FFT accuracy using (py)vkfft

The methodology follows http://www.fftw.org/accuracy/method.html: * random values are generated with a uniform distribution between -0.5 and 0.5 (for both real and imaginary values) * the comparison is made with long double precision calculations performed with (py)fftw * the comparison is made using the norms: \(L_n(y) = \left[\Sigma{\left|y\right|^n}\right]^{1/n}\) (n=1,2 or \(\infty\)) * the reported average accuracy is \(\frac{L_n(fft_{ref} - fft)}{L_n(fft_{ref})}\)

Note that the observed differences between the OpenCL and CUDA backend of VkFFT are due to different sine and cosine functions used when useLUT is not specified. With useLUT=1 these differences disappear, and may also not exist on different GPUs.

[1]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
# pyfftw supports long double accuracy
from pyfftw.interfaces.scipy_fft import fftn as fftwn, ifftn as ifftwn
from scipy.fft import fftn as fftsn, ifftn as ifftsn
from scipy import stats
from pyvkfft.fft import fftn as vkfftn, ifftn as ivkfftn
from pyvkfft.base import primes
from pyvkfft.version import __version__, vkfft_version

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

print("pyvkfft %s, VkFFT %s" % (__version__, vkfft_version()))
pyvkfft 2023.2, VkFFT 1.3.1
[2]:
# Init pycuda
cuda_device_name = None
try:
    import pycuda.autoinit
    import pycuda.gpuarray as cua
    from pyvkfft.cuda import VkFFTApp as VkFFTAppcu
    has_pycuda = True
    cuda_device_name = pycuda.autoinit.device.name()
    print("Selected CUDA device: ", cuda_device_name)

    v_cufft_plan = []
    def fftncu(d):
        dcu = cua.to_gpu(d)
        plan = cu_fft.Plan(d.shape, d.dtype, d.dtype)
        cu_fft.fft(dcu, dcu, plan, scale=False)
        if cu_fft.cufft.cufftGetVersion() >= 10200:
            v_cufft_plan.append(plan)
        return dcu.get()

    def fftnvcu(d):
        dcu = cua.to_gpu(d)
        #return vkfftn(dcu).get()
        app = VkFFTAppcu(d.shape, d.dtype, useLUT=0)
        return app.fft(dcu).get()

    def fftnvculut(d):
        dcu = cua.to_gpu(d)
        app = VkFFTAppcu(d.shape, d.dtype, useLUT=1)
        return app.fft(dcu).get()

    try:
        import skcuda.fft as cu_fft
        has_cufft = True
        if cu_fft.cufft.cufftGetVersion() >= 10200:
            print("WARNING: cuFFT plans destruction is inhibited as a workaround for "
                  "an issue with CUDA>=11.0. See https://github.com/lebedov/scikit-cuda/issues/308\n"
                  "=> all cuFFT plans will be kept in GPU memory, effectively creating a memory leak "
                  "(this should be fine to run this notebook)")
    except:
        has_cufft = False
except:
    print("CUDA is not available")
    has_pycuda = False
    has_cufft = False

Selected CUDA device:  NVIDIA A40
/home/esrf/favre/miniconda3/envs/pynx-py311-cu11.7/lib/python3.11/site-packages/skcuda/cublas.py:284: UserWarning: creating CUBLAS context to get version number
  warnings.warn('creating CUBLAS context to get version number')
WARNING: cuFFT plans destruction is inhibited as a workaround for an issue with CUDA>=11.0. See https://github.com/lebedov/scikit-cuda/issues/308
=> all cuFFT plans will be kept in GPU memory, effectively creating a memory leak (this should be fine to run this notebook)
[3]:
# Init pyopencl
cl_device_name = None
try:
    import pyopencl as cl
    import pyopencl.array as cla
    import os
    from pyvkfft.opencl import VkFFTApp as VkFFTAppcl

    # Create some context on the first available GPU
    if 'PYOPENCL_CTX' in os.environ:
        ctx = cl.create_some_context()
    else:
        ctx = None
        # Find the first OpenCL GPU available and use it, unless
        for p in cl.get_platforms():
            for d in p.get_devices():
                if d.type & cl.device_type.GPU == 0:
                    continue
                cl_device_name = d.name
                print("Selected OpenCL device: ", d.name)
                ctx = cl.Context(devices=(d,))
                break
            if ctx is not None:
                break
    cq = cl.CommandQueue(ctx)

    def fftnvcl(d):
        dcl = cla.to_device(cq, d)
        #return vkfftn(d).get()
        app = VkFFTAppcl(d.shape, d.dtype, queue=cq, useLUT=0)
        return app.fft(dcl).get()

    def fftnvcllut(d):
        dcl = cla.to_device(cq, d)
        app = VkFFTAppcl(d.shape, d.dtype, queue=cq, useLUT=1)
        return app.fft(dcl).get()

    has_pyopencl = True
except:
    print("OpenCL is not available")
    has_pyopencl = False

Selected OpenCL device:  NVIDIA A40
[4]:
def l1(a,b):
    return abs(a-b).sum() / abs(a).sum()

def l2(a,b):
    return np.sqrt((abs(a-b)**2).sum() / (abs(a)**2).sum())

def li(a,b):
    return abs(a-b).max() / abs(a).max()

def latex_float(f):
    float_str = "{0:.2g}".format(f)
    if "e" in float_str:
        base, exponent = float_str.split("e")
        return r"{0} \times 10^{{{1}}}".format(base, int(exponent))
    else:
        return float_str

[5]:
if has_pycuda:
    # CUDA
    device_name = "CUDA: " + cuda_device_name
else:
    # OpenCL
    device_name = "OpenCL: " + cl_device_name

fft_dic = {"fftw": fftwn}
if has_pycuda:
    fft_dic["vkfft-cuda"] = fftnvcu
    fft_dic["vkfft-cuda-LUT"] = fftnvculut
if has_cufft:
    fft_dic["cufft"] = fftncu
if has_pyopencl:
    fft_dic["vkfft-opencl"] = fftnvcl
    fft_dic["vkfft-opencl-LUT"] = fftnvcllut

1D, single precision

[6]:
nmax = 2**18
d0 = np.random.uniform(-0.5, 0.5, nmax) + 1j * np.random.uniform(-0.5, 0.5, nmax)
d0ld = d0.astype(np.clongdouble)
d0s = d0.astype(np.complex64)



def accu_1d(n, fft_dic):
    rld = fftwn(d0ld[:n])
    res = {}
    for k,v in fft_dic.items():
        r = v(d0s[:n])
        res[k] = l1(rld, r),l2(rld, r),li(rld, r)
    return res

# print(accu_1d(16, fft_dic))

vn, vl1, vl2, vli = [], {}, {}, {}

#print("%7s  %12s  %12s  %12s  %12s"%("N", "vkfft   ", "vkfft-LUT    ", "cufft   ", "fftw   "))
s = "%7s  %14s"%("N", "fftw   ")
r = accu_1d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
    if k in r:
        s += "  %14s" % k
print(s)
for n in range(8, len(d0)+1):
    if max(primes(n)) <= 3:  # Change this for a more complete test (only 2^N1*3^N2 sizes)
        r = accu_1d(n, fft_dic)
        vn.append(n)
        for k, v in r.items():
            if k not in vl1:
                vl1[k] = []
                vl2[k] = []
                vli[k] = []
            vl1[k].append(v[0])
            vl2[k].append(v[1])
            vli[k].append(v[2])
        s = "%7d  %14e" % (n, vl2["fftw"][-1])
        for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
            if k in vl2:
                red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
                if red < 0:
                    red = 0
                if red > 255:
                    red = 255
                s += "\x1b[38;2;%d;0;0m    %14e\x1b[0m" % (red, vl2[k][-1])
        print(s)

      N         fftw         vkfft-cuda  vkfft-cuda-LUT    vkfft-opencl  vkfft-opencl-LUT           cufft
      8    5.661812e-08      4.771311e-08      4.771311e-08      4.771311e-08      4.771311e-08      4.538441e-08
      9    6.925965e-08      7.249566e-08      7.249566e-08      7.422968e-08      7.422968e-08      8.148424e-08
     12    6.159262e-08      6.638158e-08      6.638158e-08      6.638158e-08      6.638158e-08      7.102478e-08
     16    6.543330e-08      1.039333e-07      8.245534e-08      1.031274e-07      8.245534e-08      1.048754e-07
     18    6.431882e-08      1.475902e-07      9.247269e-08      1.475902e-07      1.026794e-07      7.987485e-08
     24    7.437298e-08      1.118194e-07      7.039635e-08      1.063454e-07      7.663216e-08      9.952901e-08
     27    1.101631e-07      2.058048e-07      7.791600e-08      1.934317e-07      8.185001e-08      8.483861e-08
     32    8.698629e-08      1.209513e-07      6.916893e-08      1.241982e-07      8.133395e-08      1.410946e-07
     36    7.085077e-08      2.411464e-07      9.978294e-08      2.446531e-07      9.479782e-08      1.257602e-07
     48    8.651990e-08      1.417475e-07      7.830167e-08      1.494853e-07      8.045021e-08      1.064689e-07
     54    8.836157e-08      2.339144e-07      8.168266e-08      2.323761e-07      8.433767e-08      1.439560e-07
     64    1.022756e-07      1.441626e-07      9.541947e-08      1.436288e-07      9.778236e-08      1.194721e-07
     72    8.032949e-08      1.897170e-07      8.997575e-08      1.934350e-07      9.029303e-08      1.011575e-07
     81    9.224252e-08      2.426904e-07      1.057223e-07      2.317988e-07      9.535692e-08      1.291147e-07
     96    9.801735e-08      2.243387e-07      9.796188e-08      2.162218e-07      1.000744e-07      1.313988e-07
    108    1.095297e-07      2.498205e-07      1.109149e-07      2.554678e-07      1.061535e-07      1.772444e-07
    128    9.785448e-08      1.850773e-07      9.511398e-08      1.859862e-07      9.421877e-08      1.527004e-07
    144    1.153802e-07      2.039221e-07      9.885693e-08      2.074026e-07      9.331911e-08      1.763623e-07
    162    1.058271e-07      2.952651e-07      1.100295e-07      2.893044e-07      1.089239e-07      1.298426e-07
    192    1.034159e-07      2.286529e-07      9.648514e-08      2.320203e-07      9.776815e-08      1.936020e-07
    216    1.026397e-07      3.446967e-07      1.136366e-07      3.540606e-07      1.162158e-07      1.348444e-07
    243    1.146960e-07      2.997211e-07      1.136536e-07      2.975615e-07      1.138712e-07      1.489338e-07
    256    9.955169e-08      2.118876e-07      1.024745e-07      2.105416e-07      1.041131e-07      2.010968e-07
    288    1.184379e-07      2.616714e-07      9.917606e-08      2.621172e-07      9.961721e-08      1.667383e-07
    324    1.213902e-07      2.986930e-07      1.147992e-07      2.982502e-07      1.092042e-07      1.575466e-07
    384    1.105922e-07      2.723698e-07      1.081798e-07      2.741561e-07      1.041250e-07      1.502640e-07
    432    1.279715e-07      2.773929e-07      1.120886e-07      2.748347e-07      1.099727e-07      2.185309e-07
    486    1.264691e-07      3.618835e-07      1.241485e-07      3.655857e-07      1.288156e-07      1.400701e-07
    512    1.150365e-07      2.727073e-07      1.173454e-07      2.744505e-07      1.176425e-07      3.322761e-07
    576    1.293662e-07      3.059793e-07      1.099459e-07      3.064660e-07      1.071335e-07      2.345285e-07
    648    1.188700e-07      3.512993e-07      1.143462e-07      3.473531e-07      1.174056e-07      1.991699e-07
    729    1.256747e-07      3.280571e-07      1.258247e-07      3.217211e-07      1.262471e-07      1.795872e-07
    768    1.144708e-07      3.169585e-07      1.175144e-07      3.150117e-07      1.139605e-07      1.985681e-07
    864    1.375433e-07      3.426251e-07      1.174357e-07      3.447054e-07      1.176099e-07      1.868742e-07
    972    1.351672e-07      3.669900e-07      1.207861e-07      3.641820e-07      1.193669e-07      2.001350e-07
   1024    1.279255e-07      2.957719e-07      1.179705e-07      2.986286e-07      1.189138e-07      2.900168e-07
   1152    1.315614e-07      3.665685e-07      1.177501e-07      3.656964e-07      1.166467e-07      2.262170e-07
   1296    1.338465e-07      4.333911e-07      1.261911e-07      4.354325e-07      1.260905e-07      1.690936e-07
   1458    1.331758e-07      4.355626e-07      1.299837e-07      4.336216e-07      1.288716e-07      2.429227e-07
   1536    1.270332e-07      3.558997e-07      1.188751e-07      3.565947e-07      1.211796e-07      3.016196e-07
   1728    1.439379e-07      3.477974e-07      1.173882e-07      3.484380e-07      1.206126e-07      2.288359e-07
   1944    1.294519e-07      4.033637e-07      1.265031e-07      4.040633e-07      1.282332e-07      1.825159e-07
   2048    1.311234e-07      3.512463e-07      1.309433e-07      3.392700e-07      1.257402e-07      2.668943e-07
   2187    1.386178e-07      3.917401e-07      1.358261e-07      3.933939e-07      1.353502e-07      1.952800e-07
   2304    1.291952e-07      4.086238e-07      1.222116e-07      4.116580e-07      1.238758e-07      2.479581e-07
   2592    1.395300e-07      3.983599e-07      1.263009e-07      3.968862e-07      1.254950e-07      2.427453e-07
   2916    1.378609e-07      4.355173e-07      1.338088e-07      4.329371e-07      1.302515e-07      3.612562e-07
   3072    1.365998e-07      4.001772e-07      1.220737e-07      3.955397e-07      1.238067e-07      2.876277e-07
   3456    1.448430e-07      3.929084e-07      1.278417e-07      3.924585e-07      1.279892e-07      2.359315e-07
   3888    1.452707e-07      3.951435e-07      1.303788e-07      3.958168e-07      1.300982e-07      1.896389e-07
   4096    1.374327e-07      4.227348e-07      1.419898e-07      4.013398e-07      1.346181e-07      2.739278e-07
   4374    1.490227e-07      4.911756e-07      1.398336e-07      4.920331e-07      1.421305e-07      2.199837e-07
   4608    1.473382e-07      4.699489e-07      1.277424e-07      4.686752e-07      1.270572e-07      2.177657e-07
   5184    1.497707e-07      4.054957e-07      1.324129e-07      4.067427e-07      1.313687e-07      2.614033e-07
   5832    1.427670e-07      4.637798e-07      1.419198e-07      4.627156e-07      1.411020e-07      3.163732e-07
   6144    1.461489e-07      4.403711e-07      1.320455e-07      3.709079e-07      1.378164e-07      2.272958e-07
   6561    1.506496e-07      4.284306e-07      1.478323e-07      4.919648e-07      1.479551e-07      2.114005e-07
   6912    1.443263e-07      4.305760e-07      1.318745e-07      4.061516e-07      1.367010e-07      2.460669e-07
   7776    1.523758e-07      4.987578e-07      1.451305e-07      4.606680e-07      1.425706e-07      2.235600e-07
   8192    1.463298e-07      4.469246e-07      1.478802e-07      4.409693e-07      1.396761e-07      4.389304e-07
   8748    1.496005e-07      4.923488e-07      1.427488e-07      4.894587e-07      1.441992e-07      2.195366e-07
   9216    1.593096e-07      5.081581e-07      1.350635e-07      4.455401e-07      1.387715e-07      3.256638e-07
  10368    1.540761e-07      4.429276e-07      1.390027e-07      4.341219e-07      1.399476e-07      2.597733e-07
  11664    1.527387e-07      4.646269e-07      1.408878e-07      4.853375e-07      1.441207e-07      2.112244e-07
  12288    1.551555e-07      4.841449e-07      1.387407e-07      4.052009e-07      1.437321e-07      2.526851e-07
  13122    1.544250e-07      5.482336e-07      1.543670e-07      5.477291e-07      1.539420e-07      2.187722e-07
  13824    1.567230e-07      4.407193e-07      1.447505e-07      4.394896e-07      1.450354e-07      2.272706e-07
  15552    1.604332e-07      4.544390e-07      1.423638e-07      4.542038e-07      1.422464e-07      2.214296e-07
  16384    1.532192e-07      4.011272e-07      1.499668e-07      4.850540e-07      1.468326e-07      2.936496e-07
  17496    1.527100e-07      5.505847e-07      1.508470e-07      5.499863e-07      1.506793e-07      2.218428e-07
  18432    1.647677e-07      4.411765e-07      1.450592e-07      4.381201e-07      1.444041e-07      2.992014e-07
  19683    1.620029e-07      5.751107e-07      1.588397e-07      5.739652e-07      1.574418e-07      2.406210e-07
  20736    1.546362e-07      3.857048e-07      1.418489e-07      3.849317e-07      1.428486e-07      2.741637e-07
  23328    1.586530e-07      5.139474e-07      1.496062e-07      5.147107e-07      1.505448e-07      2.494354e-07
  24576    1.551876e-07      4.433321e-07      1.483676e-07      4.419779e-07      1.498213e-07      2.764359e-07
  26244    1.605376e-07      6.059876e-07      1.595808e-07      6.062914e-07      1.587859e-07      2.201023e-07
  27648    1.676835e-07      4.429513e-07      1.468292e-07      4.436884e-07      1.453715e-07      3.281937e-07
  31104    1.638566e-07      5.540975e-07      1.542658e-07      5.541096e-07      1.547016e-07      2.575961e-07
  32768    1.609546e-07      4.459117e-07      1.557306e-07      4.446131e-07      1.569180e-07      3.061112e-07
  34992    1.640913e-07      6.296191e-07      1.606009e-07      6.293764e-07      1.613646e-07      2.256591e-07
  36864    1.677148e-07      5.174434e-07      1.500451e-07      5.178042e-07      1.495052e-07      2.600174e-07
  39366    1.653863e-07      6.178460e-07      1.648943e-07      6.182127e-07      1.635293e-07      2.688381e-07
  41472    1.645688e-07      5.443656e-07      1.565446e-07      5.443770e-07      1.572641e-07      2.637117e-07
  46656    1.664472e-07      6.631293e-07      1.624201e-07      6.634179e-07      1.628490e-07      2.448943e-07
  49152    1.625402e-07      4.688899e-07      1.540039e-07      4.667786e-07      1.544734e-07      3.103901e-07
  52488    1.636480e-07      6.225651e-07      1.656405e-07      6.218054e-07      1.662586e-07      2.725278e-07
  55296    1.761434e-07      5.524049e-07      1.607641e-07      5.506604e-07      1.614658e-07      3.018288e-07
  59049    1.690452e-07      5.764474e-07      1.703297e-07      5.771309e-07      1.689326e-07      2.383780e-07
  62208    1.637051e-07      5.286435e-07      1.634514e-07      5.270071e-07      1.632802e-07      2.677715e-07
  65536    1.656372e-07      4.759224e-07      1.603294e-07      4.745700e-07      1.612591e-07      3.164788e-07
  69984    1.703471e-07      5.628977e-07      1.613945e-07      5.626291e-07      1.604559e-07      2.571305e-07
  73728    1.660060e-07      5.250533e-07      1.558256e-07      5.222549e-07      1.562680e-07      2.901202e-07
  78732    1.709068e-07      6.223701e-07      1.660930e-07      6.229767e-07      1.659434e-07      2.668760e-07
  82944    1.726384e-07      4.990407e-07      1.537106e-07      4.985179e-07      1.523260e-07      3.565637e-07
  93312    1.715934e-07      5.716983e-07      1.574825e-07      5.723901e-07      1.574283e-07      2.794871e-07
  98304    1.728357e-07      5.076330e-07      1.595007e-07      5.063123e-07      1.597662e-07      3.215259e-07
 104976    1.712175e-07      6.111326e-07      1.627277e-07      6.111089e-07      1.619584e-07      2.616428e-07
 110592    1.776956e-07      5.400699e-07      1.570824e-07      5.396469e-07      1.572924e-07      2.797803e-07
 118098    1.739740e-07      6.371366e-07      1.743872e-07      6.364468e-07      1.741531e-07      2.580930e-07
 124416    1.752412e-07      5.952115e-07      1.621353e-07      5.949920e-07      1.618229e-07      2.520142e-07
 131072    1.696012e-07      5.097098e-07      1.697233e-07      4.981079e-07      1.655631e-07      3.035412e-07
 139968    1.770749e-07      5.943713e-07      1.621396e-07      5.941507e-07      1.620690e-07      2.562030e-07
 147456    1.803107e-07      5.952785e-07      1.616329e-07      5.954645e-07      1.612240e-07      3.236669e-07
 157464    1.709750e-07      6.801359e-07      1.711262e-07      6.803316e-07      1.710486e-07      2.627029e-07
 165888    1.809574e-07      5.459304e-07      1.611403e-07      5.462046e-07      1.611718e-07      3.295721e-07
 177147    1.788280e-07      6.012366e-07      1.803042e-07      6.015966e-07      1.799429e-07      2.595503e-07
 186624    1.748429e-07      5.852788e-07      1.614547e-07      5.857742e-07      1.615886e-07      2.878387e-07
 196608    1.817188e-07      5.496654e-07      1.660428e-07      5.490561e-07      1.667814e-07      3.334495e-07
 209952    1.778060e-07      6.405874e-07      1.701896e-07      6.404317e-07      1.701566e-07      3.014692e-07
 221184    1.775296e-07      5.586580e-07      1.664262e-07      5.583502e-07      1.670919e-07      3.003483e-07
 236196    1.783809e-07      7.032948e-07      1.794682e-07      7.033912e-07      1.797219e-07      2.582962e-07
 248832    1.820535e-07      6.318924e-07      1.751290e-07      6.316520e-07      1.755134e-07      3.489647e-07
 262144    1.785678e-07      5.437365e-07      1.707511e-07      5.433821e-07      1.723307e-07      2.862827e-07
[7]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()

x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x)  # Use the size of the array

i=1
for k in vk:
    plt.subplot((len(vl2)+1)//2,2,i)
    plt.semilogx(vn, vl1[k], '-ob', label="$L1$")

    r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
    plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))

    ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
    plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))

    plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
    plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
    plt.title(k)
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.xlabel("N", loc='right')
    i+=1


plt.suptitle("1D FFT errors (single precision, radix-2,3) - " + device_name)

plt.tight_layout()

plt.figure()
ms = 3

clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}

for k,v in vl2.items():
    plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("1D FFT L2 error (single precision, radix-2,3) - " + device_name)
plt.xlabel("N", loc='right')
plt.grid(True)
plt.tight_layout()

1D, double precision

[8]:
nmax = 2**18
d0 = np.random.uniform(-0.5, 0.5, nmax) + 1j * np.random.uniform(-0.5, 0.5, nmax)
d0ld = d0.astype(np.clongdouble)
d0d = d0.astype(np.complex128)



def accu_1d(n, fft_dic):
    rld = fftwn(d0ld[:n])
    res = {}
    for k,v in fft_dic.items():
        r = v(d0d[:n])
        res[k] = l1(rld, r),l2(rld, r),li(rld, r)
    return res

# print(accu_1d(16, fft_dic))

vn, vl1, vl2, vli = [], {}, {}, {}

#print("%7s  %12s  %12s  %12s  %12s"%("N", "vkfft   ", "vkfft-LUT    ", "cufft   ", "fftw   "))
s = "%7s  %14s"%("N", "fftw   ")
r = accu_1d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
    if k in r:
        s += "  %14s" % k
print(s)
for n in range(8, len(d0)+1):
    if max(primes(n)) <= 3:
        r = accu_1d(n, fft_dic)
        vn.append(n)
        for k, v in r.items():
            if k not in vl1:
                vl1[k] = []
                vl2[k] = []
                vli[k] = []
            vl1[k].append(v[0])
            vl2[k].append(v[1])
            vli[k].append(v[2])
        s = "%7d  %14e" % (n, vl2["fftw"][-1])
        for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
            if k in vl2:
                red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
                if red < 0:
                    red = 0
                if red > 255:
                    red = 255
                s += "\x1b[38;2;%d;0;0m    %14e\x1b[0m" % (red, vl2[k][-1])
        print(s)

      N         fftw         vkfft-cuda  vkfft-cuda-LUT    vkfft-opencl  vkfft-opencl-LUT           cufft
      8    5.367255e-17      5.656864e-17      5.656864e-17      5.656864e-17      5.656864e-17      5.840088e-17
      9    9.619862e-17      1.069195e-16      1.069195e-16      1.067132e-16      1.067132e-16      1.067132e-16
     12    7.087986e-17      1.069433e-16      1.069433e-16      1.069433e-16      1.069433e-16      9.472633e-17
     16    9.872177e-17      1.219751e-16      1.219751e-16      1.067727e-16      1.067727e-16      1.104271e-16
     18    1.133557e-16      1.162862e-16      1.162862e-16      1.087061e-16      1.087061e-16      1.259815e-16
     24    1.299068e-16      1.543545e-16      1.543545e-16      1.539512e-16      1.539512e-16      1.660673e-16
     27    1.494702e-16      1.227911e-16      1.227911e-16      1.501807e-16      1.501807e-16      1.830138e-16
     32    1.133334e-16      1.606026e-16      1.606026e-16      1.516121e-16      1.516121e-16      1.497297e-16
     36    1.885887e-16      1.740149e-16      1.740149e-16      2.135202e-16      2.135202e-16      2.157540e-16
     48    1.216451e-16      1.539245e-16      1.539245e-16      1.575012e-16      1.575012e-16      1.581150e-16
     54    1.575659e-16      2.347844e-16      2.347844e-16      2.206083e-16      2.206083e-16      2.230410e-16
     64    1.281604e-16      1.429013e-16      1.429013e-16      1.598804e-16      1.598804e-16      1.602454e-16
     72    1.643927e-16      1.607482e-16      1.607482e-16      1.630723e-16      1.630723e-16      1.749040e-16
     81    2.040083e-16      2.156954e-16      2.156954e-16      2.175756e-16      2.175756e-16      2.011994e-16
     96    1.961262e-16      1.544846e-16      1.544846e-16      1.698115e-16      1.698115e-16      1.626307e-16
    108    1.560056e-16      1.995488e-16      1.995488e-16      2.029792e-16      2.029792e-16      2.402584e-16
    128    1.679223e-16      1.627604e-16      1.627604e-16      1.623086e-16      1.623086e-16      1.970800e-16
    144    2.077158e-16      1.991140e-16      1.991140e-16      2.002485e-16      2.002485e-16      2.271230e-16
    162    1.923714e-16      2.607887e-16      2.607887e-16      2.687635e-16      2.687635e-16      2.261844e-16
    192    1.722049e-16      1.811760e-16      1.811760e-16      1.705169e-16      1.705169e-16      2.677313e-16
    216    2.117673e-16      2.514679e-16      2.514679e-16      2.615850e-16      2.615850e-16      2.457024e-16
    243    2.187502e-16      2.250994e-16      2.250994e-16      2.399898e-16      2.399898e-16      2.507859e-16
    256    2.005995e-16      1.766079e-16      1.766079e-16      1.815744e-16      1.815744e-16      2.074768e-16
    288    2.156484e-16      2.083801e-16      2.083801e-16      2.095753e-16      2.095753e-16      2.101710e-16
    324    2.105772e-16      2.466879e-16      2.466879e-16      2.542842e-16      2.542842e-16      2.617610e-16
    384    2.040326e-16      1.976065e-16      1.976065e-16      1.941932e-16      1.941932e-16      1.909575e-16
    432    2.326740e-16      2.278230e-16      2.278230e-16      2.342927e-16      2.342927e-16      3.047920e-16
    486    2.252192e-16      2.944502e-16      2.944502e-16      2.926094e-16      2.926094e-16      2.383041e-16
    512    1.952682e-16      2.006080e-16      2.006080e-16      2.078939e-16      2.078939e-16      2.358933e-16
    576    2.328841e-16      2.187861e-16      2.187861e-16      2.277219e-16      2.277219e-16      2.660536e-16
    648    2.279861e-16      2.741809e-16      2.741809e-16      2.765692e-16      2.765692e-16      2.812500e-16
    729    2.397941e-16      2.745695e-16      2.745695e-16      2.776495e-16      2.776495e-16      2.685338e-16
    768    2.217792e-16      2.050935e-16      2.050935e-16      2.112474e-16      2.112474e-16      2.549967e-16
    864    2.392887e-16      2.464955e-16      2.464955e-16      2.396781e-16      2.396781e-16      2.845579e-16
    972    2.437752e-16      2.969209e-16      2.969209e-16      2.948694e-16      2.948694e-16      2.879844e-16
   1024    2.134838e-16      2.192733e-16      2.192733e-16      2.213998e-16      2.213998e-16      2.892022e-16
   1152    2.344243e-16      2.362071e-16      2.362071e-16      2.263669e-16      2.263669e-16      3.024710e-16
   1296    2.415099e-16      3.039216e-16      3.039216e-16      3.007685e-16      3.007685e-16      2.961018e-16
   1458    2.618731e-16      3.227033e-16      3.227033e-16      3.204716e-16      3.204716e-16      3.092983e-16
   1536    2.268523e-16      2.204308e-16      2.204308e-16      2.229394e-16      2.229394e-16      3.047860e-16
   1728    2.469143e-16      2.590953e-16      2.590953e-16      2.587161e-16      2.587161e-16      3.184378e-16
   1944    2.645351e-16      3.034015e-16      3.034015e-16      3.052625e-16      3.052625e-16      3.385453e-16
   2048    2.262977e-16      2.262601e-16      2.262601e-16      2.295038e-16      2.295038e-16      3.141678e-16
   2187    2.805185e-16      3.080530e-16      3.080530e-16      3.109135e-16      3.109135e-16      3.259160e-16
   2304    2.510036e-16      2.384915e-16      2.384915e-16      2.450032e-16      2.450032e-16      3.448086e-16
   2592    2.577191e-16      2.855486e-16      2.855486e-16      2.833299e-16      2.833299e-16      3.582828e-16
   2916    2.718885e-16      3.276193e-16      3.276193e-16      3.252465e-16      3.252465e-16      3.454902e-16
   3072    2.375548e-16      2.306371e-16      2.306371e-16      2.240963e-16      2.240963e-16      3.153804e-16
   3456    2.513195e-16      2.715907e-16      2.715907e-16      2.570280e-16      2.570280e-16      3.364389e-16
   3888    2.780577e-16      3.033602e-16      3.033602e-16      3.142853e-16      3.142853e-16      3.734568e-16
   4096    2.407233e-16      2.445722e-16      2.445722e-16      2.416350e-16      2.416350e-16      5.001043e-16
   4374    2.844914e-16      3.535308e-16      3.535308e-16      3.357152e-16      3.357152e-16      4.219991e-16
   4608    2.704864e-16      2.544678e-16      2.544678e-16      2.450729e-16      2.450729e-16      3.487418e-16
   5184    2.716405e-16      2.914734e-16      2.914734e-16      3.059825e-16      3.059825e-16      3.633577e-16
   5832    2.793983e-16      3.411985e-16      3.411985e-16      3.195383e-16      3.195383e-16      3.389887e-16
   6144    2.587074e-16      2.417811e-16      2.417811e-16      2.322334e-16      2.322334e-16      3.255483e-16
   6561    2.957650e-16      3.294060e-16      3.294060e-16      3.317424e-16      3.317424e-16      3.745007e-16
   6912    2.754060e-16      2.746749e-16      2.746749e-16      2.785481e-16      2.785481e-16      3.569971e-16
   7776    2.849944e-16      2.955296e-16      2.955296e-16      2.915170e-16      2.915170e-16      3.987939e-16
   8192    2.674059e-16      2.445088e-16      2.445088e-16      2.512990e-16      2.512990e-16      3.306980e-16
   8748    2.857597e-16      3.321091e-16      3.321091e-16      3.312670e-16      3.312670e-16      4.658386e-16
   9216    2.744919e-16      2.505807e-16      2.505807e-16      2.524278e-16      2.524278e-16      3.408578e-16
  10368    2.771470e-16      2.939128e-16      2.939128e-16      2.921326e-16      2.921326e-16      3.594668e-16
  11664    2.900195e-16      3.325539e-16      3.325539e-16      3.322498e-16      3.322498e-16      3.400178e-16
  12288    2.663610e-16      2.410619e-16      2.410619e-16      2.424417e-16      2.424417e-16      4.097729e-16
  13122    3.118571e-16      3.684103e-16      3.684103e-16      3.667145e-16      3.667145e-16      4.058685e-16
  13824    2.864866e-16      2.664835e-16      2.664835e-16      2.689970e-16      2.689970e-16      3.573930e-16
  15552    2.927195e-16      3.165552e-16      3.165552e-16      3.163312e-16      3.163312e-16      3.997006e-16
  16384    2.734085e-16      2.520036e-16      2.520036e-16      2.592869e-16      2.592869e-16      3.431413e-16
  17496    3.097311e-16      3.696773e-16      3.696773e-16      3.694079e-16      3.694079e-16      4.567620e-16
  18432    2.838084e-16      2.566817e-16      2.566817e-16      2.591014e-16      2.591014e-16      3.491507e-16
  19683    3.225921e-16      3.571714e-16      3.571714e-16      3.576653e-16      3.576653e-16      4.474667e-16
  20736    2.907632e-16      3.041692e-16      3.041692e-16      3.044346e-16      3.044346e-16      4.007057e-16
  23328    3.018126e-16      3.562740e-16      3.562740e-16      3.559010e-16      3.559010e-16      3.700049e-16
  24576    2.801902e-16      2.504164e-16      2.504164e-16      2.522531e-16      2.522531e-16      4.033854e-16
  26244    3.163100e-16      4.030064e-16      4.030064e-16      4.007280e-16      4.007280e-16      4.392310e-16
  27648    2.965545e-16      2.900228e-16      2.900228e-16      2.904187e-16      2.904187e-16      3.614601e-16
  31104    2.963796e-16      3.366356e-16      3.366356e-16      3.349188e-16      3.349188e-16      3.981820e-16
  32768    2.804352e-16      2.754293e-16      2.754293e-16      2.791107e-16      2.791107e-16      3.668297e-16
  34992    3.184266e-16      4.035335e-16      4.035335e-16      4.032879e-16      4.032879e-16      4.843679e-16
  36864    2.919735e-16      2.755381e-16      2.755381e-16      2.750201e-16      2.750201e-16      4.280307e-16
  39366    3.327346e-16      3.960490e-16      3.960490e-16      3.962953e-16      3.962953e-16      4.883068e-16
  41472    3.021585e-16      3.357164e-16      3.357164e-16      3.377360e-16      3.377360e-16      3.986021e-16
  46656    3.150570e-16      4.027811e-16      4.027811e-16      4.046402e-16      4.046402e-16      3.781815e-16
  49152    2.894413e-16      2.618178e-16      2.618178e-16      2.639423e-16      2.639423e-16      4.046840e-16
  52488    3.231218e-16      3.973656e-16      3.973656e-16      3.978064e-16      3.978064e-16      4.343594e-16
  55296    3.008292e-16      3.013918e-16      3.013918e-16      3.020041e-16      3.020041e-16      3.712652e-16
  59049    3.382884e-16      3.927921e-16      3.927921e-16      3.907209e-16      3.907209e-16      4.450151e-16
  62208    3.181738e-16      3.016649e-16      3.016649e-16      3.001105e-16      3.001105e-16      4.324270e-16
  65536    2.906882e-16      2.838052e-16      2.838052e-16      2.897250e-16      2.897250e-16      4.082329e-16
  69984    3.277844e-16      3.545990e-16      3.545990e-16      3.536523e-16      3.536523e-16      5.056228e-16
  73728    3.095879e-16      2.748330e-16      2.748330e-16      2.754066e-16      2.754066e-16      4.172611e-16
  78732    3.318011e-16      3.939013e-16      3.939013e-16      3.928623e-16      3.928623e-16      5.219038e-16
  82944    3.143504e-16      3.188615e-16      3.188615e-16      3.186202e-16      3.186202e-16      3.957595e-16
  93312    3.185616e-16      3.584346e-16      3.584346e-16      3.580152e-16      3.580152e-16      3.815659e-16
  98304    3.006042e-16      2.701318e-16      2.701318e-16      2.722451e-16      2.722451e-16      4.328090e-16
 104976    3.304492e-16      3.974154e-16      3.974154e-16      3.975181e-16      3.975181e-16      4.638966e-16
 110592    3.070421e-16      3.068377e-16      3.068377e-16      3.048006e-16      3.048006e-16      4.357884e-16
 118098    3.558113e-16      4.234892e-16      4.234892e-16      4.216279e-16      4.216279e-16      4.775853e-16
 124416    3.258323e-16      3.449687e-16      3.449687e-16      3.451990e-16      3.451990e-16      4.284158e-16
 131072    2.993093e-16      2.998442e-16      2.998442e-16      3.004132e-16      3.004132e-16      4.158093e-16
 139968    3.335338e-16      3.809354e-16      3.809354e-16      3.807334e-16      3.807334e-16      5.071593e-16
 147456    3.222761e-16      2.946617e-16      2.946617e-16      2.960588e-16      2.960588e-16      4.198297e-16
 157464    3.495228e-16      4.260938e-16      4.260938e-16      4.262866e-16      4.262866e-16      5.188830e-16
 165888    3.242618e-16      3.282844e-16      3.282844e-16      3.283160e-16      3.283160e-16      4.091494e-16
 177147    3.665945e-16      4.186164e-16      4.186164e-16      4.173338e-16      4.173338e-16      5.334907e-16
 186624    3.299107e-16      3.633019e-16      3.633019e-16      3.630145e-16      3.630145e-16      4.071148e-16
 196608    3.112031e-16      2.856495e-16      2.856495e-16      2.877122e-16      2.877122e-16      4.781679e-16
 209952    3.407887e-16      4.094482e-16      4.094482e-16      4.089924e-16      4.089924e-16      4.856920e-16
 221184    3.273546e-16      3.042240e-16      3.042240e-16      3.045988e-16      3.045988e-16      4.278760e-16
 236196    3.594414e-16      4.542307e-16      4.542307e-16      4.561372e-16      4.561372e-16      4.968436e-16
 248832    3.348508e-16      3.354448e-16      3.354448e-16      3.355906e-16      3.355906e-16      4.245576e-16
 262144    3.192889e-16      3.152152e-16      3.152152e-16      3.203700e-16      3.203700e-16      4.084377e-16
[9]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()

x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x)  # Use the size of the array

i=1
for k in vk:
    plt.subplot((len(vl2)+1)//2,2,i)
    plt.semilogx(vn, vl1[k], '-ob', label="$L1$")

    r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
    plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))

    ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
    plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))

    plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
    plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
    plt.title(k)
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.xlabel("N", loc='right')
    i+=1


plt.suptitle("1D FFT L2 error (double precision, radix-2,3) - " + device_name)

plt.tight_layout()

plt.figure()
ms = 3

clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}

for k,v in vl2.items():
    plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("1D FFT L2 error (double precision, radix-2,3) - " + device_name)
plt.xlabel("N", loc='right')
plt.grid(True)
plt.tight_layout()

2D, single precision

[10]:
nmax = 512
d0 = np.random.uniform(-0.5, 0.5, (nmax, nmax)) + 1j * np.random.uniform(-0.5, 0.5, (nmax, nmax))
d0ld = d0.astype(np.clongdouble)
d0s = d0.astype(np.complex64)

def accu_2d(n, fft_dic):
    rld = fftwn(d0ld[:n,:n].copy())
    res = {}
    for k,v in fft_dic.items():
        r = v(d0s[:n,:n].copy())
        res[k] = l1(rld, r),l2(rld, r),li(rld, r)
    return res


vn, vl1, vl2, vli = [], {}, {}, {}

s = "%7s  %14s"%("N", "fftw   ")
r = accu_2d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
    if k in r:
        s += "  %14s" % k
print(s)
for n in range(8, len(d0)+1):
    if max(primes(n)) <= 3:
        r = accu_2d(n, fft_dic)
        vn.append(n)
        for k, v in r.items():
            if k not in vl1:
                vl1[k] = []
                vl2[k] = []
                vli[k] = []
            vl1[k].append(v[0])
            vl2[k].append(v[1])
            vli[k].append(v[2])

        s = "%7d  %14e" % (n, vl2["fftw"][-1])
        for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
            if k in vl2:
                red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
                if red < 0:
                    red = 0
                if red > 255:
                    red = 255
                s += "\x1b[38;2;%d;0;0m    %14e\x1b[0m" % (red, vl2[k][-1])
        print(s)

      N         fftw         vkfft-cuda  vkfft-cuda-LUT    vkfft-opencl  vkfft-opencl-LUT           cufft
      8    8.527774e-08      7.266479e-08      7.266479e-08      7.266479e-08      7.266479e-08      9.498406e-08
      9    9.335190e-08      9.891788e-08      9.891788e-08      8.961366e-08      8.961366e-08      1.040164e-07
     12    8.536343e-08      9.431391e-08      9.431391e-08      9.556699e-08      9.556699e-08      8.830248e-08
     16    9.928524e-08      1.328003e-07      9.199517e-08      1.366377e-07      9.277042e-08      1.076327e-07
     18    1.134702e-07      2.237842e-07      1.165017e-07      2.278635e-07      1.159992e-07      1.362995e-07
     24    1.051133e-07      1.626263e-07      9.872837e-08      1.622961e-07      1.008451e-07      1.257961e-07
     27    1.276222e-07      3.197227e-07      1.178340e-07      3.181370e-07      1.196095e-07      1.366535e-07
     32    1.088421e-07      1.656500e-07      1.171188e-07      1.665413e-07      1.161752e-07      2.327413e-07
     36    1.249738e-07      3.031020e-07      1.241648e-07      3.027040e-07      1.232112e-07      1.426325e-07
     48    1.121796e-07      2.344185e-07      1.176478e-07      2.330087e-07      1.186958e-07      1.593699e-07
     54    1.371896e-07      3.735055e-07      1.342523e-07      3.733109e-07      1.363872e-07      2.123211e-07
     64    1.203335e-07      2.423184e-07      1.319832e-07      2.416615e-07      1.329602e-07      1.844784e-07
     72    1.246935e-07      2.994335e-07      1.296193e-07      2.986124e-07      1.299276e-07      1.639534e-07
     81    1.449360e-07      3.778978e-07      1.441958e-07      3.779404e-07      1.426087e-07      1.755133e-07
     96    1.334507e-07      3.268282e-07      1.336894e-07      3.279344e-07      1.323218e-07      2.305902e-07
    108    1.471534e-07      3.744680e-07      1.383366e-07      3.765097e-07      1.367569e-07      2.450587e-07
    128    1.312548e-07      3.073083e-07      1.431903e-07      3.049531e-07      1.436841e-07      2.448643e-07
    144    1.571986e-07      3.033074e-07      1.366514e-07      3.035363e-07      1.382916e-07      2.496395e-07
    162    1.557316e-07      5.001706e-07      1.547347e-07      5.022002e-07      1.538532e-07      1.839227e-07
    192    1.531230e-07      4.017444e-07      1.449496e-07      4.019279e-07      1.447932e-07      3.175503e-07
    216    1.507109e-07      5.479016e-07      1.589944e-07      5.472586e-07      1.589780e-07      1.960320e-07
    243    1.655783e-07      4.816184e-07      1.656449e-07      4.808104e-07      1.656831e-07      2.152531e-07
    256    1.509464e-07      3.599852e-07      1.533763e-07      3.576926e-07      1.548415e-07      2.779536e-07
    288    1.786177e-07      4.199985e-07      1.484688e-07      4.204004e-07      1.479948e-07      2.346000e-07
    324    1.647224e-07      5.043536e-07      1.585540e-07      5.038923e-07      1.581418e-07      2.490004e-07
    384    1.623741e-07      4.828982e-07      1.569511e-07      4.826630e-07      1.570796e-07      2.847407e-07
    432    1.740673e-07      4.695336e-07      1.578176e-07      4.695530e-07      1.575228e-07      2.996553e-07
    486    1.778964e-07      6.158958e-07      1.759388e-07      6.158558e-07      1.754382e-07      2.078483e-07
    512    1.617633e-07      4.453922e-07      1.668406e-07      4.446452e-07      1.680485e-07      4.012341e-07
[11]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()

x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x)  # Use the size of the array

i=1
for k in vk:
    plt.subplot((len(vl2)+1)//2,2,i)
    plt.semilogx(vn, vl1[k], '-ob', label="$L1$")

    r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
    plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))

    ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
    plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))

    plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
    plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
    plt.title(k)
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.xlabel("N", loc='right')
    i+=1


plt.suptitle("2D FFT errors (single precision, radix-2,3) - " + device_name)

plt.tight_layout()

plt.figure()
ms = 3

clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}

for k,v in vl2.items():
    plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("2D FFT L2 error (single precision, radix-2,3) - " + device_name)
plt.grid(True)
plt.xlabel("N", loc='right')
plt.tight_layout()


1D, non-radix (Bluestein or Rader) transforms, single precision

[12]:
nmax = 512
d0 = np.random.uniform(-0.5, 0.5, nmax) + 1j * np.random.uniform(-0.5, 0.5, nmax)
d0ld = d0.astype(np.clongdouble)
d0s = d0.astype(np.complex64)

def accu_1d(n, fft_dic):
    rld = fftwn(d0ld[:n])
    res = {}
    for k,v in fft_dic.items():
        r = v(d0s[:n])
        res[k] = l1(rld, r),l2(rld, r),li(rld, r)
    return res

fft_dic = {"fftw": fftwn}
if has_pycuda:
    fft_dic["vkfft-cuda"] = fftnvcu
    fft_dic["vkfft-cuda-LUT"] = fftnvculut
    fft_dic["cufft"] = fftncu
if has_pyopencl:
    fft_dic["vkfft-opencl"] = fftnvcl
    fft_dic["vkfft-opencl-LUT"] = fftnvcllut


# print(accu_1d(16, fft_dic))

vn, vl1, vl2, vli = [], {}, {}, {}

#print("%7s  %12s  %12s  %12s  %12s"%("N", "vkfft   ", "vkfft-LUT    ", "cufft   ", "fftw   "))
s = "%7s  %16s"%("N", "fftw   ")
r = accu_1d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
    if k in r:
        s += "  %16s" % k
print(s)
for n in range(8, len(d0)+1):
    if max(primes(n)) >13:  # test only transforms with non-radix sizes
        r = accu_1d(n, fft_dic)
        vn.append(n)
        for k, v in r.items():
            if k not in vl1:
                vl1[k] = []
                vl2[k] = []
                vli[k] = []
            vl1[k].append(v[0])
            vl2[k].append(v[1])
            vli[k].append(v[2])
        s = "%7d  %16e" % (n, vl2["fftw"][-1])
        for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
            if k in vl2:
                red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
                if red < 0:
                    red = 0
                if red > 255:
                    red = 255
                s += "\x1b[38;2;%d;0;0m    %14e\x1b[0m" % (red, vl2[k][-1])
        print(s)

      N           fftw           vkfft-cuda    vkfft-cuda-LUT      vkfft-opencl  vkfft-opencl-LUT             cufft
     17      6.254615e-08      1.015721e-07      1.102006e-07      1.102006e-07      1.102006e-07      7.038351e-08
     19      8.248360e-08      1.645417e-07      1.248262e-07      1.105743e-07      1.105743e-07      7.713461e-08
     23      9.364838e-08      2.120295e-07      1.265788e-07      1.307243e-07      1.307243e-07      9.525661e-08
     29      9.151393e-08      1.697528e-07      1.473385e-07      1.526987e-07      1.526987e-07      6.975211e-08
     31      9.476318e-08      2.802384e-07      1.140413e-07      1.201944e-07      1.201944e-07      7.906383e-08
     34      7.735947e-08      1.776248e-07      1.045113e-07      1.034587e-07      1.034587e-07      8.285427e-08
     37      1.484575e-07      3.000786e-07      1.426029e-07      1.731795e-07      1.731795e-07      9.075627e-08
     38      9.740193e-08      2.774460e-07      1.702731e-07      1.763272e-07      1.763272e-07      1.093944e-07
     41      1.353334e-07      3.260490e-07      1.226760e-07      1.258329e-07      1.258329e-07      9.509407e-08
     43      9.872378e-08      2.399786e-07      1.835915e-07      1.852351e-07      1.852351e-07      1.046902e-07
     46      8.949255e-08      2.700867e-07      1.125207e-07      1.034681e-07      1.034681e-07      1.174656e-07
     47      1.830142e-07      1.198861e-07      1.198861e-07      1.198861e-07      1.198861e-07      1.057758e-07
     51      8.422249e-08      2.138944e-07      1.312271e-07      1.413012e-07      1.413012e-07      9.646047e-08
     53      2.024566e-07      2.477728e-07      1.450103e-07      1.570705e-07      1.570705e-07      1.102411e-07
     57      9.803056e-08      3.179224e-07      1.579285e-07      1.649120e-07      1.649120e-07      9.402163e-08
     58      9.713275e-08      2.461764e-07      1.489107e-07      1.607300e-07      1.607300e-07      1.089946e-07
     59      1.950429e-07      1.029824e-07      1.029824e-07      1.029824e-07      1.029824e-07      1.092878e-07
     61      1.497549e-07      2.960745e-07      1.713226e-07      1.514671e-07      1.514671e-07      1.104706e-07
     62      1.136793e-07      2.791304e-07      1.393296e-07      1.469882e-07      1.469882e-07      1.049168e-07
     67      2.155272e-07      3.261446e-07      1.541285e-07      1.611212e-07      1.611212e-07      1.426873e-07
     68      9.695182e-08      2.457007e-07      1.163033e-07      1.161389e-07      1.161389e-07      1.830814e-07
     69      9.709617e-08      3.187640e-07      1.400185e-07      1.366575e-07      1.366575e-07      1.235348e-07
     71      2.174195e-07      2.688835e-07      2.061562e-07      1.606750e-07      1.606750e-07      1.163606e-07
     73      1.692324e-07      3.746569e-07      1.741872e-07      1.690175e-07      1.690175e-07      1.162877e-07
     74      1.575586e-07      3.353020e-07      1.659695e-07      1.662891e-07      1.662891e-07      1.084640e-07
     76      9.244200e-08      2.818918e-07      1.555008e-07      1.548209e-07      1.548209e-07      1.972896e-07
     79      1.975269e-07      2.576685e-07      1.454759e-07      1.567628e-07      1.567628e-07      1.084098e-07
     82      1.635243e-07      3.941707e-07      1.374833e-07      1.506632e-07      1.506632e-07      1.209537e-07
     83      1.789268e-07      1.317153e-07      1.317153e-07      1.317153e-07      1.317153e-07      1.263885e-07
     85      1.095898e-07      2.689680e-07      1.206128e-07      1.350287e-07      1.350287e-07      1.750085e-07
     86      1.076581e-07      2.863250e-07      1.877223e-07      1.826287e-07      1.826287e-07      1.247761e-07
     87      1.133036e-07      2.994499e-07      1.791361e-07      1.937553e-07      1.937553e-07      1.125409e-07
     89      1.967461e-07      4.042257e-07      1.658388e-07      1.578106e-07      1.578106e-07      1.361586e-07
     92      1.008463e-07      3.297765e-07      1.286483e-07      1.372108e-07      1.372108e-07      2.697618e-07
     93      1.062135e-07      2.926745e-07      1.844804e-07      1.793787e-07      1.793787e-07      1.180430e-07
     94      1.852338e-07      1.138912e-07      1.138912e-07      1.137691e-07      1.137691e-07      1.217035e-07
     95      9.282513e-08      3.283934e-07      1.558627e-07      1.430664e-07      1.430664e-07      1.145523e-07
     97      1.614399e-07      3.290774e-07      1.905957e-07      1.978080e-07      1.978080e-07      1.307875e-07
    101      1.732766e-07      3.416956e-07      1.809738e-07      1.770801e-07      1.770801e-07      1.239909e-07
    102      1.060491e-07      2.507643e-07      1.439354e-07      1.360755e-07      1.360755e-07      1.296681e-07
    103      1.856866e-07      3.637793e-07      1.877914e-07      3.538714e-07      1.845640e-07      1.289173e-07
    106      2.004430e-07      3.015556e-07      1.506545e-07      1.355621e-07      1.355621e-07      1.373634e-07
    107      1.931322e-07      3.856692e-07      1.847519e-07      3.872953e-07      1.884483e-07      1.281276e-07
    109      2.000102e-07      3.536153e-07      1.780016e-07      1.988556e-07      1.988556e-07      1.377508e-07
    111      1.664198e-07      3.192866e-07      1.421667e-07      1.502838e-07      1.502838e-07      1.311545e-07
    113      2.054878e-07      3.076369e-07      1.966113e-07      2.046859e-07      2.046859e-07      1.374936e-07
    114      1.019928e-07      3.318454e-07      1.351128e-07      1.431855e-07      1.431855e-07      1.839490e-07
    115      1.023938e-07      3.135284e-07      1.434199e-07      1.309437e-07      1.309437e-07      1.140271e-07
    116      1.092353e-07      3.303176e-07      1.723933e-07      1.752890e-07      1.752890e-07      1.411988e-07
    118      1.852870e-07      1.243774e-07      1.243774e-07      1.250687e-07      1.250687e-07      1.553505e-07
    119      1.104477e-07      2.485960e-07      1.557393e-07      1.515781e-07      1.515781e-07      1.075155e-07
    122      1.581833e-07      3.233978e-07      1.773808e-07      1.685501e-07      1.685501e-07      1.341923e-07
    123      1.409362e-07      3.653380e-07      1.448166e-07      1.379628e-07      1.379628e-07      1.157736e-07
    124      1.003285e-07      2.969741e-07      1.668390e-07      1.808389e-07      1.808389e-07      1.165339e-07
    127      2.089652e-07      3.815689e-07      2.064010e-07      1.999192e-07      1.999192e-07      1.465482e-07
    129      1.193924e-07      3.597178e-07      1.871710e-07      1.774803e-07      1.774803e-07      1.274098e-07
    131      2.081924e-07      3.716765e-07      1.918856e-07      1.823460e-07      1.823460e-07      1.013079e+00
    133      1.018254e-07      2.984598e-07      1.557671e-07      1.501836e-07      1.501836e-07      2.170771e-07
    134      2.099063e-07      3.904764e-07      1.645518e-07      1.702173e-07      1.702173e-07      1.470246e-07
    136      1.098400e-07      3.201838e-07      1.493731e-07      1.498841e-07      1.498841e-07      2.141142e-07
    137      2.387834e-07      4.598919e-07      1.623450e-07      4.394354e-07      1.701358e-07      2.027433e-07
    138      1.095211e-07      3.408621e-07      1.376716e-07      1.298727e-07      1.298727e-07      2.245828e-07
    139      2.280636e-07      4.499628e-07      1.600395e-07      4.485685e-07      1.711647e-07      2.062495e-07
    141      1.990999e-07      1.150641e-07      1.150641e-07      1.123040e-07      1.123040e-07      1.204941e-07
    142      2.047809e-07      3.131161e-07      1.897272e-07      1.899545e-07      1.899545e-07      1.339982e-07
    145      1.150806e-07      2.897775e-07      1.821280e-07      1.971955e-07      1.971955e-07      1.101078e-07
    146      1.659277e-07      4.249103e-07      1.950035e-07      1.772116e-07      1.772116e-07      1.546777e-07
    148      1.579387e-07      3.514651e-07      1.569699e-07      1.581785e-07      1.581785e-07      1.166514e-07
    149      2.219864e-07      4.673630e-07      1.809679e-07      4.716310e-07      1.858944e-07      2.225737e-07
    151      1.971656e-07      2.967276e-07      1.910044e-07      1.835607e-07      1.835607e-07      2.252467e-07
    152      1.059442e-07      2.693980e-07      1.450913e-07      1.568306e-07      1.568306e-07      1.783349e-07
    153      1.025081e-07      2.127526e-07      1.397033e-07      1.368405e-07      1.368405e-07      2.066911e-07
    155      1.186708e-07      3.282051e-07      1.736079e-07      1.717431e-07      1.717431e-07      1.108088e-07
    157      1.967858e-07      3.286034e-07      1.837128e-07      1.744382e-07      1.744382e-07      2.352349e-07
    158      2.035188e-07      3.164353e-07      1.684070e-07      1.764726e-07      1.764726e-07      1.574034e-07
    159      1.794777e-07      3.200739e-07      1.532787e-07      1.430142e-07      1.430142e-07      1.335363e-07
    161      1.067734e-07      3.343597e-07      1.435854e-07      1.424633e-07      1.424633e-07      1.090317e-07
    163      2.089208e-07      4.299696e-07      1.937606e-07      1.967131e-07      1.967131e-07      2.044107e-07
    164      1.500700e-07      3.919023e-07      1.551416e-07      1.463539e-07      1.463539e-07      1.604627e-07
    166      2.001572e-07      1.282135e-07      1.282135e-07      1.276195e-07      1.276195e-07      1.356593e-07
    167      2.106085e-07      4.942549e-07      1.732278e-07      4.825893e-07      1.650029e-07      2.329364e-07
    170      1.135286e-07      2.669009e-07      1.454533e-07      1.388323e-07      1.388323e-07      2.498998e-07
    171      1.169343e-07      3.053043e-07      1.715374e-07      1.667814e-07      1.667814e-07      1.978128e-07
    172      1.280455e-07      3.232796e-07      1.898046e-07      1.929596e-07      1.929596e-07      1.323492e-07
    173      2.063084e-07      4.856868e-07      1.806599e-07      4.943003e-07      1.886242e-07      2.237001e-07
    174      1.123379e-07      3.299866e-07      1.801550e-07      1.921437e-07      1.921437e-07      1.367182e-07
    177      1.927455e-07      1.320034e-07      1.320034e-07      1.339027e-07      1.339027e-07      1.582679e-07
    178      2.105625e-07      4.347624e-07      1.596042e-07      1.638857e-07      1.638857e-07      1.524142e-07
    179      2.231056e-07      4.764158e-07      1.842309e-07      4.880476e-07      1.847664e-07      2.154776e-07
    181      1.929199e-07      3.110166e-07      1.871079e-07      1.806618e-07      1.806618e-07      2.252474e-07
    183      1.564172e-07      3.317619e-07      1.709997e-07      1.810320e-07      1.810320e-07      1.434001e-07
    184      1.085857e-07      3.120010e-07      1.317914e-07      1.359868e-07      1.359868e-07      2.275638e-07
    185      1.555445e-07      3.967476e-07      1.645497e-07      1.640616e-07      1.640616e-07      1.782655e-07
    186      1.067958e-07      3.180343e-07      1.788461e-07      1.658559e-07      1.658559e-07      1.543705e-07
    187      1.159999e-07      2.607448e-07      1.385812e-07      1.380326e-07      1.380326e-07      1.186911e-07
    188      1.916176e-07      1.122158e-07      1.122158e-07      1.089307e-07      1.089307e-07      1.255861e-07
    190      1.276445e-07      3.446052e-07      1.479991e-07      1.594947e-07      1.594947e-07      2.638597e-07
    191      2.153538e-07      4.911162e-07      1.867347e-07      4.815208e-07      1.804571e-07      2.182134e-07
    193      1.891501e-07      3.513702e-07      1.766511e-07      1.818459e-07      1.818459e-07      2.350272e-07
    194      1.623496e-07      3.814176e-07      1.905166e-07      1.879265e-07      1.879265e-07      1.634499e-07
    197      2.112167e-07      3.561558e-07      2.226806e-07      2.097722e-07      2.097722e-07      2.386824e-07
    199      2.132325e-07      4.444957e-07      1.875646e-07      1.922090e-07      1.922090e-07      2.273699e-07
    201      1.991247e-07      4.068708e-07      1.676640e-07      1.622211e-07      1.622211e-07      1.617963e-07
    202      1.716199e-07      3.756492e-07      1.874411e-07      1.744065e-07      1.744065e-07      1.608291e-07
    203      1.182969e-07      2.946635e-07      1.887379e-07      1.885407e-07      1.885407e-07      1.309971e-07
    204      1.141323e-07      2.440876e-07      1.583997e-07      1.529069e-07      1.529069e-07      2.074945e-07
    205      1.704316e-07      4.050686e-07      1.540238e-07      1.544637e-07      1.544637e-07      1.159754e-07
    206      1.986491e-07      4.488049e-07      2.115694e-07      4.661837e-07      2.150825e-07      1.511929e-07
    207      1.188873e-07      3.344837e-07      1.373426e-07      1.371272e-07      1.371272e-07      1.346638e-07
    209      1.146278e-07      3.246925e-07      1.530359e-07      1.487966e-07      1.487966e-07      2.378698e-07
    211      2.209989e-07      3.128723e-07      2.008453e-07      1.996888e-07      1.996888e-07      2.443957e-07
    212      1.850437e-07      3.224179e-07      1.433081e-07      1.363619e-07      1.363619e-07      1.545950e-07
    213      1.963910e-07      3.433657e-07      1.949126e-07      1.946143e-07      1.946143e-07      1.380735e-07
    214      2.055205e-07      4.556934e-07      2.114241e-07      4.605312e-07      1.992919e-07      1.539524e-07
    215      1.228424e-07      3.465688e-07      1.926393e-07      1.734769e-07      1.734769e-07      1.383893e-07
    217      1.149121e-07      3.645797e-07      1.747176e-07      1.663483e-07      1.663483e-07      1.487485e-07
    218      1.923187e-07      3.720207e-07      1.798481e-07      1.793363e-07      1.793363e-07      1.591430e-07
    219      1.627610e-07      4.714273e-07      1.901704e-07      1.850918e-07      1.850918e-07      1.640938e-07
    221      1.161102e-07      2.267586e-07      1.315326e-07      1.354092e-07      1.354092e-07      1.993351e-07
    222      1.634625e-07      3.810690e-07      1.619523e-07      1.570721e-07      1.570721e-07      1.739043e-07
    223      2.094943e-07      4.385517e-07      2.088867e-07      4.470930e-07      2.010502e-07      2.553330e-07
    226      2.219911e-07      3.661037e-07      1.895713e-07      1.964298e-07      1.964298e-07      1.521751e-07
    227      2.091631e-07      4.503239e-07      2.074894e-07      4.400035e-07      2.137942e-07      2.370926e-07
    228      1.113769e-07      3.727095e-07      1.734228e-07      1.639969e-07      1.639969e-07      2.000458e-07
    229      2.132539e-07      4.377515e-07      2.098218e-07      4.455166e-07      2.106596e-07      2.454666e-07
    230      1.157781e-07      3.321521e-07      1.455432e-07      1.372214e-07      1.372214e-07      1.690261e-07
    232      1.041392e-07      3.557029e-07      1.749468e-07      1.791588e-07      1.791588e-07      3.322553e-07
    233      2.118066e-07      4.420936e-07      2.168508e-07      4.429969e-07      2.162505e-07      2.370328e-07
    235      1.889062e-07      1.194290e-07      1.194290e-07      1.201400e-07      1.201400e-07      1.340496e-07
    236      1.997082e-07      1.239299e-07      1.239299e-07      1.249085e-07      1.249085e-07      1.541500e-07
    237      2.021329e-07      3.507753e-07      1.697484e-07      1.683454e-07      1.683454e-07      1.727904e-07
    238      1.200816e-07      2.791008e-07      1.499446e-07      1.449852e-07      1.449852e-07      1.298035e-07
    239      2.228663e-07      4.719928e-07      2.220813e-07      4.660047e-07      2.131480e-07      2.547764e-07
    241      1.815284e-07      3.981928e-07      2.010071e-07      1.983743e-07      1.983743e-07      2.420173e-07
    244      1.628528e-07      3.164522e-07      1.765841e-07      1.774213e-07      1.774213e-07      1.397179e-07
    246      1.588252e-07      3.849659e-07      1.429126e-07      1.416401e-07      1.416401e-07      1.580901e-07
    247      1.253356e-07      3.067324e-07      1.490375e-07      1.448856e-07      1.448856e-07      1.881051e-07
    248      1.093901e-07      3.226233e-07      1.759849e-07      1.597223e-07      1.597223e-07      3.386437e-07
    249      1.961131e-07      1.312664e-07      1.312664e-07      1.334024e-07      1.334024e-07      1.520260e-07
    251      2.078468e-07      4.599977e-07      2.041316e-07      1.966544e-07      1.966544e-07      2.643667e-07
    253      1.138023e-07      3.518098e-07      1.385994e-07      1.364449e-07      1.364449e-07      2.909594e-07
    254      2.137445e-07      3.606242e-07      2.057417e-07      2.044628e-07      2.044628e-07      1.634830e-07
    255      1.169972e-07      2.841816e-07      1.387739e-07      1.383085e-07      1.383085e-07      2.405275e-07
    257      1.860466e-07      3.666547e-07      2.074898e-07      2.182892e-07      2.182892e-07      3.579960e-07
    258      1.150828e-07      3.576258e-07      1.924915e-07      1.881525e-07      1.881525e-07      1.438081e-07
    259      1.705815e-07      3.788998e-07      1.709033e-07      1.658286e-07      1.658286e-07      1.358027e-07
    261      1.212339e-07      3.061716e-07      1.912142e-07      1.919352e-07      1.919352e-07      1.249266e-07
    262      2.153069e-07      3.803537e-07      1.853768e-07      1.859536e-07      1.859536e-07      3.763236e-07
    263      2.213543e-07      4.796229e-07      2.403416e-07      4.603458e-07      2.338935e-07      3.783692e-07
    265      2.019336e-07      3.566799e-07      1.516975e-07      1.531708e-07      1.531708e-07      1.309428e-07
    266      1.182956e-07      3.383422e-07      1.641217e-07      1.645517e-07      1.645517e-07      1.556428e-07
    267      2.033083e-07      5.448238e-07      1.674888e-07      1.708365e-07      1.708365e-07      1.648607e-07
    268      2.015187e-07      4.221798e-07      1.666308e-07      1.565711e-07      1.565711e-07      1.343170e-07
    269      2.062111e-07      5.254998e-07      2.387887e-07      5.274504e-07      2.406694e-07      3.798024e-07
    271      1.977533e-07      4.726542e-07      1.972593e-07      1.959547e-07      1.959547e-07      3.796388e-07
    272      1.145908e-07      3.577410e-07      1.402941e-07      1.390949e-07      1.390949e-07      1.788681e-07
    274      2.456598e-07      5.155301e-07      2.437044e-07      5.256583e-07      2.386551e-07      3.690020e-07
    276      1.233398e-07      3.986606e-07      1.365727e-07      1.329284e-07      1.329284e-07      2.615891e-07
    277      2.539705e-07      5.281953e-07      2.507689e-07      5.113510e-07      2.636458e-07      3.613178e-07
    278      2.500034e-07      5.125767e-07      2.543965e-07      4.943646e-07      2.586662e-07      3.722843e-07
    279      1.230800e-07      3.426067e-07      1.727915e-07      1.706629e-07      1.706629e-07      1.300775e-07
    281      2.600788e-07      4.421222e-07      2.178430e-07      2.192520e-07      2.192520e-07      3.548260e-07
    282      1.923835e-07      1.256465e-07      1.256465e-07      1.232156e-07      1.232156e-07      1.356765e-07
    283      2.539360e-07      5.033807e-07      2.497082e-07      5.140324e-07      2.414126e-07      3.838028e-07
    284      2.027077e-07      3.349201e-07      1.928199e-07      1.911002e-07      1.911002e-07      1.471158e-07
    285      1.150815e-07      3.526146e-07      1.717077e-07      1.675323e-07      1.675323e-07      2.339297e-07
    287      1.639109e-07      4.147068e-07      1.750788e-07      1.703897e-07      1.703897e-07      1.222591e-07
    289      1.116158e-07      2.800092e-07      1.795379e-07      1.757745e-07      1.757745e-07      1.930739e-07
    290      1.231251e-07      3.149545e-07      1.788850e-07      1.861414e-07      1.861414e-07      1.675623e-07
    291      1.689888e-07      4.075763e-07      1.925678e-07      2.027442e-07      2.027442e-07      1.506466e-07
    292      1.581902e-07      4.443587e-07      1.772448e-07      1.758394e-07      1.758394e-07      1.576791e-07
    293      2.145222e-07      5.690740e-07      2.249014e-07      5.626118e-07      2.197821e-07      3.759567e-07
    295      2.081281e-07      1.285269e-07      1.285269e-07      1.283674e-07      1.283674e-07      1.404634e-07
    296      1.628766e-07      3.589461e-07      1.556122e-07      1.612424e-07      1.612424e-07      4.028750e-07
    298      2.174732e-07      5.931939e-07      2.265769e-07      5.886718e-07      2.272150e-07      3.750535e-07
    299      1.251625e-07      3.636882e-07      1.384727e-07      1.381702e-07      1.381702e-07      2.277146e-07
    301      1.263431e-07      3.312216e-07      1.908233e-07      1.806836e-07      1.806836e-07      1.326826e-07
    302      2.098849e-07      3.635306e-07      1.855339e-07      1.893575e-07      1.893575e-07      3.865412e-07
    303      1.731870e-07      3.952747e-07      1.843101e-07      1.750927e-07      1.750927e-07      1.863328e-07
    304      1.100630e-07      3.539772e-07      1.561596e-07      1.586423e-07      1.586423e-07      1.829102e-07
    305      1.705174e-07      3.479583e-07      1.647859e-07      1.727334e-07      1.727334e-07      1.382707e-07
    306      1.141977e-07      3.333392e-07      1.460105e-07      1.460203e-07      1.460203e-07      2.167725e-07
    307      2.440031e-07      5.704432e-07      2.291278e-07      5.584193e-07      2.180736e-07      3.534893e-07
    309      1.988677e-07      6.125582e-07      2.252990e-07      6.210531e-07      2.312589e-07      1.559475e-07
    310      1.285205e-07      3.491847e-07      1.762986e-07      1.725355e-07      1.725355e-07      1.681429e-07
    311      2.426690e-07      6.063725e-07      2.161904e-07      5.889187e-07      2.295903e-07      3.794806e-07
    313      2.464821e-07      3.865626e-07      1.817521e-07      1.897758e-07      1.897758e-07      3.754765e-07
    314      1.917747e-07      3.616533e-07      1.686394e-07      1.728908e-07      1.728908e-07      3.787588e-07
    316      1.945554e-07      3.289450e-07      1.671818e-07      1.787079e-07      1.787079e-07      1.468917e-07
    317      2.208842e-07      5.542473e-07      1.927134e-07      5.618234e-07      1.891404e-07      3.794081e-07
    318      1.991046e-07      3.641487e-07      1.538132e-07      1.517044e-07      1.517044e-07      1.544278e-07
    319      1.214983e-07      3.224470e-07      1.846269e-07      1.805655e-07      1.805655e-07      1.241928e-07
    321      2.056992e-07      5.471295e-07      1.928324e-07      5.515586e-07      1.902652e-07      1.890142e-07
    322      1.226038e-07      3.495409e-07      1.575273e-07      1.578879e-07      1.578879e-07      1.443990e-07
    323      1.056029e-07      3.658308e-07      1.915645e-07      1.861628e-07      1.861628e-07      1.897334e-07
    326      1.961685e-07      4.727551e-07      1.964517e-07      2.006391e-07      2.006391e-07      3.792286e-07
    327      1.906094e-07      4.003689e-07      1.954245e-07      1.917423e-07      1.917423e-07      1.566012e-07
    328      1.611068e-07      4.071518e-07      1.474960e-07      1.538363e-07      1.538363e-07      4.229538e-07
    329      2.006236e-07      1.274322e-07      1.274322e-07      1.300759e-07      1.300759e-07      1.373526e-07
    331      2.159058e-07      4.720389e-07      2.057458e-07      2.115960e-07      2.115960e-07      3.938261e-07
    332      2.034817e-07      1.323201e-07      1.323201e-07      1.299925e-07      1.299925e-07      1.504214e-07
    333      1.734878e-07      3.816761e-07      1.794666e-07      1.683401e-07      1.683401e-07      1.357561e-07
    334      2.143846e-07      5.358465e-07      1.966540e-07      5.327061e-07      1.966666e-07      4.071275e-07
    335      2.087981e-07      4.658408e-07      1.697511e-07      1.647695e-07      1.647695e-07      1.449444e-07
    337      2.278881e-07      4.637756e-07      2.324619e-07      2.240918e-07      2.240918e-07      3.727136e-07
    339      2.086462e-07      3.530390e-07      2.099672e-07      2.078547e-07      2.078547e-07      1.653213e-07
    340      1.240866e-07      3.350963e-07      1.433581e-07      1.461282e-07      1.461282e-07      2.466206e-07
    341      1.234526e-07      3.809751e-07      1.752402e-07      1.733505e-07      1.733505e-07      1.684949e-07
    342      1.181279e-07      4.157423e-07      1.586675e-07      1.565297e-07      1.565297e-07      2.236661e-07
    344      1.182284e-07      3.338805e-07      1.849533e-07      1.796248e-07      1.796248e-07      3.418745e-07
    345      1.226118e-07      3.341517e-07      1.470779e-07      1.471805e-07      1.471805e-07      2.598271e-07
    346      1.993998e-07      5.553338e-07      2.046332e-07      5.613075e-07      1.963778e-07      3.755884e-07
    347      2.216287e-07      5.428358e-07      1.963689e-07      5.421260e-07      2.022821e-07      3.674438e-07
    348      1.214825e-07      3.640408e-07      1.731811e-07      1.764132e-07      1.764132e-07      3.111197e-07
    349      2.151976e-07      5.504587e-07      2.008980e-07      5.523555e-07      1.996018e-07      3.688477e-07
    353      2.264505e-07      5.409662e-07      1.934584e-07      1.861936e-07      1.861936e-07      3.660872e-07
    354      1.998465e-07      1.257226e-07      1.257226e-07      1.266527e-07      1.266527e-07      2.085274e-07
    355      2.054707e-07      3.689250e-07      2.035096e-07      2.026951e-07      2.026951e-07      1.386130e-07
    356      2.116076e-07      4.886805e-07      1.674753e-07      1.735857e-07      1.735857e-07      1.519563e-07
    357      1.203586e-07      3.012444e-07      1.577486e-07      1.588690e-07      1.588690e-07      2.204874e-07
    358      2.102375e-07      5.391917e-07      1.991985e-07      5.374192e-07      2.003993e-07      3.767501e-07
    359      2.179083e-07      5.633718e-07      2.044090e-07      5.541445e-07      2.116728e-07      3.888299e-07
    361      1.169275e-07      4.323907e-07      2.025631e-07      2.097343e-07      2.097343e-07      2.364930e-07
    362      1.900972e-07      3.643446e-07      1.873354e-07      1.914500e-07      1.914500e-07      3.850253e-07
    365      1.760859e-07      4.756318e-07      1.887352e-07      1.905305e-07      1.905305e-07      1.552590e-07
    366      1.624002e-07      3.487381e-07      1.752577e-07      1.751299e-07      1.751299e-07      1.635652e-07
    367      2.445333e-07      5.490395e-07      2.035403e-07      5.292860e-07      1.949966e-07      3.944209e-07
    368      1.227206e-07      3.478803e-07      1.402666e-07      1.405892e-07      1.405892e-07      2.679055e-07
    369      1.717121e-07      4.411172e-07      1.589857e-07      1.555127e-07      1.555127e-07      1.637991e-07
    370      1.795894e-07      4.042884e-07      1.741971e-07      1.756213e-07      1.756213e-07      2.346114e-07
    371      1.971203e-07      4.091323e-07      1.664423e-07      1.708177e-07      1.708177e-07      1.692075e-07
    372      1.235428e-07      3.312909e-07      1.713933e-07      1.646320e-07      1.646320e-07      3.646265e-07
    373      2.479865e-07      5.509212e-07      2.142649e-07      5.471139e-07      2.085351e-07      3.855961e-07
    374      1.159354e-07      3.151714e-07      1.370980e-07      1.455959e-07      1.455959e-07      1.346691e-07
    376      1.849804e-07      1.189638e-07      1.189638e-07      1.142045e-07      1.142045e-07      5.225659e-07
    377      1.223947e-07      2.816012e-07      1.776371e-07      1.757985e-07      1.757985e-07      1.427971e-07
    379      2.256487e-07      4.858061e-07      2.176086e-07      2.206778e-07      2.206778e-07      3.899603e-07
    380      1.146674e-07      4.304451e-07      1.549537e-07      1.572528e-07      1.572528e-07      2.871437e-07
    381      2.023679e-07      4.077683e-07      2.126635e-07      2.260456e-07      2.260456e-07      2.139849e-07
    382      2.373684e-07      5.818699e-07      2.167913e-07      5.705790e-07      2.100067e-07      4.063626e-07
    383      2.344242e-07      5.416479e-07      2.061691e-07      5.488713e-07      2.161354e-07      3.858209e-07
    386      2.024901e-07      4.119942e-07      1.949455e-07      1.931482e-07      1.931482e-07      4.051680e-07
    387      1.281457e-07      3.261754e-07      1.926199e-07      1.837260e-07      1.837260e-07      1.660771e-07
    388      1.731449e-07      4.111774e-07      1.927799e-07      1.940468e-07      1.940468e-07      4.273809e-07
    389      2.172676e-07      5.133044e-07      2.033750e-07      5.004174e-07      2.005164e-07      4.179661e-07
    391      1.231344e-07      3.756860e-07      1.723851e-07      1.698571e-07      1.698571e-07      2.499926e-07
    393      2.169994e-07      4.228823e-07      1.994616e-07      1.947048e-07      1.947048e-07      4.070797e-07
    394      2.276188e-07      3.889847e-07      2.306473e-07      2.114615e-07      2.114615e-07      4.184138e-07
    395      1.979007e-07      3.828043e-07      1.772292e-07      1.773488e-07      1.773488e-07      2.008334e-07
    397      2.196821e-07      4.728493e-07      1.949282e-07      1.950308e-07      1.950308e-07      3.920595e-07
    398      2.106061e-07      5.164568e-07      1.897553e-07      1.871118e-07      1.871118e-07      4.071116e-07
    399      1.162972e-07      3.982255e-07      1.725317e-07      1.718150e-07      1.718150e-07      2.836280e-07
    401      2.000908e-07      5.168932e-07      1.952368e-07      1.926290e-07      1.926290e-07      3.956637e-07
    402      2.104636e-07      4.237690e-07      1.711537e-07      1.740200e-07      1.740200e-07      1.867341e-07
    403      1.325543e-07      3.394525e-07      1.736128e-07      1.724151e-07      1.724151e-07      1.331267e-07
    404      1.743760e-07      4.090944e-07      1.775001e-07      1.697448e-07      1.697448e-07      4.105280e-07
    406      1.248159e-07      2.909439e-07      1.891626e-07      1.891914e-07      1.891914e-07      1.522211e-07
    407      1.735658e-07      4.081548e-07      1.646497e-07      1.711953e-07      1.711953e-07      1.457375e-07
    408      1.192708e-07      3.021921e-07      1.406938e-07      1.428666e-07      1.428666e-07      2.116826e-07
    409      2.617243e-07      5.228763e-07      2.070993e-07      4.955519e-07      2.043311e-07      4.020891e-07
    410      1.666759e-07      4.183887e-07      1.586831e-07      1.561989e-07      1.561989e-07      1.707850e-07
    411      2.439922e-07      5.070495e-07      2.102991e-07      5.155817e-07      1.978480e-07      4.067824e-07
    412      2.033801e-07      4.969668e-07      2.006337e-07      5.025138e-07      2.043601e-07      4.079360e-07
    413      1.935025e-07      1.388785e-07      1.388785e-07      1.394107e-07      1.394107e-07      1.387839e-07
    414      1.249509e-07      4.333397e-07      1.450005e-07      1.430800e-07      1.430800e-07      1.469408e-07
    415      1.958207e-07      1.408202e-07      1.408202e-07      1.440556e-07      1.440556e-07      1.490856e-07
    417      2.521934e-07      5.048626e-07      2.258710e-07      5.237951e-07      2.402501e-07      4.212701e-07
    418      1.139443e-07      3.926634e-07      1.592880e-07      1.617844e-07      1.617844e-07      1.584389e-07
    419      2.530188e-07      4.994945e-07      2.242183e-07      5.027571e-07      2.179493e-07      4.018248e-07
    421      2.705975e-07      4.421037e-07      2.237606e-07      2.111102e-07      2.111102e-07      4.032416e-07
    422      2.304052e-07      3.361239e-07      2.032661e-07      2.061795e-07      2.061795e-07      3.947762e-07
    423      1.895341e-07      1.242365e-07      1.242365e-07      1.257242e-07      1.257242e-07      1.569439e-07
    424      1.982191e-07      3.699608e-07      1.483311e-07      1.482508e-07      1.482508e-07      1.588054e-07
    425      1.183367e-07      3.837843e-07      1.479654e-07      1.459986e-07      1.459986e-07      1.356949e-07
    426      2.064544e-07      3.473485e-07      2.025404e-07      2.013154e-07      2.013154e-07      1.606851e-07
    427      1.715502e-07      3.426199e-07      1.770080e-07      1.901854e-07      1.901854e-07      1.371926e-07
    428      2.186324e-07      5.096769e-07      2.175050e-07      5.021566e-07      2.169037e-07      4.015141e-07
    430      1.390314e-07      3.663572e-07      1.927799e-07      1.951004e-07      1.951004e-07      1.769368e-07
    431      2.607433e-07      5.090831e-07      2.089387e-07      5.076196e-07      2.186953e-07      4.202045e-07
    433      2.199477e-07      5.861007e-07      2.052414e-07      2.088900e-07      2.088900e-07      4.066604e-07
    434      1.256917e-07      3.683642e-07      1.813943e-07      1.777079e-07      1.777079e-07      1.604449e-07
    435      1.275254e-07      3.040839e-07      1.809527e-07      1.858254e-07      1.858254e-07      1.736110e-07
    436      1.856051e-07      4.020655e-07      1.892757e-07      1.974237e-07      1.974237e-07      4.063403e-07
    437      1.208273e-07      4.721079e-07      1.901402e-07      1.976879e-07      1.976879e-07      1.526453e-07
    438      1.771345e-07      4.849630e-07      1.843541e-07      1.956542e-07      1.956542e-07      2.069160e-07
    439      2.265181e-07      5.146082e-07      2.180214e-07      5.173231e-07      2.224487e-07      4.034112e-07
    442      1.254011e-07      2.598154e-07      1.424844e-07      1.409673e-07      1.409673e-07      2.329137e-07
    443      2.380930e-07      5.319910e-07      2.309411e-07      5.285052e-07      2.325624e-07      4.136064e-07
    444      1.709316e-07      3.848045e-07      1.734499e-07      1.679520e-07      1.679520e-07      2.267555e-07
    445      2.069760e-07      5.080970e-07      1.678781e-07      1.760085e-07      1.760085e-07      1.659490e-07
    446      2.233213e-07      5.137934e-07      2.237258e-07      5.116345e-07      2.192639e-07      3.922768e-07
    447      2.162510e-07      4.952950e-07      2.143213e-07      5.139247e-07      2.236551e-07      3.989452e-07
    449      2.436216e-07      4.622922e-07      2.348649e-07      2.299427e-07      2.299427e-07      4.129329e-07
    451      1.675976e-07      3.959555e-07      1.617838e-07      1.590233e-07      1.590233e-07      1.280170e-07
    452      2.256701e-07      3.752432e-07      2.066080e-07      2.102768e-07      2.102768e-07      4.182558e-07
    453      2.001570e-07      3.876731e-07      2.054536e-07      1.905133e-07      1.905133e-07      4.025112e-07
    454      2.245039e-07      5.107622e-07      2.183724e-07      5.185578e-07      2.269084e-07      4.119558e-07
    456      1.152630e-07      4.146577e-07      1.554956e-07      1.587821e-07      1.587821e-07      2.047558e-07
    457      2.117934e-07      5.190966e-07      2.248397e-07      5.215551e-07      2.265207e-07      4.124146e-07
    458      2.143870e-07      5.249751e-07      2.375757e-07      5.318775e-07      2.382534e-07      4.181211e-07
    459      1.213021e-07      2.900938e-07      1.508174e-07      1.593493e-07      1.593493e-07      2.082768e-07
    460      1.209660e-07      4.101683e-07      1.491910e-07      1.444748e-07      1.444748e-07      2.112227e-07
    461      2.182038e-07      5.019176e-07      2.149358e-07      5.004436e-07      2.216786e-07      4.172298e-07
    463      2.169663e-07      5.210963e-07      2.260456e-07      2.201856e-07      2.201856e-07      4.124268e-07
    464      1.195802e-07      3.762592e-07      1.737731e-07      1.785273e-07      1.785273e-07      3.389835e-07
    465      1.320007e-07      3.409968e-07      1.738816e-07      1.712088e-07      1.712088e-07      1.766731e-07
    466      2.084511e-07      5.109973e-07      2.142472e-07      5.047934e-07      2.204741e-07      4.085223e-07
    467      2.223129e-07      5.237200e-07      2.343470e-07      5.231124e-07      2.162369e-07      4.418545e-07
    469      2.134477e-07      4.375466e-07      1.766562e-07      1.770066e-07      1.770066e-07      1.575051e-07
    470      1.939910e-07      1.220688e-07      1.220688e-07      1.207220e-07      1.207220e-07      1.907693e-07
    471      2.008678e-07      3.992884e-07      1.841018e-07      1.719335e-07      1.719335e-07      4.335573e-07
    472      1.926134e-07      1.276455e-07      1.276455e-07      1.289261e-07      1.289261e-07      1.711876e-07
    473      1.297388e-07      3.916882e-07      1.894324e-07      1.822792e-07      1.822792e-07      1.546751e-07
    474      1.946602e-07      3.982798e-07      1.811392e-07      1.883797e-07      1.883797e-07      2.106717e-07
    475      1.236633e-07      4.591152e-07      1.640005e-07      1.693252e-07      1.693252e-07      1.423350e-07
    476      1.230369e-07      3.127627e-07      1.502968e-07      1.543293e-07      1.543293e-07      2.400958e-07
    477      2.094209e-07      3.986784e-07      1.537925e-07      1.611200e-07      1.611200e-07      1.451774e-07
    478      2.186375e-07      5.027230e-07      2.170199e-07      5.069745e-07      2.217227e-07      4.268714e-07
    479      2.237072e-07      5.163222e-07      2.271152e-07      5.135320e-07      2.271299e-07      4.277150e-07
    481      1.732225e-07      3.729516e-07      1.666305e-07      1.794548e-07      1.794548e-07      1.299093e-07
    482      1.842885e-07      4.089928e-07      1.876269e-07      1.834506e-07      1.834506e-07      4.245059e-07
    483      1.244360e-07      3.564028e-07      1.616629e-07      1.564488e-07      1.564488e-07      1.825908e-07
    485      1.788327e-07      4.253253e-07      2.037094e-07      1.918958e-07      1.918958e-07      1.944200e-07
    487      2.229994e-07      5.296223e-07      2.369284e-07      2.288944e-07      2.288944e-07      4.310897e-07
    488      1.692659e-07      3.465961e-07      1.733630e-07      1.719035e-07      1.719035e-07      1.426343e-07
    489      2.062031e-07      5.178350e-07      2.042306e-07      2.127927e-07      2.127927e-07      4.234923e-07
    491      2.582021e-07      4.592675e-07      2.384360e-07      2.473744e-07      2.473744e-07      4.462432e-07
    492      1.724304e-07      4.194741e-07      1.597444e-07      1.564075e-07      1.564075e-07      4.958317e-07
    493      1.258842e-07      3.713533e-07      2.083126e-07      2.096025e-07      2.096025e-07      1.548449e-07
    494      1.345958e-07      3.429842e-07      1.548295e-07      1.573919e-07      1.573919e-07      2.452533e-07
    496      1.124039e-07      3.645980e-07      1.677611e-07      1.694262e-07      1.694262e-07      2.837890e-07
    497      2.158677e-07      3.431011e-07      2.144308e-07      2.079147e-07      2.079147e-07      1.566916e-07
    498      1.991789e-07      1.490081e-07      1.490081e-07      1.482831e-07      1.482831e-07      1.789563e-07
    499      2.615972e-07      5.283323e-07      2.384425e-07      5.273755e-07      2.335929e-07      4.323736e-07
    501      2.078116e-07      5.490043e-07      2.338488e-07      5.490212e-07      2.389284e-07      4.491719e-07
    502      2.111899e-07      5.074902e-07      2.051022e-07      2.101117e-07      2.101117e-07      4.236623e-07
    503      2.432694e-07      5.422333e-07      2.413805e-07      5.398226e-07      2.444753e-07      4.413455e-07
    505      1.854365e-07      4.179943e-07      1.867338e-07      1.905998e-07      1.905998e-07      1.635027e-07
    506      1.180782e-07      4.147211e-07      1.484697e-07      1.422107e-07      1.422107e-07      1.800469e-07
    508      1.985166e-07      3.892409e-07      2.175820e-07      2.139927e-07      2.139927e-07      4.507913e-07
    509      2.526551e-07      5.177221e-07      2.290690e-07      5.198127e-07      2.356156e-07      4.521368e-07
    510      1.221776e-07      3.492785e-07      1.486429e-07      1.442352e-07      1.442352e-07      2.301662e-07
    511      1.703786e-07      4.975219e-07      1.831423e-07      1.854142e-07      1.854142e-07      1.445589e-07
[13]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()

x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x)  # Use the size of the array

i=1
for k in vk:
    plt.subplot((len(vl2)+1)//2,2,i)
    plt.semilogx(vn, vl1[k], '-ob', label="$L1$")

    r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
    plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))

    ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
    plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))

    plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
    plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
    plt.title(k)
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.xlabel("N", loc='right')
    i+=1


plt.suptitle("1D FFT errors (single precision, Bluestein) - " + device_name)

plt.tight_layout()

plt.figure()
ms = 3

clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}

for k,v in vl2.items():
    plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("1D FFT L2 error (single precision, Bluestein) - " + device_name)
plt.grid(True)
plt.xlabel("N", loc='right')
plt.tight_layout()


1D, non-radix (Bluestein) transforms, double precision

[14]:
nmax = 512
d0 = np.random.uniform(-0.5, 0.5, nmax) + 1j * np.random.uniform(-0.5, 0.5, nmax)
d0ld = d0.astype(np.clongdouble)
d0 = d0.astype(np.complex128)

def accu_1d(n, fft_dic):
    rld = fftwn(d0ld[:n])
    res = {}
    for k,v in fft_dic.items():
        r = v(d0[:n])
        res[k] = l1(rld, r),l2(rld, r),li(rld, r)
    return res

fft_dic = {"fftw": fftwn}
if has_pycuda:
    fft_dic["vkfft-cuda"] = fftnvcu
    fft_dic["vkfft-cuda-LUT"] = fftnvculut
    fft_dic["cufft"] = fftncu
if has_pyopencl:
    fft_dic["vkfft-opencl"] = fftnvcl
    fft_dic["vkfft-opencl-LUT"] = fftnvcllut


# print(accu_1d(16, fft_dic))

vn, vl1, vl2, vli = [], {}, {}, {}

#print("%7s  %12s  %12s  %12s  %12s"%("N", "vkfft   ", "vkfft-LUT    ", "cufft   ", "fftw   "))
s = "%7s  %16s"%("N", "fftw   ")
r = accu_1d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
    if k in r:
        s += "  %16s" % k
print(s)
for n in range(8, len(d0)+1):
    if max(primes(n)) >13:  # test only transforms with non-radix dimensions
        r = accu_1d(n, fft_dic)
        vn.append(n)
        for k, v in r.items():
            if k not in vl1:
                vl1[k] = []
                vl2[k] = []
                vli[k] = []
            vl1[k].append(v[0])
            vl2[k].append(v[1])
            vli[k].append(v[2])
        s = "%7d  %16e" % (n, vl2["fftw"][-1])
        for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
            if k in vl2:
                red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
                if red < 0:
                    red = 0
                if red > 255:
                    red = 255
                s += "\x1b[38;2;%d;0;0m    %14e\x1b[0m" % (red, vl2[k][-1])
        print(s)

      N           fftw           vkfft-cuda    vkfft-cuda-LUT      vkfft-opencl  vkfft-opencl-LUT             cufft
     17      1.066556e-16      2.032744e-16      2.032744e-16      1.932772e-16      1.932772e-16      1.024254e-16
     19      1.358658e-16      4.147412e-16      4.147412e-16      3.430604e-16      3.430604e-16      1.105945e-16
     23      1.222737e-16      2.413451e-16      2.413451e-16      2.516965e-16      2.516965e-16      1.148165e-16
     29      1.501406e-16      3.579374e-16      3.579374e-16      3.529350e-16      3.529350e-16      1.465995e-16
     31      1.883550e-16      3.082450e-16      3.082450e-16      2.744713e-16      2.744713e-16      1.223319e-16
     34      1.593584e-16      2.025947e-16      2.025947e-16      2.125895e-16      2.125895e-16      1.505484e-16
     37      3.676175e-16      3.702819e-16      3.702819e-16      3.570183e-16      3.570183e-16      1.801324e-16
     38      1.601503e-16      4.237837e-16      4.237837e-16      4.025553e-16      4.025553e-16      1.837089e-16
     41      3.152464e-16      3.281737e-16      3.281737e-16      3.527551e-16      3.527551e-16      1.717301e-16
     43      1.706233e-16      3.943163e-16      3.943163e-16      3.963092e-16      3.963092e-16      1.425642e-16
     46      1.883136e-16      2.610338e-16      2.610338e-16      2.461951e-16      2.461951e-16      2.052718e-16
     47      1.982760e-16      1.872361e-16      1.872361e-16      1.872361e-16      1.872361e-16      1.807893e-16
     51      1.770969e-16      2.013288e-16      2.013288e-16      2.244093e-16      2.244093e-16      1.617509e-16
     53      2.045219e-16      2.697018e-16      2.697018e-16      2.737883e-16      2.737883e-16      2.051781e-16
     57      1.669975e-16      4.105691e-16      4.105691e-16      4.015106e-16      4.015106e-16      1.896102e-16
     58      2.054844e-16      3.860140e-16      3.860140e-16      3.862643e-16      3.862643e-16      1.738323e-16
     59      3.658351e-16      1.443108e-16      1.443108e-16      1.443108e-16      1.443108e-16      1.729608e-16
     61      3.067896e-16      3.166648e-16      3.166648e-16      3.220561e-16      3.220561e-16      1.717918e-16
     62      1.668922e-16      3.051567e-16      3.051567e-16      3.128923e-16      3.128923e-16      1.628782e-16
     67      2.314708e-16      3.264476e-16      3.264476e-16      3.312149e-16      3.312149e-16      1.879661e-16
     68      1.662272e-16      2.178020e-16      2.178020e-16      2.049310e-16      2.049310e-16      1.746297e-16
     69      2.056034e-16      2.668543e-16      2.668543e-16      2.939738e-16      2.939738e-16      2.107158e-16
     71      3.728700e-16      4.401986e-16      4.401986e-16      4.499767e-16      4.499767e-16      2.336482e-16
     73      3.211524e-16      3.214904e-16      3.214904e-16      3.268311e-16      3.268311e-16      2.120713e-16
     74      3.410121e-16      4.176166e-16      4.176166e-16      3.968245e-16      3.968245e-16      1.868322e-16
     76      1.641933e-16      3.873916e-16      3.873916e-16      4.100067e-16      4.100067e-16      2.465960e-16
     79      3.712197e-16      3.675608e-16      3.675608e-16      3.433692e-16      3.433692e-16      2.077833e-16
     82      3.404349e-16      3.004966e-16      3.004966e-16      3.170214e-16      3.170214e-16      1.770641e-16
     83      3.830541e-16      2.051431e-16      2.051431e-16      2.051431e-16      2.051431e-16      2.208298e-16
     85      1.866519e-16      2.184219e-16      2.184219e-16      2.383326e-16      2.383326e-16      2.162865e-16
     86      2.014463e-16      4.268616e-16      4.268616e-16      4.419241e-16      4.419241e-16      1.894896e-16
     87      1.797631e-16      3.964417e-16      3.964417e-16      4.066787e-16      4.066787e-16      1.783458e-16
     89      4.216439e-16      2.905135e-16      2.905135e-16      2.927240e-16      2.927240e-16      2.524720e-16
     92      1.638247e-16      2.761034e-16      2.761034e-16      2.804747e-16      2.804747e-16      1.829325e-16
     93      1.968851e-16      3.083718e-16      3.083718e-16      3.203025e-16      3.203025e-16      2.133215e-16
     94      2.110990e-16      1.956110e-16      1.956110e-16      1.955015e-16      1.955015e-16      1.960184e-16
     95      2.079143e-16      4.075900e-16      4.075900e-16      3.762110e-16      3.762110e-16      1.857351e-16
     97      2.888369e-16      3.009593e-16      3.009593e-16      3.292453e-16      3.292453e-16      2.249417e-16
    101      3.412749e-16      3.585316e-16      3.585316e-16      3.766311e-16      3.766311e-16      2.442822e-16
    102      1.955521e-16      2.558419e-16      2.558419e-16      2.496954e-16      2.496954e-16      2.250308e-16
    103      3.560505e-16      3.447515e-16      3.447515e-16      3.473488e-16      3.473488e-16      2.512852e-16
    106      2.015267e-16      2.709866e-16      2.709866e-16      2.873229e-16      2.873229e-16      2.108172e-16
    107      3.985238e-16      3.363602e-16      3.363602e-16      3.484215e-16      3.484215e-16      2.343304e-16
    109      3.867450e-16      4.286241e-16      4.286241e-16      4.458877e-16      4.458877e-16      2.476720e-16
    111      3.207092e-16      4.836789e-16      4.836789e-16      4.604139e-16      4.604139e-16      2.075605e-16
    113      4.859810e-16      4.435949e-16      4.435949e-16      4.064003e-16      4.064003e-16      2.726163e-16
    114      1.872245e-16      4.348822e-16      4.348822e-16      4.603168e-16      4.603168e-16      2.509529e-16
    115      2.135527e-16      2.927767e-16      2.927767e-16      2.886522e-16      2.886522e-16      1.917635e-16
    116      1.875593e-16      4.010563e-16      4.010563e-16      4.214003e-16      4.214003e-16      2.104462e-16
    118      4.093327e-16      2.223871e-16      2.223871e-16      2.174214e-16      2.174214e-16      2.350523e-16
    119      1.960130e-16      2.813889e-16      2.813889e-16      2.667188e-16      2.667188e-16      1.905773e-16
    122      3.278021e-16      3.227205e-16      3.227205e-16      2.874891e-16      2.874891e-16      2.554785e-16
    123      3.100847e-16      3.311754e-16      3.311754e-16      3.237449e-16      3.237449e-16      2.661508e-16
    124      1.989735e-16      3.337404e-16      3.337404e-16      3.379497e-16      3.379497e-16      1.993340e-16
    127      3.720486e-16      4.861899e-16      4.861899e-16      5.231844e-16      5.231844e-16      3.109651e-16
    129      2.288744e-16      4.624426e-16      4.624426e-16      4.350685e-16      4.350685e-16      2.422310e-16
    131      4.504955e-16      3.698660e-16      3.698660e-16      3.238832e-16      3.238832e-16      5.140240e-16
    133      1.920109e-16      4.226630e-16      4.226630e-16      4.119946e-16      4.119946e-16      1.997344e-16
    134      2.198039e-16      3.891507e-16      3.891507e-16      4.012934e-16      4.012934e-16      2.384162e-16
    136      2.084956e-16      2.231501e-16      2.231501e-16      2.200405e-16      2.200405e-16      1.828021e-16
    137      4.284987e-16      4.205853e-16      4.205853e-16      4.245394e-16      4.245394e-16      4.720658e-16
    138      2.092334e-16      2.952860e-16      2.952860e-16      2.814172e-16      2.814172e-16      2.741673e-16
    139      4.188772e-16      4.621012e-16      4.621012e-16      4.252904e-16      4.252904e-16      4.974237e-16
    141      2.302913e-16      2.003624e-16      2.003624e-16      2.002325e-16      2.002325e-16      2.350283e-16
    142      4.153879e-16      4.624765e-16      4.624765e-16      4.479296e-16      4.479296e-16      2.577220e-16
    145      2.022253e-16      4.172456e-16      4.172456e-16      4.354485e-16      4.354485e-16      1.962583e-16
    146      3.808002e-16      3.435374e-16      3.435374e-16      3.524562e-16      3.524562e-16      2.303773e-16
    148      3.446394e-16      3.988110e-16      3.988110e-16      3.810151e-16      3.810151e-16      2.358893e-16
    149      4.062796e-16      3.337224e-16      3.337224e-16      3.507061e-16      3.507061e-16      4.933272e-16
    151      4.002610e-16      3.986196e-16      3.986196e-16      3.904609e-16      3.904609e-16      5.679273e-16
    152      1.979888e-16      3.762176e-16      3.762176e-16      3.926671e-16      3.926671e-16      2.324557e-16
    153      2.093900e-16      2.539712e-16      2.539712e-16      2.356858e-16      2.356858e-16      2.957654e-16
    155      2.125133e-16      3.159057e-16      3.159057e-16      3.243829e-16      3.243829e-16      2.704413e-16
    157      3.665922e-16      3.880987e-16      3.880987e-16      3.646680e-16      3.646680e-16      4.767506e-16
    158      3.511630e-16      3.891027e-16      3.891027e-16      4.002845e-16      4.002845e-16      2.469131e-16
    159      2.182624e-16      2.971595e-16      2.971595e-16      2.976541e-16      2.976541e-16      2.405648e-16
    161      2.185268e-16      3.262269e-16      3.262269e-16      3.159330e-16      3.159330e-16      2.090939e-16
    163      5.132026e-16      5.242517e-16      5.242517e-16      4.922753e-16      4.922753e-16      5.354424e-16
    164      3.540930e-16      3.204207e-16      3.204207e-16      3.287711e-16      3.287711e-16      2.272630e-16
    166      4.178859e-16      2.420661e-16      2.420661e-16      2.408705e-16      2.408705e-16      2.577730e-16
    167      4.430471e-16      3.688898e-16      3.688898e-16      3.807752e-16      3.807752e-16      7.740158e-16
    170      2.062950e-16      2.668753e-16      2.668753e-16      2.569752e-16      2.569752e-16      2.357323e-16
    171      2.173697e-16      4.420927e-16      4.420927e-16      4.220043e-16      4.220043e-16      2.832620e-16
    172      2.118488e-16      4.810829e-16      4.810829e-16      4.583838e-16      4.583838e-16      2.335068e-16
    173      4.386330e-16      3.545772e-16      3.545772e-16      3.684199e-16      3.684199e-16      5.030760e-16
    174      1.966625e-16      4.320914e-16      4.320914e-16      4.346338e-16      4.346338e-16      2.602417e-16
    177      3.980790e-16      2.457636e-16      2.457636e-16      2.445310e-16      2.445310e-16      2.222503e-16
    178      4.307858e-16      3.340961e-16      3.340961e-16      3.220665e-16      3.220665e-16      2.573625e-16
    179      4.408395e-16      3.728112e-16      3.728112e-16      3.939275e-16      3.939275e-16      6.536899e-16
    181      3.945121e-16      3.793207e-16      3.793207e-16      3.759944e-16      3.759944e-16      5.287358e-16
    183      3.198437e-16      3.591010e-16      3.591010e-16      3.373155e-16      3.373155e-16      2.407143e-16
    184      1.928248e-16      2.756121e-16      2.756121e-16      2.781794e-16      2.781794e-16      2.357639e-16
    185      3.572576e-16      4.350504e-16      4.350504e-16      4.208205e-16      4.208205e-16      1.995552e-16
    186      2.015435e-16      3.696052e-16      3.696052e-16      3.466084e-16      3.466084e-16      2.558334e-16
    187      1.952256e-16      2.622070e-16      2.622070e-16      2.600007e-16      2.600007e-16      2.987652e-16
    188      2.105454e-16      2.016576e-16      2.016576e-16      1.981467e-16      1.981467e-16      2.202767e-16
    190      2.106325e-16      3.773636e-16      3.773636e-16      3.852194e-16      3.852194e-16      2.212831e-16
    191      3.951166e-16      3.760999e-16      3.760999e-16      3.703903e-16      3.703903e-16      5.701175e-16
    193      3.320902e-16      3.630357e-16      3.630357e-16      3.585508e-16      3.585508e-16      6.086125e-16
    194      3.383942e-16      3.323661e-16      3.323661e-16      3.088008e-16      3.088008e-16      2.198132e-16
    197      3.981957e-16      5.515412e-16      5.515412e-16      5.652031e-16      5.652031e-16      7.284824e-16
    199      3.842122e-16      4.684283e-16      4.684283e-16      4.662818e-16      4.662818e-16      5.385862e-16
    201      2.267111e-16      4.151502e-16      4.151502e-16      4.308448e-16      4.308448e-16      2.314105e-16
    202      3.618266e-16      3.614846e-16      3.614846e-16      3.794759e-16      3.794759e-16      2.536718e-16
    203      2.008576e-16      4.592167e-16      4.592167e-16      4.548037e-16      4.548037e-16      2.248447e-16
    204      1.840370e-16      2.426777e-16      2.426777e-16      2.390891e-16      2.390891e-16      2.730761e-16
    205      3.534615e-16      3.265867e-16      3.265867e-16      3.193702e-16      3.193702e-16      2.500830e-16
    206      4.137018e-16      4.018045e-16      4.018045e-16      4.123807e-16      4.123807e-16      2.764055e-16
    207      2.029172e-16      2.854071e-16      2.854071e-16      3.052030e-16      3.052030e-16      2.353100e-16
    209      2.079554e-16      4.067970e-16      4.067970e-16      4.126865e-16      4.126865e-16      2.467300e-16
    211      4.207708e-16      4.749237e-16      4.749237e-16      4.828406e-16      4.828406e-16      5.606828e-16
    212      2.178517e-16      2.969481e-16      2.969481e-16      3.124649e-16      3.124649e-16      2.215099e-16
    213      4.066487e-16      4.161368e-16      4.161368e-16      4.567764e-16      4.567764e-16      2.626519e-16
    214      4.013386e-16      4.027706e-16      4.027706e-16      3.921899e-16      3.921899e-16      3.011889e-16
    215      2.323581e-16      4.631091e-16      4.631091e-16      4.520387e-16      4.520387e-16      2.313819e-16
    217      2.014112e-16      4.338211e-16      4.338211e-16      4.274623e-16      4.274623e-16      2.567983e-16
    218      3.738077e-16      3.991135e-16      3.991135e-16      4.358038e-16      4.358038e-16      2.651147e-16
    219      3.309379e-16      3.421003e-16      3.421003e-16      3.668241e-16      3.668241e-16      2.354015e-16
    221      2.254187e-16      2.547771e-16      2.547771e-16      2.525488e-16      2.525488e-16      2.421736e-16
    222      3.198251e-16      4.548480e-16      4.548480e-16      4.406397e-16      4.406397e-16      2.426449e-16
    223      4.649334e-16      4.140162e-16      4.140162e-16      4.234457e-16      4.234457e-16      5.556637e-16
    226      4.556817e-16      4.723412e-16      4.723412e-16      4.484493e-16      4.484493e-16      2.728222e-16
    227      4.050698e-16      4.189205e-16      4.189205e-16      3.990821e-16      3.990821e-16      5.731509e-16
    228      1.908995e-16      4.293922e-16      4.293922e-16      4.386623e-16      4.386623e-16      2.379902e-16
    229      4.116690e-16      4.060416e-16      4.060416e-16      3.987697e-16      3.987697e-16      5.650542e-16
    230      2.182557e-16      2.777721e-16      2.777721e-16      2.785502e-16      2.785502e-16      2.232426e-16
    232      1.935038e-16      4.295693e-16      4.295693e-16      4.025064e-16      4.025064e-16      2.059679e-16
    233      4.379194e-16      4.008820e-16      4.008820e-16      3.967666e-16      3.967666e-16      5.485785e-16
    235      2.359735e-16      2.289978e-16      2.289978e-16      2.263088e-16      2.263088e-16      2.225156e-16
    236      3.694819e-16      2.200808e-16      2.200808e-16      2.184442e-16      2.184442e-16      2.626275e-16
    237      3.994447e-16      4.142501e-16      4.142501e-16      3.793456e-16      3.793456e-16      2.599157e-16
    238      2.057381e-16      3.259522e-16      3.259522e-16      3.023206e-16      3.023206e-16      2.034470e-16
    239      4.303439e-16      4.129857e-16      4.129857e-16      4.312417e-16      4.312417e-16      6.068418e-16
    241      3.898456e-16      3.645930e-16      3.645930e-16      3.429079e-16      3.429079e-16      6.133368e-16
    244      3.153901e-16      3.459627e-16      3.459627e-16      3.356776e-16      3.356776e-16      2.512832e-16
    246      3.177716e-16      3.213002e-16      3.213002e-16      3.356220e-16      3.356220e-16      3.342745e-16
    247      2.098875e-16      4.184590e-16      4.184590e-16      4.107725e-16      4.107725e-16      2.544384e-16
    248      2.113783e-16      3.189088e-16      3.189088e-16      3.189737e-16      3.189737e-16      2.409934e-16
    249      4.189447e-16      2.434623e-16      2.434623e-16      2.434838e-16      2.434838e-16      2.735400e-16
    251      3.932799e-16      4.551911e-16      4.551911e-16      4.561028e-16      4.561028e-16      5.486927e-16
    253      1.935493e-16      3.144230e-16      3.144230e-16      3.085725e-16      3.085725e-16      2.833915e-16
    254      3.918738e-16      5.040980e-16      5.040980e-16      4.731031e-16      4.731031e-16      2.848777e-16
    255      2.257604e-16      2.373266e-16      2.373266e-16      2.583114e-16      2.583114e-16      3.236405e-16
    257      3.606148e-16      3.664810e-16      3.664810e-16      3.387670e-16      3.387670e-16      5.881190e-16
    258      2.240237e-16      4.603611e-16      4.603611e-16      4.727578e-16      4.727578e-16      3.269233e-16
    259      3.105543e-16      4.611773e-16      4.611773e-16      4.837644e-16      4.837644e-16      2.153012e-16
    261      2.335936e-16      4.292606e-16      4.292606e-16      4.224598e-16      4.224598e-16      2.179332e-16
    262      4.504228e-16      3.820135e-16      3.820135e-16      3.714197e-16      3.714197e-16      5.312039e-16
    263      4.499065e-16      5.306187e-16      5.306187e-16      5.315015e-16      5.315015e-16      6.039527e-16
    265      2.430568e-16      3.061193e-16      3.061193e-16      3.170129e-16      3.170129e-16      2.498435e-16
    266      1.986538e-16      4.141403e-16      4.141403e-16      4.265732e-16      4.265732e-16      2.319189e-16
    267      4.586972e-16      3.472214e-16      3.472214e-16      3.400729e-16      3.400729e-16      2.688599e-16
    268      2.401297e-16      3.873853e-16      3.873853e-16      3.907927e-16      3.907927e-16      2.403332e-16
    269      4.567868e-16      5.023242e-16      5.023242e-16      5.186498e-16      5.186498e-16      5.516824e-16
    271      4.212166e-16      4.398502e-16      4.398502e-16      4.263670e-16      4.263670e-16      5.161765e-16
    272      2.122421e-16      2.454751e-16      2.454751e-16      2.389727e-16      2.389727e-16      2.036869e-16
    274      4.139624e-16      5.341366e-16      5.341366e-16      5.133030e-16      5.133030e-16      5.176935e-16
    276      2.055527e-16      3.096288e-16      3.096288e-16      3.199842e-16      3.199842e-16      2.658387e-16
    277      4.404688e-16      5.113123e-16      5.113123e-16      5.119446e-16      5.119446e-16      5.377449e-16
    278      4.536224e-16      5.137798e-16      5.137798e-16      5.322734e-16      5.322734e-16      5.396899e-16
    279      2.217484e-16      3.674293e-16      3.674293e-16      3.599327e-16      3.599327e-16      2.126634e-16
    281      4.466869e-16      5.288474e-16      5.288474e-16      5.176549e-16      5.176549e-16      6.118109e-16
    282      2.255889e-16      2.286690e-16      2.286690e-16      2.318738e-16      2.318738e-16      2.819492e-16
    283      4.022360e-16      5.200618e-16      5.200618e-16      5.141904e-16      5.141904e-16      5.795763e-16
    284      4.056160e-16      4.505878e-16      4.505878e-16      4.625566e-16      4.625566e-16      2.489041e-16
    285      2.235094e-16      3.947394e-16      3.947394e-16      3.937464e-16      3.937464e-16      2.859044e-16
    287      3.616424e-16      3.786879e-16      3.786879e-16      3.699689e-16      3.699689e-16      2.459823e-16
    289      2.244310e-16      3.273120e-16      3.273120e-16      3.127297e-16      3.127297e-16      2.637970e-16
    290      2.127957e-16      4.129002e-16      4.129002e-16      4.218636e-16      4.218636e-16      2.210733e-16
    291      3.278867e-16      3.518950e-16      3.518950e-16      3.375856e-16      3.375856e-16      2.589637e-16
    292      3.496717e-16      3.753425e-16      3.753425e-16      3.843060e-16      3.843060e-16      2.459724e-16
    293      4.252048e-16      5.679563e-16      5.679563e-16      5.626417e-16      5.626417e-16      5.074463e-16
    295      3.987306e-16      2.542718e-16      2.542718e-16      2.547957e-16      2.547957e-16      2.650414e-16
    296      3.429963e-16      4.362115e-16      4.362115e-16      4.322252e-16      4.322252e-16      2.426680e-16
    298      3.976622e-16      5.603865e-16      5.603865e-16      5.528722e-16      5.528722e-16      5.211148e-16
    299      2.205340e-16      2.876882e-16      2.876882e-16      2.966815e-16      2.966815e-16      2.334871e-16
    301      2.247091e-16      4.978535e-16      4.978535e-16      4.815244e-16      4.815244e-16      2.437074e-16
    302      3.708065e-16      4.094584e-16      4.094584e-16      4.064159e-16      4.064159e-16      5.371969e-16
    303      3.519356e-16      3.771127e-16      3.771127e-16      4.015522e-16      4.015522e-16      2.845445e-16
    304      2.241276e-16      3.855226e-16      3.855226e-16      3.993954e-16      3.993954e-16      2.265670e-16
    305      3.431844e-16      3.589180e-16      3.589180e-16      3.551306e-16      3.551306e-16      2.559310e-16
    306      2.174840e-16      2.711774e-16      2.711774e-16      2.622224e-16      2.622224e-16      3.769455e-16
    307      4.765977e-16      5.909385e-16      5.909385e-16      5.975148e-16      5.975148e-16      5.422187e-16
    309      4.416886e-16      5.655617e-16      5.655617e-16      5.742615e-16      5.742615e-16      2.815834e-16
    310      2.274252e-16      3.539044e-16      3.539044e-16      3.601667e-16      3.601667e-16      2.914733e-16
    311      4.841171e-16      5.265861e-16      5.265861e-16      5.628839e-16      5.628839e-16      5.382997e-16
    313      4.301800e-16      4.043976e-16      4.043976e-16      4.118433e-16      4.118433e-16      6.817123e-16
    314      4.037395e-16      3.749561e-16      3.749561e-16      3.574227e-16      3.574227e-16      5.281438e-16
    316      3.884867e-16      3.949738e-16      3.949738e-16      3.885426e-16      3.885426e-16      2.689181e-16
    317      3.951267e-16      4.361437e-16      4.361437e-16      4.402120e-16      4.402120e-16      7.140741e-16
    318      2.296233e-16      3.107057e-16      3.107057e-16      3.179396e-16      3.179396e-16      2.561462e-16
    319      2.127062e-16      4.287095e-16      4.287095e-16      4.368500e-16      4.368500e-16      2.124074e-16
    321      4.404682e-16      4.496236e-16      4.496236e-16      4.348619e-16      4.348619e-16      2.944202e-16
    322      2.104195e-16      3.403837e-16      3.403837e-16      3.274292e-16      3.274292e-16      2.207092e-16
    323      2.305595e-16      4.115702e-16      4.115702e-16      4.061381e-16      4.061381e-16      2.704614e-16
    326      4.345080e-16      5.233691e-16      5.233691e-16      4.997438e-16      4.997438e-16      5.742139e-16
    327      4.004922e-16      4.442778e-16      4.442778e-16      4.549182e-16      4.549182e-16      2.697508e-16
    328      3.486878e-16      3.644010e-16      3.644010e-16      3.644615e-16      3.644615e-16      2.481652e-16
    329      2.315735e-16      2.703042e-16      2.703042e-16      2.639123e-16      2.639123e-16      2.305321e-16
    331      4.691042e-16      3.947969e-16      3.947969e-16      3.973702e-16      3.973702e-16      5.442337e-16
    332      4.222593e-16      2.512098e-16      2.512098e-16      2.537442e-16      2.537442e-16      2.801125e-16
    333      3.238470e-16      4.519223e-16      4.519223e-16      4.692280e-16      4.692280e-16      2.959127e-16
    334      4.169577e-16      4.573448e-16      4.573448e-16      4.666044e-16      4.666044e-16      7.417690e-16
    335      2.683303e-16      3.795257e-16      3.795257e-16      3.926932e-16      3.926932e-16      2.517444e-16
    337      4.850852e-16      4.733831e-16      4.733831e-16      5.063587e-16      5.063587e-16      5.896164e-16
    339      4.549069e-16      4.534603e-16      4.534603e-16      4.550205e-16      4.550205e-16      3.342645e-16
    340      2.244549e-16      2.638299e-16      2.638299e-16      2.684636e-16      2.684636e-16      2.995269e-16
    341      2.192033e-16      3.336948e-16      3.336948e-16      3.284659e-16      3.284659e-16      2.102115e-16
    342      2.196978e-16      4.394662e-16      4.394662e-16      4.420263e-16      4.420263e-16      3.396546e-16
    344      2.191307e-16      4.641263e-16      4.641263e-16      4.608098e-16      4.608098e-16      2.343412e-16
    345      2.318773e-16      2.965797e-16      2.965797e-16      2.949499e-16      2.949499e-16      4.133579e-16
    346      4.146525e-16      4.765834e-16      4.765834e-16      4.650029e-16      4.650029e-16      5.483127e-16
    347      4.576647e-16      4.919012e-16      4.919012e-16      4.833519e-16      4.833519e-16      5.781621e-16
    348      2.105215e-16      4.143864e-16      4.143864e-16      4.253093e-16      4.253093e-16      2.946552e-16
    349      4.803904e-16      4.328752e-16      4.328752e-16      4.483396e-16      4.483396e-16      5.556987e-16
    353      4.636717e-16      3.784891e-16      3.784891e-16      3.623746e-16      3.623746e-16      6.957535e-16
    354      3.667740e-16      2.455426e-16      2.455426e-16      2.521484e-16      2.521484e-16      2.724729e-16
    355      4.063957e-16      4.686091e-16      4.686091e-16      4.703910e-16      4.703910e-16      3.459256e-16
    356      4.275822e-16      3.434922e-16      3.434922e-16      3.310776e-16      3.310776e-16      2.705112e-16
    357      2.249814e-16      3.129011e-16      3.129011e-16      3.202446e-16      3.202446e-16      2.618991e-16
    358      4.423923e-16      4.540961e-16      4.540961e-16      4.566750e-16      4.566750e-16      6.605479e-16
    359      4.604351e-16      4.486373e-16      4.486373e-16      4.591504e-16      4.591504e-16      7.568075e-16
    361      2.152814e-16      6.109397e-16      6.109397e-16      6.480191e-16      6.480191e-16      2.499273e-16
    362      4.210193e-16      3.948298e-16      3.948298e-16      3.974537e-16      3.974537e-16      5.496687e-16
    365      3.416778e-16      3.522027e-16      3.522027e-16      3.785637e-16      3.785637e-16      3.180973e-16
    366      3.364970e-16      3.561232e-16      3.561232e-16      3.352433e-16      3.352433e-16      2.858453e-16
    367      4.738596e-16      4.788143e-16      4.788143e-16      4.883752e-16      4.883752e-16      5.437239e-16
    368      2.147653e-16      2.863575e-16      2.863575e-16      2.762466e-16      2.762466e-16      2.644321e-16
    369      3.477856e-16      3.532663e-16      3.532663e-16      3.374932e-16      3.374932e-16      3.011164e-16
    370      3.119328e-16      4.353787e-16      4.353787e-16      4.377745e-16      4.377745e-16      2.305324e-16
    371      2.306585e-16      3.381922e-16      3.381922e-16      3.473304e-16      3.473304e-16      2.854896e-16
    372      2.297127e-16      3.652244e-16      3.652244e-16      3.681493e-16      3.681493e-16      2.750904e-16
    373      4.784392e-16      4.596506e-16      4.596506e-16      4.525283e-16      4.525283e-16      5.684160e-16
    374      2.199592e-16      2.648148e-16      2.648148e-16      2.525418e-16      2.525418e-16      3.287146e-16
    376      2.254823e-16      2.193314e-16      2.193314e-16      2.187124e-16      2.187124e-16      2.351097e-16
    377      2.348134e-16      3.973743e-16      3.973743e-16      4.066423e-16      4.066423e-16      2.454729e-16
    379      4.366325e-16      5.573529e-16      5.573529e-16      5.587239e-16      5.587239e-16      8.446140e-16
    380      2.153783e-16      4.109959e-16      4.109959e-16      3.976027e-16      3.976027e-16      2.660615e-16
    381      3.945235e-16      5.194882e-16      5.194882e-16      4.908172e-16      4.908172e-16      2.949664e-16
    382      3.980268e-16      4.243672e-16      4.243672e-16      4.169758e-16      4.169758e-16      5.764166e-16
    383      4.208039e-16      4.350881e-16      4.350881e-16      4.254624e-16      4.254624e-16      6.145900e-16
    386      3.313086e-16      3.585882e-16      3.585882e-16      3.530873e-16      3.530873e-16      6.189568e-16
    387      2.476523e-16      5.005858e-16      5.005858e-16      4.940193e-16      4.940193e-16      2.765656e-16
    388      3.334975e-16      3.431799e-16      3.431799e-16      3.536376e-16      3.536376e-16      2.795207e-16
    389      4.534249e-16      4.012172e-16      4.012172e-16      4.136554e-16      4.136554e-16      6.685074e-16
    391      2.250132e-16      3.288109e-16      3.288109e-16      3.344614e-16      3.344614e-16      2.332006e-16
    393      4.483899e-16      3.660825e-16      3.660825e-16      3.868008e-16      3.868008e-16      7.670915e-16
    394      4.074309e-16      5.422990e-16      5.422990e-16      5.414215e-16      5.414215e-16      7.470965e-16
    395      4.061293e-16      3.819859e-16      3.819859e-16      3.870493e-16      3.870493e-16      2.775116e-16
    397      4.546222e-16      4.477367e-16      4.477367e-16      4.569631e-16      4.569631e-16      6.484007e-16
    398      3.986398e-16      4.805902e-16      4.805902e-16      4.655313e-16      4.655313e-16      5.731040e-16
    399      2.179817e-16      4.749482e-16      4.749482e-16      4.852004e-16      4.852004e-16      2.551743e-16
    401      3.641570e-16      4.537890e-16      4.537890e-16      4.385749e-16      4.385749e-16      6.521790e-16
    402      2.547605e-16      4.095842e-16      4.095842e-16      4.197842e-16      4.197842e-16      2.759522e-16
    403      2.371265e-16      3.615626e-16      3.615626e-16      3.674748e-16      3.674748e-16      2.165406e-16
    404      3.578523e-16      3.773839e-16      3.773839e-16      4.041103e-16      4.041103e-16      3.172097e-16
    406      2.203063e-16      4.469431e-16      4.469431e-16      4.562960e-16      4.562960e-16      2.226624e-16
    407      3.407111e-16      4.455074e-16      4.455074e-16      4.370183e-16      4.370183e-16      2.203054e-16
    408      2.250566e-16      2.599335e-16      2.599335e-16      2.591405e-16      2.591405e-16      3.102238e-16
    409      4.459972e-16      4.405106e-16      4.405106e-16      4.180883e-16      4.180883e-16      6.066592e-16
    410      3.530948e-16      3.609879e-16      3.609879e-16      3.568410e-16      3.568410e-16      2.828219e-16
    411      4.406156e-16      4.032047e-16      4.032047e-16      4.073707e-16      4.073707e-16      7.131128e-16
    412      4.325561e-16      4.316762e-16      4.316762e-16      4.208343e-16      4.208343e-16      3.006191e-16
    413      3.795352e-16      2.991110e-16      2.991110e-16      3.027518e-16      3.027518e-16      2.550085e-16
    414      2.199854e-16      3.277582e-16      3.277582e-16      3.207243e-16      3.207243e-16      3.166114e-16
    415      4.333893e-16      2.745321e-16      2.745321e-16      2.740133e-16      2.740133e-16      2.712208e-16
    417      4.435986e-16      4.045203e-16      4.045203e-16      4.027409e-16      4.027409e-16      5.753028e-16
    418      2.300417e-16      4.063278e-16      4.063278e-16      4.121372e-16      4.121372e-16      2.977369e-16
    419      4.479669e-16      4.140895e-16      4.140895e-16      4.335225e-16      4.335225e-16      5.756406e-16
    421      4.505356e-16      5.226405e-16      5.226405e-16      5.048513e-16      5.048513e-16      5.995219e-16
    422      4.333064e-16      4.883745e-16      4.883745e-16      4.908159e-16      4.908159e-16      5.674477e-16
    423      2.347486e-16      2.307141e-16      2.307141e-16      2.340541e-16      2.340541e-16      2.601520e-16
    424      2.584542e-16      3.070476e-16      3.070476e-16      3.112950e-16      3.112950e-16      2.396436e-16
    425      2.405092e-16      2.926105e-16      2.926105e-16      2.782895e-16      2.782895e-16      2.372834e-16
    426      4.458562e-16      4.399177e-16      4.399177e-16      4.619101e-16      4.619101e-16      3.159384e-16
    427      3.178348e-16      3.724478e-16      3.724478e-16      3.778633e-16      3.778633e-16      2.542602e-16
    428      4.136415e-16      3.989878e-16      3.989878e-16      4.036388e-16      4.036388e-16      3.066449e-16
    430      2.454803e-16      4.369650e-16      4.369650e-16      4.423259e-16      4.423259e-16      2.677730e-16
    431      4.408204e-16      3.964946e-16      3.964946e-16      4.131919e-16      4.131919e-16      5.942937e-16
    433      4.223632e-16      4.709574e-16      4.709574e-16      4.557268e-16      4.557268e-16      6.293978e-16
    434      2.361074e-16      4.071958e-16      4.071958e-16      4.152329e-16      4.152329e-16      2.971950e-16
    435      2.295792e-16      4.423498e-16      4.423498e-16      4.203942e-16      4.203942e-16      3.597191e-16
    436      3.524861e-16      4.464411e-16      4.464411e-16      4.436311e-16      4.436311e-16      2.830192e-16
    437      2.186560e-16      5.058341e-16      5.058341e-16      5.150317e-16      5.150317e-16      2.187293e-16
    438      3.697465e-16      3.772502e-16      3.772502e-16      4.136676e-16      4.136676e-16      2.785747e-16
    439      4.699810e-16      4.185354e-16      4.185354e-16      4.123208e-16      4.123208e-16      5.543589e-16
    442      2.171584e-16      2.772936e-16      2.772936e-16      2.660441e-16      2.660441e-16      2.995558e-16
    443      4.958746e-16      4.152065e-16      4.152065e-16      4.345575e-16      4.345575e-16      5.932955e-16
    444      3.358226e-16      4.527715e-16      4.527715e-16      4.390338e-16      4.390338e-16      2.431729e-16
    445      4.428114e-16      3.419863e-16      3.419863e-16      3.365053e-16      3.365053e-16      2.933957e-16
    446      4.693574e-16      4.028203e-16      4.028203e-16      4.316694e-16      4.316694e-16      5.967187e-16
    447      4.166388e-16      4.107732e-16      4.107732e-16      4.177250e-16      4.177250e-16      5.684658e-16
    449      4.779489e-16      4.916625e-16      4.916625e-16      5.042315e-16      5.042315e-16      6.470232e-16
    451      3.731203e-16      3.454496e-16      3.454496e-16      3.367108e-16      3.367108e-16      2.623106e-16
    452      4.521838e-16      4.569107e-16      4.569107e-16      4.488081e-16      4.488081e-16      3.025968e-16
    453      3.752138e-16      4.027194e-16      4.027194e-16      3.970457e-16      3.970457e-16      6.161256e-16
    454      4.356927e-16      4.260680e-16      4.260680e-16      3.984764e-16      3.984764e-16      5.585017e-16
    456      2.223502e-16      4.377104e-16      4.377104e-16      4.436412e-16      4.436412e-16      2.845021e-16
    457      4.064637e-16      4.584909e-16      4.584909e-16      4.281186e-16      4.281186e-16      5.864570e-16
    458      4.311810e-16      4.265438e-16      4.265438e-16      4.381598e-16      4.381598e-16      6.110807e-16
    459      2.276173e-16      2.772439e-16      2.772439e-16      2.637197e-16      2.637197e-16      2.539484e-16
    460      2.270926e-16      2.984909e-16      2.984909e-16      3.008670e-16      3.008670e-16      2.697652e-16
    461      4.185490e-16      4.280652e-16      4.280652e-16      4.183564e-16      4.183564e-16      6.024082e-16
    463      4.402809e-16      5.206069e-16      5.206069e-16      5.226720e-16      5.226720e-16      5.757983e-16
    464      2.349617e-16      4.269825e-16      4.269825e-16      4.122941e-16      4.122941e-16      2.391928e-16
    465      2.381781e-16      3.601220e-16      3.601220e-16      3.504457e-16      3.504457e-16      2.914530e-16
    466      4.384737e-16      4.411578e-16      4.411578e-16      4.444572e-16      4.444572e-16      5.964198e-16
    467      4.315914e-16      4.267229e-16      4.267229e-16      4.314943e-16      4.314943e-16      6.298230e-16
    469      2.562792e-16      4.407079e-16      4.407079e-16      4.423371e-16      4.423371e-16      2.506215e-16
    470      2.611323e-16      2.400088e-16      2.400088e-16      2.418638e-16      2.418638e-16      2.498508e-16
    471      4.096718e-16      3.917565e-16      3.917565e-16      3.890869e-16      3.890869e-16      5.984302e-16
    472      3.797157e-16      2.456149e-16      2.456149e-16      2.428081e-16      2.428081e-16      2.884509e-16
    473      2.471074e-16      4.562845e-16      4.562845e-16      4.589379e-16      4.589379e-16      3.050556e-16
    474      3.863249e-16      4.130609e-16      4.130609e-16      4.061693e-16      4.061693e-16      2.861199e-16
    475      2.340054e-16      3.886565e-16      3.886565e-16      3.963861e-16      3.963861e-16      2.296002e-16
    476      2.203823e-16      3.145705e-16      3.145705e-16      3.129197e-16      3.129197e-16      2.407926e-16
    477      2.414661e-16      3.217269e-16      3.217269e-16      3.113420e-16      3.113420e-16      2.560642e-16
    478      4.329472e-16      4.196160e-16      4.196160e-16      4.421878e-16      4.421878e-16      6.428448e-16
    479      4.229005e-16      4.534632e-16      4.534632e-16      4.441220e-16      4.441220e-16      5.749513e-16
    481      3.545958e-16      4.484231e-16      4.484231e-16      4.408979e-16      4.408979e-16      2.334183e-16
    482      3.695478e-16      3.631799e-16      3.631799e-16      3.684506e-16      3.684506e-16      5.978246e-16
    483      2.241180e-16      3.706330e-16      3.706330e-16      3.729883e-16      3.729883e-16      3.142881e-16
    485      3.440491e-16      3.459042e-16      3.459042e-16      3.599548e-16      3.599548e-16      3.175534e-16
    487      4.574951e-16      5.798463e-16      5.798463e-16      5.628864e-16      5.628864e-16      7.188753e-16
    488      3.176733e-16      3.523134e-16      3.523134e-16      3.399705e-16      3.399705e-16      3.092616e-16
    489      4.577790e-16      5.279113e-16      5.279113e-16      5.164502e-16      5.164502e-16      5.745423e-16
    491      4.977715e-16      6.094328e-16      6.094328e-16      5.949345e-16      5.949345e-16      6.108383e-16
    492      3.569061e-16      3.539576e-16      3.539576e-16      3.609381e-16      3.609381e-16      3.406413e-16
    493      2.425425e-16      4.469755e-16      4.469755e-16      4.612446e-16      4.612446e-16      2.291785e-16
    494      2.486894e-16      4.041481e-16      4.041481e-16      4.052317e-16      4.052317e-16      3.406617e-16
    496      2.392158e-16      3.538974e-16      3.538974e-16      3.424186e-16      3.424186e-16      2.375339e-16
    497      4.404625e-16      5.009285e-16      5.009285e-16      5.123642e-16      5.123642e-16      2.594489e-16
    498      4.148436e-16      2.584082e-16      2.584082e-16      2.560726e-16      2.560726e-16      3.500145e-16
    499      4.773732e-16      4.351097e-16      4.351097e-16      4.489492e-16      4.489492e-16      7.118935e-16
    501      4.657366e-16      4.333194e-16      4.333194e-16      4.359787e-16      4.359787e-16      6.057919e-16
    502      4.116226e-16      4.585403e-16      4.585403e-16      4.567751e-16      4.567751e-16      5.798328e-16
    503      4.175961e-16      4.554120e-16      4.554120e-16      4.341163e-16      4.341163e-16      6.879124e-16
    505      3.674330e-16      4.165634e-16      4.165634e-16      4.020705e-16      4.020705e-16      2.992717e-16
    506      2.297001e-16      3.109442e-16      3.109442e-16      3.179573e-16      3.179573e-16      3.466802e-16
    508      3.876398e-16      5.034659e-16      5.034659e-16      5.076236e-16      5.076236e-16      3.145135e-16
    509      4.222445e-16      4.656826e-16      4.656826e-16      4.515350e-16      4.515350e-16      6.135904e-16
    510      2.379677e-16      2.697083e-16      2.697083e-16      2.756163e-16      2.756163e-16      3.826891e-16
    511      3.611723e-16      4.184942e-16      4.184942e-16      4.257446e-16      4.257446e-16      2.891353e-16
[15]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()

x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x)  # Use the size of the array

i=1
for k in vk:
    plt.subplot((len(vl2)+1)//2,2,i)
    plt.semilogx(vn, vl1[k], '-ob', label="$L1$")

    r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
    plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))

    ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
    plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))

    plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
    plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
    plt.title(k)
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.xlabel("N", loc='right')
    i+=1


plt.suptitle("1D FFT errors (double precision, Bluestein) - " + device_name)

plt.tight_layout()

plt.figure()
ms = 3

clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}

for k,v in vl2.items():
    plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("1D FFT L2 error (double precision, Bluestein) - " + device_name)
plt.grid(True)
plt.xlabel("N", loc='right')
plt.tight_layout()

2D, non-radix (Bluestein) transforms, single precision

[17]:
nmax = 101
d0 = np.random.uniform(-0.5, 0.5, (nmax, nmax)) + 1j * np.random.uniform(-0.5, 0.5, (nmax, nmax))
d0ld = d0.astype(np.clongdouble)
d0s = d0.astype(np.complex64)

def accu_2d(n, fft_dic):
    rld = fftwn(d0ld[:n,:n].copy())
    res = {}
    for k,v in fft_dic.items():
        r = v(d0s[:n,:n].copy())
        res[k] = l1(rld, r),l2(rld, r),li(rld, r)
    return res

fft_dic = {"fftw": fftwn}
if has_pycuda:
    fft_dic["vkfft-cuda"] = fftnvcu
    fft_dic["vkfft-cuda-LUT"] = fftnvculut
    fft_dic["cufft"] = fftncu
if has_pyopencl:
    fft_dic["vkfft-opencl"] = fftnvcl
    fft_dic["vkfft-opencl-LUT"] = fftnvcllut


# print(accu_1d(16, fft_dic))

vn, vl1, vl2, vli = [], {}, {}, {}

#print("%7s  %12s  %12s  %12s  %12s"%("N", "vkfft   ", "vkfft-LUT    ", "cufft   ", "fftw   "))
s = "%7s  %16s"%("N", "fftw   ")
r = accu_2d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
    if k in r:
        s += "  %16s" % k
print(s)
for n in range(8, len(d0)+1):
    if max(primes(n)) >13:  # test only transforms with non-radix dimensions
        r = accu_2d(n, fft_dic)
        vn.append(n)
        for k, v in r.items():
            if k not in vl1:
                vl1[k] = []
                vl2[k] = []
                vli[k] = []
            vl1[k].append(v[0])
            vl2[k].append(v[1])
            vli[k].append(v[2])
        s = "%7d  %16e" % (n, vl2["fftw"][-1])
        for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
            if k in vl2:
                red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
                if red < 0:
                    red = 0
                if red > 255:
                    red = 255
                s += "\x1b[38;2;%d;0;0m    %14e\x1b[0m" % (red, vl2[k][-1])
        print(s)

      N           fftw           vkfft-cuda    vkfft-cuda-LUT      vkfft-opencl  vkfft-opencl-LUT             cufft
     17      1.064493e-07      1.769288e-07      1.766706e-07      1.793724e-07      1.793724e-07      1.014472e-07
     19      1.138276e-07      3.462510e-07      1.990153e-07      2.157018e-07      2.157018e-07      1.085927e-07
     23      1.136545e-07      3.388370e-07      1.643277e-07      1.702715e-07      1.702715e-07      1.154740e-07
     29      1.254697e-07      3.026991e-07      2.325725e-07      2.373313e-07      2.373313e-07      1.174079e-07
     31      1.254689e-07      4.019228e-07      2.190677e-07      2.194561e-07      2.194561e-07      1.249641e-07
     34      1.234050e-07      3.149019e-07      1.895455e-07      1.884011e-07      1.884011e-07      1.258486e-07
     37      2.311431e-07      4.809977e-07      2.342342e-07      2.405885e-07      2.405885e-07      1.288560e-07
     38      1.206409e-07      4.302840e-07      2.073467e-07      2.172952e-07      2.172952e-07      1.275125e-07
     41      2.080894e-07      5.428678e-07      1.994068e-07      2.039316e-07      2.039316e-07      1.358058e-07
     43      1.431978e-07      4.030685e-07      2.609898e-07      2.555937e-07      2.555937e-07      1.380594e-07
     46      1.295018e-07      4.224610e-07      1.794655e-07      1.837880e-07      1.837880e-07      1.374309e-07
     47      2.617369e-07      1.403620e-07      1.403620e-07      1.403620e-07      1.403620e-07      1.429748e-07
     51      1.339754e-07      3.595894e-07      1.919911e-07      1.936782e-07      1.936782e-07      1.693986e-07
     53      2.746606e-07      4.255515e-07      2.005767e-07      2.030934e-07      2.030934e-07      1.482253e-07
     57      1.359685e-07      4.913498e-07      2.212892e-07      2.236271e-07      2.236271e-07      1.566432e-07
     58      1.383858e-07      4.088250e-07      2.431134e-07      2.458935e-07      2.458935e-07      1.628219e-07
     59      2.645646e-07      1.549736e-07      1.549736e-07      1.549736e-07      1.549736e-07      1.522756e-07
     61      2.119116e-07      4.415614e-07      2.265996e-07      2.320769e-07      2.320769e-07      1.567412e-07
     62      1.392797e-07      4.624509e-07      2.299051e-07      2.257486e-07      2.257486e-07      1.569656e-07
     67      2.745849e-07      5.424466e-07      2.225806e-07      2.148022e-07      2.148022e-07      1.661600e-07
     68      1.324792e-07      4.179806e-07      1.959072e-07      1.975950e-07      1.975950e-07      2.647303e-07
     69      1.424612e-07      5.226099e-07      1.860048e-07      1.855560e-07      1.855560e-07      1.751677e-07
     71      2.922181e-07      4.038621e-07      2.641228e-07      2.662511e-07      2.662511e-07      1.670022e-07
     73      2.385595e-07      6.906754e-07      2.697811e-07      2.658394e-07      2.658394e-07      1.695733e-07
     74      2.388433e-07      5.565384e-07      2.428012e-07      2.438724e-07      2.438724e-07      1.555605e-07
     76      1.335076e-07      4.982163e-07      2.132034e-07      2.166279e-07      2.166279e-07      3.091439e-07
     79      2.724505e-07      4.404951e-07      2.318821e-07      2.359840e-07      2.359840e-07      1.753737e-07
     82      2.201404e-07      6.103246e-07      2.083082e-07      2.043014e-07      2.043014e-07      1.831030e-07
     83      2.628864e-07      1.791628e-07      1.791628e-07      1.791628e-07      1.791628e-07      1.768091e-07
     85      1.430311e-07      4.365876e-07      1.982840e-07      1.986417e-07      1.986417e-07      2.757655e-07
     86      1.557046e-07      4.788947e-07      2.647328e-07      2.552991e-07      2.552991e-07      1.699690e-07
     87      1.505856e-07      4.641769e-07      2.485872e-07      2.510426e-07      2.510426e-07      1.591133e-07
     89      2.888630e-07      6.814944e-07      2.264877e-07      2.325447e-07      2.325447e-07      1.821610e-07
     92      1.394571e-07      4.746074e-07      1.871657e-07      1.867921e-07      1.867921e-07      3.775324e-07
     93      1.512917e-07      5.044389e-07      2.364509e-07      2.333803e-07      2.333803e-07      1.693592e-07
     94      2.684735e-07      1.508294e-07      1.508294e-07      1.508263e-07      1.508263e-07      1.771150e-07
     95      1.449898e-07      5.854121e-07      2.263481e-07      2.307058e-07      2.307058e-07      2.094239e-07
     97      2.562577e-07      6.076271e-07      3.061153e-07      3.136416e-07      3.136416e-07      1.910664e-07
    101      2.497812e-07      5.302290e-07      2.538472e-07      2.453008e-07      2.453008e-07      1.921079e-07
[18]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()

x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x)  # Use the size of the array

i=1
for k in vk:
    plt.subplot((len(vl2)+1)//2,2,i)
    plt.semilogx(vn, vl1[k], '-ob', label="$L1$")

    r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
    plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))

    ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
    plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))

    plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
    plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
    plt.title(k)
    plt.grid(True)
    plt.legend(loc='upper left')
    plt.xlabel("N", loc='right')
    i+=1


plt.suptitle("2D FFT errors (single precision, Bluestein) - " + device_name)

plt.tight_layout()

plt.figure()
ms = 3

clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}

for k,v in vl2.items():
    plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("2D FFT L2 error (single precision, Bluestein) - " + device_name)
plt.grid(True)
plt.xlabel("N", loc='right')
plt.tight_layout()


[ ]: