I am trying to run a simulation with CUDA backend on 12 GPUs. However I get the following error:
Traceback (most recent call last):
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/util.py", line 33, in __call__
res = cache[key]
KeyError: (<function CUDAKernelProvider._build_kernel at 0x2b76ca4d9268>, b'\x80\x03X\x08\x00\x00\x00mpicfluxq\x00X3&\x00\x00\n\n// AoSoA macros\n#define SOA_SZ 32\n#define SOA_IX(a, v, nv) ((((a) / SOA_SZ)*(nv) + (v))*SOA_SZ + (a) % SOA_SZ)\n\n// Typedefs\ntypedef double fpdtype_t;\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n__global__ void mpicflux(int _nx, fpdtype_t* __restrict__ gradul_v, const int* __restrict__ gradul_vix, const int* __restrict__ gradul_vrstri, const fpdtype_t* __restrict__ magnl_v, const fpdtype_t* __restrict__ nl_v, int ldnl, fpdtype_t* __restrict__ ul_v, const int* __restrict__ ul_vix, fpdtype_t* __restrict__ ur_v)\n {\n int _x = blockIdx.x*blockDim.x + threadIdx.x;\n #define X_IDX (_x)\n #define X_IDX_AOSOA(v, nv) SOA_IX(X_IDX, v, nv)\n if (_x < _nx)\n {\n \n // Perform the Riemann solve\n fpdtype_t ficomm[5], fvcomm;\n {\n\n // Compute the left and right fluxes + velocities and pressures\n fpdtype_t fl_[3][5], fr_[3][5];\n fpdtype_t vl_[3], vr_[3];\n fpdtype_t pl_, pr_;\n\n {\n\n fpdtype_t invrho__ = 1.0/ul_v[ul_vix[X_IDX] + SOA_SZ*(0)], E__ = ul_v[ul_vix[X_IDX] + SOA_SZ*(4)];\n\n // Compute the velocities\n fpdtype_t rhov__[3];\n rhov__[0] = ul_v[ul_vix[X_IDX] + SOA_SZ*(1)];\n vl_[0] = invrho__*rhov__[0];\n rhov__[1] = ul_v[ul_vix[X_IDX] + SOA_SZ*(2)];\n vl_[1] = invrho__*rhov__[1];\n rhov__[2] = ul_v[ul_vix[X_IDX] + SOA_SZ*(3)];\n vl_[2] = invrho__*rhov__[2];\n\n // Compute the pressure\n pl_ = 0.3999999999999999*(E__ - 0.5*invrho__*((rhov__[0])*(rhov__[0]) + (rhov__[1])*(rhov__[1]) + (rhov__[2])*(rhov__[2])));\n\n // Density and energy fluxes\n fl_[0][0] = rhov__[0];\n fl_[0][4] = (E__ + pl_)*vl_[0];\n fl_[1][0] = rhov__[1];\n fl_[1][4] = (E__ + pl_)*vl_[1];\n fl_[2][0] = rhov__[2];\n fl_[2][4] = (E__ + pl_)*vl_[2];\n\n // Momentum fluxes\n fl_[0][1] = rhov__[0]*vl_[0] + pl_;\n fl_[0][2] = rhov__[0]*vl_[1];\n fl_[0][3] = rhov__[0]*vl_[2];\n fl_[1][1] = rhov__[1]*vl_[0];\n fl_[1][2] = rhov__[1]*vl_[1] + pl_;\n fl_[1][3] = rhov__[1]*vl_[2];\n fl_[2][1] = rhov__[2]*vl_[0];\n fl_[2][2] = rhov__[2]*vl_[1];\n fl_[2][3] = rhov__[2]*vl_[2] + pl_;\n\n};\n {\n\n fpdtype_t invrho__ = 1.0/ur_v[_nx*(0) + X_IDX], E__ = ur_v[_nx*(4) + X_IDX];\n\n // Compute the velocities\n fpdtype_t rhov__[3];\n rhov__[0] = ur_v[_nx*(1) + X_IDX];\n vr_[0] = invrho__*rhov__[0];\n rhov__[1] = ur_v[_nx*(2) + X_IDX];\n vr_[1] = invrho__*rhov__[1];\n rhov__[2] = ur_v[_nx*(3) + X_IDX];\n vr_[2] = invrho__*rhov__[2];\n\n // Compute the pressure\n pr_ = 0.3999999999999999*(E__ - 0.5*invrho__*((rhov__[0])*(rhov__[0]) + (rhov__[1])*(rhov__[1]) + (rhov__[2])*(rhov__[2])));\n\n // Density and energy fluxes\n fr_[0][0] = rhov__[0];\n fr_[0][4] = (E__ + pr_)*vr_[0];\n fr_[1][0] = rhov__[1];\n fr_[1][4] = (E__ + pr_)*vr_[1];\n fr_[2][0] = rhov__[2];\n fr_[2][4] = (E__ + pr_)*vr_[2];\n\n // Momentum fluxes\n fr_[0][1] = rhov__[0]*vr_[0] + pr_;\n fr_[0][2] = rhov__[0]*vr_[1];\n fr_[0][3] = rhov__[0]*vr_[2];\n fr_[1][1] = rhov__[1]*vr_[0];\n fr_[1][2] = rhov__[1]*vr_[1] + pr_;\n fr_[1][3] = rhov__[1]*vr_[2];\n fr_[2][1] = rhov__[2]*vr_[0];\n fr_[2][2] = rhov__[2]*vr_[1];\n fr_[2][3] = rhov__[2]*vr_[2] + pr_;\n\n};\n\n // Sum the left and right velocities and take the normal\n fpdtype_t nv_ = ((nl_v[ldnl*(0) + X_IDX])*(vl_[0] + vr_[0]) + (nl_v[ldnl*(1) + X_IDX])*(vl_[1] + vr_[1]) + (nl_v[ldnl*(2) + X_IDX])*(vl_[2] + vr_[2]));\n\n // Estimate the maximum wave speed / 2\n fpdtype_t a_ = sqrt(0.35*(pl_ + pr_)/(ul_v[ul_vix[X_IDX] + SOA_SZ*(0)] + ur_v[_nx*(0) + X_IDX]))\n + 0.25*fabs(nv_);\n\n // Output\n ficomm[0] = 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl_[0][0] + fr_[0][0]) + nl_v[ldnl*(1) + X_IDX]*(fl_[1][0] + fr_[1][0]) + nl_v[ldnl*(2) + X_IDX]*(fl_[2][0] + fr_[2][0]))\n + a_*(ul_v[ul_vix[X_IDX] + SOA_SZ*(0)] - ur_v[_nx*(0) + X_IDX]);\n ficomm[1] = 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl_[0][1] + fr_[0][1]) + nl_v[ldnl*(1) + X_IDX]*(fl_[1][1] + fr_[1][1]) + nl_v[ldnl*(2) + X_IDX]*(fl_[2][1] + fr_[2][1]))\n + a_*(ul_v[ul_vix[X_IDX] + SOA_SZ*(1)] - ur_v[_nx*(1) + X_IDX]);\n ficomm[2] = 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl_[0][2] + fr_[0][2]) + nl_v[ldnl*(1) + X_IDX]*(fl_[1][2] + fr_[1][2]) + nl_v[ldnl*(2) + X_IDX]*(fl_[2][2] + fr_[2][2]))\n + a_*(ul_v[ul_vix[X_IDX] + SOA_SZ*(2)] - ur_v[_nx*(2) + X_IDX]);\n ficomm[3] = 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl_[0][3] + fr_[0][3]) + nl_v[ldnl*(1) + X_IDX]*(fl_[1][3] + fr_[1][3]) + nl_v[ldnl*(2) + X_IDX]*(fl_[2][3] + fr_[2][3]))\n + a_*(ul_v[ul_vix[X_IDX] + SOA_SZ*(3)] - ur_v[_nx*(3) + X_IDX]);\n ficomm[4] = 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl_[0][4] + fr_[0][4]) + nl_v[ldnl*(1) + X_IDX]*(fl_[1][4] + fr_[1][4]) + nl_v[ldnl*(2) + X_IDX]*(fl_[2][4] + fr_[2][4]))\n + a_*(ul_v[ul_vix[X_IDX] + SOA_SZ*(4)] - ur_v[_nx*(4) + X_IDX]);\n\n};\n\n fpdtype_t fvl[3][5] = {{0}};\n {\n\n fpdtype_t rho_ = ul_v[ul_vix[X_IDX] + SOA_SZ*(0)];\n fpdtype_t rhou_ = ul_v[ul_vix[X_IDX] + SOA_SZ*(1)], rhov_ = ul_v[ul_vix[X_IDX] + SOA_SZ*(2)], rhow_ = ul_v[ul_vix[X_IDX] + SOA_SZ*(3)];\n fpdtype_t E_ = ul_v[ul_vix[X_IDX] + SOA_SZ*(4)];\n\n fpdtype_t rcprho_ = 1.0/rho_;\n fpdtype_t u_ = rcprho_*rhou_, v_ = rcprho_*rhov_, w_ = rcprho_*rhow_;\n\n fpdtype_t rho_x_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(0) + SOA_SZ*(0)];\n fpdtype_t rho_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(1) + SOA_SZ*(0)];\n fpdtype_t rho_z_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(2) + SOA_SZ*(0)];\n\n // Velocity derivatives (rho_*grad[u_,v_,w_])\n fpdtype_t u_x_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(0) + SOA_SZ*(1)] - u_*rho_x_;\n fpdtype_t u_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(1) + SOA_SZ*(1)] - u_*rho_y_;\n fpdtype_t u_z_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(2) + SOA_SZ*(1)] - u_*rho_z_;\n fpdtype_t v_x_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(0) + SOA_SZ*(2)] - v_*rho_x_;\n fpdtype_t v_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(1) + SOA_SZ*(2)] - v_*rho_y_;\n fpdtype_t v_z_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(2) + SOA_SZ*(2)] - v_*rho_z_;\n fpdtype_t w_x_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(0) + SOA_SZ*(3)] - w_*rho_x_;\n fpdtype_t w_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(1) + SOA_SZ*(3)] - w_*rho_y_;\n fpdtype_t w_z_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(2) + SOA_SZ*(3)] - w_*rho_z_;\n\n fpdtype_t E_x_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(0) + SOA_SZ*(4)];\n fpdtype_t E_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(1) + SOA_SZ*(4)];\n fpdtype_t E_z_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(2) + SOA_SZ*(4)];\n\n // Compute the temperature and viscosity\n fpdtype_t cpT_ = 1.4*(rcprho_*E_ - 0.5*(u_*u_ + v_*v_ + w_*w_));\n fpdtype_t Trat_ = 0.4*cpT_;\n fpdtype_t mu_c_ = 4.59985915432e-05*Trat_*sqrt(Trat_)\n / (cpT_ + 1.270376356);\n\n // Compute temperature derivatives (c_v*dT/d[x,y,z])\n fpdtype_t T_x_ = rcprho_*(E_x_ - (rcprho_*rho_x_*E_ + u_*u_x_ + v_*v_x_ + w_*w_x_));\n fpdtype_t T_y_ = rcprho_*(E_y_ - (rcprho_*rho_y_*E_ + u_*u_y_ + v_*v_y_ + w_*w_y_));\n fpdtype_t T_z_ = rcprho_*(E_z_ - (rcprho_*rho_z_*E_ + u_*u_z_ + v_*v_z_ + w_*w_z_));\n\n // Negated stress tensor elements\n fpdtype_t t_xx_ = -2*mu_c_*rcprho_*(u_x_ - 0.3333333333333333*(u_x_ + v_y_ + w_z_));\n fpdtype_t t_yy_ = -2*mu_c_*rcprho_*(v_y_ - 0.3333333333333333*(u_x_ + v_y_ + w_z_));\n fpdtype_t t_zz_ = -2*mu_c_*rcprho_*(w_z_ - 0.3333333333333333*(u_x_ + v_y_ + w_z_));\n fpdtype_t t_xy_ = -mu_c_*rcprho_*(v_x_ + u_y_);\n fpdtype_t t_xz_ = -mu_c_*rcprho_*(u_z_ + w_x_);\n fpdtype_t t_yz_ = -mu_c_*rcprho_*(w_y_ + v_z_);\n\n fvl[0][1] += t_xx_; fvl[1][1] += t_xy_; fvl[2][1] += t_xz_;\n fvl[0][2] += t_xy_; fvl[1][2] += t_yy_; fvl[2][2] += t_yz_;\n fvl[0][3] += t_xz_; fvl[1][3] += t_yz_; fvl[2][3] += t_zz_;\n\n fvl[0][4] += u_*t_xx_ + v_*t_xy_ + w_*t_xz_ + -mu_c_*1.9444444444444444*T_x_;\n fvl[1][4] += u_*t_xy_ + v_*t_yy_ + w_*t_yz_ + -mu_c_*1.9444444444444444*T_y_;\n fvl[2][4] += u_*t_xz_ + v_*t_yz_ + w_*t_zz_ + -mu_c_*1.9444444444444444*T_z_;\n\n};\n {\n\n\n};\n\n\n fvcomm = nl_v[ldnl*(0) + X_IDX]*fvl[0][0] + nl_v[ldnl*(1) + X_IDX]*fvl[1][0] + nl_v[ldnl*(2) + X_IDX]*fvl[2][0];\n fvcomm += 0.1*(ul_v[ul_vix[X_IDX] + SOA_SZ*(0)] - ur_v[_nx*(0) + X_IDX]);\n\n ul_v[ul_vix[X_IDX] + SOA_SZ*(0)] = magnl_v[X_IDX]*(ficomm[0] + fvcomm);\n fvcomm = nl_v[ldnl*(0) + X_IDX]*fvl[0][1] + nl_v[ldnl*(1) + X_IDX]*fvl[1][1] + nl_v[ldnl*(2) + X_IDX]*fvl[2][1];\n fvcomm += 0.1*(ul_v[ul_vix[X_IDX] + SOA_SZ*(1)] - ur_v[_nx*(1) + X_IDX]);\n\n ul_v[ul_vix[X_IDX] + SOA_SZ*(1)] = magnl_v[X_IDX]*(ficomm[1] + fvcomm);\n fvcomm = nl_v[ldnl*(0) + X_IDX]*fvl[0][2] + nl_v[ldnl*(1) + X_IDX]*fvl[1][2] + nl_v[ldnl*(2) + X_IDX]*fvl[2][2];\n fvcomm += 0.1*(ul_v[ul_vix[X_IDX] + SOA_SZ*(2)] - ur_v[_nx*(2) + X_IDX]);\n\n ul_v[ul_vix[X_IDX] + SOA_SZ*(2)] = magnl_v[X_IDX]*(ficomm[2] + fvcomm);\n fvcomm = nl_v[ldnl*(0) + X_IDX]*fvl[0][3] + nl_v[ldnl*(1) + X_IDX]*fvl[1][3] + nl_v[ldnl*(2) + X_IDX]*fvl[2][3];\n fvcomm += 0.1*(ul_v[ul_vix[X_IDX] + SOA_SZ*(3)] - ur_v[_nx*(3) + X_IDX]);\n\n ul_v[ul_vix[X_IDX] + SOA_SZ*(3)] = magnl_v[X_IDX]*(ficomm[3] + fvcomm);\n fvcomm = nl_v[ldnl*(0) + X_IDX]*fvl[0][4] + nl_v[ldnl*(1) + X_IDX]*fvl[1][4] + nl_v[ldnl*(2) + X_IDX]*fvl[2][4];\n fvcomm += 0.1*(ul_v[ul_vix[X_IDX] + SOA_SZ*(4)] - ur_v[_nx*(4) + X_IDX]);\n\n ul_v[ul_vix[X_IDX] + SOA_SZ*(4)] = magnl_v[X_IDX]*(ficomm[4] + fvcomm);\n\n }\n #undef X_IDX\n #undef X_IDX_AOSOA\n }\n\nq\x01]q\x02(cnumpy\nint32\nq\x03cnumpy\nint64\nq\x04h\x04h\x04h\x04h\x04h\x03h\x04h\x04h\x04e\x87q\x05.', b'\x80\x03}q\x00.')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/bin/pyfr", line 11, in <module>
sys.exit(main())
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/__main__.py", line 112, in main
args.process(args)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/__main__.py", line 263, in process_restart
_process_common(args, mesh, soln, cfg)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/__main__.py", line 226, in _process_common
solver = get_solver(backend, rallocs, mesh, soln, cfg)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/__init__.py", line 16, in get_solver
return get_integrator(backend, systemcls, rallocs, mesh, initsoln, cfg)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/__init__.py", line 36, in get_integrator
return integrator(backend, systemcls, rallocs, mesh, initsoln, cfg)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/controllers.py", line 80, in __init__
super().__init__(*args, **kwargs)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/controllers.py", line 14, in __init__
super().__init__(*args, **kwargs)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/steppers.py", line 159, in __init__
super().__init__(*args, **kwargs)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/base.py", line 19, in __init__
nregs=self.nregs, cfg=cfg)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/base/system.py", line 64, in __init__
self._gen_kernels(eles, int_inters, mpi_inters, bc_inters)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/base/system.py", line 193, in _gen_kernels
kernels[pn, kn].append(kgetter())
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/navstokes/inters.py", line 69, in <lambda>
magnl=self._mag_pnorm_lhs, nl=self._norm_pnorm_lhs
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/backends/base/backend.py", line 163, in kernel
return kern(*args, **kwargs)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/backends/base/kernels.py", line 162, in kernel_meth
fun = self._build_kernel(name, src, list(it.chain(*argt)))
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/util.py", line 35, in __call__
res = cache[key] = self.func(*args, **kwargs)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/backends/cuda/provider.py", line 20, in _build_kernel
fun = compiler.SourceModule(src).get_function(name)
File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pycuda/compiler.py", line 294, in __init__
self.module = module_from_buffer(cubin)
pycuda._driver.LogicError: cuModuleLoadDataEx failed: device kernel image is invalid - error : Binary format for key='0', ident='' is not recognized
application called MPI_Abort(MPI_COMM_WORLD, 1) - process 7
The same case works just fine with OpenMP backend. I was wondering if you could help me with the error.