Something along these lines. I am using the thread indices as the angle values. I didn't actually run this code so there might be bugs in it.
@cuda.jit(device = True)
def func(psi, thetam, phi):
return psi*thetam*phi
@cuda.jit
def kernel(d_out):
i,j,k = cuda.grid(3)
x,y,z = d_out.shape
if i < x and j < y and k < z:
psi = i*15
theta = j*15
phi = k*15
d_out[i,j,k] = func(psi, theta, phi)
360//15 = psi_incitements
180//15 = theta_incitements
360//15 = phi_incitements
d_out = cuda.device_array((psi_incitements,
theta_incitements,
phi_incitements),dtype = np.float32)
TPB = 32
gridDim = ((psi_incitements + TPB - 1)//TPB,
(theta_incitements + TPB - 1)//TPB,
(phi_incitements + TPB - 1)//TPB)
blockDim = (TPB,
TPB,
TPB)
kernel[gridDim, blockDim](d_out)
out = d_out.copy_to_host()