import numpy as np
import matplotlib.pyplot as plt
import math
N = 128*2
def main():
x = np.linspace(0,1,N, dtype = np.float32)
#from parallel import pArray, derivativeArray
from shared import pArray, derivativeArray
f = pArray(x)
plt.plot(x, f, 'o', label = 'f(x) = x^2')
dfdx = derivativeArray(f, 1)
plt.plot(x, dfdx, 'o', label ='First derivative')
#print dfdx
#d2fdx2 = derivativeArray(f, 2)
#plt.plot(x, d2fdx2, 'o', label = 'Second derivative')
plt.legend()
plt.show()
if __name__ == '__main__':
main()
import numpy as np
from numba import cuda, float32
TPB = 128
NSHARED = 130 # This value must agree with TPB + 2*RAD
@cuda.jit(device = True)
def parab(x0):
return x0**2
@cuda.jit
def pKernel(d_f, d_x):
i = cuda.grid(1)
n = d_x.size
if i < n:
d_f[i] = parab(d_x[i])
def pArray(x):
n = x.size
d_x = cuda.to_device(x)
d_f = cuda.device_array(n, dtype = np.float32)
pKernel[(n + TPB - 1)/TPB, TPB](d_f, d_x)
return d_f.copy_to_host()
@cuda.jit
def derivativeKernel(d_deriv, d_f, d_stencil):
i = cuda.grid(1)
n = d_f.size
radius = len(d_stencil)/2
sh_f = cuda.shared.array(NSHARED, dtype = float32)
if i >= n:
return
tIdx = cuda.threadIdx.x
shIdx = tIdx + radius
#Load Regular Cells
sh_f[shIdx] = d_f[i]
#Load Left Halo Cells
if tIdx < radius:
if i == 0:
sh_f[tIdx] = 0
else:
sh_f[tIdx] = d_f[i - radius]
#Load Right Halo Cells
elif tIdx > cuda.blockDim.x - 1 - radius:
if i == n - 1:
sh_f[tIdx + 2*radius] = 0
else:
sh_f[tIdx + 2*radius] = d_f[i + 2*radius]
cuda.syncthreads()
#Only write values where the full stencil is "in bounds"
if radius <= i <= n - 1 - radius:
for j in range(len(d_stencil)):
d_deriv[i] += 1 #sh_f[shIdx + j - radius]*d_stencil[j]
def derivativeArray(f, order):
n = f.size
h = 1./(n - 1.)
if order == 1:
stencil = (1./(2.*h))*np.array([-1., 0., 1.], dtype = np.float32)
elif order == 2:
stencil = (1./h**2)*np.array([1., -2., 1.], dtype = np.float32)
d_f = cuda.to_device(f)
#d_deriv = cuda.to_device(np.zeros(n, dtype = np.float32))
d_deriv = cuda.device_array(n, dtype = np.float32)
d_stencil = cuda.to_device(stencil)
derivativeKernel[(n + TPB - 1)/TPB, TPB](d_deriv, d_f, d_stencil)
return d_deriv.copy_to_host()