import math
import time
import numpy as np
import numba
from numba import cuda
def generate_iteration_operator(lin1, lin2, cub1, cub2, coupling, order):
exponent = 2**order + 1
def iteration_operator(point, output):
def boundary_condition(x):
temp_mod = (x + 0.5) % 1.0
if temp_mod < 0.0:
return temp_mod + 0.5
else:
return temp_mod - 0.5
p1 = point[0]
p2 = point[1]
q1 = point[2]
q2 = point[3]
new_q1 = boundary_condition(q1 + p1)
new_q2 = boundary_condition(q2 + p2)
coupling_func = coupling * (1.0 - math.cos(2*math.pi*(new_q1+new_q2)))
arg1 = 2 * new_q1
arg2 = 2 * new_q2
dV1 = (lin1*arg1 - (lin1+cub1)*arg1**exponent + cub1*arg1*arg1*arg1)
dV2 = (lin2*arg2 - (lin2+cub2)*arg2**exponent + cub2*arg2*arg2*arg2)
new_p1 = boundary_condition(p1 - dV1 + coupling_func)
new_p2 = boundary_condition(p2 - dV2 + coupling_func)
output[0] = new_p1
output[1] = new_p2
output[2] = new_q1
output[3] = new_q2
return iteration_operator
def generate_cuda_kernel(py_iter_operator, n_init_conds):
dev_operator = cuda.jit(py_iter_operator, device=True, inline=True)
threadsperblock = 32
blockspergrid = (n_init_conds + (threadsperblock - 1)) // threadsperblock
chunk_size = 6
@cuda.jit
def parallel_computation(mem_array):
i = cuda.grid(1)
if i < mem_array.shape[0]:
buffer = cuda.local.array((chunk_size, 4), numba.float64)
n_chunks = mem_array.shape[1] // chunk_size
n_remainder = mem_array.shape[1] % chunk_size
for k in range(4):
buffer[0, k] = mem_array[i, 0, k]
for chunk_id in range(n_chunks):
for iter_idx in range(chunk_size-1):
dev_operator(buffer[iter_idx], buffer[iter_idx+1])
chunk_idx = chunk_id*chunk_size
for j in range(chunk_size):
for k in range(4):
mem_array[i, chunk_idx+j, k] = buffer[j, k]
dev_operator(buffer[chunk_size-1], buffer[0])
for iter_idx in range(n_remainder-1):
dev_operator(buffer[iter_idx],
buffer[iter_idx+1])
remainder_offset = n_chunks*chunk_size
for j in range(n_remainder):
for k in range(4):
mem_array[i, remainder_offset+j, k] = buffer[j, k]
kernel = parallel_computation[blockspergrid, threadsperblock]
return kernel
def meas_time(method, n, *args):
start = time.time()
for _ in range(n):
method(*args)
end = time.time()
duration = (end - start) / n
return "execution time: {} ms".format(round(100*duration))
mapping = generate_iteration_operator(1.04, 1.35, 0.1, 0.1, 0.3, 5)
init_conds = 32*32
orbit_length = 10000
mem_array = np.zeros((init_conds, orbit_length, 4))
mem_array[:,0,:] = np.array([0.001, 0.009, 0.006, 0.003])
cuda_kernel = generate_cuda_kernel(mapping, init_conds)
cuda_kernel(mem_array)
print("computed: {} orbits with length {}".format(init_conds, orbit_length))
print(meas_time(cuda_kernel, 10, mem_array))