It seems that my code does not run all the time. I'm not sure why this is, I think I used synchronize correctly but apparently not, could someone check the highlighted code to make sure I am using synchronization correctly? The values that are printed at the end should be pretty small, definitely under 1000.
Thank You! If you want to run the code I'll include the file I am using.
import time
import numpy as np
from numba import cuda
def main():
start_time = time.time()
encrypted_asimov = []
with open("theLastQuestion.txt") as f:
fi = ([word for line in f for word in line.split()])
for i in range(0, len(fi)):
word = fi[i]
for j in range(0,len(word)):
encrypted_asimov.append(ord(word[j]))
if j == len(fi[i]):
j = 0
break
short_story = np.asanyarray(encrypted_asimov)
blockdim = 128 #nubmer of threads per block
griddim = 30 #number of blocks in the grid
cooper = np.zeros_like(encrypted_asimov)
stream = cuda.stream()
with stream.auto_synchronize():
output_device = cuda.device_array_like(short_story, stream = stream)
short_story_device = cuda.to_device(short_story,stream = stream)
encrypt[griddim, blockdim](short_story_device, output_device)
output_device.copy_to_host(cooper)
coop = cooper.tolist()
print(coop)
#en = open("encrypted story.txt", 'w')
#en.write(str(coop))
print("--- %s seconds ---" % (time.time() - start_time))
@cuda.jit(inline = True)
def encrypt(list_of_vals, out_vals):
max_iterate = 65532
tx = cuda.threadIdx.x
ty = cuda.blockIdx.x
block_size = cuda.blockDim.x
grid_size = cuda.gridDim.x
start = tx + ty * block_size
stride = block_size * grid_size
r = np.float32(3.8)
s = np.int32(256)
result = np.float32(0)
j = np.int32(0)
random_val = np.float32(.3)
for i in range(0, max_iterate):
a = np.float32(list_of_vals[j] / s)
b = np.float32((list_of_vals[j] + 1) / s)
result = r * random_val * (1 - random_val)
if result >= a and result < b:
random_val = .3
out_vals[j] = i
j = j + 1
if __name__ == '__main__':
main()