Problems with Synchronization

0 views

Skip to first unread message

Bobby Garza

unread,

Jun 22, 2017, 11:33:13 AM6/22/17

to Numba Public Discussion - Public

It seems that my code does not run all the time. I'm not sure why this is, I think I used synchronize correctly but apparently not, could someone check the highlighted code to make sure I am using synchronization correctly? The values that are printed at the end should be pretty small, definitely under 1000.

Thank You! If you want to run the code I'll include the file I am using.

import time
import numpy as np
from numba import cuda

def main():
    start_time = time.time()
    encrypted_asimov = []
    with open("theLastQuestion.txt") as f:
        fi = ([word for line in f for word in line.split()])
    for i in range(0, len(fi)):
        word = fi[i]
        for j in range(0,len(word)):
            encrypted_asimov.append(ord(word[j]))
            if j == len(fi[i]):
                j = 0
                break
    short_story = np.asanyarray(encrypted_asimov)
    blockdim = 128 #nubmer of threads per block
    griddim = 30     #number of blocks in the grid
    cooper = np.zeros_like(encrypted_asimov)
    stream = cuda.stream()
    with stream.auto_synchronize():
        output_device = cuda.device_array_like(short_story, stream = stream)
        short_story_device = cuda.to_device(short_story,stream = stream)
        encrypt[griddim, blockdim](short_story_device, output_device)
        output_device.copy_to_host(cooper)
    coop = cooper.tolist()
    print(coop)

    #en = open("encrypted story.txt", 'w')
    #en.write(str(coop))
    print("--- %s seconds ---" % (time.time() - start_time))

@cuda.jit(inline = True)
def encrypt(list_of_vals, out_vals):
    max_iterate = 65532
    tx = cuda.threadIdx.x
    ty = cuda.blockIdx.x
    block_size = cuda.blockDim.x
    grid_size = cuda.gridDim.x
    start = tx + ty * block_size
    stride = block_size * grid_size
    r = np.float32(3.8)
    s = np.int32(256)
    result = np.float32(0)
    j = np.int32(0)
    random_val = np.float32(.3)
    for i in range(0, max_iterate):
        a = np.float32(list_of_vals[j] / s)
        b = np.float32((list_of_vals[j] + 1) / s)
        result = r * random_val * (1 - random_val)
        if result >= a and result < b:
            random_val = .3
            out_vals[j] = i
            j = j + 1












if __name__ == '__main__':
    main()