import functools, datetime
import numpy as np
def run():
"""
Create fake-data variable `grouped_data` which is a list of 100k entries.
Each element has 3 numpy arrays that are UNIQUE AND SORTED.
Goal: iterate through `grouped_data` to find intersecting values per element.
Ie, length of output equals length of input, `grouped_data`.
In each element, these common values will be used to slice another numpy
array which is not included here.
"""
grouped_data = create_data() # 9% of runtime
overlap = loop_through_intersections(grouped_data) # 91% of runtime
def create_data():
""" Return `grouped_data`, list of 100k entries. Each element has 3 numpy arrays
kern profiler shows this function takes ~ 9% of runtime """
array = np.array(range(2000))
grouped_data = []
for i in range(100000):
ar1 = array[::np.random.randint(1,9)]
ar2 = array[::np.random.randint(1,9)]
ar3 = array[::np.random.randint(1,9)]
grouped_data.append( [ar1, ar2, ar3] )
return grouped_data
def loop_through_intersections(grouped_data):
""" for each element in grouped_data (3 numpy arrays), find the intersecting values
kern profiler shows this function takes ~ 91% of runtime
"""
overlap = []
for f in grouped_data:
overlap.append( functools.reduce(intersect1d, f) )
return overlap
def intersect1d(ar1, ar2):
"""
Find the intersection of two arrays.
Return the sorted, unique values that are in both of the input arrays.
Taken from NumPy. https://github.com/numpy/numpy/blob/v1.14.0/numpy/lib/arraysetops.py#L297-L338
"""
aux = np.concatenate((ar1, ar2))
aux.sort()
return aux[:-1][aux[1:] == aux[:-1]]
####################################################
# Runtime takes ~6s
####################################################
st = datetime.datetime.now()
run(); print datetime.datetime.now() - st
--
You received this message because you are subscribed to the Google Groups "Numba Public Discussion - Public" group.
To unsubscribe from this group and stop receiving emails from it, send an email to numba-users+unsubscribe@continuum.io.
To post to this group, send email to numba...@continuum.io.
To view this discussion on the web visit https://groups.google.com/a/continuum.io/d/msgid/numba-users/77eb9c82-b65f-4152-8ac2-bc85611ee694%40continuum.io.
For more options, visit https://groups.google.com/a/continuum.io/d/optout.
import datetime
import numpy as np
from numba import jit
from numba import cuda
def run():
"""
var `matrix` now stores all index sets in groups of threes (but is N outside of this example).
var `bools` stores overlapping indexes.
"""
rows = 2000
iterations = 10000
matrix = create_data(rows, iterations)
bools = np.zeros((rows, iterations), dtype=bool, order='F')
iterate_over_matrix(matrix, iterations, rows, bools)
def create_data(rows, iterations):
""" Return `grouped_data`, list of 100k entries. Each element has 3 numpy arrays """
grouped_data = [None]*(iterations*3-1)
array = np.array(range(rows))
grouped_data = []
for i in range(iterations):
for j in range(3):
index = np.zeros(rows, dtype=bool)
index[ array[::np.random.randint(1,9)] ] = True
grouped_data.append( index )
matrix = np.array( np.array(grouped_data).T, order='F')
return matrix
@jit
#@jit(nopython=True, nogil=True, cache=True)
def iterate_over_matrix(matrix, iterations, rows, bools):
"""
Since N sets = 3, take three columns at a time from matrix.
Params:
:matrix, (bool np.array) matrix of CCS indexes.
rows = timeseries rows. cols=# exhaustive * 3.
:iterations, (int) number of iterations to run through.
:rows, (int) length of data
:bools, (bool np.array) empty matrix as placeholder
"""
for i in range(iterations):
arr = matrix[:, i*3:(i*3+3)]
check_intersection(bools[:, i], arr[:, 0], arr[:, 1], arr[:, 2], rows)
@jit
#@jit(target='gpu')
#@cuda.jit
#@jit(nopython=True, nogil=True, cache=True)
def check_intersection(index, ar1, ar2, ar3, rows):
"""
Iterate through array indexes. If all are true, set to True.
"""
for i in range(rows):
if ar1[i] == ar2[i] == ar3[i] == True:
index[i] = True
####################################################
# Runtime
####################################################
st = datetime.datetime.now()
run(); print datetime.datetime.now() - st
To unsubscribe from this group and stop receiving emails from it, send an email to numba-users...@continuum.io.
import functools import logging import time import numpy as np from numba import njit def timeit(comment): def timed(fn): def wrapper(*args, **kw): start = time.perf_counter() result = fn(*args, **kw) end = time.perf_counter() logging.info('{0}: {1:.2f}s'.format(comment, end - start)) return result return wrapper return timed @timeit('run') def run(grouped_data): overlap = loop_through_intersections(grouped_data) return overlap @timeit('run_version2') def run_version2(grouped_data): overlap = [] for array_collection in grouped_data: overlap.append(fast_intersect(array_collection)) return overlap @timeit('create_data') def create_data(rng): array = np.array(range(2000)) grouped_data = [] for i in range(100000): ar1 = np.ascontiguousarray(array[::rng.randint(1, 9)]) ar2 = np.ascontiguousarray(array[::rng.randint(1, 9)]) ar3 = np.ascontiguousarray(array[::rng.randint(1, 9)]) grouped_data.append(tuple((ar1, ar2, ar3))) return grouped_data def loop_through_intersections(grouped_data): overlap = []
for f in grouped_data: overlap.append(functools.reduce(intersect1d, f)) return overlap def intersect1d(ar1, ar2
):
aux = np.concatenate((ar1, ar2))
aux.sort()
return aux[:-1][aux[1:] == aux[:-1]]
@njit def fast_intersect(array_collection): for idx in range(len(array_collection)): arr_set = set(array_collection[idx]) if idx == 0: res = arr_set else: res = res.intersection(arr_set) return np.array(list(res)) if __name__ == '__main__': # set logging level to INFO logging.basicConfig(level=logging.INFO) # generate one lot of grouped_data rng = np.random.RandomState(0) grouped_data = create_data(rng) # generate output via original approach output = run(grouped_data) # jit compile _ = fast_intersect(grouped_data[0]) # generate output via second approach output_v2 = run_version2(grouped_data) # some check and balances assert len(output) == len(output_v2) for idx in range(len(output)): s0 = set(output[idx]) s1 = set(output_v2[idx]) assert s0 == s1
import numpy as np
import datetime
from numba import njit
from pycuda import driver, compiler, gpuarray, tools
import pycuda.autoinit
def compare(rows, iterations):
"""
Run CPU & GPU Version. Compare output.
Creates binary matrix called a_cpu which represents a dataset.
The goal is to take 3 columns at a time and if all are 1, pass 1
to the output matrix.
"""
np.random.seed(42)
a_cpu = np.random.randint(0,2, (rows, iterations*3)).astype(np.float32)
st = datetime.datetime.now()
cpu = np.zeros((rows, iterations), dtype=int)
iterate_over_matrix(a_cpu, iterations, rows, cpu)
print '\n\t CPU runtime: ', datetime.datetime.now() - st
st = datetime.datetime.now()
gpu = cuda_attempt(rows, iterations, a_cpu)
print '\n\t GPU runtime: ', datetime.datetime.now() - st
print "cpu.sum(): {:,}".format(cpu.sum())
print "gpu.sum(): {:,}".format(int(gpu.sum()))
def get_kernel_code(iterations):
kernel_code = """
__global__ void MatrixMulKernel(int ROWS, float *A, float *C)
{
const int wC = %(C_SIZE)s;
const int blockId = blockIdx.y * gridDim.x + blockIdx.x;
const int thread = blockId * blockDim.x + threadIdx.x;
if ( thread < (ROWS * wC) ) {
float Aele = A[3*thread] * A[3*thread +1] * A[3*thread +2];
C[thread] = Aele;
}
}
"""
kernel_code = kernel_code % {
'A_SIZE': 3*iterations,
'C_SIZE': iterations,
}
return kernel_code
def cuda_attempt(rows, iterations, a_cpu):
"""
Create data, use gpuarray, get pycuda result.
"""
a_gpu = gpuarray.to_gpu(a_cpu)
c_gpu = gpuarray.empty((rows, iterations), np.float32)
kernel_code = get_kernel_code(iterations)
mod = compiler.SourceModule(kernel_code)
matrixmul = mod.get_function("MatrixMulKernel")
# 2D Grid of 1D Blocks
needed_threads = rows * iterations
threads = 1024
number_blocks = needed_threads // threads + 1
number_blocks = int(np.sqrt(number_blocks)) + 1
assert (number_blocks <= 65535), "number of blocks exceeds allowed limit in 1 dimension"
grid = (number_blocks, number_blocks)
block = (threads, 1, 1)
matrixmul(
np.int32(rows), a_gpu, c_gpu,
grid = grid,
block = block,
)
return c_gpu.get()
#===============================================================================
# CPU CALCULATTIONS
#===============================================================================
@njit
def iterate_over_matrix(matrix, iterations, rows, bools):
for i in range(iterations):
arr = matrix[:, i*3:(i*3+3)]
check_intersection(bools[:, i], arr[:, 0], arr[:, 1], arr[:, 2], rows)
@njit
def check_intersection(index, ar1, ar2, ar3, rows):
for i in range(rows):
if ar1[i] == ar2[i] == ar3[i] == True:
index[i] = True
#===============================================================================
# RUN
#===============================================================================
rows=5
iterations=2
rows=2000
iterations=100000
compare(rows, iterations)
--
You received this message because you are subscribed to the Google Groups "Numba Public Discussion - Public" group.
To unsubscribe from this group and stop receiving emails from it, send an email to numba-users+unsubscribe@continuum.io.
To view this discussion on the web visit https://groups.google.com/a/continuum.io/d/msgid/numba-users/1953ec61-bd73-4816-9cb6-9eba41c2d3c6%40continuum.io.