Cyrus Omar
unread,Jun 17, 2010, 5:39:11 PM6/17/10Sign in to reply to author
Sign in to forward
You do not have permission to delete messages in this group
Either email addresses are anonymous for this group or you need the view member email addresses permission to view the original message
to ahh-d...@googlegroups.com
The latest changesets fixed most outstanding issues. I would be comfortable with people writing code in cl.oquence now (but there may still be bugs of course, just not super obvious ones since cl.egans uses a lot of features and now works.)
cl.egans produces the following cl.oquence code for a spiking network step function (that can run multiple realizations in parallel and group them into divisions and so on, as in SpikeStream of yore):
def step_fn(timestep, realization_start):
exec "#pragma extension cl_khr_global_int32_base_atomics : enable;"
gid = get_global_id(0)
gsize = get_global_size(0)
first_idx_sim = realization_start * 4000
last_idx_sim = min(first_idx_sim + 4000, 4000)
for idx_sim in (first_idx_sim + gid, last_idx_sim, gsize):
realization_num = idx_sim / 4000
realization_first_idx_sim = realization_num * 4000
realization_first_idx_div = (realization_num - realization_start)*4000
idx_realization = idx_sim - realization_first_idx_sim
idx_division = idx_sim - first_idx_sim
idx_model = idx_realization - 0
idx_state = idx_model + (realization_num - realization_start)*4000
LIF_ge_AtomicReceiver = LIF_ge_AtomicReceiver_in[idx_state]
LIF_ge_AtomicReceiver_in[idx_state] = 0
LIF_gi_AtomicReceiver = LIF_gi_AtomicReceiver_in[idx_state]
LIF_gi_AtomicReceiver_in[idx_state] = 0
LIF_ge_g = (LIF_ge_g_buffer[idx_state]) + (LIF_ge_AtomicReceiver)
LIF_gi_g = (LIF_gi_g_buffer[idx_state]) + (LIF_gi_AtomicReceiver)
LIF_v = LIF_v_buffer[idx_state]
LIF_abs_refractory_t_release = LIF_abs_refractory_t_release_buffer[idx_state]
v_new = LIF_v + 0.1/10.0*(((-LIF_v) + LIF_gi_g*(-20.0 - LIF_v)) + LIF_gi_g*(-20.0 - LIF_v)) if not 0.1*timestep < LIF_abs_refractory_t_release else 0.0
LIF_ge_g_buffer[idx_state] = LIF_ge_g - 0.1/5.0*LIF_ge_g
LIF_gi_g_buffer[idx_state] = LIF_gi_g - 0.1/10.0*LIF_gi_g
if v_new >= 10.0:
if (timestep < 10000 and idx_realization < 4000):
LIF_raster_probe_buffer[((timestep-0)/1*1 + realization_num)*4000 + (idx_realization - 0)/1] = 1
LIF_v_buffer[idx_state] = 0.0
LIF_abs_refractory_t_release_buffer[idx_state] = 0.1*timestep + 5.0
target = LIF_ge_AtomicReceiver_out if idx_model < 3200 else LIF_gi_AtomicReceiver_out
neighbors_offset = neighbor_data[idx_realization]
neighbor_size = neighbor_data[neighbors_offset]
neighbors = neighbor_data + neighbors_offset + 1
for i in (0, neighbor_size, 1):
atom_add(target + realization_first_idx_div + neighbors[i], 1)
pass # in case no one writes out any code in this branch
else:
if (timestep < 10000 and idx_realization < 4000):
LIF_raster_probe_buffer[((timestep-0)/1*1 + realization_num)*4000 + (idx_realization - 0)/1] = 0
pass # in case no one writes out any code in this branch
And that produces the following OpenCL code which compiles (all the buffer allocations are made into "constants" -- really implicit variables but if we use a hack or make a CUDA backend they'd be pointer literals in the future):
__kernel void step_fn___0(int timestep, int realization_start, __global int* __implicit__0, __global int* __implicit__1, __global float* __implicit__2, __global float* __implicit__3, __global float* __implicit__4, __global float* __implicit__5, __global int* __implicit__6, __global int* __implicit__7, __global int* __implicit__8, __global int* __implicit__9) {
// Automatically generated variable declarations
__global int* neighbors;
size_t realization_num;
size_t realization_first_idx_sim;
size_t realization_first_idx_div;
size_t idx_realization;
float LIF_v;
size_t idx_state;
size_t gid;
float v_new;
size_t gsize;
float LIF_gi_g;
size_t idx_model;
float LIF_abs_refractory_t_release;
int neighbors_offset;
int LIF_gi_AtomicReceiver;
__global int* target;
int i;
size_t idx_sim;
float LIF_ge_g;
int neighbor_size;
size_t idx_division;
int last_idx_sim;
int LIF_ge_AtomicReceiver;
int first_idx_sim;
#pragma extension cl_khr_global_int32_base_atomics : enable;
gid = get_global_id(0);
gsize = get_global_size(0);
first_idx_sim = (realization_start * 4000);
last_idx_sim = min((first_idx_sim + 4000), 4000);
for (idx_sim = (first_idx_sim + gid); (idx_sim < last_idx_sim); idx_sim += gsize) {
realization_num = (idx_sim / 4000);
realization_first_idx_sim = (realization_num * 4000);
realization_first_idx_div = ((realization_num - realization_start) * 4000);
idx_realization = (idx_sim - realization_first_idx_sim);
idx_division = (idx_sim - first_idx_sim);
idx_model = (idx_realization - 0);
idx_state = (idx_model + ((realization_num - realization_start) * 4000));
LIF_ge_AtomicReceiver = __implicit__0[idx_state];
__implicit__0[idx_state] = 0;
LIF_gi_AtomicReceiver = __implicit__1[idx_state];
__implicit__1[idx_state] = 0;
LIF_ge_g = (__implicit__2[idx_state] + LIF_ge_AtomicReceiver);
LIF_gi_g = (__implicit__3[idx_state] + LIF_gi_AtomicReceiver);
LIF_v = __implicit__4[idx_state];
LIF_abs_refractory_t_release = __implicit__5[idx_state];
v_new = !((0.1f * timestep) < LIF_abs_refractory_t_release) ? (LIF_v + ((0.1f / 10.0f) * ((-LIF_v + (LIF_gi_g * (-20.0f - LIF_v))) + (LIF_gi_g * (-20.0f - LIF_v))))) : 0.0f;
__implicit__2[idx_state] = (LIF_ge_g - ((0.1f / 5.0f) * LIF_ge_g));
__implicit__3[idx_state] = (LIF_gi_g - ((0.1f / 10.0f) * LIF_gi_g));
if ((v_new >= 10.0f)) {
if (((timestep < 10000) && (idx_realization < 4000))) {
__implicit__6[((((((timestep - 0) / 1) * 1) + realization_num) * 4000) + ((idx_realization - 0) / 1))] = 1;
}
__implicit__4[idx_state] = 0.0f;
__implicit__5[idx_state] = ((0.1f * timestep) + 5.0f);
target = (idx_model < 3200) ? __implicit__7 : __implicit__8;
neighbors_offset = __implicit__9[idx_realization];
neighbor_size = __implicit__9[neighbors_offset];
neighbors = ((__implicit__9 + neighbors_offset) + 1);
for (i = 0; (i < neighbor_size); i += 1) {
atom_add(((target + realization_first_idx_div) + neighbors[i]), 1);
}
;
}else {
if (((timestep < 10000) && (idx_realization < 4000))) {
__implicit__6[((((((timestep - 0) / 1) * 1) + realization_num) * 4000) + ((idx_realization - 0) / 1))] = 0;
}
;
}
}
}