Hi, I have been using CUSP and noticed the following behavior. The following example gives the correct output only when B is used in multiply. When A is used, the result is all zeros. Any ideas?
int main()
{
int state_size = 15;
int batch_size = 2;
// allocate storage for (10,10) matrix with 5 nonzeros
cusp::csr_matrix<int,float,cusp::device_memory> A(state_size, state_size, 5);
// initialize matrix entries on host
A.row_offsets[0] = 0; // first offset is always zero
A.row_offsets[1] = 0;
A.row_offsets[2] = 1;
A.row_offsets[3] = 2;
A.row_offsets[4] = 3;
A.row_offsets[5] = 4;
A.row_offsets[6] = 5; A.row_offsets[7] = 5; A.row_offsets[8] = 5; A.row_offsets[9] = 5; A.row_offsets[10] = 5; A.row_offsets[11] = 5;
A.row_offsets[12] = 5; A.row_offsets[13] = 5; A.row_offsets[14] = 5; A.row_offsets[15] = 5;
A.column_indices[0] = 0; A.values[0] = 0.19f;
A.column_indices[1] = 0; A.values[1] = 0.2f;
A.column_indices[2] = 0; A.values[2] = 0.21f;
A.column_indices[3] = 0; A.values[3] = 0.22f;
A.column_indices[4] = 0; A.values[4] = 0.18f;
cusp::array2d<float,cusp::device_memory> B(A);
cusp::print(B);
cusp::array2d<float, cusp::device_memory> prev_mat(state_size, batch_size, 0.0f);
cusp::array2d<float, cusp::device_memory> next_mat(state_size, batch_size, 0.0f);
for(int i = 0; i < batch_size; i++){
prev_mat(0,i) = 1.0f;
}
multiply(A, prev_mat, next_mat); // this one gives the wrong answer
// multiply(B, prev_mat, next_mat); // this is fine.
cout << "Previous matrix:" << endl;
cusp::print(prev_mat);
cout << "Next matrix:" << endl;
cusp::print(next_mat);
}