Hello !
I have the following function which inverses a matrix:
void matrix_inverse_magma(vector<vector<double>> const &F_matrix, vector<vector<double>> &F_output) {
// Index for loop and arrays
int i, j, ip, idx;
// Start magma part
magma_init (); // initialize Magma
magma_queue_t queue=NULL;
magma_int_t dev=0;
magma_queue_create(dev ,&queue );
double gpu_time , *dwork; // dwork - workspace
magma_int_t ldwork; // size of dwork
magma_int_t *piv, info; // piv - array of indices of inter -
magma_int_t m = F_matrix.size();
cout << "m = " << m << endl;
printf("m = %d\n", m);
// changed rows; a - mxm matrix
magma_int_t mm=m*m; // size of a, r, c
double *a; // a- mxm matrix on the host
double *d_a; // d_a - mxm matrix a on the device
double *d_r; // d_r - mxm matrix r on the device
double *d_c; // d_c - mxm matrix c on the device
magma_int_t ione = 1;
magma_int_t ISEED [4] = { 0,0,0,1 }; // seed
magma_int_t err;
const double alpha = 1.0; // alpha =1
const double beta = 0.0; // beta=0
ldwork = m * magma_get_dgetri_nb( m ); // optimal block size
// allocate matrices
err = magma_dmalloc_cpu( &a , mm ); // host memory for a
err = magma_dmalloc( &d_a , mm ); // device memory for a
err = magma_dmalloc( &d_r , mm ); // device memory for r
err = magma_dmalloc( &d_c , mm ); // device memory for c
err = magma_dmalloc( &dwork , ldwork );// dev. mem. for ldwork
piv=( magma_int_t *) malloc(m*sizeof(magma_int_t ));// host mem.
// Convert matrix to *a double pointer
for (i = 0; i<m; i++){
for (j = 0; j<m; j++){
idx = i*m + j;
a[idx] = F_matrix[i][j];
}
}
// TEST working : generate random matrix a // for piv
//lapackf77_dlarnv (&ione ,ISEED ,&mm ,a); // randomize a
magma_dsetmatrix( m, m, a, m, d_a, m, queue); // copy a -> d_a
magmablas_dlacpy(MagmaFull, m, m, d_a , m, d_r, m, queue);//d_a ->d_r
// find the inverse matrix: d_a*X=I using the LU factorization
// with partial pivoting and row interchanges computed by
// magma_dgetrf_gpu; row i is interchanged with row piv(i);
// d_a -mxm matrix; d_a is overwritten by the inverse
gpu_time = magma_sync_wtime(NULL);
magma_dgetrf_gpu( m, m, d_a, m, piv, &info);
magma_dgetri_gpu(m, d_a, m, piv, dwork, ldwork, &info);
gpu_time = magma_sync_wtime(NULL)-gpu_time;
magma_dgemm(MagmaNoTrans ,MagmaNoTrans ,m, m, m, alpha, d_a ,m,
d_r ,m,beta ,d_c ,m,queue); // multiply a^-1*a
printf("magma_dgetrf_gpu + magma_dgetri_gpu time: %7.5f sec.\
\n",gpu_time );
magma_dgetmatrix( m, m, d_c , m, a, m, queue); // copy d_c ->a
printf("upper left corner of a^-1*a:\n");
magma_dprint( 4, 4, a, m ); // part of a^-1*a
// Save Final matrix
for (i = 0; i<m; i++){
for (j = 0; j<m; j++){
idx = i*m + j;
F_output[i][j] = a[idx];
}
}
free(a); // free host memory
free(piv); // free host memory
magma_free(d_a); // free device memory
magma_free(d_r); // free device memory
magma_free(d_c); // free device memory
magma_queue_destroy(queue); // destroy queue
magma_finalize ();
// End magma part
}
At the execution, I have the following error on this function (project_magma.cpp:192):
magma_dsetmatrix( m, m, a, m, d_a, m, queue); // copy a -> d_a
**Error:**
CUBLAS error: invalid value (7) in matrix_inverse_magma at project_magma.cpp:192
On entry to magmablas_dlacpy, parameter 5 had an illegal value (info = -5)
On entry to magma_dgetrf_gpu_expert, parameter 4 had an illegal value (info = -4)
On entry to magma_dgetri_gpu, parameter 3 had an illegal value (info = -3)
** On entry to DGEMM parameter number 8 had an illegal value
magma_dgetrf_gpu + magma_dgetri_gpu time: 0.00001 sec.
CUBLAS error: invalid value (7) in matrix_inverse_magma at project_magma.cpp:209
upper left corner of a^-1*a:
On entry to magma_dprint, parameter 4 had an illegal value (info = -4)
**Important remark:** If I am using the filling of `a` array:
lapackf77_dlarnv (&ione ,ISEED ,&mm ,a); // randomize a
Code works fine and correct inversion is performed.
If I am using the filling of `a` array by doing:
// Convert matrix to *a double pointer
for (i = 0; i<m; i++){
for (j = 0; j<m; j++){
idx = i*m + j;
a[idx] = F_matrix[i][j];
}
}
Then, I will get the errors mentioned above.
I don't really understand where this error could come from since with the LAPACKE routines version, I have no problems.
How can I fix this error?