I have found an example of matrix inversion on from this
[link Inverse matrix] with `pdgetrf` and `pdgetri` :
#include <mpi.h>
#include <cstdio>
#include <cassert>
//#include "block_cyclic_mat.h"
#include <mkl.h>
#include "mkl_scalapack.h"
#include <vector>
#include <memory>
using namespace std;
static double getri_flops(int N)
{
// From:
//
https://icl.cs.utk.edu/svn/scalapack-dev/scalapack/trunk/TESTING/LIN/pdinvdriver.f // Factorization: 2/3 N^3 - 1/2 N^2
// Inverse : 4/3 N^3 - N^2
return ((2.0/3.0 * N * N * N) - (1.0/2.0 * N * N) +
(4.0/3.0 * N * N * N) - (N * N))/1024.0/1024.0/1024.0;
}
/// n_global: the order of the matrix
static void inv_driver(int n_global)
{
auto grid = make_shared<blacs_grid_t>();
//// self code
//n_global = 3;
//double *aaa = new double(n_global*n_global);
//for (int i = 0; i < 9; i++)
//{
// aaa[i] = i + 1;
//}
//aaa[8] = 10;
//auto a = block_cyclic_mat_t::createWithArray(grid, n_global, n_global, aaa);
// Create a NxN random matrix A
auto a = block_cyclic_mat_t::random(grid, n_global, n_global);
// Create a NxN matrix to hold A^{-1}
auto ai = block_cyclic_mat_t::constant(grid, n_global, n_global);
// Copy A to A^{-1} since it will be overwritten during factorization
copy_n(a->local_data(), a->local_size(), ai->local_data());
MPI_Barrier (MPI_COMM_WORLD);
double t0 = MPI_Wtime();
// Factorize A
int ia = 1, ja = 1;
vector<int> ipiv(a->local_rows() + a->row_block_size() + 100);
int info;
//å«ä¹åºè¯¥æ¯D-GE-TRFã
//第ä¸ä¸ªD表示æä»¬çç©éµæ¯doubleç±»åç
//GE表示æä»¬çç©éµæ¯Generalç±»åç
//TRF表示对ç©éµè¿è¡ä¸è§åè§£ä¹å°±æ¯æä»¬é常æè¯´çLUåè§£ã
pdgetrf_(n_global, n_global,
ai->local_data(), ia, ja, ai->descriptor(),
ipiv.data(),
info);
assert(info == 0);
double t_factor = MPI_Wtime() - t0;
// Compute A^{-1} based on the LU factorization
// Compute workspace for double and integer work arrays on each process
int lwork = 10;
int liwork = 10;
vector<double> work (lwork);
vector<int> iwork(liwork);
lwork = liwork = -1;
// 计ç®lworkä¸liworkçå¼
pdgetri_(n_global,
ai->local_data(), ia, ja, ai->descriptor(),
ipiv.data(),
work.data(), lwork, iwork.data(), liwork, info);
assert(info == 0);
lwork = static_cast<int>(work[0]);
liwork = static_cast<size_t>(iwork[0]);
work.resize(lwork);
iwork.resize(liwork);
// Now compute the inverse
t0 = MPI_Wtime();
pdgetri_(n_global,
ai->local_data(), ia, ja, ai->descriptor(),
ipiv.data(),
work.data(), lwork, iwork.data(), liwork, info);
assert(info == 0);
double t_solve = MPI_Wtime() - t0;
// Verify that the inverse is correct using A*A^{-1} = I
auto identity = block_cyclic_mat_t::diagonal(grid, n_global, n_global);
// Compute I = A * A^{-1} - I and verify that the ||I|| is small
char nein = 'N';
double alpha = 1.0, beta = -1.0;
pdgemm_(nein, nein, n_global, n_global, n_global, alpha,
a->local_data() , ia, ja, a->descriptor(),
ai->local_data(), ia, ja, ai->descriptor(),
beta,
identity->local_data(), ia, ja, identity->descriptor());
// Compute 1-norm of the result
char norm='1';
work.resize(identity->local_cols());
double err = pdlange_(norm, n_global, n_global,
identity->local_data(), ia, ja, identity->descriptor(), work.data());
double t_total = t_factor + t_solve;
double t_glob;
MPI_Reduce(&t_total, &t_glob, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
if (grid->iam() == 0)
{
double gflops = getri_flops(n_global)/t_glob/grid->nprocs();
printf("\n"
"MATRIX INVERSE BENCHMARK SUMMARY\n"
"================================\n"
"N = %d\tNP = %d\tNP_ROW = %d\tNP_COL = %d\n"
"Time for PxGETRF + PxGETRI = %10.7f seconds\tGflops/Proc = %10.7f, Error = %f\n",
n_global, grid->nprocs(), grid->nprows(), grid->npcols(),
t_glob, gflops, err);fflush(stdout);
}
}
int main(int argc, char** argv)
{
MPI_Init(&argc, &argv);
int n_global = 4096;
if (argc > 1)
{
n_global = int(atol(argv[1])); // stol åç¬¦ä¸²è½¬é¿æ´å½¢
}
inv_driver(n_global);
MPI_Finalize();
}
I tried to compile this source in the following way :
mpicxx -I/opt/intel/oneapi/mpi/latest/include -L${MKLROOT}/lib/intel64 -lmkl_scalapack_ilp64 -lmkl_intel_ ilp64 -lmkl_intel_thread -lmkl_core -lmkl_blacs_intelmpi_ilp64 -liomp5 -lpthread -lm -ldl main.cpp
But I get the following errors :
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
main.cpp: In function ‘void inv_driver(int)’:
main.cpp:27:29: error: ‘blacs_grid_t’ was not declared in this scope
auto grid = make_shared<blacs_grid_t>();
^~~~~~~~~~~~
main.cpp:27:29: note: suggested alternative: ‘clockid_t’
auto grid = make_shared<blacs_grid_t>();
^~~~~~~~~~~~
clockid_t
main.cpp:27:43: error: no matching function for call to ‘make_shared<<expression error> >()’
auto grid = make_shared<blacs_grid_t>();
^
In file included from /usr/include/c++/8/memory:81,
from main.cpp:8:
/usr/include/c++/8/bits/shared_ptr.h:718:5: note: candidate: ‘template<class _Tp, class ... _Args> std::shared_ptr<_Tp> std::make_shared(_Args&& ...)’
make_shared(_Args&&... __args)
^~~~~~~~~~~
/usr/include/c++/8/bits/shared_ptr.h:718:5: note: template argument deduction/substitution failed:
main.cpp:27:43: error: template argument 1 is invalid
auto grid = make_shared<blacs_grid_t>();
^
main.cpp:41:14: error: ‘block_cyclic_mat_t’ has not been declared
auto a = block_cyclic_mat_t::random(grid, n_global, n_global);
^~~~~~~~~~~~~~~~~~
main.cpp:44:15: error: ‘block_cyclic_mat_t’ has not been declared
auto ai = block_cyclic_mat_t::constant(grid, n_global, n_global);
^~~~~~~~~~~~~~~~~~
main.cpp:47:5: error: ‘copy_n’ was not declared in this scope
copy_n(a->local_data(), a->local_size(), ai->local_data());
^~~~~~
main.cpp:47:5: note: suggested alternative: ‘ctpcon’
copy_n(a->local_data(), a->local_size(), ai->local_data());
^~~~~~
ctpcon
main.cpp:100:21: error: ‘block_cyclic_mat_t’ has not been declared
auto identity = block_cyclic_mat_t::diagonal(grid, n_global, n_global);
^~~~~~~~~~~~~~~~~~
main.cpp:105:5: error: ‘pdgemm_’ was not declared in this scope
pdgemm_(nein, nein, n_global, n_global, n_global, alpha,
^~~~~~~
main.cpp:105:5: note: suggested alternative: ‘pdgels_’
pdgemm_(nein, nein, n_global, n_global, n_global, alpha,
^~~~~~~
pdgels_
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
I couldn't find into my OneAPI installation the header `"block_cyclic_mat.h"`.
Moreover, I get a lot of same error about `"block_cyclic_mat_t"` with : `"error: ‘block_cyclic_mat_t’` has not been declared"
By the way, I have also errors about `"blacs_grid_t’` was not declared in this scope" and `"error: no matching function for call to ‘make_shared<<expression error> >()’ "`
Does anyone see what's wrong and if this is the case, how to circumvent these compilation errors ?
Best regards.