#include <slate/slate.hh>
#include <iostream>
#include <iomanip>
template <typename scalar_t>
void printTileExists(slate::Matrix<scalar_t> &A, int device) {
if (A.mpiRank() != 0) return;
int64_t mt = A.mt();
int64_t nt = A.nt();
std::cout << "Exists (device = " << device << "):" << std::endl;
for (int64_t i = 0; i < mt; i++) {
for (int64_t j = 0; j < nt; j++) {
std::cout << A.tileExists(i, j, device);
if (j + 1 < nt)
std::cout << " ";
}
std::cout << std::endl;
}
}
int main(int argc, char **argv) {
slate::gpu_aware_mpi(true);
using scalar_t = double;
int64_t m = 128;
int64_t n = 128;
int64_t tilesize = 16;
int64_t p = 2, q = 2;
int err=0, mpi_provided=0;
err = MPI_Init_thread( &argc, &argv, MPI_THREAD_MULTIPLE, &mpi_provided );
assert( err == 0 && mpi_provided == MPI_THREAD_MULTIPLE );
slate::Options opts = {{slate::Option::Target, slate::Target::Devices}};
slate::Matrix<scalar_t> A(m, n, tilesize, p, q, MPI_COMM_WORLD);
A.insertLocalTiles(slate::Target::Devices);
slate::Matrix<scalar_t> B = A.emptyLike();
B.insertLocalTiles(slate::Target::Devices);
slate::copy(A, B, opts);
/////////////////////////////////////////////////////////
// GPU2GPU COMM TEST ////////////////////////////////////