The following is a code for gathering numpy arrays of varying size from different processes. When I run this code on a single node (PBS node with 48 cores), it works fine. However, when I run it on a multiple node, the gathered data is incorrect. Can you please help me solve this problem?
#--------------------------------------------------------------------------------------------------------
import numpy as np
from numpy.linalg import norm
from mpi4py import MPI
Comm = MPI.COMM_WORLD
N_Workers = Comm.Get_size()
Rank = Comm.Get_rank()
RefDataLen = int(1e4)
VecLenList = RefDataLen*np.arange(1, N_Workers+1)
VecDisplList = np.array([np.sum(VecLenList[:i]) for i in range(N_Workers)])
N_GatheredVec = np.sum(VecLenList)
DataList = []
for i in range(N_Workers):
Data = np.arange(VecLenList[i])*1e-3
DataList.append(Data)
for i in range(10):
if Rank == 0:
GatheredVec = np.zeros(N_GatheredVec)
Comm.Gatherv(DataList[Rank], (GatheredVec, VecLenList, VecDisplList, MPI.DOUBLE), 0)
print(norm(GatheredVec-np.hstack(DataList)))
else: Comm.Gatherv(DataList[Rank],None,0)