#!/usr/bin/env python
"""
Example adapted from user lebedov:
https://gist.github.com/lebedov/8514d3456a94a6c73e6d
Demo of how to pass GPU memory managed by numba to mpi4py.
Notes
-----
This code can be used to perform peer-to-peer communication of data via
NVIDIA's GPUDirect technology if mpi4py has been built against a
CUDA-enabled MPI implementation.
"""
import sys
import numpy as np
from mpi4py import MPI
from numba import cuda
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
if rank == 0:
ary = np.arange(100,200,10,dtype=np.double)
gpu_ary = cuda.to_device(ary)
print(('before (%i): ' % rank)+str(ary))
comm.Send(
[MPI.memory.fromaddress(
gpu_ary.device_ctypes_pointer.value,
gpu_ary.alloc_size),
MPI.DOUBLE], dest=1 )
print('sent')
print(('after (%i): ' % rank)+str(gpu_ary.copy_to_host()))
elif rank == 1:
ary = np.zeros(10, dtype=np.double)
gpu_ary = cuda.to_device(ary)
print(('before (%i): ' % rank)+str(ary))
comm.Recv(
[MPI.memory.fromaddress(
gpu_ary.device_ctypes_pointer.value,
gpu_ary.alloc_size),
MPI.DOUBLE], source=0)
print('received')
print(('after (%i): ' % rank)+str(gpu_ary.copy_to_host()))
#!/usr/bin/env python
"""
Example adapted from user lebedov:
https://gist.github.com/lebedov/8514d3456a94a6c73e6d
Demo of how to pass GPU memory managed by pycuda to mpi4py.
Notes
-----
This code can be used to perform peer-to-peer communication of data via
NVIDIA's GPUDirect technology if mpi4py has been built against a
CUDA-enabled MPI implementation.
"""
import atexit
import sys
import pycuda
import numpy as np
from mpi4py import MPI
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
drv.init()
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
N_gpu = drv.Device(0).count()
if N_gpu < 2:
sys.stdout.write('at least 2 GPUs required')
else:
dev = drv.Device(rank)
ctx = dev.make_context()
atexit.register(ctx.pop)
atexit.register(MPI.Finalize)
if rank == 0:
x_gpu = gpuarray.arange(100, 200, 10, dtype=np.double)
print(('before (%i): ' % rank)+str(x_gpu))
comm.Send(
[MPI.memory.fromaddress(x_gpu.ptr, x_gpu.nbytes),
MPI.DOUBLE], dest=1)
print('sent')
print(('after (%i): ' % rank)+str(x_gpu))
elif rank == 1:
x_gpu = gpuarray.zeros(10, dtype=np.double)
print(('before (%i): ' % rank)+str(x_gpu))
comm.Recv(
[MPI.memory.fromaddress(x_gpu.ptr, x_gpu.nbytes),
MPI.DOUBLE], source=0)
print('received')
print(('after (%i): ' % rank)+str(x_gpu))