-
Notifications
You must be signed in to change notification settings - Fork 10
/
cuda_mpi_sendrecv.f90
76 lines (67 loc) · 2.22 KB
/
cuda_mpi_sendrecv.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
program main
use cudafor
use mpi
implicit none
integer mpi_rank, mpi_size, tag, n, ierr, i
integer, allocatable :: sendbuf(:), recvbuf(:)
integer dst, src, p2p
real start, finish
call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, mpi_rank, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, mpi_size, ierr)
print *, 'process ', mpi_rank, ' of ', mpi_size, ' is alive'
ierr = cudaSetDevice(mpi_rank)
ierr = cudaGetDeviceCount(n)
! enable GPU Direct P2P
do i = 0, (n - 1)
ierr = cudaDeviceCanAccessPeer(p2p, mpi_rank, i)
if (p2p .eq. 1) then
ierr = cudaDeviceEnablePeerAccess(i, 0)
end if
end do
n = 1000000
allocate(sendbuf(n))
allocate(recvbuf(n))
!$acc enter data create(sendbuf, recvbuf)
!$acc kernels present(sendbuf, recvbuf)
recvbuf = 0
sendbuf = mpi_rank + 1
!$acc end kernels
tag = 0
src = mod(mpi_rank + 1, mpi_size)
dst = mpi_rank - 1
if (dst < 0) then
dst = mpi_size - 1
end if
if (mpi_rank .eq. 0) then
print*, 'send \"1\" to destination process'
end if
!$acc host_data use_device(sendbuf, recvbuf)
call cpu_time(start)
call MPI_Sendrecv(sendbuf, n, MPI_INTEGER, &
dst, tag, &
recvbuf, n, MPI_INTEGER, &
src, tag, &
MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
call cpu_time(finish)
if (mpi_rank .eq. 0) then
print '("CUDA-Aware MPI Sendrecv 1st time = ",f6.3," seconds.")', (finish - start)
end if
call cpu_time(start)
call MPI_Sendrecv(sendbuf, n, MPI_INTEGER, &
dst, tag, &
recvbuf, n, MPI_INTEGER, &
src, tag, &
MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
call cpu_time(finish)
if (mpi_rank .eq. 0) then
print '("CUDA-Aware MPI Sendrecv 2nd time = ",f6.3," seconds.")', (finish - start)
end if
!$acc end host_data
!$acc exit data copyout(recvbuf) delete(sendbuf)
if (mpi_rank .eq. mpi_size - 1) then
print *, 'after MPI Sendrecv:\n', recvbuf(1)
endif
deallocate(sendbuf, recvbuf)
call MPI_Finalize(ierr)
end program main