I am running the following mpi fortran code where I generate a matrix in each processor. Then each matrix value is incremented by one and the updated matrix sent to the root processor. Finally, the full matrix is printed after assembling. I am facing a problem in the root processor, where the matrix does not get updated. Why is that? Run the code using four processors to better understand my problem.
PROGRAM MAIN
include "mpif.h"
parameter (nx = 4)
parameter (ny = 4)
parameter (tsteps = 5)
real*8 a(nx,ny),b(nx,ny)
integer rows,cols
integer myid, myid1,Root,source,numprocs
integer it,comm2d,ierr,req
integer sx, ex, sy, ey
integer dims(2),coord(2)
logical periods(2)
integer status(MPI_STATUS_SIZE)
data periods/2*.false./
Root = 0
CALL MPI_INIT( ierr )
CALL MPI_COMM_RANK(MPI_COMM_WORLD,myid1,ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
c Get a new communicator for a decomposition of the domain.
c Let MPI find a "good" decomposition
dims(1) = 0
dims(2) = 0
CALL MPI_DIMS_CREATE(numprocs,2,dims,ierr)
if (myid1.EQ.Root) then
print *,'dimensions:',dims(1),dims(2)
endif
CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periods,.true.,
& comm2d,ierr)
c Get my position in this communicator
c CALL MPI_COMM_RANK(comm2d,myid,ierr)
c Compute the decomposition
CALL fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
rows = ex-sx+1
cols = ey-sy+1
c Initialize the a matrix
do i= sx,ex
do j=sy,ey
a(i,j) = (i-1)+(j-1)
enddo
enddo
do it = 1,tsteps
do i= sx,ex
do j=sy,ey
a(i,j) = a(i,j)+1
enddo
enddo
C Send the results to other processors
call MPI_ISEND(sx,1,MPI_INTEGER,Root,1,
& comm2d,req,ierr)
call MPI_ISEND(ex,1,MPI_INTEGER,Root,1,
& comm2d,req,ierr)
call MPI_ISEND(sy,1,MPI_INTEGER,Root,1,
& comm2d,req,ierr)
call MPI_ISEND(ey,1,MPI_INTEGER,Root,1,
& comm2d,req,ierr)
call MPI_ISEND(a(sx:ex,sy:ey),cols*rows,MPI_DOUBLE_PRECISION,
& Root,1,comm2d,req,ierr )
c Recieved the results from othe precessors
if (myid1.EQ.Root) then
do source = 0,numprocs-1
call MPI_RECV(sx,1,MPI_INTEGER,source,
& 1,comm2d,status,ierr )
call MPI_RECV(ex,1,MPI_INTEGER,source,
& 1,comm2d,status,ierr )
call MPI_RECV(sy,1,MPI_INTEGER,source,
& 1,comm2d,status,ierr )
call MPI_RECV(ey,1,MPI_INTEGER,source,
& 1,comm2d,status,ierr )
call MPI_RECV(a(sx:ex,sy:ey),cols*rows,MPI_DOUBLE_PRECISION,
& source,1,comm2d,status,ierr)
a(sx:ex,sy:ey) = a(sx:ex,sy:ey)
call MPI_Wait(req, status, ierr)
enddo
endif
if (myid1.EQ.Root) then
c print the results
print *, 'time step=',it
do 90 i=1,nx
do 80 j = 1,ny
write(*,70)a(i,j)
70 format(2x,f8.2,$)
80 continue
print *, ' '
90 continue
endif
enddo
C Cleanup goes here.
CALL MPI_Comm_free( comm2d, ierr )
30 CALL MPI_FINALIZE(ierr)
STOP
END
C*******************************************************
subroutine fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
integer comm2d
integer nx,ny,sx,ex,sy,ey
integer dims(2),coords(2),ierr
logical periods(2)
c Get (i,j) position of a processor from Cartesian topology.
CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr)
C Decomposition in first (ie. X) direction
CALL MPE_DECOMP1D(nx,dims(1),coords(1),sx,ex)
C Decomposition in second (ie. Y) direction
CALL MPE_DECOMP1D(ny,dims(2),coords(2),sy,ey)
return
end
c*********************************************************************
SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e)
integer n,numprocs,myid,s,e,nlocal,deficit
nlocal = n / numprocs
s = myid * nlocal + 1
deficit = mod(n,numprocs)
s = s + min(myid,deficit)
C Give one more slice to processors
if (myid .lt. deficit) then
nlocal = nlocal + 1
endif
e = s + nlocal - 1
if (e .gt. n .or. myid .eq. numprocs-1) e = n
return
end
I am generating the following matrix:
A=[0 1 2 3
1 2 3 4
2 3 4 5
3 4 5 6] ;
I am updating matrix A by adding 1 in a loop, sending, receiving A in parts and printing on desktop. A(1:2,1:2) is not showing any update on printing the matrix.
Run the code with four processors for better understanding my problem.
I got the mistake in the code. The indices sx,ex,sy,ey when sending to Root are being over written and thus it was not getting updated. I have corrected the code and posting below.