Updating matrix in mpi fortran

2019-09-05 14:51发布

I am running the following mpi fortran code where I generate a matrix in each processor. Then each matrix value is incremented by one and the updated matrix sent to the root processor. Finally, the full matrix is printed after assembling. I am facing a problem in the root processor, where the matrix does not get updated. Why is that? Run the code using four processors to better understand my problem.

    PROGRAM MAIN
    include "mpif.h"
    parameter (nx = 4)
    parameter (ny = 4)
    parameter (tsteps = 5)
    real*8    a(nx,ny),b(nx,ny)
    integer   rows,cols
    integer   myid, myid1,Root,source,numprocs
    integer   it,comm2d,ierr,req
    integer   sx, ex, sy, ey
    integer   dims(2),coord(2)
    logical   periods(2)
    integer status(MPI_STATUS_SIZE)
    data periods/2*.false./

    Root = 0
    CALL MPI_INIT( ierr )
    CALL MPI_COMM_RANK(MPI_COMM_WORLD,myid1,ierr)
    CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
c       Get a new communicator for a decomposition of the domain.  
c       Let MPI find a "good" decomposition
    dims(1) = 0
    dims(2) = 0
    CALL MPI_DIMS_CREATE(numprocs,2,dims,ierr)
    if (myid1.EQ.Root) then
        print *,'dimensions:',dims(1),dims(2)
    endif
    CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periods,.true.,
     &                    comm2d,ierr)
c       Get my position in this communicator
c       CALL MPI_COMM_RANK(comm2d,myid,ierr)
c       Compute the decomposition
    CALL fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
    rows = ex-sx+1 
    cols = ey-sy+1  
c       Initialize the a matrix
    do  i= sx,ex
        do j=sy,ey
          a(i,j) = (i-1)+(j-1)
        enddo
    enddo    
    do it = 1,tsteps 
       do  i= sx,ex
           do j=sy,ey
              a(i,j) = a(i,j)+1
           enddo
       enddo
C     Send the results to other processors      
    call MPI_ISEND(sx,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ex,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(sy,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ey,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(a(sx:ex,sy:ey),cols*rows,MPI_DOUBLE_PRECISION,
     &                   Root,1,comm2d,req,ierr )
c    Recieved the results from othe precessors   
    if (myid1.EQ.Root) then
       do source = 0,numprocs-1
          call MPI_RECV(sx,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ex,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(sy,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ey,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(a(sx:ex,sy:ey),cols*rows,MPI_DOUBLE_PRECISION, 
     &                   source,1,comm2d,status,ierr)
          a(sx:ex,sy:ey) = a(sx:ex,sy:ey) 
          call MPI_Wait(req, status, ierr) 
       enddo
       endif
       if (myid1.EQ.Root) then
c      print the results
       print *, 'time step=',it
        do 90 i=1,nx
          do 80 j = 1,ny
             write(*,70)a(i,j)
  70        format(2x,f8.2,$)
  80      continue
          print *, ' '
  90    continue      
       endif
     enddo
C      Cleanup goes here.
      CALL MPI_Comm_free( comm2d, ierr )
30    CALL MPI_FINALIZE(ierr)

      STOP
      END
C******************************************************* 
      subroutine fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
      integer   comm2d
      integer   nx,ny,sx,ex,sy,ey
      integer   dims(2),coords(2),ierr
      logical   periods(2)
c Get (i,j) position of a processor from Cartesian topology.
      CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr)
C Decomposition in first (ie. X) direction
      CALL MPE_DECOMP1D(nx,dims(1),coords(1),sx,ex)
C Decomposition in second (ie. Y) direction
      CALL MPE_DECOMP1D(ny,dims(2),coords(2),sy,ey)
      return
      end
c********************************************************************* 
      SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e)
      integer n,numprocs,myid,s,e,nlocal,deficit
      nlocal  = n / numprocs
      s       = myid * nlocal + 1
      deficit = mod(n,numprocs)
      s       = s + min(myid,deficit)
C Give one more slice to processors
      if (myid .lt. deficit) then
          nlocal = nlocal + 1
      endif
      e = s + nlocal - 1
      if (e .gt. n .or. myid .eq. numprocs-1) e = n
      return
      end

I am generating the following matrix:

   A=[0     1     2     3
     1     2     3     4
     2     3     4     5
     3     4     5     6] ;

I am updating matrix A by adding 1 in a loop, sending, receiving A in parts and printing on desktop. A(1:2,1:2) is not showing any update on printing the matrix.

Run the code with four processors for better understanding my problem.

1条回答
干净又极端
2楼-- · 2019-09-05 15:34

I got the mistake in the code. The indices sx,ex,sy,ey when sending to Root are being over written and thus it was not getting updated. I have corrected the code and posting below.

PROGRAM MAIN
    include "mpif.h"
    parameter (nx = 4)
    parameter (ny = 4)
    parameter (tsteps = 5)
    real*8    a(nx,ny),b(nx,ny)
    integer   rows,cols
    integer   myid, myid1,Root,source,numprocs
    integer   it,comm2d,ierr,req
    integer   sx, ex, sy, ey
    integer   sx0, ex0, sy0, ey0
    integer   dims(2),coord(2)
    logical   periods(2)
    integer status(MPI_STATUS_SIZE)
    data periods/2*.false./

    Root = 0
    CALL MPI_INIT( ierr )
    CALL MPI_COMM_RANK(MPI_COMM_WORLD,myid1,ierr)
    CALL MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr)
c       Get a new communicator for a decomposition of the domain.  
c       Let MPI find a "good" decomposition
    dims(1) = 0
    dims(2) = 0
    CALL MPI_DIMS_CREATE(numprocs,2,dims,ierr)
    if (myid1.EQ.Root) then
        print *,'dimensions:',dims(1),dims(2)
    endif
    CALL MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periods,.true.,
     &                    comm2d,ierr)
c       Get my position in this communicator
c       CALL MPI_COMM_RANK(comm2d,myid,ierr)
c       Compute the decomposition
    CALL fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
    rows = ex-sx+1 
    cols = ey-sy+1  
c       Initialize the a matrix
    do  i= sx,ex
        do j=sy,ey
          a(i,j) = (i-1)+(j-1)
        enddo
    enddo    
    do it = 1,tsteps 
       do  i= sx,ex
           do j=sy,ey
              a(i,j) = a(i,j)+1
           enddo
       enddo
C     Send the results to other processors      
    call MPI_ISEND(sx,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ex,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(sy,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(ey,1,MPI_INTEGER,Root,1, 
     &                   comm2d,req,ierr)
    call MPI_ISEND(a(sx:ex,sy:ey),cols*rows,MPI_DOUBLE_PRECISION,
     &                   Root,1,comm2d,req,ierr )
c    Recieved the results from othe precessors   
    if (myid1.EQ.Root) then
       do source = 0,numprocs-1
          call MPI_RECV(sx0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ex0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(sy0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(ey0,1,MPI_INTEGER,source,
     &                   1,comm2d,status,ierr )
          call MPI_RECV(a(sx0:ex0,sy0:ey0),cols*rows,MPI_DOUBLE_PRECISION, 
     &                   source,1,comm2d,status,ierr)
          a(sx0:ex0,sy0:ey0) = a(sx0:ex0,sy0:ey0) 
          call MPI_Wait(req, status, ierr) 
       enddo
       endif
       if (myid1.EQ.Root) then
c      print the results
       print *, 'time step=',it
        do 90 i=1,nx
          do 80 j = 1,ny
             write(*,70)a(i,j)
  70        format(2x,f8.2,$)
  80      continue
          print *, ' '
  90    continue      
       endif
     enddo
C      Cleanup goes here.
      CALL MPI_Comm_free( comm2d, ierr )
30    CALL MPI_FINALIZE(ierr)

      STOP
      END
C******************************************************* 
      subroutine fnd2ddecomp(comm2d,nx,ny,sx,ex,sy,ey)
      integer   comm2d
      integer   nx,ny,sx,ex,sy,ey
      integer   dims(2),coords(2),ierr
      logical   periods(2)
c Get (i,j) position of a processor from Cartesian topology.
      CALL MPI_Cart_get(comm2d,2,dims,periods,coords,ierr)
C Decomposition in first (ie. X) direction
      CALL MPE_DECOMP1D(nx,dims(1),coords(1),sx,ex)
C Decomposition in second (ie. Y) direction
      CALL MPE_DECOMP1D(ny,dims(2),coords(2),sy,ey)
      return
      end
c********************************************************************* 
      SUBROUTINE MPE_DECOMP1D(n,numprocs,myid,s,e)
      integer n,numprocs,myid,s,e,nlocal,deficit
      nlocal  = n / numprocs
      s       = myid * nlocal + 1
      deficit = mod(n,numprocs)
      s       = s + min(myid,deficit)
C Give one more slice to processors
      if (myid .lt. deficit) then
          nlocal = nlocal + 1
      endif
      e = s + nlocal - 1
      if (e .gt. n .or. myid .eq. numprocs-1) e = n
      return
      end
查看更多
登录 后发表回答