recursive SUBROUTINE main
  USE mpi
  IMPLICIT NONE

  INTEGER :: i,j
  INTEGER, PARAMETER :: width=10
  INTEGER, PARAMETER :: height=100
  INTEGER :: iter, left, right
  INTEGER :: tag, tagLeft, tagRight
  INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status
  DOUBLE PRECISION, DIMENSION(height, width+2) :: temp
  DOUBLE PRECISION :: error, tval, maxerr
  INTEGER :: niter

  INTEGER :: thisIndex, ierr, nblocks

  CALL MPI_Init(ierr)
  CALL MPI_Comm_rank(MPI_COMM_WORLD, thisIndex, ierr)
  CALL MPI_Comm_size(MPI_COMM_WORLD, nblocks, ierr)

  maxerr = 0.0

  if(thisIndex .eq. 0) then
    niter = 3                ! some dummy proc 0 only initialization
  end if

  call MPI_Bcast(niter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)

  DO i = 1, width
    DO j = 0, height-1
      temp(j+1, i+1) = 100*(i-1) + j
    ENDDO
  ENDDO

  call MPI_Barrier(MPI_COMM_WORLD, ierr)

  left = mod((thisIndex-1+nblocks), nblocks)
  right = mod((thisIndex+1), nblocks)
  DO iter = 0,niter-1 
    tag = iter*nblocks+thisIndex
    tagLeft = iter*nblocks+left
    tagRight = iter*nblocks+right

    call MPI_Send(temp(1,2), height, MPI_DOUBLE_PRECISION, left, tag, &
&                 MPI_COMM_WORLD, ierr)
    call MPI_Recv(temp(1, width+2), height, MPI_DOUBLE_PRECISION, right, &
&                 tagRight, MPI_COMM_WORLD, status, ierr)
    call MPI_Send(temp(1,width+1), height, MPI_DOUBLE_PRECISION, right, tag, &
&                 MPI_COMM_WORLD, ierr)
    call MPI_Recv(temp(1, 1), height, MPI_DOUBLE_PRECISION, left, tagLeft, &
&                 MPI_COMM_WORLD, status, ierr)

    DO i = 2, 11
      DO j = 2, 99
        tval=(temp(j,i)+temp(j,i+1)+temp(j,i-1)+temp(j+1,i)+temp(j-1,i))/5.0
        error = abs(tval-temp(j,i))
        temp(j,i) = tval
        if(error > maxerr) maxerr = error
      END DO
    END DO

    call MPI_AllReduce(maxerr, maxerr, 1, MPI_DOUBLE_PRECISION, MPI_MAX, &
&                      MPI_COMM_WORLD, ierr)
    call Migrate_Ready
  END DO
  CALL MPI_Finalize(ierr)
END SUBROUTINE
