#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>

#define indexof(i,j,ydim) (((i)*(ydim))+(j))

int main(int argc, char* argv[]){
  int nStep,NSTEPS;
  int M,N,numChunks,sectionSize;
  double *data, *tmpData;         // 2D (sectionSize+2)*N
  double starttime,endtime,durtime;

  int thisIndex,worldSize;
  MPI_Status sts;
  MPI_Request req[2];

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD,&thisIndex);
  MPI_Comm_size(MPI_COMM_WORLD,&worldSize);

  //initialization
  NSTEPS=10;
  N=1024; M=1024;
  numChunks=worldSize;

  sectionSize=M/numChunks;
  data=(double *)malloc(sizeof(double)*(sectionSize+2)*N);
  tmpData=(double *)malloc(sizeof(double)*(sectionSize+2)*N);
  bzero(data,(sectionSize+2)*N*sizeof(double));

  for(int i=0;i<sectionSize;i++){
    data[indexof(1+i,0,N)]=1.0;
    data[indexof(1+i,N-1,N)]=1.0;
  }
  if(thisIndex==0)
    for(int i=0;i<N;i++)
      data[indexof(1,i,N)]=1.0;
  if(thisIndex==numChunks-1)
    for(int i=0;i<N;i++)
      data[indexof(sectionSize,i,N)]=1.0;
  
  if(thisIndex==0) {
   printf("[0]Starting Jacobi relaxation with %d VP\n",worldSize);
    starttime = MPI_Wtime();
  }

  // computation iterations
  for(nStep=0; nStep<NSTEPS; nStep++){
    //exchange ghosts
    if(thisIndex!=0){
	MPI_Irecv(data+(indexof(0,0,N)),N, MPI_DOUBLE,thisIndex-1,0,MPI_COMM_WORLD,&req[0]);
	MPI_Send(data+(indexof(1,0,N)),N, MPI_DOUBLE,thisIndex-1,0,MPI_COMM_WORLD);
    }
    if(thisIndex!=numChunks-1){
	MPI_Irecv(data+(indexof(sectionSize+1,0,N)),N, MPI_DOUBLE,thisIndex+1,0,MPI_COMM_WORLD,&req[1]);
	MPI_Send(data+(indexof(sectionSize,0,N)),N, MPI_DOUBLE,thisIndex+1,0,MPI_COMM_WORLD);
    }
    if(thisIndex!=0)
	MPI_Wait(&req[0],&sts);
    if(thisIndex!=numChunks-1)
	MPI_Wait(&req[1],&sts);

    //compute next value
    double tmpChange,maxChange=0.0;
    memcpy(tmpData,data,sizeof(double)*(sectionSize+2)*N);
    int start,end;
    start=1,end=sectionSize;
    if(thisIndex==0) start=2;
    else if(thisIndex==numChunks-1) end=sectionSize-1;
    for(int i=start;i<=end;i++){
	for(int j=1;j<N-1;j++){
	  data[indexof(i,j,N)] = 0.2 * (tmpData[indexof(i,  j-1,N)] +\
					tmpData[indexof(i,  j,  N)] +\
					tmpData[indexof(i,  j+1,N)] +\
					tmpData[indexof(i-1,j,  N)] +\
					tmpData[indexof(i+1,j,  N)]);
	}
    }
    if(thisIndex==0) printf("[0]Step=%d\n",nStep);
  }

  if (thisIndex == 0){
    endtime = MPI_Wtime();
    durtime = endtime - starttime;
    printf("[0]Finished. time = %lf sec.\n", durtime/nStep);
  }

  // finalization 
  delete [] tmpData;
  delete [] data;
  MPI_Finalize();
  return 0;
}

