#include <stdlib.h>
#include <converse.h>

enum {nCycles = 1 << 8 };
enum { maxMsgSize = 1 << 13 };

CpvDeclare(int,msgSize);
CpvDeclare(int,cycleNum);
CpvDeclare(int,sizeNum);
CpvDeclare(int,exitHandler);
CpvDeclare(int,node0Handler);
CpvDeclare(int,node1Handler);
CpvStaticDeclare(double,startTime);
CpvStaticDeclare(double,endTime);

PersistentHandle h;

CpvDeclare(double, IdleStartTime);
CpvDeclare(double, IdleTime);

void ApplIdleStart(void *)
{
  CpvAccess(IdleStartTime)= CmiWallTimer();
  return;
}

void ApplIdleEnd(void *)
{
  if(CpvAccess(IdleStartTime) < 0)
      return;
  
  CpvAccess(IdleTime) += CmiWallTimer()-CpvAccess(IdleStartTime);
  CpvAccess(IdleStartTime)=-1;
  return;
}

void startRing()
{
  CpvAccess(cycleNum) = 0;
  CpvAccess(msgSize) = (CpvAccess(msgSize)-CmiMsgHeaderSizeBytes)*2 + 
      CmiMsgHeaderSizeBytes;
  /*
  if(CpvAccess(msgSize) < 320)
      CpvAccess(msgSize) = CpvAccess(msgSize) + 16; 
  else if(CpvAccess(msgSize) < 512)
      CpvAccess(msgSize) = CpvAccess(msgSize) + 32; 
  else if(CpvAccess(msgSize) < 4096)
      CpvAccess(msgSize) = CpvAccess(msgSize) + 256; 
  else
      CpvAccess(msgSize) *= 2;
  */

  char *msg = (char *)CmiAlloc(CpvAccess(msgSize));
  *((int *)(msg+CmiMsgHeaderSizeBytes)) = CpvAccess(msgSize);
  
  CmiSetHandler(msg,CpvAccess(node0Handler));
  CmiSyncSendAndFree(CmiMyPe(), CpvAccess(msgSize), msg);

  CpvAccess(startTime) = CmiWallTimer();
  CpvAccess(IdleTime) = 0.0;
}

void ringFinished(char *msg)
{
  //CmiFree(msg);
  double cycle_time = 
      (1e6*(CpvAccess(endTime)-CpvAccess(startTime)))/(2.*nCycles);
  double compute_time = cycle_time - 
      (1e6*(CpvAccess(IdleTime)))/(2.*nCycles);

  CmiPrintf("[%d] %d \t %5.3lfus \t %5.3lfus\n", CmiMyPe(),
            CpvAccess(msgSize) - CmiMsgHeaderSizeBytes, cycle_time, compute_time);
  
  CpvAccess(sizeNum)++;

  if (CpvAccess(msgSize) < maxMsgSize)
    startRing();
  else 
  {
    void *sendmsg = CmiAlloc(CmiMsgHeaderSizeBytes);
    CmiSetHandler(sendmsg,CpvAccess(exitHandler));
    CmiSyncBroadcastAllAndFree(CmiMsgHeaderSizeBytes,sendmsg);
  }
}

CmiHandler exitHandlerFunc(char *msg)
{
  CmiFree(msg);
  CsdExitScheduler();
  return 0;
}

CmiHandler node0HandlerFunc(char *msg)
{
  CpvAccess(cycleNum)++;

  if (CpvAccess(cycleNum) == nCycles) {
    CpvAccess(endTime) = CmiWallTimer();
    ringFinished(msg);
  }
  else
  {
    CmiSetHandler(msg,CpvAccess(node1Handler));
    *((int *)(msg+CmiMsgHeaderSizeBytes)) = CpvAccess(msgSize);

    int dest = CmiNumPes() - CmiMyPe() - 1;
    CmiUsePersistentHandle(&h, 1);
    CmiSyncSendAndFree(dest,CpvAccess(msgSize),msg);
    CmiUsePersistentHandle(NULL, 0);
  }
  return 0;
}

CmiHandler node1HandlerFunc(char *msg)
{
  CpvAccess(msgSize) = *((int *)(msg+CmiMsgHeaderSizeBytes));
  CmiSetHandler(msg,CpvAccess(node0Handler));

  int dest = CmiNumPes() - CmiMyPe() - 1;
  CmiUsePersistentHandle(&h, 1);
  CmiSyncSendAndFree(dest,CpvAccess(msgSize),msg);
  CmiUsePersistentHandle(NULL, 0);
  return 0;
}

CmiStartFn mymain()
{
  CpvInitialize(int,msgSize);
  CpvInitialize(int,cycleNum);
  CpvInitialize(int,sizeNum);
  CpvAccess(sizeNum) = 1;
  CpvAccess(msgSize)= CmiMsgHeaderSizeBytes + 8;

  CpvInitialize(int,exitHandler);
  CpvAccess(exitHandler) = CmiRegisterHandler((CmiHandler) exitHandlerFunc);
  CpvInitialize(int,node0Handler);
  CpvAccess(node0Handler) = CmiRegisterHandler((CmiHandler) node0HandlerFunc);
  CpvInitialize(int,node1Handler);
  CpvAccess(node1Handler) = CmiRegisterHandler((CmiHandler) node1HandlerFunc);

  CpvInitialize(double,startTime);
  CpvInitialize(double,endTime);

  int otherPe = CmiMyPe() ^ 1;

  CcdCallOnConditionKeep(CcdPROCESSOR_BEGIN_IDLE, ApplIdleStart, NULL);
  CcdCallOnConditionKeep(CcdPROCESSOR_END_IDLE, ApplIdleEnd, NULL);

  h = CmiCreatePersistent(otherPe, maxMsgSize+1024);

  if (CmiMyPe() < CmiNumPes()/2)
  startRing();

  return 0;
}

int main(int argc,char *argv[])
{
  ConverseInit(argc,argv,(CmiStartFn)mymain,0,0);
  return 0;
}
