// BOC: load balancing strategy

#include <charm++.h>

#include "jacobi.h"

#include "JacobiLB.h"
#include "JacobiLB.def.h"

CkGroupID loadbalancer;

void CreateJacobiLB()
{
  loadbalancer = CProxy_JacobiLB::ckNew();
}

void  JacobiLB::staticMigrated(void* data, LDObjHandle h)
{
  JacobiLB* me = static_cast<JacobiLB*>(data);
  me->Migrated(h);
}

void  JacobiLB::staticAtSync(void* data)
{
  JacobiLB* me = static_cast<JacobiLB*>(data);
  me->AtSync();
}

void  JacobiLB::QuiesResume()
{
  (CProxy_JacobiLB(loadbalancer)).resumeLocalCells();
}

JacobiLB::JacobiLB()
{
  // save the function for detect quiescence
  if (CkMyPe() == 0) {
    funcIdx = CProxy_JacobiLB::ckIdx_QuiesResume();
    mainhandle = thishandle;
  }

  // Register with the lb database
  theLbdb = CProxy_LBDatabase(lbdb).ckLocalBranch();
  theLbdb->AddLocalBarrierReceiver((LDBarrierFn)staticAtSync,(void*)this);
/*
  theLbdb->
    AddLocalBarrierReceiver(reinterpret_cast<LDBarrierFn>(staticAtSync),
                            static_cast<void*>(this));
*/
  theLbdb->CollectStatsOn();

//    // create LBDB and Object Manager
//    lbdb = LBDBCreate();
//    LBDBCollectStatsOn(lbdb);
//    lbom = LBDBRegisterOM(lbdb, 1, NULL);

//    // create PESync and registered as a receiver
//    syncHand = NewPESync();
//    RegisterPESyncReceiver(syncHand, myBalance, (void *)this);

   if (CkMyPe() == 0)
   {
     count = 0;
     array = new LoadData[CkNumPes()];
   }

   curBalStep = 0;
   nextBalStep = -1;
   maxBalStep = -1;
   balanceCount = 0;
   loadBalanceSoon = 0;
   loadBalanceInProgress = 0;
#ifdef DO_INITIAL_BALANCE
   // gzheng
   curBalStep = 1;
   nextBalStep = 1;
   loadBalanceInProgress = 1;
#endif
//   delete msg;
}

JacobiLB::~JacobiLB()
{
//    UnregisterPESync(recver);
//    DeletePESync(syncHand);

   if (CkMyPe() == 0)
   	delete [] array;
}


void JacobiLB::balanceNow(void)
{
//  CkPrintf("[%d] balanceNow on step curBalStep:%d nextBalStep:%d\n",CkMyPe(),curBalStep, nextBalStep);
  if (curBalStep == nextBalStep) {
//    CkPrintf("[%d] Balancing on step %d\n",CkMyPe(),curBalStep);

//     lbdata = ObjLoads(lbdb, &numObj);
    const int osz = theLbdb->GetObjDataSz();
    numObj = osz;
    lbdata = new LDObjData[osz];
    theLbdb->GetObjData(lbdata);
    theLbdb->ClearLoads();

     // bug in Lbdb here
    if (numObj<0) 
      CkPrintf("Illegal numObj %d\n",numObj);

    objDataMsg *msg = new objDataMsg;
    msg->cputime = 0.0;
    msg->walltime = 0.0;
    msg->pe = CkMyPe();
    // sum up all the statistics
    for (int i=0; i<numObj; i++) {
      // debug
      // CkPrintf("lbdata[%d] cpu=%f wall=%f\n",
      // i,lbdata[i].cpuTime,lbdata[i].wallTime);

      msg->cputime += lbdata[i].cpuTime;
      msg->walltime += lbdata[i].wallTime;
    }

    CkPrintf("PE %d had %d objects\n",CkMyPe(),numObj);
    (CProxy_JacobiLB(thisgroup)).collectStatis(msg, 0);
    loadBalanceInProgress++;
    if (loadBalanceInProgress > N_BALANCING_STEPS)
      loadBalanceInProgress = 0;
    else 
      nextBalStep = curBalStep + 1;

  } else if (loadBalanceSoon && !loadBalanceInProgress) {
    loadBalanceInProgress = 1;
    loadBalanceSoon = 0;
    BalTriggerMsg *msg = new BalTriggerMsg;
    msg->step = curBalStep;
    (CProxy_JacobiLB(thisgroup)).BalanceSync(msg,0);
  } else {
//    CkPrintf("[%d] not Balancing on step %d\n",CkMyPe(),curBalStep);
    
    resumeLocalCells();
  }
  curBalStep++;
}

void JacobiLB::BalanceSoon(BalSoonMsg *m)
{
  loadBalanceSoon = 1;
  //  loadBalanceSoon = !loadBalanceSoon;
  delete m;
}

void JacobiLB::BalanceSync(BalTriggerMsg *m)
{
  balanceCount++;
  if (m->step > maxBalStep)
    maxBalStep = m->step;

  if (balanceCount == CkNumPes()) {
    int i;
    for(i=0;i<CkNumPes();i++) {
      BalTriggerMsg *msg = new BalTriggerMsg;
      msg->step = maxBalStep + 1;
      CkPrintf("[%d] Suggesting balance on step %d\n",CkMyPe(),msg->step);
      (CProxy_JacobiLB(thisgroup)).NextBalance(msg,i);
    }
    maxBalStep = -1;
    balanceCount = 0;
  }
  delete m;
}

void JacobiLB::NextBalance(BalTriggerMsg *m)
{
  CkPrintf("[%d] I will balance on step %d\n",CkMyPe(),m->step);
  nextBalStep = m->step;
  resumeLocalCells();
  delete m;
}

void JacobiLB::resumeLocalCells()
{
//   int i;
//   CProxy_Cell cr(arrayGroup);
//   for(i=0;i<numObj;i++) {
//     DummyMsg2 *m2 = new DummyMsg2;
//     cr[lbdata[i].id[0]].resume(m2);
//   }
//   delete [] lbdata;
  theLbdb->ResumeClients();
}

// only called by processor 0
void JacobiLB::collectStatis(objDataMsg *dataMsg)
{
  double averageCPU;
  double averageWall;
  int numPe = CkNumPes();
  int i;

  // debug
  CkPrintf("JacobiLB collecting statistics(count %d) from %d: %f %f.\n",
	   count,dataMsg->pe,dataMsg->cputime, dataMsg->walltime);

  array[dataMsg->pe].cputime = dataMsg->cputime;
  array[dataMsg->pe].walltime = dataMsg->walltime;
  count ++;
  if (count == numPe) {
    // compute average load using only cputime
    averageCPU = 0.0;
    averageWall = 0.0;
    for (i=0; i<numPe; i++) {
      averageCPU += array[i].cputime;
      averageWall += array[i].walltime;
    }
    CkPrintf("Total load CPU:%f Wall:%f\n", averageCPU,averageWall);

    averageCPU /= numPe;
    averageWall /= numPe;

    // for debug
    CkPrintf("Average load CPU:%f Wall:%f\n", averageCPU,averageWall);

    // compute overload and underload load
    stgyMsg *msg = new stgyMsg;
    msg->averageCPU = averageCPU;
    msg->averageWall = averageWall;
    for (i=0; i<numPe; i++) {
      msg->load[i] = array[i].walltime - averageWall;
      // for debug
      CkPrintf("load difference in (%d): %f\n", i, msg->load[i]);
    }
    msg->num = numPe;

    // debug
    CkPrintf("JacobiLB - broadcast migrate.\n");

    // broadcast all BOCs
    (CProxy_JacobiLB(thisgroup)).migrate(msg);
	
    // reset
    count = 0;

    // activate the quiescence function of main
    CkStartQD(funcIdx, &mainhandle);
  }

  delete dataMsg;
}

#define PRECISION  0.1

void JacobiLB::migrate(stgyMsg *dataMsg)
{
  //  CkPrintf("[%d] starting migrate\n",CkMyPe());
  if (lbstrategy == 1)
    RandomMigrate(dataMsg);
  else if (lbstrategy == 2)
    BetterRandomMigrate(dataMsg);
  //  CkPrintf("[%d] ending migrate\n",CkMyPe());
}

void JacobiLB::BetterRandomMigrate(stgyMsg *dataMsg)
{

  double *overload = dataMsg->load;
  const double epsilon = 0.05 * dataMsg->averageWall;

  if (overload[CkMyPe()] < epsilon)
    return;

  int *already_migrated = new int[numObj];
  int i;
  for(i=0; i < numObj; i++)
    already_migrated[i] = 0;

  int me = CkMyPe();
  double my_overload = overload[me];
  int P = CkNumPes();
  int done = 0;
  int donor = 0;
#ifdef AVOID_PE_ZERO
  int receiver = 1;
#else
  int receiver = 0;
#endif
  double really_sent = 0;

  while (!done) {
    while ((donor  < P) && (overload[donor] < epsilon)) donor++;

    if (donor == P) break;
  
    while (overload[donor] >= 0) {
      //      CkPrintf("[%d] Overload[%d] = %f\n",CkMyPe(),donor,epsilon);

      while ((receiver < P) && (overload[receiver] > -epsilon))
	receiver++;

      if (receiver == P) {
	done = 1;
	break;
      }

      //      CkPrintf("[%d] Considering donor %d receiver %d\n",
      //	       CkMyPe(),donor,receiver);

      double transfer;
      int receiver_not_full;

      if (-overload[receiver] < overload[donor]) {
	transfer = -overload[receiver];
	receiver_not_full = 0;
      } else {
	transfer = overload[donor];
	receiver_not_full = 1;
      }

      if (donor == me)
	really_sent += SendTo(transfer, receiver, already_migrated);
      //      else 
      //	CkPrintf("[%d] Not transferring %f to %d\n",CkMyPe(),
      //	 transfer,receiver);
      overload[donor] -= transfer;
      overload[receiver] += transfer;
      // actual transfer may have been less due to quantization

      if (receiver_not_full) break;
    }
    if (donor == me) 
      done = 1;
  }
  // debug
  CkPrintf("[%d] Load %f %f %f\n", CkMyPe(),
	   my_overload,really_sent,my_overload-really_sent);

  delete [] already_migrated;
  delete [] lbdata;
  delete dataMsg;
}

double JacobiLB::SendTo(double transfer, int receiver, int *already_migrated)
{
  CkPrintf("[%d] SendTo transferring %f of %d to %d\n",
	   CkMyPe(),transfer,numObj,receiver);

  // Store all objects
  int *object = new int[numObj];
  int i;
  int n_migratable=0;
  for(i=0; i < numObj; i++) {
    if (!already_migrated[i]) {
      object[n_migratable] = i;
      n_migratable++;
    }
  }

  // First, never send off your last object
  if (n_migratable<=1) return 0;

#if 0
  // Shuffle objects
  for(i=0; i < n_migratable-1; i++) {
    const int swapwith = (rand() % (n_migratable-i)) + i;
    if (swapwith != i) {
      const int tmp = object[swapwith];
      object[swapwith] = object[i];
      object[i] = tmp;
    }
  }
#endif

#if 1
  // Sort objects, descending order
  for(int j = 1; j < n_migratable; j++) {
    int i = j-1;
    int a = object[j];

    while (i >= 0 && lbdata[object[i]].wallTime < lbdata[a].wallTime) {
      object[i+1] = object[i];
      i--;
    }
    object[i+1] = a;
  }
#endif

  double transferred = 0;
  int n_transferred=0;

  // Deal out objects, migrating ones that fit
  for(i=0; i < n_migratable; i++) {
    const double objtime = lbdata[object[i]].wallTime;
    if (objtime < transfer) {
      transfer -= objtime;
      transferred += objtime;
      n_transferred++;
//      LBDBMigrate(lbdb, lbdata[object[i]].handle, receiver);
      theLbdb->Migrate(lbdata[object[i]].handle, receiver);
      CkPrintf("Migrating %d[%d] %f from %d to %d\n",
	       lbdata[object[i]].id,object[i],lbdata[object[i]].wallTime,
	       CkMyPe(), receiver);
      already_migrated[object[i]] = 1;
    }

    if (n_transferred == n_migratable-1) break;
  }

  delete [] object;
  return transferred;
}

void JacobiLB::RandomMigrate(stgyMsg *dataMsg)
{
  int myPe = CkMyPe();
  int i;

  // overloaded
  double threshold = PRECISION * dataMsg->averageWall;

  if (dataMsg->load[myPe] > threshold) {
    double over = dataMsg->load[myPe];
    // decide which obj should be migrate
    int objtomigrate = rand() % numObj;
    i = 0;
    while (over > threshold && i < numObj-1) {
      objtomigrate++;
      objtomigrate %= numObj;
      double timeUsed = lbdata[objtomigrate].wallTime;
	  
      // debug
      // CkPrintf("check obj %d, cputime:%f, current overload:%f.\n",
      // i, timeUsed, over);
      
      // migrate this one
      if (timeUsed < over) {
	// pick randomly a processor
	int dest;
	int decided = CkNumPes() * 10;
	while (decided > 0) {
	  decided--;
	  dest = rand() % dataMsg->num;
	  if (dataMsg->load[dest] < 0 && dest != myPe) break;
	}
	
	if (decided) {
	  // debug
	  CkPrintf("Object %d migrating from %d to %d\n",
		   objtomigrate, CkMyPe(), dest);

	  // migrate to dest
// 	  LBDBMigrate(lbdb, lbdata[objtomigrate].handle, dest);
 	  theLbdb->Migrate(lbdata[objtomigrate].handle, dest);

	  over -= timeUsed;
	} 
      }
      i++;
    } // while
  }

  // debug
  CkPrintf("On %d Strategy migrate complete. \n", CkMyPe());

  delete [] lbdata;
  delete dataMsg;
}

void JacobiLB::Migrated(LDObjHandle h)
{
}

void JacobiLB::AtSync()
{
  this->balanceNow();
}

