/*
Tiny program to demonstrate speed of array broadcasts
for [nokeep] entry methods.

For large broadcasts to lots of elements per processor,
this can be much much faster than copying the message 
for each element.
*/
#include <stdio.h>
#include "hello.decl.h"

/*readonly*/ CProxy_Main mainProxy;
/*readonly*/ int nElements;
const int nReps=100; /* Number of broadcasts to do */
const int nData=10000; /* Number of doubles to send */

/*mainchare*/
class Main : public Chare
{
  double startTime;
public:
  Main(CkArgMsg* m)
  {
    //Process command-line arguments
    nElements=5;
    if(m->argc >1 ) nElements=atoi(m->argv[1]);
    delete m;

    //Start the computation
    CkPrintf("Running Hello on %d processors for %d elements\n",
	     CkNumPes(),nElements);
    mainProxy = thishandle;

    CProxy_Hello arr = CProxy_Hello::ckNew(nElements);
    
    double *data=new double[nData];
    startTime=CkWallTimer();
    for (int i=0;i<nReps;i++)
      arr.SayHi(nData,data);
    delete[] data;
  };

  void done(void)
  {
    double elapsed=CkWallTimer()-startTime;
    double bytes=nElements*nData*sizeof(double);
    CkPrintf("%f s for %d broadcasts to %d elements on %d processors (%g bytes total)\n",
          elapsed,nReps,nElements,CkNumPes(),
	  nReps*bytes);
    CkPrintf("%f s / broadcast\n",elapsed/nReps);
    CkPrintf("%.3g bytes / second / processor\n",(nReps*bytes)/(elapsed*CkNumPes()));
    CkExit();
  };
};

/*array [1D]*/
class Hello : public CBase_Hello 
{
  int nTimes;
public:
  Hello()
  {
    nTimes=0;
  }
  Hello(CkMigrateMessage *m) {}
  
  void SayHi(int n,const double *data)
  {
    contribute(0,0,CkReduction::sum_int,CkCallback(CkIndex_Main::done(),mainProxy));
    if (++nTimes==nReps)
      //We've been around once-- we're done.
      mainProxy.done();
  }
};

#include "hello.def.h"
