OpenAtom  Version1.5a
CP_State_ParticlePlane.C
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////////////////////////
2 ///////////////////////////////////////////////////////////////////////////
3 ///////////////////////////////////////////////////////////////////////////
4 /** \file CP_State_ParticlePlane.C
5  * @defgroup Particle
6  * @{
7  *
8  * \brief Compute kinetic energy of the non-interacting electrons and non-local forces based on the particle view of the system, triggered by \ref GSpaceState and overlaps with those computations when possible.
9  *
10  * The kinetic energy of non-interacting electrons is expressed (in
11  * the computer science parlance) as a ``point by point multiply'' and
12  * reduction operation.
13  * \f$$(\hbar^2/2m_e)\sum_{g_x,g_y,g_z\epsilon
14  * |{\bf g}|<g_{cut}}\sum_s f_sg^2 |\Psi(s,g_x,g_y,g_z)|^2$\f$
15  *
16  * Life-cycle of a CP_State_ParticlePlane:
17  *
18  * The particle-plane array is a shadow of the g-space planes. This means
19  * that UparticlePlaneProxy[thisInstance.proxyOffset](s, p) exists on the same processor as
20  * UgSpacePlaneProxy[thisInstance.proxyOffset](s, p). Also, both chare arrays have the same number of
21  * objects.
22  *
23  * The life-cycle of this object involves computation of a portion of
24  * the Z-matrix, reduction of the Z matrix over g-space and computing
25  * forces using the particle data. ComputeZ is triggered by the
26  * arrival of the structure factor for each atom group at the local
27  * sfcache.
28  *
29  * The computation of the Z-matrix is done in the method computeZ().
30  * Immediately after this, the matrix is reduced using the reduceZ() method.
31  * Through the reduceZ() method, a portion of the reduced matrix is
32  * available at the zeroth plane of each state.
33  *
34  * The reduced matrix is sent from the particle-plane 0 to all the particle
35  * planes in the state using the getForces() method, which computes
36  * forces and adds these forces to the forces compputed using electron
37  * density in the g-space planes.
38  *
39  * Important information:
40  * 1. The dimensionality of the Z-matrix depends upon how may orbital levels
41  * are considered (l = 0,1...). The current code assumes that only l = 0
42  * is allowed. This has to be changed.
43  *
44  * 2. The special point gx=gy=gz=0 is not dealt with.
45  */
46 ///////////////////////////////////////////////////////////////////////////
47 
48 #include "CP_State_ParticlePlane.h"
49 #include "CP_State_GSpacePlane.h"
50 #include "CP_State_Plane.h"
52 #include "structure_factor/StructFactorCache.h"
53 #include "main/cpaimd.h"
54 #include "main/AtomsCache.h"
55 #include "main/eesCache.h"
56 #include "utility/util.h"
57 
58 #include "ckmulticast.h"
59 #include "charm++.h"
60 #include "src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cpnonlocal.h"
61 #include "src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cplocal.h"
62 extern CProxy_ENL_EKE_Collector ENLEKECollectorProxy;
63 ///////////////////////////////////////////////////////////////////////////
64 
65 extern CProxy_main mainProxy;
66 extern CProxy_InstanceController instControllerProxy;
67 extern CkVec <CProxy_CP_State_GSpacePlane> UgSpacePlaneProxy;
68 extern CkVec <CProxy_GSpaceDriver> UgSpaceDriverProxy;
69 extern CkVec <CProxy_AtomsCache> UatomsCacheProxy;
70 extern CkVec <CProxy_CP_State_ParticlePlane> UparticlePlaneProxy;
71 extern CkVec <CProxy_CP_State_RealParticlePlane> UrealParticlePlaneProxy;
72 extern CkVec <CProxy_StructFactCache> UsfCacheProxy;
73 extern CkVec <CProxy_eesCache> UeesCacheProxy;
74 extern CkVec <CProxy_FFTcache> UfftCacheProxy;
75 
76 extern CkGroupID mCastGrpId;
77 extern ComlibInstanceHandle gssPInstance;
78 
79 extern int nstates;
80 extern int nchareG;
81 extern Config config;
82 
83 //#define _CP_DEBUG_STATE_GPP_VERBOSE_
84 
85 ///////////////////////////////////////////////////////////////////////////
86 
87 
88 
89 
90 //////////////////////////////////////////////////////////////////////////////
91 /// Reduction client for N^3 nonlocal energy computation
92 //////////////////////////////////////////////////////////////////////////////
93 ///////////////////////////////////////////////////////////////////////////cc
94 //////////////////////////////////////////////////////////////////////////////
95 void CP_State_ParticlePlane::CP_State_ParticlePlane::printEnl(CkReductionMsg *msg){
96 
97  double d = ((double *)msg->getData())[0];
98  delete msg;
99 
100  ENLEKECollectorProxy[thisInstance.idxU.z].acceptENL(d);
101  // CkPrintf("{%d} ENL = %5.8lf\n", thisInstance.proxyOffset, d); // tell the world
102  // CkAbort("fix CP_StateParticlePlane printEnl to be instance aware");
103  UgSpacePlaneProxy[thisInstance.proxyOffset](0,0).computeEnergies(ENERGY_ENL, d); //store it
104 }
105 //////////////////////////////////////////////////////////////////////////////
106 
107 
108 
109 //////////////////////////////////////////////////////////////////////////////
110 /// The glorious constructor
111 //////////////////////////////////////////////////////////////////////////////
112 //////////////////////////////////////////////////////////////////////////////
113 //////////////////////////////////////////////////////////////////////////////
114 CP_State_ParticlePlane::CP_State_ParticlePlane(
115  int x, int y, int z, int xNL, int yNL, int zNL,
116  int _gSpacePPC, int numSfGrps_in,int natm_nl_in, int natm_nl_grp_max_in,
117  int _nstates, int _nchareG, int _Gstates_per_pe, int _numNLiter,
118  int _ees_nonlocal, UberCollection _instance) :
119  sizeX(x), sizeY(y), sizeZ(z), ngridaNL(xNL), ngridbNL(yNL), ngridcNL(zNL),
120  ees_nonlocal(_ees_nonlocal), nstates(_nstates), nchareG(_nchareG),
121  Gstates_per_pe(_Gstates_per_pe), numNLiter(_numNLiter),
122  gSpacePlanesPerChare(_gSpacePPC), thisInstance(_instance)
123 //////////////////////////////////////////////////////////////////////////////
124  {//begin routine
125 //////////////////////////////////////////////////////////////////////////////
126 
127  myChareG = thisIndex.y;
128  ibead_ind = thisInstance.idxU.x;
129  kpoint_ind = thisInstance.idxU.y;
130  itemper_ind = thisInstance.idxU.z;
131  iteration = 0;
132  iterNL = 0;
133  countNLIFFT = 0;
134  doneEnl = 0;
135  doneForces = 0;
136  enl = 0.0;
137  energy_count = 0;
138  totalEnergy = 0.0;
139  sendDone = 0;
140  istate_ind = thisIndex.x;
141 
142  numSfGrps = numSfGrps_in;
143  natm_nl = natm_nl_in;
144  natm_nl_grp_max = natm_nl_grp_max_in;
145  count = new int[numSfGrps];
146  haveSFAtmGrp = new int[numSfGrps];
147  memset(haveSFAtmGrp,-1,numSfGrps*sizeof(int));
148  bzero(count,numSfGrps*sizeof(int));
149 
150  zsize = numSfGrps*natm_nl_grp_max;
151 
152  zmatrixSum = NULL;
153  zmatrix = NULL;
154  zmatrix_fx = NULL; zmatrix_fy = NULL; zmatrix_fz = NULL;
155  zmatrixSum_fx = NULL; zmatrixSum_fy = NULL; zmatrixSum_fz = NULL;
156 
157 
158 
159 //////////////////////////////////////////////////////////////////////////////
160 /// No load balancing and no atsyncing either!
161 
162  setMigratable(false);
163  usesAtSync = false;
164 
165 //////////////////////////////////////////////////////////////////////////////
166 /// report your status to main
167 
168  int constructed=1;
169  contribute(sizeof(int), &constructed, CkReduction::sum_int,
170  CkCallback(CkIndex_InstanceController::doneInit(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy), thisInstance.proxyOffset);
171 #ifdef _CP_GS_DEBUG_COMPARE_VKS_
172  savedprojpsiBf=NULL;
173  savedprojpsiBfsend=NULL;
174  savedprojpsiGBf=NULL;
175 #endif
176 //---------------------------------------------------------------------------
177  }//end routine
178 //////////////////////////////////////////////////////////////////////////////
179 
180 
181 /** It is invoked from CP_State_GSpacePlane::initGSpace(). It also takes care of
182  * the array section creation which needs to wait for the doneInsertion phase
183  * @todo: Made this public to remove friendship. Check if this can go back to being private.
184  */
186 {
187 //////////////////////////////////////////////////////////////////////////////
188 /// Comlib to talk to realPP : used when ees_enl_on==1
189 
190  /// realPP proxies are created only if ees_nonlocal is turned on
191  if(ees_nonlocal == 1)
192  {
193  realPP_proxy = UrealParticlePlaneProxy[thisInstance.proxyOffset];
194  #ifdef USE_COMLIB
195  if (config.useGssInsRealPP)
196  ComlibAssociateProxy(gssPInstance,realPP_proxy);
197  #endif
198  }
199 
200 //////////////////////////////////////////////////////////////////////////////
201 /// Register with the SFCache
202 
203  if(ees_nonlocal==0){
204 #ifdef _CP_DEBUG_SF_CACHE_
205  CkPrintf("PP [%d,%d] has %d numSfGrps\n",thisIndex.x, thisIndex.y, numSfGrps);
206 #endif
207  StructFactCache *sfcache = UsfCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
208  for(int i=0;i<numSfGrps;i++){
209 #ifdef _CP_DEBUG_SF_CACHE_
210  CkPrintf("PP [%d,%d] registers grp %i with SFC[%d]\n",
211  thisIndex.x, thisIndex.y,i, CkMyPe());
212 #endif
213  sfcache->registerPP(thisIndex.x, thisIndex.y,i);
214  }//endfor
215  }//endif
216 
217 //////////////////////////////////////////////////////////////////////////////
218 /// Create section proxy for ENL reduction ParticlePlane (any state#, reductionPlaneNum)
219 /// This is used when ees_enl_on ==0
220 
221  //--------------------------------------------------------------------------
222  // Compute all reduction planes for all chares
223 
224 
225 
226  int *red_pl = new int[nstates];
227  int numProcs=CkNumPes();
228  int *usedProc= new int[numProcs];
229  memset(usedProc,0,sizeof(int)*numProcs);
230  int charperpe=nstates/numProcs;
231  if(nstates%numProcs!=0) charperpe++;
232  if(charperpe<1) charperpe=1;
233  for(int state=0; state<nstates;state++){
234  int plane=nchareG-1;
235  while(plane>=0)
236  {
237  bool used=false;
238  int thisstateplaneproc = GSImaptable[thisInstance.proxyOffset].get(state, plane)%numProcs;
239  if(usedProc[thisstateplaneproc]>=charperpe);
240  {
241  used=true;
242  }
243  if(!used || plane==0)
244  {
245  red_pl[state]=plane;
246  (usedProc[thisstateplaneproc])++;
247  plane=-1;
248  }
249  plane--;
250  }
251  }
252  for(int state=0; state<nstates;state++){
253  int plane=0;
254  while(plane<nchareG)
255  {
256  bool used=false;
257  int thisstateplaneproc = GSImaptable[thisInstance.proxyOffset].get(state, plane)%CkNumPes();
258  if(usedProc[thisstateplaneproc]>=charperpe);
259  {
260  used=true;
261  }
262  if(!used || (plane+1==nchareG))
263  {
264  usedProc[thisstateplaneproc]++;
265  red_pl[state]=plane;
266  plane=nchareG;
267  }
268  plane++;
269  }
270  }
271  reductionPlaneNum = red_pl[thisIndex.x];
272  delete [] usedProc;
273  //--------------------------------------------------------------------------
274  // If you are a reduction plane, set up your comm with your guys and the
275  // the other reduction planes. Store your cookie to eat later.
276 
277  if(thisIndex.x==0 && thisIndex.y==reductionPlaneNum){
278 
279  CkArrayIndexMax *elems = new CkArrayIndexMax[nstates];
280 
281  CkArrayIndex2D idx(0, reductionPlaneNum); // plane# = this plane#
282  for (int j = 0; j < nstates; j++) {
283  idx.index[0] = j;
284  idx.index[1] = red_pl[j];
285  // idx.index[1] = calcReductionPlaneNum(j); ifndef USE_TOPOMAP
286  elems[j] = idx;
287  }//endfor
288 
289  particlePlaneENLProxy =
290  CProxySection_CP_State_ParticlePlane::ckNew(UparticlePlaneProxy[thisInstance.proxyOffset].ckGetArrayID(),
291  elems, nstates);
292  CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(mCastGrpId).ckLocalBranch();
293  particlePlaneENLProxy.ckDelegate(mcastGrp);
294  EnlCookieMsg *emsg= new EnlCookieMsg;
295  mcastGrp->setSection(particlePlaneENLProxy);
296  particlePlaneENLProxy.setEnlCookie(emsg);
297  delete [] elems;
298 
299  }//endif
300  delete [] red_pl;
301 
302  GStateSlab *gss = &( UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x, thisIndex.y).ckLocal()->gs );
303  numLines = gss->numLines;
304  numFullNL = gss->numFullNL;
305  ees_nonlocal = gss->ees_nonlocal;
306  gSpaceNumPoints = gss->numPoints;
307 
308  myForces = (complex *)fftw_malloc(gSpaceNumPoints*sizeof(complex)); // forces on coefs
309  bzero(myForces,gSpaceNumPoints * sizeof(complex));
310 
311  if(ees_nonlocal==1){
312  dyp_re = (double *)fftw_malloc(gSpaceNumPoints*sizeof(double));
313  dyp_im = (double *)fftw_malloc(gSpaceNumPoints*sizeof(double));
314  projPsiG = (complex *)fftw_malloc(numFullNL *sizeof(complex)); // fft projector
315  bzero(projPsiG, numFullNL * sizeof(complex));
316  }//endif
317 
318  // This occurs AFTER GSP has registered
319  registrationFlag = 0;
320  if(ees_nonlocal==1){
321  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
322  int ncoef = gSpaceNumPoints;
323  int *k_x = eesData->GspData[myChareG]->ka;
324  int *k_y = eesData->GspData[myChareG]->kb;
325  int *k_z = eesData->GspData[myChareG]->kc;
326  int mycoef= eesData->GspData[myChareG]->ncoef;
327  if(eesData->allowedGspChares[myChareG]==0 || mycoef != ncoef){
328  CkPrintf("Plane %d of state %d toasy %d %d\n",myChareG,thisIndex.x,mycoef,ncoef);
329  CkExit();
330  }//endif
331  eesData->registerCacheGPP(thisIndex.y,ncoef,k_x,k_y,k_z);
332  int i=1;
333  CkCallback cb(CkIndex_CP_State_ParticlePlane::registrationDone(NULL),UparticlePlaneProxy[thisInstance.proxyOffset]);
334  contribute(sizeof(int),&i,CkReduction::sum_int,cb);
335  }//endif
336 }//end routine
337 
338 
339 //////////////////////////////////////////////////////////////////////////////
340 /// The destructor : Carefully about what it malloced and what is not
341 //////////////////////////////////////////////////////////////////////////////
342 //////////////////////////////////////////////////////////////////////////////
343 //////////////////////////////////////////////////////////////////////////////
345 
346  fftw_free(myForces);
347 
348  if(ees_nonlocal==1){
349  fftw_free(projPsiG);
350  fftw_free(dyp_re);
351  fftw_free(dyp_im);
352  }//endif
353 
354  if(ees_nonlocal==0){
355  fftw_free(zmatrix); //FOO-BAR should i delete this?
356  fftw_free(zmatrix_fx);
357  fftw_free(zmatrix_fy);
358  fftw_free(zmatrix_fz);
359  delete [] count;
360  delete [] haveSFAtmGrp;
361  }//endif
362 
363 }
364 //////////////////////////////////////////////////////////////////////////////
365 
366 
367 //////////////////////////////////////////////////////////////////////////////
368 /// pup for migration
369 //////////////////////////////////////////////////////////////////////////////
370 //////////////////////////////////////////////////////////////////////////////
371 //////////////////////////////////////////////////////////////////////////////
373  ArrayElement2D::pup(p);
374  p|istate_ind;
375  p|ibead_ind; p|kpoint_ind; p|itemper_ind;
376  p|registrationFlag;
377  p|myChareG;
378  p|iteration;
379  p|iterNL;
380  p|numNLiter;
381  p|ees_nonlocal;
382  p|ngridaNL;
383  p|ngridbNL;
384  p|ngridcNL;
385  p|numFullNL;
386  p|numLines; // NL has same number as states but a copy is nice
387  p|gSpaceNumPoints; // NL has same number as states but a copy is nice
388  p|sendDone;
389  p|numSfGrps;
390  p|natm_nl;
391  p|natm_nl_grp_max;
392  p|realPP_proxy;
393  p|countNLIFFT;
394  p|zsize;
395  if (p.isUnpacking()) {
396  myForces = (complex *)fftw_malloc(gSpaceNumPoints*sizeof(complex));
397  if(ees_nonlocal==1){
398  dyp_re = (double *)fftw_malloc(gSpaceNumPoints*sizeof(double));
399  dyp_im = (double *)fftw_malloc(gSpaceNumPoints*sizeof(double));
400  projPsiG = (complex *)fftw_malloc(numFullNL*sizeof(complex)); // fft proj
401  }//endif
402  if(ees_nonlocal==0){
403  zmatrixSum = (complex *)fftw_malloc(zsize*sizeof(complex));
404  zmatrixSum_fx = (complex *)fftw_malloc(zsize*sizeof(complex));
405  zmatrixSum_fy = (complex *)fftw_malloc(zsize*sizeof(complex));
406  zmatrixSum_fz = (complex *)fftw_malloc(zsize*sizeof(complex));
407  zmatrix = (complex *)fftw_malloc(zsize*sizeof(complex));
408  zmatrix_fx = (complex *)fftw_malloc(zsize*sizeof(complex));
409  zmatrix_fy = (complex *)fftw_malloc(zsize*sizeof(complex));
410  zmatrix_fz = (complex *)fftw_malloc(zsize*sizeof(complex));
411  count = new int[numSfGrps];
412  haveSFAtmGrp = new int[numSfGrps];
413  }//endif
414  }//endif
415  p((char*)myForces,gSpaceNumPoints*sizeof(complex));
416  if(ees_nonlocal==1){
417  p((char*)projPsiG,numFullNL*sizeof(complex));
418  p(dyp_re,gSpaceNumPoints);
419  p(dyp_im,gSpaceNumPoints);
420  }//endif
421  if(ees_nonlocal==0){
422  p(count,numSfGrps);
423  p(haveSFAtmGrp,numSfGrps);
424  p((char*)zmatrixSum,zsize *sizeof(complex));
425  p((char*)zmatrixSum_fx,zsize *sizeof(complex));
426  p((char*)zmatrixSum_fy,zsize *sizeof(complex));
427  p((char*)zmatrixSum_fz,zsize *sizeof(complex));
428  p((char*)zmatrix,zsize *sizeof(complex));
429  p((char*)zmatrix_fx,zsize *sizeof(complex));
430  p((char*)zmatrix_fy,zsize *sizeof(complex));
431  p((char*)zmatrix_fz,zsize *sizeof(complex));
432  }//endif
433  p|sizeX;
434  p|sizeY;
435  p|sizeZ;
436  p|gSpacePlanesPerChare;
437  p|energy_count;
438  p|totalEnergy;
439  p|enl;
440  p|doneEnl;
441  p|doneForces;
442  p|enl_total;
443  p|reductionPlaneNum;
444  p|enlCookie;
445  p|particlePlaneENLProxy;
446  // "gspace" is not pup'ed since it is always assigned to
447 }
448 //////////////////////////////////////////////////////////////////////////////
449 
450 
451 
452 //////////////////////////////////////////////////////////////////////////////
453 /**
454  * Pushes all relevant computeZ() calls onto the runtime's queue. Computation for each atomindex
455  * is split into a computeZ call to increase the granularity of the computation and prevent it from
456  * blocking chares on the critical path as it would if it were a monolithic block of work.
457  */
458 //////////////////////////////////////////////////////////////////////////////
460 //////////////////////////////////////////////////////////////////////////////
461  #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
462  CkPrintf("ParticlePlane[%d,%d] launching computeZs()\n",thisIndex.x,thisIndex.y);
463  #endif
464  for(int i=0;i<config.numSfGrps;i++)
465  {
466  if(haveSFAtmGrp[i]>=0)
467  {
468  PPDummyMsg *pmsg = new (8*sizeof(int)) PPDummyMsg;
469  pmsg->atmGrp = i;
470  pmsg->sfindex = haveSFAtmGrp[i];
471  CkSetQueueing(pmsg, CK_QUEUEING_IFIFO);
472  *(int*)CkPriorityPtr(pmsg) = config.sfpriority+i+config.numSfGrps;
473  //lower than sf and sfcache
474  thisProxy(thisIndex.x,thisIndex.y).computeZ(pmsg);
475  }
476  }
477 }//end routine
478 //////////////////////////////////////////////////////////////////////////////
479 
480 
481 //////////////////////////////////////////////////////////////////////////////
482 /* The N^3 routine computeZ is triggered by the arrival of the structure factor
483  for each atom group in the local sfcache. */
484 //////////////////////////////////////////////////////////////////////////////
485 //////////////////////////////////////////////////////////////////////////////
486 //////////////////////////////////////////////////////////////////////////////
488 //////////////////////////////////////////////////////////////////////////////
489 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
490  if(thisIndex.x==0)
491  CkPrintf("HI, I am gPP %d %d in computeZ \n",thisIndex.x,thisIndex.y);
492 #endif
493 
494  if(ees_nonlocal==1){
495  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
496  CkPrintf("Homes, ees nonlocal is on. You can't call computeZ\n");
497  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
498  CkExit();
499  }//endif
500 
501  CP_State_GSpacePlane *gsp = UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x, thisIndex.y).ckLocal();
502  GStateSlab *gss = &(gsp->gs);
503  int atmIndex = m->atmGrp;
504  int sfindex = m->sfindex;
505  delete m;
506 
507 //////////////////////////////////////////////////////////////////////////////
508 /// If you got here and you have the latest psi, correct iteration etc.
509 /// This ensures you have reduced psi for dynamics which you really really need!
510 /// OK, so the upshot is, get your part of the Zmatrix and then send to the reduction
511 
512  if(gsp->acceptedPsi && gsp->doneNewIter){
513  // get ks
514  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
515  int *k_x = eesData->GspData[myChareG]->ka;
516  int *k_y = eesData->GspData[myChareG]->kb;
517  int *k_z = eesData->GspData[myChareG]->kc;
518 
519  // get sf
520  StructFactCache *sfcache = UsfCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
521  complex *structureFactor,*structureFactor_fx,*structureFactor_fy,*structureFactor_fz;
522  sfcache->getStructFact(thisIndex.y, atmIndex, &structureFactor,
523  &structureFactor_fx, &structureFactor_fy, &structureFactor_fz);
524  if(zmatrix==NULL){
525  zmatrix = (complex *)fftw_malloc(zsize*sizeof(complex));
526  zmatrix_fx = (complex *)fftw_malloc(zsize*sizeof(complex));
527  zmatrix_fy = (complex *)fftw_malloc(zsize*sizeof(complex));
528  zmatrix_fz = (complex *)fftw_malloc(zsize*sizeof(complex));
529  memset(zmatrix, 0, sizeof(complex)*zsize);
530  memset(zmatrix_fx, 0, sizeof(complex)*zsize);
531  memset(zmatrix_fy, 0, sizeof(complex)*zsize);
532  memset(zmatrix_fz, 0, sizeof(complex)*zsize);
533  }//endif
534 
535  // get zmat
536  int mydoublePack = config.doublePack;
537  int zoffset = natm_nl_grp_max * atmIndex;
538 
539 #if CMK_TRACE_ENABLED
540  double StartTime=CmiWallTimer();
541 #endif
542 
543  CPNONLOCAL::CP_enl_matrix_calc(gSpaceNumPoints,gss->packedPlaneData, k_x,k_y,k_z,
544  structureFactor,structureFactor_fx,structureFactor_fy,
545  structureFactor_fz,&zmatrix[zoffset],&zmatrix_fx[zoffset],&zmatrix_fy[zoffset],
546  &zmatrix_fz[zoffset],thisIndex.x,mydoublePack,numSfGrps,atmIndex);
547 #if CMK_TRACE_ENABLED
548  traceUserBracketEvent(enlMatrixCalc_, StartTime, CmiWallTimer());
549 #endif
550 
551  // reduce zmatrices over the planes of each state
552  thisProxy(thisIndex.x, reductionPlaneNum).reduceZ(natm_nl_grp_max, atmIndex,
553  &zmatrix[zoffset],&zmatrix_fx[zoffset],&zmatrix_fy[zoffset],&zmatrix_fz[zoffset]);
554 
555  haveSFAtmGrp[atmIndex]=-1; //this one is done
556 
557 //////////////////////////////////////////////////////////////////////////////
558 /// If you to the `else', you don't have the latest psi (impossible now).
559 /// Set the flag so Psi can kick off computeZ
560  }else{
561  haveSFAtmGrp[atmIndex]=sfindex;
562  }//endif
563 
564 //////////////////////////////////////////////////////////////////////////////
565 /// Now, Hang out until reduced zmatrix is sent back.
566 
567 //----------------------------------------------------------------------------
568  }//end routine
569 //////////////////////////////////////////////////////////////////////////////
570 
571 
572 //////////////////////////////////////////////////////////////////////////////
573 /// ReduceZ reduces Zmat over gspace for a particular state for N^3 method
574 //////////////////////////////////////////////////////////////////////////////
575 ///////////////////////////////////////////////////////////////////////////cc
576 //////////////////////////////////////////////////////////////////////////////
577 void CP_State_ParticlePlane::reduceZ(int size, int atmIndex, complex *zmatrix_,
578  complex *zmatrix_fx_,complex *zmatrix_fy_,complex *zmatrix_fz_){
579 //////////////////////////////////////////////////////////////////////////////
580 /// This method is invoked only CP_State_ParticlePlane(*,reductionPlaneNum)
581 /// CkPrintf("PP [%d %d] reduceZ\n",thisIndex.x, thisIndex.y);
582 
583 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
584  if(thisIndex.x==0)
585  CkPrintf("HI, I am gPP %d %d in redZ \n",thisIndex.x,thisIndex.y);
586 #endif
587 
588  if(ees_nonlocal==1){
589  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
590  CkPrintf("Homes, ees nonlocal is on. You can't call reduceZ\n");
591  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
592  CkExit();
593  }//endif
594 
595  count[atmIndex]++;
596  int i,j;
597  int zoffset= natm_nl_grp_max * atmIndex;
598  if (zmatrixSum==NULL) {
599  zmatrixSum = (complex *)fftw_malloc(zsize*sizeof(complex));
600  memset(zmatrixSum, 0, zsize * sizeof(complex));
601  }//endif
602 
603  if (zmatrixSum_fx==NULL) {
604  zmatrixSum_fx = (complex *)fftw_malloc(zsize*sizeof(complex));
605  memset(zmatrixSum_fx, 0, zsize * sizeof(complex));
606  }//endif
607 
608  if (zmatrixSum_fy==NULL) {
609  zmatrixSum_fy = (complex *)fftw_malloc(zsize*sizeof(complex));
610  memset(zmatrixSum_fy, 0, zsize * sizeof(complex));
611  }//endif
612 
613  if (zmatrixSum_fz==NULL) {
614  zmatrixSum_fz = (complex *)fftw_malloc(zsize*sizeof(complex));
615  memset(zmatrixSum_fz, 0, zsize * sizeof(complex));
616  }//endif
617 
618  for (i = 0,j=zoffset; i < size; i++,j++){
619  zmatrixSum[j] += zmatrix_[i];
620  zmatrixSum_fx[j] += zmatrix_fx_[i];
621  zmatrixSum_fy[j] += zmatrix_fy_[i];
622  zmatrixSum_fz[j] += zmatrix_fz_[i];
623  }//endif
624 
625 //////////////////////////////////////////////////////////////////////////////
626 /// Now we have contributions from all the other gspace planes in this state.
627 /// Calculate the energies and atoms forces. Normalize ZmatrixSum
628 
629  if( ((count[atmIndex] == (sizeX/gSpacePlanesPerChare)/2 - 1)&&(!config.doublePack)) ||
630  ((count[atmIndex] == nchareG)&&(config.doublePack)) ){
631 
632  //---------------------------------------------------------------------
633  // accounting
634  count[atmIndex] = 0;
635  if(doneEnl==0){enl=0.0;}
636  doneEnl++;
637 
638  //-----------------------------------------------------------------------
639  // energy and atom forces
640  AtomsCache *ag = UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
641  FastAtoms *fastAtoms = &(ag->fastAtoms);
642  int mydoublePack = config.doublePack;
643  double myenl = 0.0;
644 
645 #if CMK_TRACE_ENABLED
646  double StartTime=CmiWallTimer();
647 #endif
648 
649  CPNONLOCAL::CP_enl_atm_forc_calc(numSfGrps,atmIndex,fastAtoms,&zmatrixSum[zoffset],
650  &zmatrixSum_fx[zoffset],&zmatrixSum_fy[zoffset],&zmatrixSum_fz[zoffset],
651  &myenl,mydoublePack,istate_ind);
652 #if CMK_TRACE_ENABLED
653  traceUserBracketEvent(enlAtmForcCalc_, StartTime, CmiWallTimer());
654 #endif
655 
656  enl+=myenl;
657 
658 #ifdef _CP_DEBUG_NLMAT_
659  for (i = 0; i < zsize; i++){
660  CkPrintf("Non-local matrix[%d][%d]={%g, %g}\n",i,thisIndex.x,
661  zmatrix_[i].re,zmatrix_[i].im );}
662 #endif
663 
664  //---------------------------------------------------------------
665  // send the reduced zmatrix to the particleplanes of this state and
666  // compute the non-local forces on the coefs
667  for (i = 0; i < nchareG; i ++){
668  thisProxy(thisIndex.x, i).getForces(size,atmIndex, &zmatrixSum[zoffset]);
669  }//endfor
670 
671  //---------------------------------------------------------------
672  // If completely done with Z stuff send out our ENL
673  if(thisIndex.y==reductionPlaneNum && doneEnl==numSfGrps){
674  CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(mCastGrpId).ckLocalBranch();
675 
676  CkCallback cb=CkCallback(CkIndex_CP_State_ParticlePlane::printEnl(NULL),CkArrayIndex2D(0,0), UparticlePlaneProxy[thisInstance.proxyOffset]);
677  mcastGrp->contribute(sizeof(double),(void*) &enl,
678  CkReduction::sum_double, enlCookie, cb);
679  doneEnl=0;
680  enl=0.0;
681  bzero(zmatrixSum, zsize * sizeof(complex));
682  bzero(zmatrixSum_fx, zsize * sizeof(complex));
683  bzero(zmatrixSum_fy, zsize * sizeof(complex));
684  bzero(zmatrixSum_fz, zsize * sizeof(complex));
685  }//endif
686 
687  }/*endif*/
688 
689 //---------------------------------------------------------------------------
690  }//end routine
691 //////////////////////////////////////////////////////////////////////////////
692 
693 
694 
695 //////////////////////////////////////////////////////////////////////////////
696 /// Compute psi forces for the N^3 method
697 //////////////////////////////////////////////////////////////////////////////
698 //////////////////////////////////////////////////////////////////////////////
699 //////////////////////////////////////////////////////////////////////////////
700 void CP_State_ParticlePlane::getForces(int zmatSize, int atmIndex,
701  complex *zmatrixSum_loc)
702 //////////////////////////////////////////////////////////////////////////////
703  {//begin routine
704 //////////////////////////////////////////////////////////////////////////////
705 /// Warninging and unpacking
706 
707 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
708  if(thisIndex.x==0)
709  CkPrintf("HI, I am gPP %d %d in getforces \n",thisIndex.x,thisIndex.y);
710 #endif
711  if(ees_nonlocal==1){
712  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
713  CkPrintf("Homes, ees nonlocal is on. You can't call getForces\n");
714  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
715  CkExit();
716  }//endif
717 
718  doneForces++;
719 
720 //////////////////////////////////////////////////////////////////////////////
721 /// Compute the non-local forces on the coefficients using PINY physics
722 
723  CP_State_GSpacePlane *gsp = UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x, thisIndex.y).ckLocal();
724  GStateSlab *gss = &(gsp->gs);
725  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
726  StructFactCache *sfcache = UsfCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
727 
728  int mydoublePack = config.doublePack;
729  int state_ind = gss->istate_ind;
730  int *k_x = eesData->GspData[myChareG]->ka;
731  int *k_y = eesData->GspData[myChareG]->kb;
732  int *k_z = eesData->GspData[myChareG]->kc;
733  complex *structureFactor,*structureFactor_fx,*structureFactor_fy,*structureFactor_fz;
734 
735  sfcache->getStructFact(thisIndex.y, atmIndex, &structureFactor, &structureFactor_fx,
736  &structureFactor_fy, &structureFactor_fz);
737 
738  if (gSpaceNumPoints != 0){
739 
740 #if CMK_TRACE_ENABLED
741  double StartTime=CmiWallTimer();
742 #endif
743 
744  CPNONLOCAL::CP_enl_force_calc(zmatrixSum_loc,gSpaceNumPoints, k_x, k_y, k_z,
745  structureFactor,myForces,state_ind,mydoublePack,
746  numSfGrps, atmIndex);
747 #if CMK_TRACE_ENABLED
748  traceUserBracketEvent(enlForcCalc_, StartTime, CmiWallTimer());
749 #endif
750 
751  }//endif
752 
753 //////////////////////////////////////////////////////////////////////////////
754 /// Forces have been computed, let g-space plane know. This ends the N^3 method
755 
756  if(doneForces==numSfGrps){
757  doneForces = 0;
758  /// If the nonlocal force computations are barriered, contribute to the reduction
759  #ifdef BARRIER_CP_PARTICLEPLANE_NONLOCAL
760  int nonLocDone = 1;
761  contribute(sizeof(int),&nonLocDone,CkReduction::sum_int,
762  CkCallback(CkIndex_GSpaceDriver::allDoneNLForces(NULL),UgSpaceDriverProxy[thisInstance.proxyOffset]));
763  /// else, directly notify your driver
764  #else
765  UgSpaceDriverProxy[thisInstance.proxyOffset](thisIndex.x,thisIndex.y).doneNLForces();
766  #endif
767  }//endif : doneforces
768 
769 //----------------------------------------------------------------------------
770  }//end routine
771 //////////////////////////////////////////////////////////////////////////////
772 
773 
774 //////////////////////////////////////////////////////////////////////////////
775 /// Section reduction cookie N^3 method
776 //////////////////////////////////////////////////////////////////////////////
777 //////////////////////////////////////////////////////////////////////////////
778 //////////////////////////////////////////////////////////////////////////////
780 //////////////////////////////////////////////////////////////////////////////
781 /// Do not delete msg. Its a nokeep.
782 //////////////////////////////////////////////////////////////////////////////
783  CkGetSectionInfo(enlCookie,m);
784 
785 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
786  CkPrintf("PP [%d %d] get enl cookie\n",thisIndex.x, thisIndex.y);
787 #endif
788 
789 }
790 //////////////////////////////////////////////////////////////////////////////
791 
792 
793 //////////////////////////////////////////////////////////////////////////////
794 /**
795  * Spread the reduction plane numbers around to minimize map collisions for
796  * the N^3 method
797  */
798 //////////////////////////////////////////////////////////////////////////////
799 //////////////////////////////////////////////////////////////////////////////
800 //////////////////////////////////////////////////////////////////////////////
802 //////////////////////////////////////////////////////////////////////////////
803 
804  int nstatemax=nstates-1;
805  int ncharemax=nchareG-1;
806  int planeNum= state %ncharemax;
807  if(planeNum<0){
808  CkPrintf(" PP [%d %d] calc nstatemax %d ncharemax %d state %d planenum %d\n",
809  thisIndex.x, thisIndex.y,nstatemax, ncharemax, state, planeNum);
810  CkExit();
811  }//endif
812  return planeNum;
813 }
814 //////////////////////////////////////////////////////////////////////////////
815 
816 
817 //////////////////////////////////////////////////////////////////////////////
818 /// Resume from Sync which is a general routine
819 //////////////////////////////////////////////////////////////////////////////
820 //////////////////////////////////////////////////////////////////////////////
821 //////////////////////////////////////////////////////////////////////////////
823  if(thisIndex.x==0 && thisIndex.y==reductionPlaneNum){
824  CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(mCastGrpId).ckLocalBranch();
825  mcastGrp->resetSection(particlePlaneENLProxy);
826  }//endif
827 }// end routine
828 //////////////////////////////////////////////////////////////////////////////
829 
830 
831 ///////////////////////////////////////////////////////////////////////////////=
832 /// The entry point to the Euler Exponential Spline non-local method.
833 ///////////////////////////////////////////////////////////////////////////////=
834 ///////////////////////////////////////////////////////////////////////////////c
835 ///////////////////////////////////////////////////////////////////////////////=
837  int iteration_in=msg->iteration;
838  delete msg;
839  startNLEes(iteration_in);
840 }
841 ///////////////////////////////////////////////////////////////////////////////=
842 
843 
844 ///////////////////////////////////////////////////////////////////////////////=
845 /// The entry point to the Euler Exponential Spline non-local method.
846 ///////////////////////////////////////////////////////////////////////////////=
847 ///////////////////////////////////////////////////////////////////////////////c
848 ///////////////////////////////////////////////////////////////////////////////=
849 void CP_State_ParticlePlane::startNLEes(int iteration_in){
850 ///////////////////////////////////////////////////////////////////////////////=
851 /// Increment counters, Check for inconsistancies
852 
853  iterNL++; if(iterNL==1){iteration++;}
854 
855  if(natm_nl==0){
856  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
857  CkPrintf("Duuuuude, natm_nl==0 don't start NL Ees\n");
858  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
859  CkExit();
860  }//endif
861 
862  if(ees_nonlocal==0){
863  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
864  CkPrintf("Yo dawg, ees nonlocal is off. You can't call startNLEes\n");
865  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
866  CkExit();
867  }//endif
868 
869  if(iteration!=iteration_in){
870  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
871  CkPrintf("Mismatch in time step between Gstate and Gpart %d %d for chare %d %d\n",
872  iteration_in,iteration,thisIndex.y,thisIndex.x);
873  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
874  CkExit();
875  }//endif
876 
877  if(iterNL>numNLiter){
878  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
879  CkPrintf("Too many non-local iterations encountered %d %d for Gchare %d %d\n",
880  iterNL,numNLiter,thisIndex.y,thisIndex.x);
881  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
882  CkExit();
883  }//endif
884 
885  if(iterNL>1 && registrationFlag==0){
886  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
887  CkPrintf("Why am I not registered! PP chare %d %d\n",thisIndex.x,thisIndex.y);
888  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
889  }//endif
890 
891 ///////////////////////////////////////////////////////////////////////////////=
892 /// Lets boogy : provided we have registered and we are not trying to do some
893 /// bogus iteration
894 
895 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
896  if(thisIndex.x==0)
897  CkPrintf("HI, I am gPP %d %d in startNLees : %d\n",thisIndex.x,thisIndex.y,iterNL);
898 #endif
899 
900  if(registrationFlag==1 && iteration<=config.maxIter){createNLEesFFTdata();}
901 
902  // Bogus iteration :
903  // Spin my wheels while atoms integrate, cleanExit and Energy reducts
904  // and ortho does who knows what.
905  if(iteration>config.maxIter){
906  ///TODO: Remove unused var gsp
907  CP_State_GSpacePlane *gsp = UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x,thisIndex.y).ckLocal();
908  iterNL = 0;
909  /// If the nonlocal force computations are barriered, contribute to the reduction
910  #ifdef BARRIER_CP_PARTICLEPLANE_NONLOCAL
911  int nonLocDone = 1;
912  contribute(sizeof(int),&nonLocDone,CkReduction::sum_int,
913  CkCallback(CkIndex_GSpaceDriver::allDoneNLForces(NULL),UgSpaceDriverProxy[thisInstance.proxyOffset]));
914  /// else, directly notify your driver
915  #else
916  UgSpaceDriverProxy[thisInstance.proxyOffset](thisIndex.x,thisIndex.y).doneNLForces();
917  #endif
918 
919  }//endif
920 
921 //-----------------------------------------------------------------------------
922  }//end routine
923 ///////////////////////////////////////////////////////////////////////////////=
924 
925 
926 //////////////////////////////////////////////////////////////////////////////
927 /// In gspace, create the projector, projPsi
928 //////////////////////////////////////////////////////////////////////////////
929 //////////////////////////////////////////////////////////////////////////////
930 //////////////////////////////////////////////////////////////////////////////
932 //////////////////////////////////////////////////////////////////////////////
933 /// Local pointers and warnings
934 
935  if(ees_nonlocal==0){
936  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
937  CkPrintf("Yo dawg, ees nonlocal is off. You can't createNLEesFFTdata\n");
938  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
939  CkExit();
940  }//endif
941 
942  CP_State_GSpacePlane *gsp = UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x, thisIndex.y).ckLocal();
943  GStateSlab *gss = &(gsp->gs);
944  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
945  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
946 
947 //////////////////////////////////////////////////////////////////////////////
948 /// Setup the projector and then launch the fft
949 
950  double *d_re = eesData->GppData[myChareG]->b_re; // precomputed stuff
951  double *d_im = eesData->GppData[myChareG]->b_im;
952  int **ind_gspl = eesData->GppData[myChareG]->ind_gspl;
953  double **h_gspl= eesData->GppData[myChareG]->h_gspl;
954  int *k_x = eesData->GspData[myChareG]->ka;
955  int *k_y = eesData->GspData[myChareG]->kb;
956  int *k_z = eesData->GspData[myChareG]->kc;
957 
958  int ncoef = gSpaceNumPoints;
959  complex *psi = gss->packedPlaneData;
960  int ihave_g0 = gss->ihave_g000; // I have the special pt, g=0!
961  int ind_g0 = gss->ind_g000; // This is the index of g=0 !
962 
963 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
964  if(thisIndex.x==0)
965  CkPrintf("HI, I am gPP %d %d in createNLEes : %d\n",thisIndex.x,thisIndex.y,iterNL);
966 #endif
967 
968  fftcache->getCacheMem("CP_State_ParticlePlane::createNLEesFFTdata");
969  complex *projPsiGTmp = fftcache->tmpData; // store in tmp
970 
971 #if CMK_TRACE_ENABLED
972  double StartTime=CmiWallTimer();
973 #endif
974 
975  CPNONLOCAL::eesProjGchare(ncoef,psi,k_x,k_y,k_z,ihave_g0,ind_g0,iterNL,
976  d_re,d_im,dyp_re,dyp_im,projPsiGTmp,ind_gspl,h_gspl,
977  thisIndex.x,thisIndex.y,kpoint_ind,config.nfreq_cpnonlocal_eesfwd);
978 #if CMK_TRACE_ENABLED
979  traceUserBracketEvent(eesProjG_, StartTime, CmiWallTimer());
980 #endif
981 
982  FFTNLEesFwd();
983 
984 //---------------------------------------------------------------------------
985  }//end routine
986 //////////////////////////////////////////////////////////////////////////////
987 
988 
989 //////////////////////////////////////////////////////////////////////////////
990 /// FFT projPsi(gx,gy,gz) -> projPsi(gx,gy,z)
991 //////////////////////////////////////////////////////////////////////////////
992 //////////////////////////////////////////////////////////////////////////////
993 //////////////////////////////////////////////////////////////////////////////
995 
996  if(ees_nonlocal==0){
997  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
998  CkPrintf("Yo dawg, ees nonlocal is off. You can't FFTNLEesFwd\n");
999  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1000  CkExit();
1001  }//endif
1002 
1003  // local pointers
1004  CP_State_GSpacePlane *gsp = UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x, thisIndex.y).ckLocal();
1005  GStateSlab *gss = &(gsp->gs);
1006  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
1007  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1008 
1009 //////////////////////////////////////////////////////////////////////////////
1010 /// Do the FFT and then send it to r-space to complete the fft
1011 
1012 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
1013  if(thisIndex.x==0)
1014  CkPrintf("HI, I am gPP %d %d in FFTNLFwd : %d\n",thisIndex.x,thisIndex.y,iterNL);
1015 #endif
1016 
1017  RunDescriptor *runs = eesData->GspData[myChareG]->runs;
1018  complex *projPsiGTmp = fftcache->tmpData;
1019 #if CMK_TRACE_ENABLED
1020  double StartTime=CmiWallTimer();
1021 #endif
1022 
1023  fftcache->doNlFFTGtoR_Gchare(projPsiGTmp,projPsiG,numFullNL,gSpaceNumPoints,numLines,
1024  (gss->numRuns),runs,ngridcNL,myChareG);
1025  fftcache->freeCacheMem("CP_State_ParticlePlane::FFTNLEesFwd");
1026 
1027 #if CMK_TRACE_ENABLED
1028  traceUserBracketEvent(doNlFFTGtoR_, StartTime, CmiWallTimer());
1029 #endif
1030 
1031 #ifdef _CP_GS_DUMP_VKS_
1032  dumpMatrix("projPsiGb4send",(double *)projPsiG, 1, gSpaceNumPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1033 #endif
1034 
1035 #ifdef _CP_GS_DEBUG_COMPARE_VKS_
1036  if(savedprojpsiBfsend==NULL)
1037  { // load it
1038  savedprojpsiBfsend= new complex[gSpaceNumPoints];
1039  loadMatrix("projPsiGb4send",(double *)savedprojpsiBfsend, 1, gSpaceNumPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1040  }
1041  for(int i=0;i<gSpaceNumPoints;i++)
1042  {
1043  if(fabs(projPsiG[i].re-savedprojpsiBfsend[i].re)>0.0001)
1044  {
1045  fprintf(stderr, "PP [%d,%d] %d element projpsi %.10g not %.10g\n",thisIndex.x, thisIndex.y,i, projPsiG[i].re, savedprojpsiBfsend[i].re);
1046  }
1047  CkAssert(fabs(projPsiG[i].re-savedprojpsiBfsend[i].re)<0.0001);
1048  CkAssert(fabs(projPsiG[i].im-savedprojpsiBfsend[i].im)<0.0001);
1049  }
1050 #endif
1051 
1052  sendToEesRPP();
1053 
1054 //---------------------------------------------------------------------------
1055  }//end routine
1056 //////////////////////////////////////////////////////////////////////////////
1057 
1058 
1059 //////////////////////////////////////////////////////////////////////////////
1060 /// send projPsi(gx,gy,z) to Rspace chare where FFT is completed
1061 //////////////////////////////////////////////////////////////////////////////
1062 //////////////////////////////////////////////////////////////////////////////
1063 //////////////////////////////////////////////////////////////////////////////
1065 //////////////////////////////////////////////////////////////////////////////
1066 /// Do a Comlib Dance
1067 
1068  if(ees_nonlocal==0){
1069  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1070  CkPrintf("Yo dawg, ees nonlocal is off. You can't sendtoEesRPP\n");
1071  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1072  CkExit();
1073  }//endif
1074 
1075 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
1076  if(thisIndex.x==0)
1077  CkPrintf("HI, I am gPP %d %d in sendtoEesRPP : %d\n",thisIndex.x,thisIndex.y,iterNL);
1078 #endif
1079 
1080 #ifdef USE_COMLIB
1081 #ifdef OLD_COMMLIB
1082  if (config.useGssInsRealPP){gssPInstance.beginIteration();}
1083 #else
1084  if (config.useGssInsRealPP){ComlibBegin(realPP_proxy, iterNL);}
1085 #endif
1086 #endif
1087 
1088 //////////////////////////////////////////////////////////////////////////////
1089 /// Send your (x,y,z) to processors z.
1090 
1091  for(int z=0; z < ngridcNL; z++) {
1092 
1093  NLFFTMsg *msg = new (numLines,8*sizeof(int)) NLFFTMsg;
1094  msg->size = numLines;
1095  msg->senderIndex = thisIndex.y; // planenumber
1096  msg->step = iterNL;
1097 
1098  if(config.prioNLFFTMsg){
1099  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
1100  *(int*)CkPriorityPtr(msg) = config.rsNLfftpriority +
1101  thisIndex.x + thisIndex.y;
1102  }//endif
1103 
1104  // beam out all points with same z to chare array index z
1105  complex *data = msg->data;
1106  /// @todo: realPP_proxy is initialized in initKVectors only if nloc_on is true. Should we check the same here too?
1107  for (int i=0,j=z; i<numLines; i++,j+=ngridcNL){data[i] = projPsiG[j];}
1108  realPP_proxy(thisIndex.x, z).recvFromEesGPP(msg); // same state, realspace char[z]
1109  }//endfor
1110 
1111 //////////////////////////////////////////////////////////////////////////////
1112 /// Turn off commlib
1113 
1114 #ifdef USE_COMLIB
1115 #ifdef OLD_COMMLIB
1116  if (config.useGssInsRealPP){gssPInstance.endIteration();}
1117 #else
1118  if (config.useGssInsRealPP){ComlibEnd(realPP_proxy, iterNL);}
1119 #endif
1120 #endif
1121  sendDone = 1;
1122 
1123 //////////////////////////////////////////////////////////////////////////////
1124  }//end routine
1125 //////////////////////////////////////////////////////////////////////////////
1126 
1127 
1128 //////////////////////////////////////////////////////////////////////////////
1129 /// Receive the projector back modified to generate psi forces
1130 /// A message cannot come back until I have sent!!
1131 //////////////////////////////////////////////////////////////////////////////
1132 //////////////////////////////////////////////////////////////////////////////
1133 //////////////////////////////////////////////////////////////////////////////
1135 //////////////////////////////////////////////////////////////////////////////
1136 
1137  if(ees_nonlocal==0){
1138  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1139  CkPrintf("Yo dawg, ees nonlocal is off. You can't recvFromEesRPP\n");
1140  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1141  CkExit();
1142  }//endif
1143 
1144 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
1145  if(thisIndex.x==0)
1146  CkPrintf("HI, I am gPP %d %d in recvFromEesRPP : %d\n",thisIndex.x,thisIndex.y,iterNL);
1147 #endif
1148 
1149 //////////////////////////////////////////////////////////////////////////////
1150 /// unpack, check sizes, iteration numbers, and if you could be overwriting memory
1151 
1152  int size = msg->size;
1153  int offset = msg->offset;
1154  int iterNLnow = msg->iterNL;
1155  complex *partlyIFFTd = msg->data;
1156 
1157  CkAssert(numLines == size);
1158  if(iterNLnow != iterNL || sendDone==0){
1159  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1160  CkPrintf("Iteration mismatch in ParticlePlane %d %d %d\n",iterNL,iterNLnow,sendDone);
1161  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1162  CkExit();
1163  }//endif
1164 
1165 //////////////////////////////////////////////////////////////////////////////
1166 /// Copy out the data and delete the message
1167 
1168  // No need to zero because every value is set.
1169  // z=offset is inner index : collections of z-lines of constant (gx,gy)
1170  for(int i=0,j=offset; i< numLines; i++,j+=ngridcNL){projPsiG[j] = partlyIFFTd[i];}
1171 
1172  delete msg;
1173  // receive 1 message from each z-chare
1174 
1175 //////////////////////////////////////////////////////////////////////////////
1176 /// When everything is here, continue the computation
1177 
1178  countNLIFFT++;
1179  if (countNLIFFT == ngridcNL) {
1180  countNLIFFT = 0;
1181 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
1182  if(thisIndex.x==0)
1183  CkPrintf("HI, I am gPP %d %d in recvFromEesRPP : %d\n",thisIndex.x,thisIndex.y,iterNL);
1184 #endif
1185  FFTNLEesBck();
1186  sendDone = 0;
1187  }//endif
1188 
1189 //----------------------------------------------------------------------------
1190  }//end routine
1191 //////////////////////////////////////////////////////////////////////////////
1192 
1193 
1194 //////////////////////////////////////////////////////////////////////////////
1195 /// Complete the FFT : projPsif(gx,gy,z) -> projPsif(gx,gy,gz)
1196 //////////////////////////////////////////////////////////////////////////////
1197 //////////////////////////////////////////////////////////////////////////////
1198 //////////////////////////////////////////////////////////////////////////////
1200 //////////////////////////////////////////////////////////////////////////////
1201 
1202  if(ees_nonlocal==0){
1203  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1204  CkPrintf("Yo dawg, ees nonlocal is off. You can't FFTEeesBck\n");
1205  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1206  CkExit();
1207  }//endif
1208 
1209  // local pointers
1210  CP_State_GSpacePlane *gsp = UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x, thisIndex.y).ckLocal();
1211  GStateSlab *gss = &(gsp->gs);
1212  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
1213  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1214 
1215 //////////////////////////////////////////////////////////////////////////////
1216 /// Do the FFT and then compute Psi forces
1217 
1218 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
1219  if(thisIndex.x==0)
1220  CkPrintf("HI, I am gPP %d %d in FFTNLeesBck : %d\n",thisIndex.x,thisIndex.y,iterNL);
1221 #endif
1222 
1223  RunDescriptor *runs = eesData->GspData[myChareG]->runs;
1224  fftcache->getCacheMem("CP_State_ParticlePlane::FFTNLEesBck");
1225  complex *projPsiGTmp = fftcache->tmpData;
1226 
1227 #ifdef _CP_GS_DUMP_VKS_
1228  dumpMatrix("projPsiGb4",(double *)projPsiG, 1, numFullNL*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1229 #endif
1230 
1231 #ifdef _CP_GS_DEBUG_COMPARE_VKS_
1232  if(savedprojpsiGBf==NULL)
1233  { // load it
1234  savedprojpsiGBf= new complex[numFullNL];
1235  loadMatrix("projPsiGb4",(double *)savedprojpsiGBf, 1, numFullNL*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1236  }
1237  for(int i=0;i<numFullNL;i++)
1238  {
1239  if(fabs(projPsiG[i].re-savedprojpsiGBf[i].re)>0.0001)
1240  {
1241  fprintf(stderr, "PP [%d,%d] %d element projpsi %.10g not %.10g\n",thisIndex.x, thisIndex.y,i, projPsiG[i].re, savedprojpsiGBf[i].re);
1242  }
1243  CkAssert(fabs(projPsiG[i].re-savedprojpsiGBf[i].re)<0.0001);
1244  CkAssert(fabs(projPsiG[i].im-savedprojpsiGBf[i].im)<0.0001);
1245  }
1246 #endif
1247 
1248 #if CMK_TRACE_ENABLED
1249  double StartTime=CmiWallTimer();
1250 #endif
1251 
1252  fftcache->doNlFFTRtoG_Gchare(projPsiG,projPsiGTmp,numFullNL,gSpaceNumPoints,numLines,
1253  eesData->GspData[myChareG]->numRuns,runs,ngridcNL,myChareG);
1254 #if CMK_TRACE_ENABLED
1255  traceUserBracketEvent(doNlFFTRtoG_, StartTime, CmiWallTimer());
1256 #endif
1257 
1259 
1260 //---------------------------------------------------------------------------
1261  }//end routine
1262 //////////////////////////////////////////////////////////////////////////////
1263 
1264 
1265 //////////////////////////////////////////////////////////////////////////////
1266 /// Compute the Psi forces : If not done, start a new iteration
1267 //////////////////////////////////////////////////////////////////////////////
1268 //////////////////////////////////////////////////////////////////////////////
1269 //////////////////////////////////////////////////////////////////////////////
1271 //////////////////////////////////////////////////////////////////////////////
1272 
1273  CP_State_GSpacePlane *gsp = UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x, thisIndex.y).ckLocal();
1274  GStateSlab *gss = &(gsp->gs);
1275  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1276  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
1277 
1278  int ncoef = gSpaceNumPoints;
1279  int *k_x = eesData->GspData[myChareG]->ka;
1280  int *k_y = eesData->GspData[myChareG]->kb;
1281  int *k_z = eesData->GspData[myChareG]->kc;
1282  int ihave_g0 = gss->ihave_g000;
1283  int ind_g0 = gss->ind_g000;
1284  int nkx0 = gss->nkx0;
1285  complex *fPsiG = myForces; // these are zeroed in gspaceplane at the end of iteration.
1286 
1287 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
1288  if(thisIndex.x==0)
1289  CkPrintf("HI, I am gPP %d %d in computeNLeesForc : %d\n",thisIndex.x,thisIndex.y,iterNL);
1290 #endif
1291  complex *projPsiGTmp = fftcache->tmpData;
1292 
1293 #ifdef _CP_GS_DUMP_VKS_
1294  dumpMatrix("projPsiGtmpb4",(double *)projPsiGTmp, 1, ncoef*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1295 #endif
1296 
1297 #ifdef _CP_GS_DEBUG_COMPARE_VKS_
1298  if(savedprojpsiBf==NULL)
1299  { // load it
1300  savedprojpsiBf= new complex[ncoef];
1301  loadMatrix("projPsiGtmpb4",(double *)savedprojpsiBf, 1, ncoef*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1302  }
1303  for(int i=0;i<ncoef;i++)
1304  {
1305  if(fabs(projPsiGTmp[i].re-savedprojpsiBf[i].re)>0.0001)
1306  {
1307  fprintf(stderr, "PP [%d,%d] %d element projpsi %.10g not %.10g\n",thisIndex.x, thisIndex.y,i, projPsiGTmp[i].re, savedprojpsiBf[i].re);
1308  }
1309  CkAssert(fabs(projPsiGTmp[i].re-savedprojpsiBf[i].re)<0.0001);
1310  CkAssert(fabs(projPsiGTmp[i].im-savedprojpsiBf[i].im)<0.0001);
1311  }
1312 #endif
1313 
1314 #if CMK_TRACE_ENABLED
1315  double StartTime=CmiWallTimer();
1316 #endif
1317 
1318  CPNONLOCAL::eesPsiForcGspace(ncoef,ihave_g0,ind_g0,nkx0,projPsiGTmp,fPsiG,dyp_re,dyp_im,
1319  k_x,k_y,k_z,thisIndex.x,thisIndex.y,iterNL,config.nfreq_cpnonlocal_eesbk);
1320  fftcache->freeCacheMem("CP_State_ParticlePlane::computeNLEesForces");
1321 
1322 #if CMK_TRACE_ENABLED
1323  traceUserBracketEvent(eesPsiForcGspace_, StartTime, CmiWallTimer());
1324 #endif
1325 
1326 //////////////////////////////////////////////////////////////////////////////
1327 
1328  if(iterNL==numNLiter){
1329 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
1330  if(thisIndex.x==0)
1331  CkPrintf("HI, I am gPP %d %d done! : %d\n",thisIndex.x,thisIndex.y,iterNL);
1332 #endif
1333  iterNL = 0;
1334  /// If the nonlocal force computations are barriered, contribute to the reduction
1335  #ifdef BARRIER_CP_PARTICLEPLANE_NONLOCAL
1336  int nonLocDone = 1;
1337  contribute(sizeof(int),&nonLocDone,CkReduction::sum_int,
1338  CkCallback(CkIndex_GSpaceDriver::allDoneNLForces(NULL),UgSpaceDriverProxy[thisInstance.proxyOffset]));
1339  /// else, directly notify your driver
1340  #else
1341  UgSpaceDriverProxy[thisInstance.proxyOffset](thisIndex.x,thisIndex.y).doneNLForces();
1342  #endif
1343  // Gsp zeros myForces so it is ready next time
1344  }else{
1345  NLDummyMsg *msg = new(8*sizeof(int)) NLDummyMsg;
1346  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
1347  *(int*)CkPriorityPtr(msg) = config.sfpriority;
1348  msg->iteration=iteration;
1349  thisProxy(thisIndex.x, thisIndex.y).lPrioStartNLEes(msg);
1350  }//endif
1351 
1352 //----------------------------------------------------------------------------
1353  }//end routine
1354 //////////////////////////////////////////////////////////////////////////////
1355 
1356 
1357 ///////////////////////////////////////////////////////////////////////////=
1358 /// Make sure everyone is registered on the 1st time step
1359 ///////////////////////////////////////////////////////////////////////////=
1360 ///////////////////////////////////////////////////////////////////////////c
1361 ///////////////////////////////////////////////////////////////////////////=
1362  void CP_State_ParticlePlane::registrationDone(CkReductionMsg *msg) {
1363 ///////////////////////////////////////////////////////////////////////////=
1364 
1365  int sum = ((int *)msg->getData())[0];
1366  delete msg;
1367 
1368 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
1369  if(thisIndex.x==0)
1370  CkPrintf("HI, I am gPP %d %d in reg : %d\n",thisIndex.x,thisIndex.y,sum);
1371 #endif
1372 
1373  registrationFlag = 1;
1374  if(iterNL==1){createNLEesFFTdata();}
1375 
1376  if(iterNL>1){
1377  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1378  CkPrintf("Homes, registeration must occur before the 1st time step in GPP\n");
1379  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1380  CkExit();
1381  }//endif
1382 
1383 }
1384 ///////////////////////////////////////////////////////////////////////////=
1385 /*@}*/
1386 #include "gParticlePlane.def.h"
1387 
void pup(PUP::er &)
pup for migration /////////////////////////////////////////////////////////////////////////// ///////...
holds the UberIndex and the offset for proxies
Definition: Uber.h:68
void FFTNLEesFwd()
FFT projPsi(gx,gy,gz) -> projPsi(gx,gy,z) ///////////////////////////////////////////////////////////...
int nfreq_cpnonlocal_eesbk
CPNONLOCAL::eesPsiForcGspace.
Definition: configure.h:213
void createNLEesFFTdata()
In gspace, create the projector, projPsi ////////////////////////////////////////////////////////////...
void computeZ(PPDummyMsg *m)
Entry Method.
void initKVectors()
InitKVectors creates local k vectors for this chare and mallocs some memory.
void reduceZ(int, int, complex *, complex *, complex *, complex *)
ReduceZ reduces Zmat over gspace for a particular state for N^3 method //////////////////////////////...
void computeNLEesForces()
Compute the Psi forces : If not done, start a new iteration /////////////////////////////////////////...
CkGroupID mCastGrpId
Multicast manager group that handles many mcast/redns in the code. Grep for info. ...
Definition: cpaimd.C:216
2D chare array [nchareG][nstates] Handles the electronic structure in Fourier space (referred to as G...
Add type declarations for simulationConstants class (readonly vars) and once class for each type of o...
void doNlFFTRtoG_Gchare(complex *, complex *, int, int, int, int, RunDescriptor *, int, int)
non-local : Gchare : data(gx,gy,z) -> data(gx,gy,gz) : backward /////////////////////////////////////...
Definition: fftCache.C:598
void FFTNLEesBck()
Complete the FFT : projPsif(gx,gy,z) -> projPsif(gx,gy,gz) //////////////////////////////////////////...
== Index logic for lines of constant x,y in gspace.
Definition: RunDescriptor.h:22
int nfreq_cpnonlocal_eesfwd
CPNONLOCAL::eesProjGchare, CPNONLOCAL::eesYlmOnD.
Definition: configure.h:212
int calcReductionPlaneNum(int)
Spread the reduction plane numbers around to minimize map collisions for the N^3 method.
void ResumeFromSync()
Resume from Sync which is a general routine /////////////////////////////////////////////////////////...
Some basic data structures and the array map classes are defined here.
void recvFromEesRPP(GSPPIFFTMsg *msg)
Receive the projector back modified to generate psi forces A message cannot come back until I have se...
void registerCacheGPP(int, int, int *, int *, int *)
= GParticlePlane Cache Management tool
Definition: eesCache.C:138
void getForces(int, int, complex *)
Compute psi forces for the N^3 method ///////////////////////////////////////////////////////////////...
void startNLEes(int)
= The entry point to the Euler Exponential Spline non-local method.
Group Container class : Only allowed chare data classes have data.
Definition: eesCache.h:18
int registerPP(int state, int plane, int atmGrp)
local particle planes register themselves with the cache so they can be launched by the arrival of an...
void registrationDone(CkReductionMsg *msg)
= Make sure everyone is registered on the 1st time step
void setEnlCookie(EnlCookieMsg *m)
Section reduction cookie N^3 method /////////////////////////////////////////////////////////////////...
void sendToEesRPP()
send projPsi(gx,gy,z) to Rspace chare where FFT is completed ////////////////////////////////////////...
void doNlFFTGtoR_Gchare(complex *, complex *, int, int, int, int, RunDescriptor *, int, int)
non-local : Gchare : data(gx,gy,gz) -> data(gx,gy,z) : forward //////////////////////////////////////...
Definition: fftCache.C:632
void lPrioStartNLEes(NLDummyMsg *m)
= The entry point to the Euler Exponential Spline non-local method.
void launchComputeZs()
Entry Method. GSpaceDriver calls this to trigger the launch of the computeZ()s to compute the Z matri...
~CP_State_ParticlePlane()
The destructor : Carefully about what it malloced and what is not ///////////////////////////////////...