OpenAtom  Version1.5a
CP_State_GSpacePlane.C
Go to the documentation of this file.
1 ///////////////////////////////////////////////////////=
2 /// Things to do :
3 /// move resetiterstate
4 ///////////////////////////////////////////////////////=
5 //#define _CP_DEBUG_WARN_SUSPEND_
6 //#define _CP_DEBUG_ORTHO_OFF_
7 //#define _CP_DEBUG_PSI_OFF_
8 //#define DEBUG_CP_GSPACE_PSIV
9 //#define BARRIER_CP_GSPACE_PSI
10 //#define BARRIER_CP_GSPACE_PSIV
11 //#define BARRIER_CP_GSPACE_NONLOCAL
12 //#define BARRIER_CP_GSPACE_IFFT
13 
14 
15 //////////////////////////////////////////////////////////////////////////////
16 //////////////////////////////////////////////////////////////////////////////
17 //////////////////////////////////////////////////////////////////////////////
18 /** \file CP_State_GSpacePlane.C
19  * @defgroup GSpaceState GSpaceState
20  *
21  * \brief The electronic structure is loaded in Fourier space (referred
22  * to as GSpace), tranformed to \ref RealSpaceState and updated each step.
23  * Data is arranged into approximately equal size chunks \re nchareg. The
24  * number of chunks is a free parameter selected at runtime.
25  * See \ref GSpaceDriver::driveGSpace for flow of control.
26  */
27 //////////////////////////////////////////////////////////////////////////////
28 
29 
30 //////////////////////////////////////////////////////////////////////////////
31 #include "CP_State_GSpacePlane.h"
32 #include "CP_State_ParticlePlane.h"
33 #include "CP_State_Plane.h"
34 
35 #include "main/startupMessages.h"
36 #include "utility/util.h"
37 #include "main/AtomsCache.h"
38 #include "main/energyGroup.h"
39 #include "main/eesCache.h"
40 #include "main/TimeKeeper.h"
42 #include "structure_factor/StructFactorCache.h"
43 #include "main/CPcharmParaInfoGrp.h"
44 #include "main/cpaimd.h"
45 #include "main/InstanceController.h"
46 #ifdef PC_USE_RDMA
47  #define ENABLE_RDMA_HANDSHAKES
48 #endif
49 #include "paircalc/RDMAMessages.h"
50 
51 #include "charm++.h"
52 
53 #include <iostream>
54 #include <fstream>
55 #include <cmath>
56 
57 //---------------------------------------------------------------------------
58 #define CHARM_ON
59 #include "../../src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cpnonlocal.h"
60 #include "../../src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cpintegrate.h"
61 #include "../../src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cprspaceion.h"
62 #include "../../src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cplocal.h"
63 #include "../../src_piny_physics_v1.0/include/class_defs/piny_constants.h"
64 #include "../../src_piny_physics_v1.0/include/class_defs/allclass_gen.h"
65 #include "../../src_piny_physics_v1.0/include/class_defs/allclass_cp.h"
66 #include "../../src_piny_physics_v1.0/include/class_defs/PINY_INIT/PhysicsParamTrans.h"
67 //////////////////////////////////////////////////////////////////////////////
68 #if CMK_PERSISTENT_COMM
69 #define USE_PERSISTENT 1
70 #endif
71 
72 //////////////////////////////////////////////////////////////////////////////
73 extern Config config;
74 extern CProxy_main mainProxy;
75 extern CProxy_InstanceController instControllerProxy;
76 extern CProxy_TimeKeeper TimeKeeperProxy;
77 extern CkVec <CProxy_CP_State_RealSpacePlane> UrealSpacePlaneProxy;
78 extern CkVec <CProxy_CP_State_GSpacePlane> UgSpacePlaneProxy;
79 extern CkVec <CProxy_GSpaceDriver> UgSpaceDriverProxy;
80 extern CkVec <CProxy_CP_State_ParticlePlane> UparticlePlaneProxy;
81 extern CkVec <CProxy_AtomsCache> UatomsCacheProxy;
82 extern CkVec <CProxy_StructureFactor> UsfCompProxy;
83 extern CkVec <CProxy_EnergyGroup> UegroupProxy;
84 extern CkVec <CProxy_FFTcache> UfftCacheProxy;
85 extern CkVec <CProxy_StructFactCache> UsfCacheProxy;
86 extern CkVec <CProxy_eesCache> UeesCacheProxy;
87 
88 
89 extern CProxy_ComlibManager mgrProxy;
90 extern ComlibInstanceHandle gssInstance;
91 extern CkGroupID mCastGrpId;
92 
93 extern int nstates;
94 extern int sizeX;
95 extern int nchareG; // number of g-space chares <= sizeX and >=nplane_x
96 
97 
98 void testeke(int ,complex *,int *,int *,int *, int ,int);
99 
100 //#define _CP_DEBUG_STATEG_VERBOSE_
101 //#define _CP_DEBUG_WARN_SUSPEND_
102 
103 
104 /** @addtogroup GSpaceState
105  @{
106 */
107 //////////////////////////////////////////////////////////////////////////////
108 /// Entry method to resume execution after computing reduction over all planes
109 /// and states to form psiCgOvlap (cg only) and magforPsi
110 //////////////////////////////////////////////////////////////////////////////
111 //////////////////////////////////////////////////////////////////////////////
112 //////////////////////////////////////////////////////////////////////////////
113 void CP_State_GSpacePlane::psiCgOvlap(CkReductionMsg *msg){
114 //////////////////////////////////////////////////////////////////////////////
115 /// Unpack
116 /// CkPrintf("{%d} GSP [%d,%d] psiCgOvlap\n",thisInstance.proxyOffset, thisIndex.x,thisIndex.y);
117  CPcharmParaInfo *sim = CPcharmParaInfo::get();
118  AtomsCache *ag = UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch(); // find me the local copy
119 
120  int cp_min_opt = sim->cp_min_opt;
121  double tol_cp_min = sim->tol_cp_min;
122  double tol_cp_dyn = sim->tol_cp_dyn;
123  int natm = ag->natm;
124  double rnatm = ((double)natm)/96.0; // relative to 32 waters
125 
126  double d0 = ((double *)msg->getData())[0];
127  double d1 = ((double *)msg->getData())[1];
128  d1 = sqrt(d1); // piny convention
129 
130  delete msg;
131 
132 //////////////////////////////////////////////////////////////////////////////
133 /// Copy old/new : Set new values
134 
135  fovlap_old = fovlap; // CG ovlap (all chares need it)
136  fmagPsi_total_old = fmagPsi_total;
137 
138  fovlap = d0;
139  fmagPsi_total = d1; // mag of psi force (all chares need it)
140 
141 //////////////////////////////////////////////////////////////////////////////
142 /// Output the mag force, send to the energy group, set the exit flag
143 
144  if(thisIndex.x==0 && thisIndex.y==0){
145  int iprintout = iteration-1;
146 
147  fprintf(temperScreenFile, "Iter [%d] MagForPsi = %5.8lf | %5.8lf per entity\n", iprintout,d1,d1/rnatm);
148  fprintf(temperScreenFile,"Iter [%d] Memory = %ld\n",iprintout,CmiMemoryUsage());
149  computeEnergies(ENERGY_FMAG, d1);
150  }//endif
151 
152  if(cp_min_opt==0 && fmagPsi_total>rnatm*tol_cp_dyn){
153  exitFlag=1;
154  if(thisIndex.x==0 && thisIndex.y==0){
155  fprintf(temperScreenFile,"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
156  fprintf(temperScreenFile, "Mag psi force %.10g > %.10g too large for CP dynamics. Ciao! \n",
157  fmagPsi_total/rnatm,tol_cp_dyn);
158  fprintf(temperScreenFile,"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
159  }//endif
160  }//endif
161  int numBeads=config.UberImax;
162  int numTempers=config.UberKmax;
163 #ifndef _CP_DEBUG_ORTHO_OFF_
164  if (cp_min_opt==1)
165  {
166  if(numBeads==1 && numTempers==1)
167  {
168  if(fmagPsi_total<=tol_cp_min){
169 #ifndef _CP_DEBUG_SCALC_ONLY_
170  exitFlag=1; outputFlag=1;
171  if(thisIndex.x==0 && thisIndex.y==0){
172  CkPrintf("----------------------------------------------\n");
173  CkPrintf(" CP wavefunction force tolerence reached! \n");
174  CkPrintf("----------------------------------------------\n");
175  }//endif
176 #endif // _CP_DEBUG_SCALC_ONLY_
177  }
178  }
179  else
180  {
181 #ifndef _CP_DEBUG_SCALC_ONLY_
182  if(fmagPsi_total<=tol_cp_min)
183  outputFlag=1;
184  if(exitFlagMin==1) // every bead and temper is minimized
185  {
186  exitFlag=1; outputFlag=1;
187  if(thisIndex.x==0 && thisIndex.y==0){
188  CkPrintf("----------------------------------------------\n");
189  CkPrintf(" CP wavefunction force tolerence reached! \n");
190  CkPrintf("----------------------------------------------\n");
191  }//endif
192  }
193  if(thisIndex.x==0 && thisIndex.y==0){
194  // can't let any bead stop until they all reach tolerance.
195  // but we only need one contributor from each replica.
196  CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(mCastGrpId).ckLocalBranch();
197  int result=(fmagPsi_total <= tol_cp_min);
198  // CkPrintf("{%d} [%d,%d] tolcheck contrib %d %.5g %5g %5g\n",thisInstance.proxyOffset, thisIndex.x, thisIndex.y, result, fmagPsi_total, tol_cp_min,fmagPsi_total - tol_cp_min);
199  mcastGrp->contribute(sizeof(int), &result, CkReduction::logical_and,
200  beadCookie);
201  }
202 #endif // _CP_DEBUG_SCALC_ONLY_
203  }
204 
205  }
206 #endif
207 
208 //////////////////////////////////////////////////////////////////////////////
209 /// Do a little cputime management in GS class then resume
210 
211  if(thisIndex.x==0 && thisIndex.y==0){
212 #ifdef _CP_DEBUG_ORTHO_OFF_
213  CkPrintf("/////////////////////////////////////////////=\n");
214  CkPrintf(" Running with PC/Ortho Off \n");
215 #endif
216  double cpuTimeOld = cpuTimeNow;
217  cpuTimeNow = CkWallTimer();
218  if(iteration>1){
219  fprintf(temperScreenFile, "Iter [%d] CpuTime(GSP)= %g\n",iteration-1,cpuTimeNow-cpuTimeOld);
220  if(cp_min_opt==0){
221  int heavyside = 1-(iteration-iterRotation >= 1 ? 1 : 0);
222  fprintf(temperScreenFile, "Iter [%d] Step = %d : Step Last Rot = %d : Interval Rot = %d : Num Rot = %d : %d\n",iteration,
223  iteration,iterRotation,iteration-iterRotation,nrotation,heavyside);
224  }//endif
225  }//endif
226  }//endif
227 //////////////////////////////////////////////////////////////////////////////
228  }// end routine
229 //////////////////////////////////////////////////////////////////////////////
230 void CP_State_GSpacePlane::initBeadCookie(ICCookieMsg *m)
231 {
232  //CkPrintf("{%d} [%d,%d] beadcookie initialized\n",thisInstance.proxyOffset, thisIndex.x, thisIndex.y);
233  CkGetSectionInfo(beadCookie,m);
234  //beadCookie=m->_cookie;
235 }
236 
237 void CP_State_GSpacePlane::minimizeSync(ICCookieMsg *m)
238 {
239  // CkPrintf("{%d} [%d,%d] minimizeSync %d\n",thisInstance.proxyOffset, thisIndex.x, thisIndex.y, m->junk);
240  CkGetSectionInfo(beadCookie,m);
241  if(m->junk==1)
242  thisProxy.setExitFlag();
243 }
244 
245 void CP_State_GSpacePlane::setExitFlag()
246 {
247  exitFlagMin=1;
248 }
249 //////////////////////////////////////////////////////////////////////////////
250 //////////////////////////////////////////////////////////////////////////////
251 //////////////////////////////////////////////////////////////////////////////
252 void printForce(void *param, void *msg){
253 
254  CkReductionMsg *m=(CkReductionMsg *)msg;
255  double d = ((double *)m->getData())[0];
256 
257  CkPrintf("printForces = %5.8f \n", d);
258  delete m;
259 }
260 //////////////////////////////////////////////////////////////////////////////
261 
262 
263 //////////////////////////////////////////////////////////////////////////////
264 /** \brief Constructor for GSpacePlane
265  */
266 //////////////////////////////////////////////////////////////////////////////
267 //////////////////////////////////////////////////////////////////////////////
268 //////////////////////////////////////////////////////////////////////////////
270  /// numplanes in x per state
271  int sizeX,
272  /// numplanes per slab (Gspace: in x dimension)
273  int gSpaceUnits,
274  /// numplanes per slab (Rspace: in y dimension)
275  int realSpaceUnits,
276  /// S matrix grain size
277  int s_grain,
278  /// timekeeper fwd
279  int _gforward,
280  /// timekeeper bwd
281  int _gbackward,
282  /// uber
283  UberCollection _thisInstance
284  ) :
285  forwardTimeKeep(_gforward), backwardTimeKeep(_gbackward),
286  thisInstance(_thisInstance)
287 //////////////////////////////////////////////////////////////////////////////
288 {//begin routine
289 //////////////////////////////////////////////////////////////////////////////
290 
291  /// initialize member vars
292 #ifdef USE_PERSISTENT
293  fftHandler = NULL;
294 #endif
295  CPcharmParaInfo *sim = CPcharmParaInfo::get();
296  int cp_min_opt = sim->cp_min_opt;
297  int gen_wave = sim->gen_wave;
298  wallTimeArr=NULL;
299  if(thisIndex.x==0 && thisIndex.y==0 && config.maxIter<30){
300  wallTimeArr = new double[config.maxIter+2];
301  }else{
302  wallTimeArr = new double[30];
303  }//endif
304  wallTimeArr[0]=0.0;
305  wallTimeArr[1]=0.0;
306 
307  myBeadIndex = thisInstance.idxU.x;
308  myKptIndex = thisInstance.idxU.y;
309  myTemperIndex = thisInstance.idxU.z;
310  mySpinIndex = thisInstance.idxU.s;
311 
312 //////////////////////////////////////////////////////////////////////////////
313 
314  istate_ind = thisIndex.x;
315  iplane_ind = thisIndex.y;
316  ibead_ind = thisInstance.idxU.x;
317  kpoint_ind = thisInstance.idxU.y;
318  itemper_ind = thisInstance.idxU.z;
319  ispin_ind = 0; //needs to be updated
320  initialized = false;
321  iteration = 0;
322  nrotation = 0;
323  iterRotation = 0;
324  gotHandles =0;
325 
326  total_energy = 0.0;
327  ehart_total = 0.0;
328  enl_total = 0.0;
329  eke_total = 0.0;
330  egga_total = 0.0;
331  eexc_total = 0.0;
332  eext_total = 0.0;
333  ewd_total = 0.0;
334  fovlap = 0.0;
335  fovlap_old = 0.0;
336  fmagPsi_total = 0.0;
337  fmagPsi_total0 = 0.0; // only chare(0,0) cares
338  fmagPsi_total_old = 0.0;
339  cpuTimeNow = 0.0;
340  fictEke_total = 0.0;
341 
342  halfStepEvolve = 1;
343  ireset_cg = 1;
344  numReset_cg = 0;
345  exitFlag = 0;
346  exitFlagMin = 0;
347  outputFlag = 0;
348  iRecvRedPsi = 1;
349  iSentRedPsi = 1;
350  iRecvRedPsiV = 0;
351  iSentRedPsiV = 0;
352 
353  finishedCpIntegrate = 0;
354  if(cp_min_opt==0){finishedCpIntegrate = 1;}// alternate entry point
355  if(gen_wave==1){finishedCpIntegrate = 1;}// alternate entry point
356  doneDoingIFFT = false;
357  acceptedPsi = true; // we start out with a psi
358  acceptedVPsi = true; // we start out with a vpsi
359  acceptedLambda = false; // no forces yet
360  doneNewIter = false;
361 
362  countFileOut = 0;
363  countRedPsi = 0;
364  countRedPsiV = 0;
365  countPsi = 0;
366  countLambda = 0;
367  countVPsi = 0;
368  countIFFT = 0;
369  ecount = 0; //No energies have been received.
370  cleanExitCalled = 0;
371  /// Setup Symm PC accounting to expected manage message counts
372  countPsiO = new int[config.numChunksSym];
373 
374  for(int i=0;i<config.numChunksSym;i++)
375  countPsiO[i]=0;
376 
377  countVPsiO= new int[config.numChunksSym];
378  for(int i=0;i<config.numChunksSym;i++)
379  countVPsiO[i]=0;
380  /* figure out how to record the off diag extra lambdas */
381  /* some trickery like psi uses */
382 
383 
384  countLambdaO= new int[config.numChunksAsym];
385  for(int i=0;i<config.numChunksAsym;i++)
386  countLambdaO[i]=0;
387 
388  numRecvRedPsi = sim->RCommPkg[thisIndex.y].num_recv_tot;
389 
390  //---------------------------------------------
391 
392  int remainder=nstates%s_grain;
393  int sizeoflastgrain=s_grain+remainder;
394  int lastgrain=nstates-sizeoflastgrain;
395  int ourgrain = thisIndex.x/s_grain*s_grain;
396  if(nstates == s_grain){
397  AllPsiExpected=1;
398  }else{
399  // if(ourgrain<lastgrain){ // corner has no extras
400  if(ourgrain<(nstates-s_grain)){ // corner has no extras
401  AllPsiExpected=2;
402  }else{
403  AllPsiExpected=1;
404  }//endif
405  }//endif
406  AllPsiExpected*=config.numChunksSym;
407 
408  int numGrains=nstates/s_grain;
409  if(config.gSpaceSum){ // no reductions its all coming direct
410  AllPsiExpected=numGrains*config.numChunksSym;
411  }//endif
412 
413  //---------------------------------------------
414  /// Asymm PC msg count accounting
415  if(cp_min_opt==0){ //expect non diagonal column results
416  if(nstates == s_grain){
417  AllLambdaExpected=1;
418  }else {
419  AllLambdaExpected=2;
420  }//endif
421  }else{ //no column reduction results in minimization
422  AllLambdaExpected=1;
423  }//endif
424 
425  if(config.gSpaceSum){ // no reductions its all coming direct
426  if(cp_min_opt==0 && numGrains>1)
427  { AllLambdaExpected=(2*numGrains-1)*config.numChunksAsym;}
428  else
429  { AllLambdaExpected=numGrains*config.numChunksAsym*AllLambdaExpected;}
430  }//endif
431  else
432  {
433  AllLambdaExpected*=config.numChunksAsym;
434 
435  }
436 
437  /// Compute the number of RDMA links that I'll have with the symm/asymm PC chares
438 #ifdef PC_USE_RDMA
439  numRDMAlinksSymm = numGrains * config.numChunksSym;
440  numRDMAlinksAsymm = numGrains * config.numChunksAsym * 2;
441 #else
442  numRDMAlinksSymm = 0;
443  numRDMAlinksAsymm = 0;
444 #endif
445 
446 //////////////////////////////////////////////////////////////////////////////
447 /// Just zero everything for now
448 
449  gSpaceNumPoints = 0;
450  tpsi = NULL;
451  tvpsi = NULL;
452  tk_x = NULL;
453  tk_y = NULL;
454  tk_z = NULL;
455  int len_nhc_cp;
456  int num_nhc_cp;
457  int nck_nhc_cp;
458  CPINTEGRATE::fetchNHCsize(&len_nhc_cp,&num_nhc_cp,&nck_nhc_cp);
459  initGStateSlab(&gs,sizeX,sim->sizeY,sim->sizeZ,gSpaceUnits,realSpaceUnits,s_grain,
460  thisIndex.y,thisIndex.x,len_nhc_cp,num_nhc_cp,nck_nhc_cp);
461 
462 
463 //////////////////////////////////////////////////////////////////////////////
464 /// Load Balancing etc
465 
466  usesAtSync = true;
467  if(config.lbgspace){
468  setMigratable(true);
469  }else{
470  setMigratable(false);
471  }//endif
472 
473 #ifdef _CP_GS_DEBUG_COMPARE_VKS_
474  savedvksBf=NULL;
475  savedforceBf=NULL;
476 #endif
477 #ifdef _CP_GS_DEBUG_COMPARE_PSI_
478  savedpsiBf=NULL;
479  savedpsiBfp=NULL;
480  savedpsiAf=NULL;
481  savedlambdaBf=NULL;
482  savedlambdaAf=NULL;
483 #endif
484 
485 //////////////////////////////////////////////////////////////////////////////
486 /// Pick a reduction plane
487 
488  redPlane = 0;
489  if(nchareG>1){
490  redPlane = 1;
491 #ifdef _FANCY_PLANES_
492  if(config.numPes>2*nstates){
493  CkVec <int> usedVec;
494  CkVec <int> peUsedByNLZ;
495  CkVec <int> planeUsedByNLZ;
496  for(int state=0; state<thisIndex.x;state++){
497  redPlane=nchareG-1;
498  while(redPlane>=0){
499  bool used=false;
500  int thisstateplaneproc=GSImaptable.get(state,redPlane)%CkNumPes();
501  for(int i=0;i<usedVec.size();i++){
502  if(usedVec[i]==thisstateplaneproc){used=true;}
503  }//endfor
504  if(!used || redPlane==0){
505  peUsedByNLZ.push_back(thisstateplaneproc);
506  planeUsedByNLZ.push_back(redPlane);
507  usedVec.push_back(thisstateplaneproc);
508  redPlane=-1;
509  }//endif
510  redPlane--;
511  }//endwhile
512  }//endfor
513  }else{
514  redPlane = (thisIndex.x % (nchareG-1));
515  }//endif
516 #else
517  redPlane = (thisIndex.x % (nchareG-1));
518 #endif
519  redPlane = (redPlane < 0 ? redPlane+nchareG : redPlane);
520  redPlane = (redPlane > nchareG-1 ? redPlane-nchareG : redPlane);
521  }//endif
522 
523 
524 //---------------------------------------------------------------------------
525  }//end routine
526 //////////////////////////////////////////////////////////////////////////////
527 
528 
529 //////////////////////////////////////////////////////////////////////////////
530 //////////////////////////////////////////////////////////////////////////////
531 //////////////////////////////////////////////////////////////////////////////
532 CP_State_GSpacePlane::CP_State_GSpacePlane(CkMigrateMessage *m) {}
533 //////////////////////////////////////////////////////////////////////////////
534 
535 
536 //////////////////////////////////////////////////////////////////////////////
537 //////////////////////////////////////////////////////////////////////////////
538 //////////////////////////////////////////////////////////////////////////////
539 CP_State_GSpacePlane::~CP_State_GSpacePlane(){
540  if(initialized) {
541  delete [] lambdaproxyother;
542  delete [] lambdaproxy;
543  delete [] psiproxyother;
544  delete [] psiproxy;
545  }//
546 }
547 //////////////////////////////////////////////////////////////////////////////
548 
549 
550 //////////////////////////////////////////////////////////////////////////////
551 //////////////////////////////////////////////////////////////////////////////
552 //////////////////////////////////////////////////////////////////////////////
553 void CP_State_GSpacePlane::pup(PUP::er &p) {
554 //////////////////////////////////////////////////////////////////////////////
555  ArrayElement2D::pup(p);
556  UgSpaceDriverProxy[thisInstance.proxyOffset](thisIndex.x,thisIndex.y).pup(p);
557  ///ontrol flags and functions reference by thread are public
558  p|istate_ind;
559  p|iplane_ind;
560  p|ibead_ind; p|kpoint_ind; p|itemper_ind; p|ispin_ind;
561  p|registrationFlag;
562  p|initialized;
563  p|istart_typ_cp;
564  p|iteration;
565  p|nrotation;
566  p|exitFlag;
567  p|exitFlagMin;
568  p|outputFlag;
569  p|cleanExitCalled;
570  p|finishedCpIntegrate;
571  p|iRecvRedPsi;
572  p|iSentRedPsi;
573  p|iRecvRedPsiV;
574  p|iSentRedPsiV;
575  p|ireset_cg;
576  p|numReset_cg;
577  p|countPsi;
578  p|countVPsi;
579  p|countLambda;
580  p|countIFFT;
581  p|countFileOut;
582  p|ecount;
583  p|countRedPsi;
584  p|numRecvRedPsi;
585  p|AllPsiExpected;
586  p|AllLambdaExpected;
588  p|numRDMAlinksAsymm;
589  p|doneDoingIFFT;
590  p|doneNewIter;
591  p|acceptedPsi;
592  p|acceptedVPsi;
593  p|acceptedLambda;
594  p|itemp; // 2 temporary variables for debugging in scope
595  p|jtemp;
596  p|myBeadIndex;
597  p|myKptIndex;
598  p|myTemperIndex;
599  p|mySpinIndex;
600 
601  p|ehart_total;
602  p|enl_total;
603  p|eke_total;
604  p|fictEke_total;
605  p|fmagPsi_total0;
606  p|fmagPsi_total;
607  p|fmagPsi_total_old;
608  p|fovlap;
609  p|fovlap_old;
610  p|egga_total;
611  p|eexc_total;
612  p|eext_total;
613  p|ewd_total;
614  p|total_energy;
615  p|cpuTimeNow;
616 
617  p|real_proxy;
618  gs.pup(p);
619  p|gSpaceNumPoints;
620  if (p.isUnpacking()) {
621  lambdaproxy=new CProxySection_PairCalculator[config.numChunksAsym];
622  lambdaproxyother=new CProxySection_PairCalculator[config.numChunksAsym];
623  psiproxy=new CProxySection_PairCalculator[config.numChunksSym];
624  psiproxyother=new CProxySection_PairCalculator[config.numChunksSym];
625  countPsiO= new int[config.numChunksSym];
626  countVPsiO= new int[config.numChunksSym];
627  countLambdaO= new int[config.numChunksAsym];
628  }//endif
629  PUParray(p,lambdaproxy,config.numChunksAsym);
630  PUParray(p,lambdaproxyother,config.numChunksAsym);
631  PUParray(p,psiproxy,config.numChunksSym);
632  PUParray(p,psiproxyother,config.numChunksSym);
633  PUParray(p,countPsiO,config.numChunksSym);
634  PUParray(p,countVPsiO,config.numChunksSym);
635  PUParray(p,countLambdaO,config.numChunksAsym);
636 //-------------------------------------------------------
637  }// end routine : pup
638 //////////////////////////////////////////////////////////////////////////////
639 
640 
641 
642 
644 {
645  symmPCmgr = PCCommManager(thisIndex, msg->symmCfg, msg->symmIDs);
646  asymmPCmgr = PCCommManager(thisIndex, msg->asymmCfg, msg->asymmIDs);
647  myOrtho = CProxy_Ortho(msg->orthoAID);
648 
649 //////////////////////////////////////////////////////////////////////////////
650 /// Contribute to the reduction telling main we are done
651 
652  int constructed=1;
653  contribute(sizeof(int), &constructed, CkReduction::sum_int,
654  CkCallback(CkIndex_InstanceController::doneInit(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy), thisInstance.proxyOffset);
655 }
656 
657 
658 
659 
660 //////////////////////////////////////////////////////////////////////////////
661 //////////////////////////////////////////////////////////////////////////////
662 //////////////////////////////////////////////////////////////////////////////
663 /// In this function data is read from files, and sent to the corresponding
664 /// G-space planes. Data reading will be done in chunk 0 of each state
665 //////////////////////////////////////////////////////////////////////////////
667 //////////////////////////////////////////////////////////////////////////////
668 /// Local pointers
669 
670  CP *cp = CP::get();
671 #include "../class_defs/allclass_strip_cp.h"
672  CPcharmParaInfo *sim = CPcharmParaInfo::get();
673  int numData = config.numData;
674  int ibinary_opt = sim->ibinary_opt;
675  int istart_typ_cp = sim->istart_typ_cp;
676  int gen_wave = sim->gen_wave;
677  int ncoef = sim->ncoef;
678  CkVec <RunDescriptor> *sortedRunDescriptors = sim->sortedRunDescriptors;
679  int *npts_lgrp = sim->npts_per_chareG;
680  int *nline_lgrp = sim->nlines_per_chareG;
681  int *istrt_lgrp = NULL;
682  int *iend_lgrp = NULL;
683 
684  int nx = sizeX;
685  int ny = sim->sizeY;
686  int nz = sim->sizeZ;
687  int ngridaNL = sim->ngrid_nloc_a;
688  int ngridbNL = sim->ngrid_nloc_b;
689  int ngridcNL = sim->ngrid_nloc_c;
690  int nkpoint = sim->nkpoint;
691  int cp_force_complex_psi = sim->cp_force_complex_psi;
692 
693 //////////////////////////////////////////////////////////////////////////////
694 /// Set the file name using the config path and state number
695 
696  char fname[1024];
697  int ind_state=thisIndex.x;
698  //------------------------------------------------------------------
699  // Get the complex data, Psi(g) and the run descriptor (z-lines in g-space)
700 
701  complex *complexPoints = (complex *)fftw_malloc(numData*sizeof(complex));
702  complex *vcomplexPoints = NULL;
703  if(istart_typ_cp>=3){vcomplexPoints = (complex *)fftw_malloc(numData*sizeof(complex));}
704 
705  int *kx= (int *)fftw_malloc(numData*sizeof(int));
706  int *ky= (int *)fftw_malloc(numData*sizeof(int));
707  int *kz= (int *)fftw_malloc(numData*sizeof(int));
708  int nlines_tot,nplane;
709 
710  if(istart_typ_cp>=3){
711  sprintf(fname, "%s/Spin.%d_Kpt.%d_Bead.%d_Temper.%d/vState%d.out",
712  config.dataPath,mySpinIndex,myKptIndex,myBeadIndex,myTemperIndex,ind_state+1);
713  readState(numData,vcomplexPoints,fname,ibinary_opt,&nlines_tot,&nplane,
714  kx,ky,kz,&nx,&ny,&nz,istrt_lgrp,iend_lgrp,npts_lgrp,nline_lgrp,0,1);
715  }//endif
716 
717  if(gen_wave==0){
718  sprintf(fname, "%s/Spin.%d_Kpt.%d_Bead.%d_Temper.%d/state%d.out",
719  config.dataPath,mySpinIndex,myKptIndex,myBeadIndex,myTemperIndex,ind_state+1);
720  readState(numData,complexPoints,fname,ibinary_opt,&nlines_tot,&nplane,
721  kx,ky,kz,&nx,&ny,&nz,istrt_lgrp,iend_lgrp,npts_lgrp,nline_lgrp,0,0);
722  if(cp_force_complex_psi==1){
723  if(ind_state==0){
724  CkPrintf("\n$$$$$$$$$$$$$$$$$$$$_warning_$$$$$$$$$$$$$$$$$$$$\n");
725  CkPrintf("Adding a phase to the states to debug kpt code!!\n");
726  CkPrintf("in routine CP_State_GspacePlane.C\n");
727  CkPrintf("$$$$$$$$$$$$$$$$$$$$_warning_$$$$$$$$$$$$$$$$$$$$\n\n");
728  }//endif
729  double phase = M_PI*((double)(ind_state+1))/((double)(nstates+1));
730  for(int i=0;i<numData;i++){
731  double re = cos(phase); double im = sin(phase);
732  double ore = re*complexPoints[i].re - im*complexPoints[i].im;
733  double oim = re*complexPoints[i].im + im*complexPoints[i].re;
734  complexPoints[i].re = ore;
735  complexPoints[i].im = oim;
736  }//endfor
737  }//endif
738  }else{
739  kx -= 1; ky -= 1; kz -=1;
740  PhysicsParamTransfer::fetch_state_kvecs(kx,ky,kz,ncoef,config.doublePack);
741  kx += 1; ky += 1; kz +=1;
742  processState(numData,ncoef,complexPoints,fname,ibinary_opt,&nlines_tot,
743  &nplane,kx,ky,kz,istrt_lgrp,iend_lgrp,npts_lgrp,nline_lgrp,
744  0,0,0,ny);
745  double *xfull =UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->fastAtoms.x-1;
746  double *yfull =UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->fastAtoms.y-1;
747  double *zfull =UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->fastAtoms.z-1;
748  cpgen_wave->create_coefs(kx,ky,kz,numData,ind_state,complexPoints,
749  xfull,yfull,zfull,kpoint_ind);
750  }//*endif
751 
752  if(config.nGplane_x != nplane && config.doublePack){
753  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
754  CkPrintf("Mismatch in planesize\n");
755  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
756  CkExit();
757  }//endif
758 
759 /// Test the input g-vectors and psi(g) using kinetic energy
760 /// testeke(numData,complexPoints,kx,ky,kz,1,ind_state);
761 
762 //////////////////////////////////////////////////////////////////////////////
763 /// Blast off the data to chares
764 
765  int ioff = 0;
766  for(int x = 0; x < nchareG; x ++){
767 
768  int numPoints = 0;
769  for (int j = 0; j < sortedRunDescriptors[x].size(); j++){
770  numPoints += sortedRunDescriptors[x][j].length;
771  }//endfor
772 
773  complex *dataToBeSent = (complex *)fftw_malloc(numPoints*sizeof(complex));
774  complex *temp = complexPoints+ioff;
775  CmiMemcpy(dataToBeSent,temp,(sizeof(complex) * numPoints));
776 
777  int numPointsV;
778  complex *vdataToBeSent;
779  if(istart_typ_cp>=3){
780  numPointsV = numPoints;
781  vdataToBeSent = (complex *)fftw_malloc(numPointsV*sizeof(complex));
782  complex *vtemp = vcomplexPoints+ioff;
783  CmiMemcpy(vdataToBeSent,vtemp,(sizeof(complex) * numPoints));
784  }else{
785  numPointsV = 1;
786  vdataToBeSent = (complex *)fftw_malloc(numPointsV*sizeof(complex));
787  vdataToBeSent[0].re = 0.0;
788  vdataToBeSent[0].im = 0.0;
789  }//endif
790 
791  if(ioff>numData){
792  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
793  CkPrintf("Error reading\n");
794  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
795  CkExit();
796  }//endif
797 
798  UgSpacePlaneProxy[thisInstance.proxyOffset](ind_state, x).initGSpace(
799  numPoints,dataToBeSent,numPointsV,vdataToBeSent,
800  nx,ny,nz,ngridaNL,ngridbNL,ngridcNL,istart_typ_cp);
801  fftw_free(dataToBeSent);
802  fftw_free(vdataToBeSent);
803 
804  ioff += numPoints;
805  CmiNetworkProgress();
806  }//endfor : loop over all possible chares in g-space (pencils)
807 
808  CkAssert(numData==ioff);
809 
810 //////////////////////////////////////////////////////////////////////////////
811 /// Clean up
812 
813  fftw_free(complexPoints);
814  if(istart_typ_cp>=3){fftw_free(vcomplexPoints);}
815  fftw_free(kx);
816  fftw_free(ky);
817  fftw_free(kz);
818 
819 //---------------------------------------------------------------------------
820  }//read the file
821 //////////////////////////////////////////////////////////////////////////////
822 
823 
824 //////////////////////////////////////////////////////////////////////////////
825 /**
826  * This method is used to accept the state data from some initializing
827  * routine. Since a CP_State_GSpacePlane class can have more than one plane,
828  * the method used for initialization is as follows:
829  *
830  * This call initializes the entire set of planes
831  * runDescSize: number of run-descriptors
832  * size: the total number of non-zero points
833  * points: pointer to the total data.
834 n *
835  * The data is copied into the planes.
836  */
837 //////////////////////////////////////////////////////////////////////////////
838 //////////////////////////////////////////////////////////////////////////////
839 //////////////////////////////////////////////////////////////////////////////
841  complex* points,
842  int vsize,
843  complex* vpoints,
844  int nx, int ny, int nz,
845  int nxNL, int nyNL, int nzNL,
846  int istart_cp)
847 //////////////////////////////////////////////////////////////////////////////
848  { //begin routine
849 //////////////////////////////////////////////////////////////////////////////
850 #ifdef _CP_DEBUG_STATEG_VERBOSE_
851  CkPrintf("GSpace[%d,%d] initGSpace %d\n",thisIndex.x,thisIndex.y,size);
852 #endif
853 
854  temperScreenFile = UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->temperScreenFile;
855 
856  CPcharmParaInfo *sim = CPcharmParaInfo::get();
857  registrationFlag = 1;
858  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
859  eesData->registerCacheGSP(thisIndex.x,thisIndex.y);
860 
861 //////////////////////////////////////////////////////////////////////////////
862 /// Section Reductions and SF proxy creation
863 
864  real_proxy = UrealSpacePlaneProxy[thisInstance.proxyOffset];
865 
866 #ifdef USE_COMLIB
867  if (config.useGssInsRealP){
868  ComlibAssociateProxy(gssInstance,real_proxy);
869  }//endif
870 #endif
871 
872 //////////////////////////////////////////////////////////////////////////////
873 /// Register with the cache : Eric's multiple reduction schemes ensure its done
874 /// before we need it.
875 
876  int cp_min_opt = sim->cp_min_opt;
877  int cp_min_update = sim->cp_min_update;
878 
879  if (true == initialized) {
880  ckerr << "Attempt to initialize a plane twice" << endl;
881  return;
882  }//endif
883  initialized = true;
884 
885 //////////////////////////////////////////////////////////////////////////////
886 /// Setup gs
887 
888  istart_typ_cp = istart_cp;
889 
890  gs.eke_ret = 0.0;
891  gs.fictEke_ret = 0.0;
892  gs.ekeNhc_ret = 0.0;
893  gs.cp_min_opt = cp_min_opt;
894 
895  gs.numRuns = eesData->GspData[iplane_ind]->numRuns;
896  gs.numLines = gs.numRuns/2;
897  gs.numFull = (gs.numLines)*nz;
898  gs.numFullNL = (gs.numLines)*nzNL;
899  gs.istate_ind = thisIndex.x;
900  gs.iplane_ind = thisIndex.y;
901  gs.mysizeX = sizeX;
902  gs.fftReqd = true;
903 
904  gs.xdim = 1;
905  gs.ydim = ny;
906  gs.zdim = nz;
907 
908  gs.ngridaNL = nxNL;
909  gs.ngridbNL = nyNL;
910  gs.ngridcNL = nzNL;
911 
912  gs.numPoints = eesData->GspData[iplane_ind]->ncoef;;
913  CkAssert(gs.numPoints == size);
914 
915  gs.ees_nonlocal = sim->ees_nloc_on;
916 
917  gs.packedPlaneData = (complex *)fftw_malloc(gs.numPoints*sizeof(complex));
918  gs.packedForceData = (complex *)fftw_malloc(gs.numFull*sizeof(complex));
919  gs.packedVelData = (complex *)fftw_malloc(gs.numPoints*sizeof(complex));
920  CmiMemcpy(gs.packedPlaneData, points, sizeof(complex)*gs.numPoints);
921  bzero(gs.packedForceData,sizeof(complex)*gs.numFull);
922 
923  if(cp_min_opt==0){
924  gs.packedPlaneDataScr = (complex *)fftw_malloc(gs.numPoints*sizeof(complex));
925  gs.packedPlaneDataTemp = (complex *)fftw_malloc(gs.numPoints*sizeof(complex));
926  memset(gs.packedPlaneDataScr, 0, sizeof(complex)*gs.numPoints);
927  CmiMemcpy(gs.packedPlaneDataTemp, points, sizeof(complex)*gs.numPoints);
928  }//endif
929 
930  if(cp_min_opt==1 && cp_min_update==0){
931  gs.packedPlaneDataTemp = (complex *)fftw_malloc(gs.numPoints*sizeof(complex));
932  CmiMemcpy(gs.packedPlaneDataTemp, points, sizeof(complex)*gs.numPoints);
933  }//endif
934 
935 #ifdef _CP_DEBUG_SCALC_ONLY_
936  // if(cp_min_opt==0){
937  gs.packedPlaneDataTemp2 = (complex *)fftw_malloc(gs.numPoints*sizeof(complex));
938  bzero(gs.packedPlaneDataTemp2,sizeof(complex)*gs.numPoints);
939  // }//endif
940 #endif
941 
942  // Under cp_min veldata is the conjugate gradient : always need it.
943  if(istart_typ_cp>=3 && cp_min_opt==0){
944  CkAssert(vsize == size);
945  CmiMemcpy(gs.packedVelData, vpoints, sizeof(complex)*gs.numPoints);
946  }else{
947  memset(gs.packedVelData, 0, sizeof(complex)*gs.numPoints);
948  }//endif
949 
950 //////////////////////////////////////////////////////////////////////////////
951 /// Setup gpspaceplane and particle plane
952 
953  CkAssert(UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x, thisIndex.y).ckLocal());
954  CP_State_ParticlePlane *localParticlePlaneChare = UparticlePlaneProxy[thisInstance.proxyOffset](thisIndex.x, thisIndex.y).ckLocal();
955  CkAssert(localParticlePlaneChare);
956  localParticlePlaneChare->initKVectors();
957 
958 //////////////////////////////////////////////////////////////////////////////
959 /// Setup k-vector ranges, masses and zero the force overlap
960 
961  int *k_x = eesData->GspData[iplane_ind]->ka;
962  int *k_y = eesData->GspData[iplane_ind]->kb;
963  int *k_z = eesData->GspData[iplane_ind]->kc;
964  double *coef_mass = eesData->GspData[iplane_ind]->coef_mass;
965  int mycoef = eesData->GspData[iplane_ind]->ncoef;
966  gSpaceNumPoints = gs.numPoints;
967 
968  if(eesData->allowedGspChares[iplane_ind]==0 || mycoef != gSpaceNumPoints){
969  CkPrintf("Plane %d of state %d toasy %d %d\n",iplane_ind,thisIndex.x,
970  mycoef,gSpaceNumPoints);
971  CkExit();
972  }//endif
973 
974  gs.setKRange(gSpaceNumPoints,k_x,k_y,k_z);
975  // if I have redundant coefs, I must receive them from someone (e.g. myself included)
976  // numRecRedPsi is the number of chares that send master to coefs to me to overwrite my redundant guys
977  if(numRecvRedPsi==0 && gs.nkx0_red>0){
978  CkPrintf("Error in GSchare(%d %d) on proc %d : numRecvRedPsi=%d gs.nkx0_red=%d\n",thisIndex.x,thisIndex.y,
979  CkMyPe(),numRecvRedPsi,gs.nkx0_red);
980  CkExit();
981  }//endif
982 
983  fovlap = 0.0;
984 
985 //////////////////////////////////////////////////////////////////////////////
986 /// Init NHC, Sample velocities
987 
988  int ncoef_use = gs.numPoints-gs.nkx0_red;
989  int maxLenNHC = gs.len_nhc_cp; // double check
990  int maxNumNHC = gs.num_nhc_cp; // double check
991  int maxNckNHC = gs.nck_nhc_cp; // double check
992 
993  CPINTEGRATE::initCPNHC(ncoef_use,gs.nkx0_zero,maxLenNHC,maxNumNHC,maxNckNHC,
994  &gs.kTCP,&gs.tauNHCCP,&gs.degfree,&gs.degfreeNHC,
995  gs.mNHC,gs.v0NHC,gs.a2NHC,gs.a4NHC,
996  gs.degFreeSplt,gs.istrNHC,gs.iendNHC);
997 
998  if(cp_min_opt==0){
999  CPINTEGRATE::CPSmplVel(gs.numPoints,coef_mass,gs.packedVelData,
1000  gs.len_nhc_cp,gs.num_nhc_cp,gs.nck_nhc_cp,
1001  gs.mNHC,gs.vNHC,gs.xNHC,gs.xNHCP,gs.a2NHC,
1002  gs.kTCP,istart_typ_cp,gs.nkx0_red,gs.nkx0_uni,
1003  gs.nkx0_zero,gs.degfree,gs.degfreeNHC);
1004  }//endif
1005 
1006 #ifdef _CP_DEBUG_PSI_OFF_
1007  memset(gs.packedPlaneData, 0, sizeof(complex)*gs.numPoints);
1008 #endif
1009 
1010 #define _CP_DEBUG_DYNAMICS_ZVEL_OFF_
1011 #ifdef _CP_DEBUG_DYNAMICS_ZVEL_
1012  bzero(gs.packedVelData,sizeof(complex)*gs.numPoints);
1013 #endif
1014 
1015 //////////////////////////////////////////////////////////////////////////////
1016 /// Send the k's to the structure factor
1017 
1018  if(sim->ees_nloc_on == 0){
1019  for(int atm=0;atm<config.numSfGrps; atm++){ //each atm
1020  for(int dup=0;dup<config.numSfDups;dup++){ //each dup
1021  if(dup==thisIndex.x){
1022 #ifdef _CP_DEBUG_SF_CACHE_
1023  CkPrintf("GSP [%d,%d] on PE %d sending KVectors to SF[%d,%d,%d]\n",
1024  thisIndex.x, thisIndex.y, CkMyPe(), atm, thisIndex.y, dup);
1025 #endif
1026  UsfCompProxy[thisInstance.proxyOffset](atm,thisIndex.y,dup).acceptKVectors(gSpaceNumPoints, k_x, k_y, k_z);
1027  }//endif
1028  }//endfor
1029  }//endfor
1030  }//endif
1031 
1032 //////////////////////////////////////////////////////////////////////////////
1033 ///Some PC initialization that needs to happen here to avoid
1034 ////onstructor race conditions
1035 
1036  makePCproxies();
1037 #ifdef PC_USE_RDMA
1038  /** Now that we have paircalcids, proxies, and allocated data, setup RDMA with a handshake to each PC.
1039  * Give each PC a token as part of the handshake that identifies what we will be sending it.
1040  * The PC will fill the token with its ID information and return it to us.
1041  * This, along with the RDMA handle will allow us to complete the setup by pointing out the appropriate
1042  * data to be sent to that PC.
1043  */
1044 
1045  /// Get the entry point index of the method that should be called to complete the RDMA handshake
1046  int rdmaConfirmEP = CkIndex_CP_State_GSpacePlane::completeRDMAhandshake(0);
1047  /// Create a callback object to this entry method
1048  CkCallback rdmaConfirmCB( rdmaConfirmEP, CkArrayIndex2D(thisIndex.x,thisIndex.y), thisProxy.ckGetArrayID() );
1049  /// Create a handshake token and fill it with my (sender) ID
1050  RDMApair_GSP_PC handshakeToken;
1051  handshakeToken.gspIndex = thisIndex;
1052 
1053  /// Send rdma requests to all asymmetric loop PCs who will get data from me
1054  handshakeToken.symmetric = false;
1055  handshakeToken.shouldSendLeft = true;
1056  sendLeftRDMARequest (handshakeToken,gs.numPoints,rdmaConfirmCB);
1057  handshakeToken.shouldSendLeft = false;
1058  sendRightRDMARequest(handshakeToken,gs.numPoints,rdmaConfirmCB);
1059 
1060  /// Send rdma requests to all symmetric loop PCs who will get data from me
1061  handshakeToken.symmetric = true;
1062  handshakeToken.shouldSendLeft = true;
1063  sendLeftRDMARequest (handshakeToken,gs.numPoints,rdmaConfirmCB);
1064  handshakeToken.shouldSendLeft = false;
1065  sendRightRDMARequest(handshakeToken,gs.numPoints,rdmaConfirmCB);
1066 
1067 #else
1068 //////////////////////////////////////////////////////////////////////////////
1069 /// This reduction is done to signal the end of initialization to main
1070 
1071  int i=1;
1072  contribute(sizeof(int), &i, CkReduction::sum_int,
1073  CkCallback(CkIndex_InstanceController::doneInit(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy), thisInstance.proxyOffset);
1074 
1075 #endif
1076 #ifdef _CP_SUBSTEP_TIMING_
1077 #if USE_HPM
1078  (TimeKeeperProxy.ckLocalBranch())->initHPM();
1079 #endif // HPM
1080 #endif // _CP_SUBSTEP_TIMING_
1081 
1082 //---------------------------------------------------------------------------
1083 
1084 }// end routine
1085 //////////////////////////////////////////////////////////////////////////////
1086 
1087 void CP_State_GSpacePlane::acceptNewTemperature(double temp)
1088 {
1089  // Hey GLENN do something with your new temperature here
1090 
1091 
1092  // when you're done
1093  int i=1;
1094  contribute(sizeof(int), &i, CkReduction::sum_int,
1095  CkCallback(CkIndex_InstanceController::gspDoneNewTemp(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy), thisInstance.proxyOffset);
1096 }
1097 
1098 
1099 
1100 //////////////////////////////////////////////////////////////////////////////
1101 //////////////////////////////////////////////////////////////////////////////
1102 //////////////////////////////////////////////////////////////////////////////
1103 void CP_State_GSpacePlane::makePCproxies(){
1104 
1105  lambdaproxy=new CProxySection_PairCalculator[config.numChunksAsym];
1106  lambdaproxyother=new CProxySection_PairCalculator[config.numChunksAsym];
1107  psiproxy=new CProxySection_PairCalculator[config.numChunksSym];
1108  psiproxyother=new CProxySection_PairCalculator[config.numChunksSym];
1109  //need one proxy per chunk
1110  if(!config.gSpaceSum){
1111  for(int chunk=0;chunk<config.numChunksAsym;chunk++){
1112  lambdaproxy[chunk]=asymmPCmgr.makeOneResultSection_asym(chunk);
1113  if(AllLambdaExpected/config.numChunksAsym == 2)//additional col. red. in dynamics
1114  lambdaproxyother[chunk]=asymmPCmgr.makeOneResultSection_asym_column(chunk);
1115  }//endfor chunk
1116  for(int chunk=0; chunk < config.numChunksSym ;chunk++){
1117  psiproxy[chunk]=symmPCmgr.makeOneResultSection_sym1(chunk);
1118  if(AllPsiExpected / config.numChunksSym > 1)
1119  psiproxyother[chunk]=symmPCmgr.makeOneResultSection_sym2(chunk);
1120  }//endfor chunk
1121  }//endif not gspacesum
1122 
1123 //////////////////////////////////////////////////////////////////////////////
1124  }//end routine
1125 //////////////////////////////////////////////////////////////////////////////
1126 
1127 
1128 //////////////////////////////////////////////////////////////////////////////
1129 //////////////////////////////////////////////////////////////////////////////
1130 //////////////////////////////////////////////////////////////////////////////
1132 //////////////////////////////////////////////////////////////////////////////
1133 
1134 /// Check for flow of control errors :
1135 
1136 #ifdef _CP_DEBUG_SF_CACHE_
1137  CkPrintf("GSP [%d,%d] StartNewIter\n",thisIndex.x, thisIndex.y);
1138 #endif
1139 #if CMK_TRACE_ENABLED
1140  traceUserSuppliedData(iteration);
1141 #endif
1142  if(iteration>0){
1143  if(UegroupProxy[thisInstance.proxyOffset].ckLocalBranch()->iteration_gsp != iteration ||
1144  UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->iteration != iteration){
1145  CkPrintf("{%d} @@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n",thisInstance.proxyOffset);
1146  CkPrintf("{%d} Flow of Control Error : Starting new iter before\n",thisInstance.proxyOffset);
1147  CkPrintf("{%d} finishing atom integrate or iteration mismatch.\n",thisInstance.proxyOffset);
1148  CkPrintf("{%d} iter_gsp %d iter_energy %d iter_atm %d and %d and %d\n",
1149  thisInstance.proxyOffset,
1150  iteration,UegroupProxy[thisInstance.proxyOffset].ckLocalBranch()->iteration_gsp,
1151  UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->iteration,
1152  config.maxIter,cleanExitCalled);
1153  CkPrintf("{%d} chare %d %d\n",thisInstance.proxyOffset,thisIndex.x,thisIndex.y);
1154  CkPrintf("{%d} @@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n",thisInstance.proxyOffset);
1155  CkExit();
1156  }//endif
1157  } //endif
1158 
1159  if(!acceptedVPsi){
1160  CkPrintf("GSpace[%d,%d] Error: Flow of Control. Starting new iter (%d) before finishing Vpsi.\n",thisIndex.x,thisIndex.y,iteration+1);
1161  CkAbort("Error: GSpace cannot startNewIter() before finishing the PsiV loop\n");
1162  }//endif
1163 
1164  doneNewIter = true;
1165  CPcharmParaInfo *sim = CPcharmParaInfo::get();
1166  if(thisIndex.x==0 && thisIndex.y==0 ){
1167  if(!(sim->cp_min_opt==1)){
1168  fprintf(temperScreenFile,"-------------------------------------------------------------------------------\n");
1169  int iii = iteration;
1170  if(!sim->gen_wave){iii+=1;}
1171  fprintf(temperScreenFile,"Iteration %d done\n",iii);
1172  fprintf(temperScreenFile,"/////////////////////////////////////////////////////////////////////////////==\n");
1173  fprintf(temperScreenFile,"/////////////////////////////////////////////////////////////////////////////==\n");
1174  }else{
1175  if(iteration>0){
1176  fprintf(temperScreenFile,"/////////////////////////////////////////////////////////////////////////////==\n");
1177  fprintf(temperScreenFile,"/////////////////////////////////////////////////////////////////////////////==\n");
1178  }//endif
1179  if(iteration<config.maxIter){
1180  fprintf(temperScreenFile,"Beginning Iteration %d \n", iteration);
1181  }else{
1182  fprintf(temperScreenFile,"Completing Iteration %d \n", iteration-1);
1183  }//endif
1184  fprintf(temperScreenFile,"-------------------------------------------------------------------------------\n");
1185  }//endif
1186  }//endif
1187 //////////////////////////////////////////////////////////////////////////////
1188 /// Reset all the counters that need to be reset (not more not less)
1189 /// otherwise race conditions can leak in. Rely on the constructor
1190 /// for initialization. Reset set your flags as soon as you are done
1191 /// with the tests that require them.
1192 
1193  int cp_min_opt = sim->cp_min_opt;
1194 
1195  // Finished integrate and red psi are safe.
1196  // You can't get to these until you get passed through this routine
1197  finishedCpIntegrate = 0;
1198  iRecvRedPsi = 1; if(numRecvRedPsi>0){iRecvRedPsi = 0;}
1199  iRecvRedPsiV = 1; if(cp_min_opt==0 && numRecvRedPsi>0){iRecvRedPsiV = 0;}
1200  iSentRedPsi = 0;
1201  iSentRedPsiV = 1; if(cp_min_opt==0){iSentRedPsiV = 0;}
1202 
1203  iteration++; // my iteration # : not exactly in sync with other chares
1204  // but should agree when chares meet.
1205 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
1206  CkPrintf("GSP [%d,%d] StartNewIter : %d\n",thisIndex.x, thisIndex.y,iteration);
1207 #endif
1208 
1209 //////////////////////////////////////////////////////////////////////////////
1210 
1211 /// Check Load Balancing, Increment counter, set done flags equal to false.
1212 /* if(iteration=3)
1213  {
1214  CmiMemoryMark();
1215  }
1216  if(iteration=4)
1217  {
1218  CmiMemorySweep("GSP");
1219  }
1220 */
1221 #if CMK_TRACE_ENABLED
1222  if(iteration==TRACE_ON_STEP ){(TimeKeeperProxy.ckLocalBranch())->startTrace();}
1223  if(iteration==TRACE_OFF_STEP){(TimeKeeperProxy.ckLocalBranch())->stopTrace();}
1224 #endif
1225 
1226  if(config.lbgspace || config.lbpaircalc ||config.lbdensity){
1227  if((iteration % (FIRST_BALANCE_STEP - PRE_BALANCE_STEP) == 0) ||
1228  (iteration % (LOAD_BALANCE_STEP - PRE_BALANCE_STEP) == 0)){
1229  LBTurnInstrumentOn();
1230  }//endif
1231  }//endif
1232 
1233 //////////////////////////////////////////////////////////////////////////////
1234 /// Output psi at start of minimization for debugging
1235 
1236  if(iteration==1 && cp_min_opt==1){screenOutputPsi(0);}
1237 #ifdef _CP_SUBSTEP_TIMING_
1238  if(forwardTimeKeep>0){
1239  double gstart=CmiWallTimer();
1240  CkCallback cb(CkIndex_TimeKeeper::collectStart(NULL),0,TimeKeeperProxy);
1241  contribute(sizeof(double),&gstart,CkReduction::min_double, cb , forwardTimeKeep);
1242  }//endif
1243 #if USE_HPM
1244  if(iteration==HPM_ON_STEP ){(TimeKeeperProxy.ckLocalBranch())->startHPM("OneStep");}
1245  if(iteration==HPM_OFF_STEP ){(TimeKeeperProxy.ckLocalBranch())->stopHPM("OneStep");}
1246 #endif // HPM
1247 #endif // TIMING
1248 
1249 //---------------------------------------------------------------------------
1250  }//end routine
1251 
1252 void CP_State_GSpacePlane::screenPrintWallTimes()
1253 {
1254  CPcharmParaInfo *sim = CPcharmParaInfo::get();
1255  // do new iteration output once globally from the 0th instance
1256  if (thisIndex.x==0 && thisIndex.y==0 &&
1257  thisInstance.idxU.x==0 && thisInstance.idxU.y==0 &&
1258  thisInstance.idxU.z==0 && thisInstance.idxU.s==0 ) {
1259  int iprintout = config.maxIter;
1260  if(!(sim->cp_min_opt==1) && !sim->gen_wave){iprintout-=1;}
1261  int itime = iteration;
1262  if(config.maxIter>=30){itime=1; wallTimeArr[0]=wallTimeArr[1];}
1263  wallTimeArr[itime] = CkWallTimer();
1264  if (iteration == iprintout && config.maxIter<30) {
1265  CkPrintf("-------------------------------------------------------------------------------\n");
1266  CkPrintf("Wall Times from within GSP\n\n");
1267  for (int t = 1; t < iprintout; t++) {
1268  CkPrintf("%g\n",wallTimeArr[t] - wallTimeArr[t-1]);
1269  }//endfor
1270  if(itime>0) {
1271  CkPrintf("%g\n", wallTimeArr[itime] - wallTimeArr[itime-1]);
1272  CkPrintf("-------------------------------------------------------------------------------\n");
1273  }
1274  } else if(iteration>0) {
1275  CkPrintf("Iteration time (GSP) : %g\n",
1276  wallTimeArr[itime] - wallTimeArr[itime-1]);
1277  }//endif
1278  }//endif
1279 }
1280 
1281 //////////////////////////////////////////////////////////////////////////////
1282 /**
1283  * This method is used to start the forward ffts in the CP_State_GSpacePlanes.
1284  * The work done here could have been done in the acceptData method, but for
1285  * the fact that we need to synchronize all the CP_State_GSpacePlanes before
1286  * doing the forward-ffts. This is a feature, not a bug!
1287  */
1288 //////////////////////////////////////////////////////////////////////////////
1289 //////////////////////////////////////////////////////////////////////////////
1290 //////////////////////////////////////////////////////////////////////////////
1292 
1293 #ifdef _CP_DEBUG_STATEG_VERBOSE_
1294  CkPrintf("dofft %d.%d \n",thisIndex.x,thisIndex.y);
1295 #endif
1296 
1297  // If there is no data to send, return immediately
1298  if (gs.numNonZeroPlanes == 0 || gs.fftReqd == false){
1299  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1300  CkPrintf("Dude, no data to send : Why launch the stategpsaceplane %d %d %d\n",
1301  thisIndex.x,thisIndex.y, gs.numNonZeroPlanes);
1302  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1303  CkExit();
1304  }//endif
1305 
1306 //////////////////////////////////////////////////////////////////////////////
1307 
1308 #if CMK_TRACE_ENABLED
1309  double StartTime=CmiWallTimer();
1310 #endif
1311 
1312 /// Do fft in forward direction, 1-D, in z direction
1313 /// A local function not a message : get pointer to memory for fft group
1314 
1315  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1316  RunDescriptor *runs = eesData->GspData[iplane_ind]->runs;
1317 
1318  UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->doStpFFTGtoR_Gchare(gs.packedPlaneData,gs.packedForceData,
1319  gs.numFull,gs.numPoints,gs.numLines,gs.numRuns,runs,gs.zdim,iplane_ind);
1320 
1321 #if CMK_TRACE_ENABLED
1322  traceUserBracketEvent(GspaceFwFFT_, StartTime, CmiWallTimer());
1323 #endif
1324 }
1325 //////////////////////////////////////////////////////////////////////////////
1326 
1327 
1328 //////////////////////////////////////////////////////////////////////////////
1329 /// Send result to realSpacePlane : perform the transpose
1330 /// Force data cannot be overwritten due to all to all nature of comm.
1331 /// Until everyone sends, no one gets anything back
1332 //////////////////////////////////////////////////////////////////////////////
1333 //////////////////////////////////////////////////////////////////////////////
1334 //////////////////////////////////////////////////////////////////////////////
1335 
1336 #if USE_PERSISTENT
1337 void CP_State_GSpacePlane::setupFFTPersistent() {
1338 
1339  int numLines = gs.numLines; // same amount of data to each realspace chare puppy
1340  int sizeZ = gs.planeSize[1];
1341  CkArray *real_proxy_local = real_proxy.ckLocalBranch();
1342  fftHandler = (PersistentHandle*) malloc( sizeof(PersistentHandle) * sizeZ);
1343  int size;
1344  for(int z=0; z < sizeZ; z++) {
1345  int peer = real_proxy_local->homePe(CkArrayIndex2D(thisIndex.x, z));
1346  int compress_start = sizeof(envelope) + sizeof(RSFFTMsg);
1347  int compress_size = numLines*sizeof(complex);
1348  size = compress_start + compress_size + sizeof(int);
1349  fftHandler[z] = CmiCreatePersistent(peer, size, compress_start);
1350  //fftHandler[z] = CmiCreateCompressPersistent(peer, size, compress_start, compress_size);
1351  }
1352  CkPrintf("persistent handler message count %d, size %d\n", sizeZ, size);
1353 }
1354 #endif
1356 
1357 #if USE_PERSISTENT
1358  if(fftHandler == NULL) setupFFTPersistent();
1359 #endif
1360 
1361 #ifdef _CP_DEBUG_STATEG_VERBOSE_
1362  CkPrintf("sendfft %d.%d \n",thisIndex.x,thisIndex.y);
1363 #endif
1364 
1365  complex *data_out = gs.packedForceData;
1366  int numLines = gs.numLines; // same amount of data to each realspace chare puppy
1367  int sizeZ = gs.planeSize[1];
1368 
1369 //////////////////////////////////////////////////////////////////////////////
1370 /// Do a Comlib Dance
1371 #ifdef USE_COMLIB
1372 #ifdef OLD_COMMLIB
1373  if (config.useGssInsRealP){gssInstance.beginIteration();}
1374 #else
1375  // if (config.useGssInsRealP){ComlibBegin(real_proxy, 0);}
1376 #endif
1377 #endif
1378 
1379 //////////////////////////////////////////////////////////////////////////////
1380 /// Send your (x,y,z) to processors z.
1381 
1382  /**********************************************
1383  char junk[1000];
1384  sprintf(junk,"gstate%d.%d.out.%d",thisIndex.x,thisIndex.y, iteration);
1385  FILE *fp = fopen(junk,"w");
1386  ***********************************************/
1387 
1388  for(int z=0; z < sizeZ; z++) {
1389 
1390  // Malloc and prio the message
1391  RSFFTMsg *msg = new (numLines,8*sizeof(int)) RSFFTMsg;
1392  msg->size = numLines;
1393  msg->senderIndex = thisIndex.y; // planenumber
1394  msg->senderJndex = thisIndex.x; // statenumber
1395  msg->senderKndex = z; // planenumber of rstate
1396  msg->numPlanes = gs.numNonZeroPlanes; // unity baby
1397 
1398  if(config.prioFFTMsg){
1399  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
1400  *(int*)CkPriorityPtr(msg) = config.rsfftpriority +
1401  thisIndex.x*gs.planeSize[0]+thisIndex.y;
1402  }//endif
1403 
1404  // beam out all points with same z to chare array index z
1405  complex *data = msg->data;
1406  //fprintf(fp,"%d\n%d ",numLines*2, iteration);
1407  for (int i=0,j=z; i<numLines; i++,j+=sizeZ){data[i] = data_out[j]; }
1408  //for (int i=0,j=z; i<numLines; i++,j+=sizeZ){data[i] = data_out[j]; fprintf(fp,"%e %e ", data[i].re, data[i].im);}
1409 #if USE_PERSISTENT
1410  CmiUsePersistentHandle(&fftHandler[z], 1);
1411 #endif
1412  real_proxy(thisIndex.x, z).acceptFFT(msg); // same state,realspace index [z]
1413 #if USE_PERSISTENT
1414  CmiUsePersistentHandle(NULL, 0);
1415 #endif
1416  // progress engine baby
1417  CmiNetworkProgress();
1418  }//endfor
1419  //fclose(fp);
1420 
1421 //////////////////////////////////////////////////////////////////////////////
1422 /// Finish up
1423 #ifdef USE_COMLIB
1424 #ifdef OLD_COMMLIB
1425  if (config.useGssInsRealP){gssInstance.endIteration();}
1426 #else
1427  //if (config.useGssInsRealP){ComlibEnd(real_proxy, 0);}
1428 #endif
1429 #endif
1430 
1431 #ifdef _CP_SUBSTEP_TIMING_
1432  if(forwardTimeKeep>0)
1433  {
1434  double gend=CmiWallTimer();
1435  CkCallback cb(CkIndex_TimeKeeper::collectEnd(NULL),0,TimeKeeperProxy);
1436  contribute(sizeof(double),&gend,CkReduction::max_double, cb , forwardTimeKeep);
1437  }
1438 #endif
1439 
1440 //----------------------------------------------------------------------
1441  }//end routine
1442 //////////////////////////////////////////////////////////////////////////////
1443 
1444 
1445 /**
1446  * Force cannot be overwitten because we can't receive until all chares send. The beauty of all to all comm
1447  * Forces are initialized HERE. No need to zero them etc. elsewhere.
1448  */
1450 {
1451 #ifdef _CP_SUBSTEP_TIMING_
1452  if(backwardTimeKeep>0)
1453  {
1454  double gstart=CmiWallTimer();
1455  CkCallback cb(CkIndex_TimeKeeper::collectStart(NULL),0,TimeKeeperProxy);
1456  contribute(sizeof(double),&gstart,CkReduction::min_double, cb , backwardTimeKeep);
1457  }
1458 #endif
1459 
1460 #ifdef _CP_DEBUG_STATEG_VERBOSE_
1461  CkPrintf("GSpace[%d,%d] acceptIFFT\n",thisIndex.x,thisIndex.y);
1462 #endif
1463 #ifdef _NAN_CHECK_
1464  for(int i=0;i<msg->size ;i++)
1465  {
1466  CkAssert(finite(msg->data[i].re));
1467  CkAssert(finite(msg->data[i].im));
1468  }
1469 #endif
1470 
1471  int size = msg->size;
1472  int offset = msg->offset;
1473  complex *partlyIFFTd = msg->data;
1474 
1475  complex *data_in = gs.packedForceData;
1476  int numLines = gs.numLines;
1477  int sizeZ = gs.planeSize[1];
1478  int expandedDataSize = numLines*sizeZ;
1479 
1480  CkAssert(numLines == size);
1481 
1482 //////////////////////////////////////////////////////////////////////////////
1483 /// Recv the message
1484 
1485  countIFFT++;
1486 
1487  // This is not a reduction. Don't zero me please. Every elements is set.
1488  // z=offset is inner index : collections of z-lines of constant (gx,gy)
1489  for(int i=0,j=offset; i< numLines; i++,j+=sizeZ){data_in[j] = partlyIFFTd[i];}
1490 
1491  delete msg;
1492  }//end routine
1493 //////////////////////////////////////////////////////////////////////////////
1494 
1495 
1496 //////////////////////////////////////////////////////////////////////////////
1497 //////////////////////////////////////////////////////////////////////////////
1498 //////////////////////////////////////////////////////////////////////////////
1500 {
1501  // Now do the IFFT in place
1502  #if CMK_TRACE_ENABLED
1503  double StartTime=CmiWallTimer();
1504  #endif
1505  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1506  RunDescriptor *runs = eesData->GspData[iplane_ind]->runs;
1507  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
1508 
1509  fftcache->getCacheMem("CP_State_GSpacePlane::doIFFT");
1510  complex *forcTmp = fftcache->tmpData;
1511  fftcache->doStpFFTRtoG_Gchare(gs.packedForceData,forcTmp,
1512  gs.numFull,gs.numPoints,gs.numLines,gs.numRuns,runs,gs.zdim,iplane_ind);
1513 
1514  #if CMK_TRACE_ENABLED
1515  traceUserBracketEvent(GspaceBwFFT_, StartTime, CmiWallTimer());
1516  #endif
1517 
1518  // Finish up by multiplying the the FFT scaling factor
1519  CPcharmParaInfo *sim = CPcharmParaInfo::get();
1520  double scaleFactor = -2.0/double(sim->sizeX *
1521  sim->sizeY *
1522  sim->sizeZ);
1523  complex *forces = gs.packedForceData;
1524  for(int index=0; index<gs.numPoints; index++)
1525  forces[index] = forcTmp[index]*scaleFactor;
1526  fftcache->freeCacheMem("CP_State_GSpacePlane::doIFFT");
1527  CmiNetworkProgress();
1528 
1529  // Report that you are done
1530  #ifdef _CP_DEBUG_STATEG_VERBOSE_
1531  if(thisIndex.x==0)
1532  CkPrintf("Done doing Ifft gsp : %d %d\n",thisIndex.x,thisIndex.y);
1533  #endif
1534 
1535  /// If there is a barrier after the IFFTs, contribute to the reduction barrier that will sync all GSpace chares
1536  #ifdef BARRIER_CP_GSPACE_IFFT
1537  //put contribute here to reduction with a broadcast client
1538  int wehaveours=1;
1539  contribute(sizeof(int),&wehaveours,CkReduction::sum_int,
1540  CkCallback(CkIndex_CP_State_GSpacePlane::allDoneIFFT(NULL),thisProxy));
1541  #endif
1542 }//end routine
1543 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
1544 
1545 
1546 
1547 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
1548 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
1549 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
1551 
1552 #ifdef _CP_DEBUG_VKS_OFF_
1553  if(thisIndex.x==0 && thisIndex.y==0){
1554  CkPrintf("EHART = OFF FOR DEBUGGING\n");
1555  CkPrintf("EExt = OFF FOR DEBUGGING\n");
1556  CkPrintf("EWALD_recip = OFF FOR DEBUGGING\n");
1557  CkPrintf("EEXC = OFF FOR DEBUGGING\n");
1558  CkPrintf("EGGA = OFF FOR DEBUGGING\n");
1559  CkPrintf("EEXC+EGGA = OFF FOR DEBUGGING\n");
1560  }//endif
1561 #endif
1562 
1563 //////////////////////////////////////////////////////////////////////////////////
1564 
1565  // Check stuff from the gsplane data cache and particle plane
1566 
1567  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1568  int ncoef = gs.numPoints;
1569  int *k_x = eesData->GspData[iplane_ind]->ka;
1570  int *k_y = eesData->GspData[iplane_ind]->kb;
1571  int *k_z = eesData->GspData[iplane_ind]->kc;
1572  double **g2 = eesData->GspData[iplane_ind]->g2;
1573 
1574 //////////////////////////////////////////////////////////////////////////////////
1575 /// Add forces from particle plane to forces from IFFT then zero them
1576 
1577 #ifdef _CP_DEBUG_VKS_FORCES_
1578  if(thisIndex.x==0 && thisIndex.y==0){
1579  FILE *fp = fopen("vks_forces_s0_p0.out","w");
1580  int ncoef = gs.numPoints;
1581  complex *forces = gs.packedForceData;
1582  for(int i=0;i<ncoef;i++)
1583  fprintf(fp,"%d %d %d : %g %g\n",k_x[i],k_y[i],k_z[i],forces[i].re,forces[i].im);
1584  fclose(fp);
1585  }//endif
1586 #endif
1587 
1588  complex *forces = gs.packedForceData;
1589  complex *ppForces = UparticlePlaneProxy[thisInstance.proxyOffset](thisIndex.x,thisIndex.y).ckLocal()->myForces;
1590 
1591 #ifdef _CP_DEBUG_SFNL_OFF_
1592  bzero(ppForces,ncoef*sizeof(complex));
1593 #endif
1594 #ifdef _CP_DEBUG_VKS_OFF_
1595  bzero(forces,ncoef*sizeof(complex));
1596  bzero(ppForces,ncoef*sizeof(complex));
1597 #endif
1598 
1599 
1600 #ifdef _CP_GS_DUMP_VKS_
1601  dumpMatrix("vksBf",(double *)ppForces, 1,
1602  gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1603  dumpMatrix("forceBf",(double *)forces, 1,
1604  gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1605 #endif
1606 
1607 #ifdef _CP_GS_DEBUG_COMPARE_VKS_
1608  if(savedvksBf==NULL){ // load it
1609  savedvksBf= new complex[gs.numPoints];
1610  loadMatrix("vksBf",(double *)savedvksBf, 1,
1611  gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1612  }//endif
1613  if(savedforceBf==NULL){ // load it
1614  savedforceBf= new complex[gs.numPoints];
1615  loadMatrix("forceBf",(double *)savedforceBf, 1,
1616  gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1617  }//endif
1618 
1619  for(int i=0;i<gs.numPoints;i++){
1620  if(fabs(ppForces[i].re-savedvksBf[i].re)>0.0001){
1621  fprintf(stderr, "GSP [%d,%d] %d element vks %.10g not %.10g\n",
1622  thisIndex.x, thisIndex.y,i, ppForces[i].re, savedvksBf[i].re);
1623  }//endif
1624  CkAssert(fabs(ppForces[i].re-savedvksBf[i].re)<0.0001);
1625  CkAssert(fabs(ppForces[i].im-savedvksBf[i].im)<0.0001);
1626  }//endfor
1627  for(int i=0;i<gs.numPoints;i++){
1628  if(fabs(forces[i].re-savedforceBf[i].re)>0.0001){
1629  fprintf(stderr, "GSP [%d,%d] %d element force %.10g not %.10g\n",
1630  thisIndex.x, thisIndex.y,i, forces[i].re, savedforceBf[i].re);
1631  }//endif
1632  CkAssert(fabs(forces[i].re-savedforceBf[i].re)<0.0001);
1633  CkAssert(fabs(forces[i].im-savedforceBf[i].im)<0.0001);
1634  }//endfor
1635 #endif
1636 
1637  gs.addForces(ppForces,k_x);
1638  bzero(ppForces,gs.numPoints*sizeof(complex));
1639  CmiNetworkProgress();
1640 
1641 //////////////////////////////////////////////////////////////////////////////////
1642 /// Compute force due to quantum kinetic energy and add it in.
1643 /// Reduce quantum kinetic energy or eke
1644 
1645  int istate = gs.istate_ind;
1646  int nkx0 = gs.nkx0;
1647  complex *psi_g = gs.packedPlaneData;
1648  double *eke_ret = &(gs.eke_ret);
1649 
1650  CPNONLOCAL::CP_eke_calc(ncoef,istate,forces,psi_g,k_x,k_y,k_z,g2,eke_ret,config.doublePack,nkx0,
1651  kpoint_ind,config.nfreq_cpnonlocal_eke);
1652  contribute(sizeof(double), &gs.eke_ret, CkReduction::sum_double,
1653  CkCallback(CkIndex_InstanceController::printEnergyEke(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy));
1654  //isEnergyReductionDone = false; ///@note: This doesnt seem necessary here and commenting out has not affected simple tests. This flag is reset at the start of the iter itself.
1655 
1656 #ifdef _CP_DEBUG_SCALC_ONLY_
1657  bzero(forces,ncoef*sizeof(complex));
1658 #endif
1659 
1660 //////////////////////////////////////////////////////////////////////////
1661 /// Debug output
1662 
1663 #ifdef _CP_DEBUG_OLDFORCE_
1664  if(ncoef >0){
1665  FILE *fp = fopen("force_old.out", "a+");
1666  for(int i = 0; i < ncoef; i++){
1667  if(k_x[i]==0 && k_y[i]==1 && k_z[i]==4){
1668  fprintf(fp,
1669  "old force H+Ext+Exc+Eke+Enl : is=%d %d %d %d : %g %g\n",
1670  istate,k_x[i],k_y[i],k_z[i],forces[i].re,forces[i].im);
1671  }
1672  }
1673  fclose(fp);
1674  }//endif
1675 #endif
1676 
1677 //-----------------------------------------------------------------------------
1678  }//end routine
1679 ///////////////////////////////////////////////////////////////////////////////=
1680 
1681 
1682 ///////////////////////////////////////////////////////////////////////////////=
1683 /** \brief Atoms are launched by allDoneCpForces when all after ALL planes and states
1684  * have reported. */
1685 ///////////////////////////////////////////////////////////////////////////////=
1686 ///////////////////////////////////////////////////////////////////////////
1687 ///////////////////////////////////////////////////////////////////////////////=
1689  // CkPrintf("{%d} GSP [%d,%d] launchAtoms\n",thisInstance.proxyOffset, thisIndex.x,thisIndex.y);
1690 
1691 ///////////////////////////////////////////////////////////////////////////////=
1692 
1693 /// The usual stuff
1694 #ifdef _CP_DEBUG_PSI_OFF_
1695 /// iteration++;
1696  if(iteration==config.maxIter+1){
1697  int i=0;
1698 #ifdef _CP_SUBSTEP_TIMING_
1699 #if USE_HPM
1700  (TimeKeeperProxy.ckLocalBranch())->printHPM();
1701 #endif
1702 #endif
1703  cleanExitCalled = 1;
1704 
1705  contribute(sizeof(int),&cleanExitCalled,CkReduction::sum_int, CkCallback(CkIndex_InstanceController::cleanExit(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy));
1706  }else{
1707 #endif
1708  int i=0;
1709  contribute(sizeof(int),&i,CkReduction::sum_int, CkCallback(CkIndex_InstanceController::allDoneCPForces(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy));
1710 #ifdef _CP_DEBUG_PSI_OFF_
1711  }//endif
1712 #endif
1713 }//end routine
1714 
1715 
1716 
1717 ///////////////////////////////////////////////////////////////////////////////=
1718 /** \brief After my CP forces have arrived : sendLambda */
1719 ///////////////////////////////////////////////////////////////////////////////=
1720 ///////////////////////////////////////////////////////////////////////////////c
1721 ///////////////////////////////////////////////////////////////////////////////=
1723 ///////////////////////////////////////////////////////////////////////////////=
1724 
1725 /// Reset lambda (not done) and force counters (not done for NEXT step):
1726 
1727  acceptedLambda = false;
1728  doneDoingIFFT = false;
1729  doneNewIter = false;
1730 
1731 ///////////////////////////////////////////////////////////////////////////////=
1732 
1733 /// Get nice local variables and if dynamics make a copy
1734 
1735  CPcharmParaInfo *sim = CPcharmParaInfo::get();
1736  complex *psi = gs.packedPlaneData;
1737  complex *force = gs.packedForceData;
1738  int cp_min_opt = sim->cp_min_opt;
1739 
1740  if(cp_min_opt==0){ // dynamics in on : minimization is off
1741  int ncoef = gs.numPoints;
1742  complex *psi_g = gs.packedPlaneData;
1743  complex *psi_g_tmp = gs.packedPlaneDataTemp;
1744  CmiMemcpy(psi_g_tmp,psi_g,sizeof(complex)*ncoef);
1745  }//endif
1746 
1747 ///////////////////////////////////////////////////////////////////////////////=
1748 
1749 /// Debug output schmoo : contrib to diagonal element of lambda from this chare
1750 /// : Most effective for 1 gspace chare when you
1751 /// : get the diangonal elemens of lambda (see below)
1752 
1753 
1754 #define _CP_GSPACE_DUMP_LMAT_DIAGONAL_VALS_OFF_
1755 
1756 #ifdef _CP_GSPACE_DUMP_LMAT_DIAGONAL_VALS_
1757  /** The lambda matrix is the reduced result of the forward path GEMMs in the
1758  * asymmetric paircalcs that arrives at Ortho::aceptSectionLambda. There is
1759  * an LMAT dump macro over there. Here, we manually compute the product of
1760  * psi and the forces to obtain the contribution of this chare to the
1761  * diagonal element of the L matrix. To verify that the forward path is
1762  * producing correct results, we can compare these hand-computed diagonal
1763  * elements with the lambda matrix dumped in Ortho.
1764  *
1765  * Running the code with just one plane (reduce gExpandFact to get just one
1766  * plane) should ensure that there is just one GSpace chare for each state
1767  * and that the diagonal elements are computed in this chare itself.
1768  * Otherwise, the diagonal elements have to be computed by summing the
1769  * corresponding values dumped by all the GSpace chares in a state (ie across
1770  * all planes).
1771  */
1772 
1773  complex mylambda_diag = 0;
1774  for(int i=0; i<gs.numPoints; i++){
1775  mylambda_diag += force[i] * psi[i].conj();
1776  }/*endfor*/
1777  if(config.doublePack==0){
1778  CkPrintf("lambda[%d, %d] = %.12g %.12g at plane %d\n",
1779  thisIndex.x, thisIndex.x, mylambda_diag.re, mylambda_diag.im, thisIndex.y);
1780  }else{
1781  CkPrintf("lambda[%d, %d] = %.12g %.12g at plane %d\n",
1782  thisIndex.x, thisIndex.x, mylambda_diag.re,0.0, thisIndex.y);
1783  }/*endif*/
1784 #endif
1785 
1786 ///////////////////////////////////////////////////////////////////////////////=
1787 
1788 /// Debug output schmoo : output forces before lambda-ization!
1789 /// :
1790 
1791 
1792 #define _CP_GSPACE_PSI_FORCE_OUTPUT_BEFORE_LAMBDA_OFF_
1793 #ifdef _CP_GSPACE_PSI_FORCE_OUTPUT_BEFORE_LAMBDA_
1794  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1795  int *ka = eesData->GspData[iplane_ind]->ka;
1796  int *kb = eesData->GspData[iplane_ind]->kb;
1797  int *kc = eesData->GspData[iplane_ind]->kc;
1798 
1799  char fname[1024];
1800  sprintf(fname,"psi_forces_before_lambda_state%d_plane%d.out",thisIndex.x,thisIndex.y);
1801  FILE *fp = cfopen(fname,"w");
1802  for(int i=0; i<gs.numPoints; i++){
1803  int igo = 0; double wght = 0.5;
1804  if (ka[i] > 0) igo=1;
1805  if (ka[i] == 0 && kb[i]>0){ igo=1; wght=1.0; }
1806  if (ka[i] == 0 && kb[i]==0 && kc[i]>=0){ igo=1; wght=1.0; }
1807  if(config.doublePack==0) wght=1.0;
1808  if(igo==1)
1809  fprintf(fp,"%d %d %d : %.10g %.10g\n",ka[i],kb[i],kc[i],force[i].re*wght,force[i].im*wght);
1810  }/*endfor*/
1811  fclose(fp);
1812 #endif
1813 
1814 ///////////////////////////////////////////////////////////////////////////////=
1815 
1816 /** Scale the variables for dynamics and double packing : Single pack
1817  * will require modification of factors of 2 in PC or scaling all the
1818  * variables which stinks
1819 */
1820 
1821 #ifndef PAIRCALC_TEST_DUMP
1822 #ifndef _CP_DEBUG_ORTHO_OFF_
1823  if(cp_min_opt==0){
1824  double rad2i = 1.0/sqrt(2.0);
1825  double rad2 = sqrt(2.0);
1826  if(gs.ihave_kx0==1 && config.doublePack==1){
1827  for(int i=gs.kx0_strt; i<gs.kx0_end; i++){psi[i] *= rad2i;}
1828  for(int i=gs.kx0_strt; i<gs.kx0_end; i++){force[i] *= rad2;}
1829  }else{
1830  for(int i=0; i<gs.numPoints; i++){
1831  psi[i] *= rad2i;
1832  force[i] *= rad2;
1833  }//endfor
1834  }//endif
1835  }//endif
1836 #endif
1837 #endif
1838 
1839 ///////////////////////////////////////////////////////////////////////////////=
1840 
1841 /// Enormous debug schmoo
1842 
1843 #ifdef _PAIRCALC_DEBUG_PARANOID_FW_
1844  for(int i=0;i<config.numChunksAsym;i++)
1845  CkAssert(countLambdaO[i]==0);
1846 #endif
1847 #ifdef _CP_GS_DUMP_LAMBDA_
1848  char name1[100];
1849  char name2[100];
1850  sprintf(name1, "lambdaBf.iter.%d", iteration);
1851  sprintf(name2, "psiBf.iter.%d", iteration);
1852  dumpMatrix(name1, (double *)force, 1,
1853  gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1854  dumpMatrix(name2,(double *)psi, 1,
1855  gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1856 #endif
1857 
1858 #ifdef _CP_GS_DEBUG_COMPARE_PSI_
1859  if(savedlambdaBf==NULL){ // load it
1860  savedlambdaBf= new complex[gs.numPoints];
1861  loadMatrix("lambdaBf",(double *)savedlambdaBf, 1,
1862  gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1863  }//endif
1864  if(savedpsiBf==NULL){ // load it
1865  savedpsiBf= new complex[gs.numPoints];
1866  loadMatrix("psiBf",(double *)savedpsiBf, 1,
1867  gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
1868  }//endif
1869  double testvalue=0.00000001;
1870  for(int i=0;i<gs.numPoints;i++){
1871  if(fabs(force[i].re-savedlambdaBf[i].re)>testvalue){
1872  fprintf(stderr, "GSP [%d,%d] %d element lambda %.10g not %.10g\n",
1873  thisIndex.x, thisIndex.y,i, force[i].re, savedlambdaBf[i].re);
1874  }//endif
1875  CkAssert(fabs(force[i].re-savedlambdaBf[i].re)<testvalue);
1876  CkAssert(fabs(force[i].im-savedlambdaBf[i].im)<testvalue);
1877  }//endfor
1878  for(int i=0;i<gs.numPoints;i++){
1879  CkAssert(fabs(psi[i].re-savedpsiBf[i].re)<testvalue);
1880  CkAssert(fabs(psi[i].im-savedpsiBf[i].im)<testvalue);
1881  }//endfor
1882 #endif
1883 
1884 ///////////////////////////////////////////////////////////////////////////////=
1885 
1886 /// Send to lambda : Finally
1887 
1888  int numPoints = gs.numPoints;
1889 #ifndef _CP_DEBUG_ORTHO_OFF_
1890  asymmPCmgr.sendLeftData(numPoints,psi,false);
1891  CmiNetworkProgress();
1892  asymmPCmgr.sendRightData(numPoints,force,false);
1893 #else
1894  acceptedLambda=true;
1895  bzero(force,sizeof(complex)*numPoints);
1896 #endif
1897 #ifdef _CP_DEBUG_STATEG_VERBOSE_
1898  if(thisIndex.x==0){CkPrintf("Sent Lambda %d %d\n",thisIndex.y,cleanExitCalled);}
1899 #endif
1900 
1901 ///////////////////////////////////////////////////////////////////////////////=
1902 
1903 /// sub-phase timing
1904 
1905 #ifdef _CP_SUBSTEP_TIMING_
1906  if(backwardTimeKeep>0){
1907  double gend=CmiWallTimer();
1908  CkCallback cb(CkIndex_TimeKeeper::collectEnd(NULL),0,TimeKeeperProxy);
1909  contribute(sizeof(double),&gend,CkReduction::max_double, cb , backwardTimeKeep);
1910  }//endif
1911 #endif
1912 
1913 //-----------------------------------------------------------------------------
1914  }// end routine
1915 ///////////////////////////////////////////////////////////////////////////////=
1916 
1917 
1918 ///////////////////////////////////////////////////////////////////////////////=
1919 ///////////////////////////////////////////////////////////////////////////////c
1920 ///////////////////////////////////////////////////////////////////////////////=
1921 void CP_State_GSpacePlane::unpackLambda(CkReductionMsg *msg) {
1922 ///////////////////////////////////////////////////////////////////////////////=
1923  CPcharmParaInfo *sim = CPcharmParaInfo::get();
1924  int cp_min_opt = sim->cp_min_opt;
1925  int cp_lsda = sim->nspin - 1; // 1 for lsda and 0 for lda
1926  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1927  int *k_x = eesData->GspData[iplane_ind]->ka;
1928 
1929  int N = msg->getSize()/sizeof(complex);
1930  complex *data = (complex *)msg->getData();
1931  int offset = msg->getUserFlag(); if(offset<0){offset=0;}
1932  // CkPrintf("[%d %d] accepts lambda %d \n", thisIndex.x, thisIndex.y,offset);
1933  complex *force = gs.packedForceData;
1934  int chunksize = gs.numPoints/config.numChunksAsym;
1935  int chunkoffset=offset*chunksize; // how far into the points this contribution lies
1936 
1937 #ifdef _NAN_CHECK_
1938  for(int i=0;i<msg->getSize()/sizeof(double) ;i++){
1939  CkAssert(finite(((double*) msg->getData())[i]));
1940  }//endfor
1941 #endif
1942 
1943 ///////////////////////////////////////////////////////////////////////////////=
1944 
1945 /// Count then debug
1946 
1947  countLambda++;//lambda arrives in as many as 2 * numChunks reductions
1948 
1949 ///////////////////////////////////////////////////////////////////////////////=
1950 
1951 /// Add the forces
1952 
1953  //---------------------------------------------------
1954  /// A) Double Pack
1955  if(config.doublePack==1){
1956  if(cp_min_opt==1){
1957  double overlap = (cp_lsda==0 ? 2.0 : 1.0);
1958  double ws = 1.0/overlap; double wd = 2.0/overlap;
1959 #ifndef PAIRCALC_TEST_DUMP
1960  for(int i=0,idest=chunkoffset; i<N; i++,idest++){
1961  double wght = (k_x[idest]==0 ? ws : wd);
1962  force[idest].re -= wght*data[i].re;
1963  force[idest].im -= wght*data[i].im;
1964  }//endfor
1965 #endif
1966  }else{
1967  if(countLambdaO[offset]<1){
1968  for(int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] = data[i]*(-1.0);}
1969  }else{
1970  for(int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] += data[i]*(-1.0);}
1971  }// coutlambda
1972  }//endif : cpmin
1973 
1974  }//endif : double pack
1975 
1976  //---------------------------------------------------
1977  /// B) Double Pack is off
1978  if(config.doublePack==0){
1979  if(cp_min_opt==1){
1980  double overlap = (cp_lsda==0 ? 2.0 : 1.0);
1981  double ws = 1.0/overlap;
1982  for(int i=0,idest=chunkoffset; i<N; i++,idest++){
1983  force[idest].re -= ws*data[i].re;
1984  force[idest].im -= ws*data[i].im;
1985  }//endfor
1986  }else{
1987  if(countLambdaO[offset]<1){
1988  for(int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] = data[i]*(-1.0);}
1989  }else{
1990  for(int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] += data[i]*(-1.0);}
1991  }// endif : 1st guy
1992  }//endif : minimization
1993  }//endif : singlePack = kpts
1994 
1995  //---------------------------------------------------
1996  /// C) Cleanup
1997  delete msg;
1998 
1999 ///////////////////////////////////////////////////////////////////////////////=
2000 
2001 /// Do we have everything?
2002 
2003  countLambdaO[offset]++;
2004  if(countLambda==AllLambdaExpected){
2005 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2006  if(thisIndex.x==0)
2007  CkPrintf("doLambda %d %d\n",thisIndex.y,cleanExitCalled);
2008 #endif
2009  }//endif
2010 
2011 ///////////////////////////////////////////////////////////////////////////////=
2012  }//end routine
2013 ///////////////////////////////////////////////////////////////////////////////=
2014 
2015 ///////////////////////////////////////////////////////////////////////////////=
2016 ///////////////////////////////////////////////////////////////////////////////c
2017 ///////////////////////////////////////////////////////////////////////////////=
2019 ///////////////////////////////////////////////////////////////////////////////=
2020 /// 0) unpack the message : pop out variables from groups
2021 
2022  complex *data = msg->result;
2023  int N = msg->N;
2024  int offset = msg->myoffset;
2025  if(offset<0){offset=0;}
2026 
2027 #ifdef _NAN_CHECK_
2028  for(int i=0; i < N; i++)
2029  {
2030  CkAssert(finite(data[i].re));
2031  CkAssert(finite(data[i].im));
2032  }
2033 #endif
2034 
2035  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2036  int cp_min_opt = sim->cp_min_opt;
2037  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
2038  int *k_x = eesData->GspData[iplane_ind]->ka;
2039 
2040  complex *force = gs.packedForceData;
2041  int chunksize = gs.numPoints/config.numChunksAsym;
2042  int chunkoffset = offset*chunksize; // how far into the points this contribution lies
2043 
2044 ///////////////////////////////////////////////////////////////////////////////=
2045 /// I) Increment the counter and do some checking
2046 
2047  countLambda++; //lambda arrives in as many as 2 * numChunks reductions
2048 
2049  // CkPrintf("GSP [%d,%d] acceptLambda N %d offset %d countLambda %d expecting %d\n",thisIndex.x,thisIndex.y,N,offset,countLambda,AllLambdaExpected);
2050 
2051 ///////////////////////////////////////////////////////////////////////////////
2052 /// (II) Add it in to our forces : Careful about offsets, doublepack and cpmin/cp
2053 
2054  //----------------------------------------------------------
2055  /// A) Double Pack
2056 
2057  if(config.doublePack==1){
2058  if(cp_min_opt==1){
2059 #ifndef PAIRCALC_TEST_DUMP
2060  for(int i=0,idest=chunkoffset; i<N; i++,idest++){
2061  double wght = (k_x[idest]==0 ? 0.5 : 1);
2062  force[idest].re -= wght*data[i].re;
2063  force[idest].im -= wght*data[i].im;
2064  }//endfor
2065 #endif
2066  }else{
2067  if(countLambdaO[offset]<1){
2068  for(int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] = data[i]*(-1.0);}
2069  }else{
2070  for(int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] += data[i]*(-1.0);}
2071  }//endif : off set thingy
2072  }//endif : cp_min_on
2073  }//endif : double pack
2074 
2075  //----------------------------------------------------------
2076  /// B) Single pack
2077 
2078  if(config.doublePack==0){
2079  for(int i=0,idest=chunkoffset; i<N; i++,idest++){
2080  force[idest].re -= 0.5*data[i].re;
2081  force[idest].im -= 0.5*data[i].im;
2082  }//endfor
2083 
2084  }//endif : single pack
2085 
2086  //----------------------------------------------------------
2087  /// C) Clean up
2088 
2089  delete msg;
2090 
2091 ///////////////////////////////////////////////////////////////////////////////
2092 /// are we done?
2093 
2094  countLambdaO[offset]++;
2095  if(countLambda==AllLambdaExpected){
2096 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2097  if(thisIndex.x==0)
2098  CkPrintf("doLambda %d %d\n",thisIndex.y,cleanExitCalled);
2099 #endif
2100  }//endif
2101 
2102 ///////////////////////////////////////////////////////////////////////////////=
2103  }//end routine
2104 ///////////////////////////////////////////////////////////////////////////////=
2105 
2106 ///////////////////////////////////////////////////////////////////////////////=
2107 ///////////////////////////////////////////////////////////////////////////////c
2108 ///////////////////////////////////////////////////////////////////////////////=
2109 /** \brief When all the inputs have arrived, complete the lambda computation
2110  */
2112 ///////////////////////////////////////////////////////////////////////////////=
2113 
2114 /// (I) If you have got it all : Rescale it and resume
2115 
2116  // CkPrintf("{%d} GSP [%d,%d] doLambda\n",thisInstance.proxyOffset, thisIndex.x,thisIndex.y);
2117  CkAssert(countLambda==AllLambdaExpected);
2118  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2119  int cp_min_opt = sim->cp_min_opt;
2120  complex *force = gs.packedForceData;
2121 #ifdef _NAN_CHECK_
2122  for(int i=0;i<gs.numPoints ;i++){
2123  CkAssert(finite(force[i].re));
2124  CkAssert(finite(force[i].im));
2125  }
2126 #endif
2127 
2128  if(cp_min_opt==0){
2129  // dynamics scale it out
2130  if(gs.ihave_kx0==1){
2131  double rad2i = 1.0/sqrt(2.0);
2132  for(int i=gs.kx0_strt; i<gs.kx0_end; i++){force[i] *= rad2i;}
2133  }//endif
2134  }//endif
2135 
2136 ///////////////////////////////////////////////////////////////////////////////=
2137 
2138 /// Retrieve Non-orthog psi
2139 
2140  if(cp_min_opt==0){
2141  int ncoef = gs.numPoints;
2142  complex *psi_g_scr = gs.packedPlaneDataScr;
2143  complex *psi_g = gs.packedPlaneData;
2144  CmiMemcpy(psi_g,psi_g_scr,sizeof(complex)*ncoef); // overwrite ortho with non-ortho
2145  }//endif
2146 
2147 ///////////////////////////////////////////////////////////////////////////////=
2148 
2149  acceptedLambda=true;
2150  countLambda=0;
2151  bzero(countLambdaO,config.numChunksAsym*sizeof(int));
2152 
2153 ///////////////////////////////////////////////////////////////////////////////=
2154 
2155 /// Debug Schmoo
2156 
2157 #ifdef _CP_GS_DUMP_LAMBDA_
2158  dumpMatrix("lambdaAf",(double *)force, 1, gs.numPoints*2,
2159  thisIndex.y,thisIndex.x,thisIndex.x,0,false);
2160 #endif
2161 
2162 #ifdef _CP_GS_DEBUG_COMPARE_PSI_
2163  double testvalue=0.00000001;
2164  if(savedlambdaAf==NULL){ // load it
2165  savedlambdaAf= new complex[gs.numPoints];
2166  loadMatrix("lambdaAf",(double *)savedlambdaAf, 1,
2167  gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,false);
2168  }//endif
2169  for(int i=0;i<gs.numPoints;i++){
2170  CkAssert(fabs(force[i].re-savedlambdaAf[i].re)<testvalue);
2171  CkAssert(fabs(force[i].im-savedlambdaAf[i].im)<testvalue);
2172  }//endfor
2173 #endif
2174 
2175 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2176  if(thisIndex.x==0)
2177  CkPrintf("doLambda %d %d\n",thisIndex.y,cleanExitCalled);
2178 #endif
2179 
2180 ///////////////////////////////////////////////////////////////////////////////=
2181 
2182 /// Debug : Write out forces after Lambda-ization
2183 
2184 
2185 #define _CP_GSPACE_PSI_FORCE_OUTPUT_AFTER_LAMBDA_OFF_
2186 #ifdef _CP_GSPACE_PSI_FORCE_OUTPUT_AFTER_LAMBDA_
2187  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
2188  int *ka = eesData->GspData[iplane_ind]->ka;
2189  int *kb = eesData->GspData[iplane_ind]->kb;
2190  int *kc = eesData->GspData[iplane_ind]->kc;
2191 
2192  char fname[1024];
2193  sprintf(fname,"psi_forces_after_lambda_state%d_plane%d.out",thisIndex.x,thisIndex.y);
2194  FILE *fp = cfopen(fname,"w");
2195  for(int i=0; i<gs.numPoints; i++){
2196  int igo = 0; double wght = 0.5;
2197  if (ka[i] > 0) igo=1;
2198  if (ka[i] == 0 && kb[i]>0){ igo=1; wght=1.0; }
2199  if (ka[i] == 0 && kb[i]==0 && kc[i]>=0){ igo=1; wght=1.0; }
2200  if(config.doublePack==0) wght=1.0;
2201  if(igo==1)
2202  fprintf(fp,"%d %d %d : %.10g %.10g\n",ka[i],kb[i],kc[i],force[i].re*wght,force[i].im*wght);
2203  }/*endfor*/
2204  fclose(fp);
2205 #endif
2206 
2207 ///////////////////////////////////////////////////////////////////////////////=
2208  }//end routine
2209 ///////////////////////////////////////////////////////////////////////////////=
2210 
2211 ///////////////////////////////////////////////////////////////////////////
2212 ///////////////////////////////////////////////////////////////////////////
2213 ///////////////////////////////////////////////////////////////////////////
2214 /// If minimization :
2215 /// compute lambda matrix (Lagrange multipliers)
2216 /// modify forces using the lambda matrix
2217 /// compute the |force|^2 = fovlap
2218 ///////////////////////////////////////////////////////////////////////////
2220 ///////////////////////////////////////////////////////////////////////////
2221 /// Flow of control check and local variables
2222 /// CkPrintf("{%d} GSP [%d,%d] computeCgOverlap\n",thisInstance.proxyOffset, thisIndex.x,thisIndex.y);
2223  if(!acceptedLambda){
2224  CkPrintf("GSpace[%d,%d] Flow of Control Error : Attempting to Cg ovlap without lambda correction\n",thisIndex.x,thisIndex.y);
2225  CkAbort("Error: Attempting to compute cg overlap without lambda correction\n");
2226  }//endif
2227 
2228  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2229  int istate = gs.istate_ind;
2230  int ncoef = gs.numPoints;
2231  complex *forces = gs.packedForceData;
2232  int cp_min_opt = sim->cp_min_opt;
2233  int cp_min_cg = sim->cp_min_cg;
2234 
2235 ///////////////////////////////////////////////////////////////////////////
2236 
2237  double fovlap_loc = 0.0;
2238  if(cp_min_opt==1 && cp_min_cg==1){
2239  CPINTEGRATE::CP_fovlap_calc(ncoef,istate,forces,&fovlap_loc);
2240  }//endif
2241  double force_sq_sum_loc=0.0;
2242  for(int i=0; i<gs.numPoints; i++){force_sq_sum_loc+= forces[i].getMagSqr();}
2243 
2244  double redforc[2];
2245  redforc[0] = fovlap_loc;
2246  redforc[1] = force_sq_sum_loc;
2247  contribute(2*sizeof(double),redforc,CkReduction::sum_double,
2248  CkCallback(CkIndex_CP_State_GSpacePlane::acceptCgOverlap(NULL),thisProxy));
2249 
2250 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2251  if(thisIndex.x==0)
2252  CkPrintf("ovlpa %d %d\n",thisIndex.y,cleanExitCalled);
2253 #endif
2254 
2255 //----------------------------------------------------------------------------
2256  }// end routine : computeCgOverlap
2257 ///////////////////////////////////////////////////////////////////////////////=
2258 
2259 
2260 //////////////////////////////////////////////////////////////////////////////
2261 //////////////////////////////////////////////////////////////////////////////
2262 //////////////////////////////////////////////////////////////////////////////
2263 /// In this function data is written to files the simpliest way possible
2264 //////////////////////////////////////////////////////////////////////////////
2266 {
2267  // Local pointers, variables and error checking
2268  if(!acceptedPsi || !acceptedLambda || !acceptedVPsi)
2269  CkAbort("{%d} Flow of Control Error : Attempting to write states without completing psi, vpsi and Lambda\n");
2270 
2271 
2272  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2273  int cp_min_opt = (sim->cp_min_opt);
2274 
2275  int ind_state = (thisIndex.x+1);
2276  int ind_chare = (thisIndex.y+1);
2277  int ncoef = gSpaceNumPoints;
2278 
2279  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
2280  int *k_x = eesData->GspData[iplane_ind]->ka;
2281  int *k_y = eesData->GspData[iplane_ind]->kb;
2282  int *k_z = eesData->GspData[iplane_ind]->kc;
2283  double *coef_mass = eesData->GspData[iplane_ind]->coef_mass;
2284 
2285  complex *psi = gs.packedPlaneData;
2286  complex *vpsi = gs.packedVelData; // to evolve psi to time, t.
2287  complex *forces = gs.packedForceData;
2288  double ***xNHC = gs.xNHC;
2289  double ***xNHCP = gs.xNHCP;
2290  double ***vNHC = gs.vNHC;
2291  double ***fNHC = gs.fNHC;
2292  double *mNHC = gs.mNHC;
2293  int len_nhc_cp = gs.len_nhc_cp;
2294  int num_nhc_cp = gs.num_nhc_cp;
2295  int nck_nhc_cp = gs.nck_nhc_cp;
2296  int nkx0_red = gs.nkx0_red;
2297  int nkx0_uni = gs.nkx0_uni;
2298  int nkx0_zero = gs.nkx0_zero;
2299  double kTCP = gs.kTCP;
2300 
2301  int myiteration = iteration;
2302  if(cp_min_opt==0){myiteration=iteration-1;}
2303 
2304  //////////////////////////////////////////////////////////////////////////////
2305  // Set the file names and write the files
2306  if(ind_state==1 && ind_chare==1){
2307  CkPrintf("-----------------------------------\n");
2308  CkPrintf("Writing states to disk at time %d\n",myiteration);
2309  CkPrintf("-----------------------------------\n");
2310  }//endif
2311  //------------------------------------------------------------------
2312  // Update the velocities into scratch as we are between steps
2313  if(cp_min_opt==0 && halfStepEvolve ==1){
2314  halfStepEvolve = 0;
2315  CPINTEGRATE::cp_evolve_vel(ncoef,forces,vpsi,coef_mass,
2316  len_nhc_cp,num_nhc_cp,nck_nhc_cp,fNHC,vNHC,xNHC,xNHCP,mNHC,
2317  gs.v0NHC,gs.a2NHC,gs.a4NHC,kTCP,nkx0_red,nkx0_uni,nkx0_zero,
2318  2,iteration,gs.degfree,gs.degfreeNHC,gs.degFreeSplt,
2319  gs.istrNHC,gs.iendNHC,1);
2320  }//endif
2321  //------------------------------------------------------------------
2322  // Pack the message and send it to your reduction plane
2323  GStateOutMsg *msg = new (ncoef,ncoef,ncoef,ncoef,ncoef,
2324  8*sizeof(int)) GStateOutMsg;
2325  if(config.prioFFTMsg){
2326  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
2327  *(int*)CkPriorityPtr(msg) = config.rsfftpriority +
2328  thisIndex.x*gs.planeSize[0]+thisIndex.y;
2329  }//endif
2330  msg->size = ncoef;
2331  msg->senderIndex = thisIndex.y; // planenumber
2332  complex *data = msg->data;
2333  complex *vdata = msg->vdata;
2334  int *mk_x = msg->k_x;
2335  int *mk_y = msg->k_y;
2336  int *mk_z = msg->k_z;
2337  if(cp_min_opt==0){
2338  for(int i=0;i<ncoef;i++){vdata[i] = vpsi[i];}
2339  }else{
2340  for(int i=0;i<ncoef;i++){vdata[i] = 0.0;}
2341  }//endif
2342  for (int i=0;i<ncoef; i++){
2343  data[i] = psi[i];
2344  mk_x[i] = k_x[i]; mk_y[i] = k_y[i]; mk_z[i] = k_z[i];
2345  }//endfor
2346  UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x,redPlane).acceptFileOutput(msg);
2347 }
2348 
2349 //////////////////////////////////////////////////////////////////////////////
2350 //////////////////////////////////////////////////////////////////////////////
2351 //////////////////////////////////////////////////////////////////////////////
2353 //////////////////////////////////////////////////////////////////////////////
2354 
2355  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2356  int npts_tot = (sim->npts_tot);
2357  int *ipacked_off = (sim->index_output_off);
2358 
2359 //////////////////////////////////////////////////////////////////////////////
2360 /// Receive the message
2361 
2362  if(countFileOut==0){
2363  tpsi = (complex *)fftw_malloc(npts_tot*sizeof(complex));
2364  tvpsi = (complex *)fftw_malloc(npts_tot*sizeof(complex));
2365  tk_x = (int *)fftw_malloc(npts_tot*sizeof(int));
2366  tk_y = (int *)fftw_malloc(npts_tot*sizeof(int));
2367  tk_z = (int *)fftw_malloc(npts_tot*sizeof(int));
2368  }//endif
2369  countFileOut++;
2370 
2371  int ncoef = msg->size;
2372  int myplane = msg->senderIndex;
2373  complex *data = msg->data;
2374  complex *vdata = msg->vdata;
2375  int *mk_x = msg->k_x;
2376  int *mk_y = msg->k_y;
2377  int *mk_z = msg->k_z;
2378  int ioff = ipacked_off[myplane];
2379  for(int i=0,j=ioff;i<ncoef;i++,j++){
2380  tpsi[j] = data[i];
2381  tvpsi[j] = vdata[i];
2382  tk_x[j] = mk_x[i];
2383  tk_y[j] = mk_y[i];
2384  tk_z[j] = mk_z[i];
2385  }//endfor
2386 //////////////////////////////////////////////////////////////////////////////
2387  }//end routine
2388 //////////////////////////////////////////////////////////////////////////////
2389 
2390 //////////////////////////////////////////////////////////////////////////////
2391 //////////////////////////////////////////////////////////////////////////////
2392 //////////////////////////////////////////////////////////////////////////////
2393 void CP_State_GSpacePlane::writeOutputFile() {
2394 //////////////////////////////////////////////////////////////////////////////
2395  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2396  int sizeX = (sim->sizeX);
2397  int sizeY = (sim->sizeY);
2398  int sizeZ = (sim->sizeZ);
2399  int npts_tot = (sim->npts_tot);
2400  int cp_min_opt = (sim->cp_min_opt);
2401  int ibinary_write_opt = sim->ibinary_write_opt;
2402 
2403  int myiteration = iteration;
2404  if(cp_min_opt==0){myiteration=iteration-1;}
2405 
2406  countFileOut = 0;
2407  int ind_state = thisIndex.x+1;
2408  int ibead = myBeadIndex;
2409  int ikpt = myKptIndex;
2410  int itemper = myTemperIndex;
2411  int ispin = mySpinIndex;
2412 
2413  char psiName[400]; char vpsiName[400];
2414 
2415  sprintf(psiName, "%s/Spin.%d_Kpt.%d_Bead.%d_Temper.%d/state%d.out",
2416  config.dataPathOut,ispin,ikpt,ibead,itemper,ind_state);
2417  sprintf(vpsiName, "%s/Spin.%d_Kpt.%d_Bead.%d_Temper.%d/vState%d.out",
2418  config.dataPathOut,ispin,ikpt,ibead,itemper,ind_state);
2419  writeStateFile(npts_tot,tpsi,tvpsi,tk_x,tk_y,tk_z,cp_min_opt,
2420  sizeX,sizeY,sizeZ,psiName,vpsiName,ibinary_write_opt,
2421  myiteration,ind_state,ispin,ikpt,ibead,itemper);
2422  fftw_free(tpsi); tpsi = NULL;
2423  fftw_free(tvpsi);tvpsi = NULL;
2424  fftw_free(tk_x); tk_x = NULL;
2425  fftw_free(tk_y); tk_y = NULL;
2426  fftw_free(tk_z); tk_z = NULL;
2427 //////////////////////////////////////////////////////////////////////////////
2428  }//end routine
2429 //////////////////////////////////////////////////////////////////////////////
2430 
2431 
2432 ///////////////////////////////////////////////////////////////////////////////=
2433 ///////////////////////////////////////////////////////////////////////////////c
2434 ///////////////////////////////////////////////////////////////////////////////=
2435 /** \brief Integrate the forces */
2437 ///////////////////////////////////////////////////////////////////////////////=
2438 
2439 /// I. Error Conditions : you haven't accepted present steps lambda
2440 /// you haven't accepted previous steps psi or vpsi
2441 
2442  if(!acceptedLambda || !acceptedVPsi || !acceptedPsi){
2443  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2444  CkPrintf("Flow of Control Error : Attempting to cp integrate\n");
2445  CkPrintf("without lambda correction to forces or psi or vpsi\n");
2446  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2447  CkExit();
2448  }//endif
2449 
2450 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2451  if(thisIndex.x==0)
2452  CkPrintf("integrate %d %d\n",thisIndex.y,cleanExitCalled);
2453 #endif
2454 
2455 ///////////////////////////////////////////////////////////////////////////////=
2456 
2457 /// II. Local pointers
2458 
2459  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
2460  int *k_x = eesData->GspData[iplane_ind]->ka;
2461  int *k_y = eesData->GspData[iplane_ind]->kb;
2462  int *k_z = eesData->GspData[iplane_ind]->kc;
2463  double *coef_mass = eesData->GspData[iplane_ind]->coef_mass;
2464 
2465  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2466  int cp_min_opt = sim->cp_min_opt;
2467  int cp_min_cg = sim->cp_min_cg;
2468 
2469  int istate = gs.istate_ind;
2470  int ncoef = gs.numPoints;
2471  int len_nhc = gs.len_nhc_cp;
2472  int num_nhc = gs.num_nhc_cp;
2473  int nck_nhc = gs.nck_nhc_cp;
2474  complex *psi_g = gs.packedPlaneData;
2475  complex *forces = gs.packedForceData;
2476  complex *forcesold = gs.packedVelData; // for minimization not cp
2477  complex *vpsi_g = gs.packedVelData; // for cp not minimization
2478  double ***fNHC = gs.fNHC;
2479  double ***vNHC = gs.vNHC;
2480  double ***xNHC = gs.xNHC;
2481  double ***xNHCP = gs.xNHCP;
2482  double *mNHC = gs.mNHC;
2483  double *v0NHC = gs.v0NHC;
2484  double *a2NHC = gs.a2NHC;
2485  double *a4NHC = gs.a4NHC;
2486  double kTCP = gs.kTCP;
2487  int nkx0_red = gs.nkx0_red;
2488  int nkx0_uni = gs.nkx0_uni;
2489  int nkx0_zero = gs.nkx0_zero;
2490  double degfree = gs.degfree;
2491  double degfreeNHC = gs.degfreeNHC;
2492  double *degFreeSplt= gs.degFreeSplt;
2493  int *istrNHC = gs.istrNHC;
2494  int *iendNHC = gs.iendNHC;
2495 
2496  double fictEke = 0.0;
2497  double ekeNhc = 0.0;
2498  double potNHC = 0.0;
2499 
2500 ///////////////////////////////////////////////////////////////////////////=
2501 
2502 /// III. Set conjugate gradient parameter : Don't reset too often
2503 
2504  ireset_cg = 0;
2505  if(iteration==1){ireset_cg=1;numReset_cg=0;}
2506  if(iteration>1 && cp_min_opt==1 && cp_min_cg==1){
2507  if( (fmagPsi_total>1.05*fmagPsi_total_old && numReset_cg>=5) ||
2508  (fmagPsi_total>2.0*fmagPsi_total_old)){
2509  ireset_cg = 1;
2510  numReset_cg = 0;
2511  if(thisIndex.x==0 && thisIndex.y==0){
2512  CkPrintf("----------------------------------------------\n");
2513  CkPrintf(" Resetting Conjugate Gradient! \n");
2514  CkPrintf("----------------------------------------------\n");
2515  }//endif
2516  }//endif
2517  }//endif
2518 
2519  double gamma_conj_grad = 0.0; // start CG up again
2520  if( (cp_min_opt==1) && (cp_min_cg == 1) && (ireset_cg==0)){
2521  gamma_conj_grad = fovlap/fovlap_old; // continue evolving CG
2522  }//endif
2523  ireset_cg = 0;
2524  numReset_cg++;
2525 
2526 ///////////////////////////////////////////////////////////////////////////=
2527 
2528 /// IV. Evolve the states using the forces/conjugate direction
2529 
2530 //---------------------------------------------------------------
2531 
2532 /// (A) Debug output before integration
2533 
2534 #define _CP_DEBUG_DYNAMICS_OFF
2535 #ifdef _CP_DEBUG_DYNAMICS_
2536  if(cp_min_opt!=1){
2537  if(istate<3){
2538  char forcefile[80];
2539  snprintf(forcefile,80,"Bpsi_force_%d_%d_%d.out",thisIndex.x,thisIndex.y,iteration);
2540  FILE *fp=fopen(forcefile,"w");
2541  for(int i=0;i <ncoef;i++){
2542  fprintf(fp,"%d %d %d : %.10g %.10g : %g %g : %g %g : %g\n",
2543  k_x[i], k_y[i], k_z[i],psi_g[i].re,psi_g[i].im,
2544  vpsi_g[i].re,vpsi_g[i].im,
2545  forces[i].re,forces[i].im, coef_mass[i]);
2546  }//endfor
2547  fclose(fp);
2548  }//endif
2549  }//endif
2550 #endif
2551 
2552 //---------------------------------------------------------------
2553 
2554 /// (B) Numerical integration
2555 
2556 #if CMK_TRACE_ENABLED
2557  double StartTime=CmiWallTimer();
2558 #endif
2559 
2560 #define _GLENN_CHECK_DYNAMICS_OFF_
2561 #ifdef _GLENN_CHECK_DYNAMICS_
2562  if(iteration==1){bzero(vpsi_g,ncoef*sizeof(complex));}
2563  if(thisIndex.x==0&&thisIndex.y==0){CkPrintf("Before Integrate : iteration %d\n",iteration);}
2564 #endif
2565 
2566 #ifdef _GLENN_CHECK_INTEGRATE_
2567  for(int i=0;i<ncoef;i++){
2568  if( (k_x[i]==0 &&k_y[i]==1 && k_z[i]==4) ||
2569  (k_x[i]==2 &&k_y[i]==1 && k_z[i]==3)){
2570  if(thisIndex.x==0 || thisIndex.x==127){
2571  CkPrintf(" Psi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g %.15g %.15g\n",
2572  gs.istate_ind+1,k_x[i],k_y[i],k_z[i],
2573  psi_g[i].re,psi_g[i].im,
2574  forces[i].re,forces[i].im);
2575  }//endif
2576  }//endif
2577  }//endfor
2578 #endif
2579 
2580 #ifdef _CP_DEBUG_SCALC_ONLY_
2581  bzero(forces,ncoef*sizeof(complex));
2582  if(cp_min_opt==0){
2583  psi_g = gs.packedPlaneDataTemp2;
2584  }
2585 #endif
2586 
2587 #ifdef _NAN_CHECK_
2588  for(int i=0; i < ncoef; i++)
2589  {
2590  CkAssert(finite(vpsi_g[i].re));
2591  CkAssert(finite(vpsi_g[i].im));
2592  CkAssert(finite(psi_g[i].re));
2593  CkAssert(finite(psi_g[i].im));
2594  CkAssert(finite(forces[i].re));
2595  CkAssert(finite(forces[i].im));
2596  }
2597 #endif
2598 
2599  fictEke = 0.0; ekeNhc=0.0; potNHC=0.0;
2600  CPINTEGRATE::CP_integrate(ncoef,istate,iteration,forces,forcesold,psi_g,
2601  coef_mass,k_x,k_y,k_z,len_nhc,num_nhc,nck_nhc,fNHC,vNHC,xNHC,xNHCP,
2602  mNHC,v0NHC,a2NHC,a4NHC,kTCP,gamma_conj_grad,&fictEke,
2603  nkx0_red,nkx0_uni,nkx0_zero,&ekeNhc,&potNHC,degfree,degfreeNHC,
2604  degFreeSplt,istrNHC,iendNHC,halfStepEvolve,config.nfreq_cpintegrate);
2605  halfStepEvolve = 1;
2606 
2607 
2608 #ifdef _GLENN_CHECK_INTEGRATE_
2609  for(int i=0;i<ncoef;i++){
2610  if( (k_x[i]==0 &&k_y[i]==1 && k_z[i]==4) ||
2611  (k_x[i]==2 &&k_y[i]==1 && k_z[i]==3)){
2612  if(thisIndex.x==0 || thisIndex.x==127){
2613  CkPrintf(" Psi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g %.15g %.15g\n",
2614  gs.istate_ind+1,k_x[i],k_y[i],k_z[i],
2615  psi_g[i].re,psi_g[i].im,
2616  forces[i].re,forces[i].im);
2617  }//endif
2618  }//endif
2619  }//endfor
2620 #endif
2621 
2622 #if CMK_TRACE_ENABLED
2623  traceUserBracketEvent(IntegrateModForces_, StartTime, CmiWallTimer());
2624 #endif
2625 
2626 //---------------------------------------------------------------
2627 
2628 /// (C) Debug output after integration
2629 
2630 #ifdef _NAN_CHECK_
2631  for(int i=0; i < ncoef; i++)
2632  {
2633  CkAssert(finite(vpsi_g[i].re));
2634  CkAssert(finite(vpsi_g[i].im));
2635  CkAssert(finite(psi_g[i].re));
2636  CkAssert(finite(psi_g[i].im));
2637  CkAssert(finite(forces[i].re));
2638  CkAssert(finite(forces[i].im));
2639  }
2640 #endif
2641 
2642 #ifdef _CP_DEBUG_DYNAMICS_
2643  if(cp_min_opt!=1){
2644  if(thisIndex.x<3){
2645  char forcefile[80];
2646  snprintf(forcefile,80,"Apsi_force_%d_%d_%d.out",thisIndex.x,thisIndex.y,iteration);
2647  FILE *fp=fopen(forcefile,"w");
2648  for(int i=0;i <ncoef;i++){
2649  fprintf(fp,"%d %d %d : %.10g %.10g : %g %g : %g %g : %g\n",
2650  k_x[i], k_y[i], k_z[i],psi_g[i].re,psi_g[i].im,
2651  vpsi_g[i].re,vpsi_g[i].im,
2652  forces[i].re,forces[i].im, coef_mass[i]);
2653  }//endfor
2654  fclose(fp);
2655  }//endif
2656  }//endif
2657  if(iteration==2){CkPrintf("later debugging dyn\n");CkExit();}
2658 #endif
2659 
2660 
2661 ///////////////////////////////////////////////////////////////////////////=
2662 
2663 /// V. Contribute FictKe : Output and store in energy group
2664 
2665  double redSize = ((double) (nchareG*nstates));
2666  double sendme[5];
2667  sendme[0] = 2.0*BOLTZ*(fictEke/degfree)/redSize;
2668  sendme[1] = 2.0*BOLTZ*(ekeNhc/degfreeNHC)/redSize;
2669  sendme[2] = fictEke;
2670  sendme[3] = ekeNhc;
2671  sendme[4] = potNHC;
2672  contribute(5*sizeof(double),sendme,CkReduction::sum_double,
2673  CkCallback(CkIndex_InstanceController::printFictEke(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy));
2674 
2675 ///////////////////////////////////////////////////////////////////////////=
2676 
2677 /// VI. Pack up and set the flag that indicating you've finished integrating.
2678 
2679  gs.fictEke_ret = fictEke;
2680  gs.ekeNhc_ret = ekeNhc;
2681  gs.potNHC_ret = potNHC;
2682  finishedCpIntegrate = 1;
2683 
2684 //------------------------------------------------------------------------------
2685  }// end CP_State_GSpacePlane::integrateModForce
2686 ///////////////////////////////////////////////////////////////////////////////=
2687 
2688 ///////////////////////////////////////////////////////////////////////////////=
2689 ///////////////////////////////////////////////////////////////////////////////c
2690 ///////////////////////////////////////////////////////////////////////////////=
2691 /** \brief send red psi */
2693 ///////////////////////////////////////////////////////////////////////////////=
2694 
2695 
2696  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2697  RedundantCommPkg *RCommPkg = sim->RCommPkg;
2698 
2699  complex *sendData = gs.packedPlaneData;
2700  int isend = thisIndex.y; // my g-space chare index
2701  int *num_send = RCommPkg[isend].num_send;
2702  int **lst_send = RCommPkg[isend].lst_send;
2703  int num_send_tot = RCommPkg[isend].num_send_tot;
2704 
2705  if(finishedCpIntegrate==0){
2706  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2707  CkPrintf("Flow of Control Error : Attempting to sendredpsi\n");
2708  CkPrintf("without integration\n");
2709  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2710  CkExit();
2711  }//endif
2712 
2713 ///////////////////////////////////////////////////////////////////////////////=
2714 
2715  int iii=0; int jjj=0;
2716  if(num_send_tot>0){
2717  for(int irecv = 0; irecv < nchareG; irecv ++){
2718 
2719  int ncoef = num_send[irecv];
2720  jjj += ncoef;
2721  if(ncoef>0){
2722  GSRedPsiMsg *msg = new (ncoef,8*sizeof(int)) GSRedPsiMsg;
2723  if(config.prioFFTMsg){
2724  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
2725  *(int*)CkPriorityPtr(msg) = config.rsfftpriority +
2726  thisIndex.x*gs.planeSize[0]+thisIndex.y;
2727  }//endif
2728  if(ncoef>0){iii++;}
2729  msg->size = ncoef;
2730  msg->senderIndex = isend; // my gspace chare index
2731  complex *msgData = msg->data;
2732  for (int i=0;i<ncoef; i++){msgData[i] = sendData[lst_send[irecv][i]];}
2733  UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x,irecv).acceptRedPsi(msg);
2734  }//endif
2735 
2736  }//endfor
2737 
2738  }//endif : no one to which I have to send
2739 
2740 ///////////////////////////////////////////////////////////////////////////////=
2741 
2742 /// Check for errors
2743 
2744  if(iii!=num_send_tot){
2745  CkPrintf("Error in GSchare %d %d : %d %d : sendredPsi.1\n",thisIndex.x,thisIndex.y,
2746  num_send_tot,iii);
2747  CkExit();
2748  }//endif
2749  if(jjj != gs.nkx0_uni-gs.nkx0_zero){
2750  CkPrintf("Error in GSchare %d %d : %d %d\n sendredPsi.3",thisIndex.x,thisIndex.y,
2751  jjj,gs.nkx0_uni);
2752  CkExit();
2753  }//endif
2754 
2755  iSentRedPsi = 1;
2756 
2757 //-----------------------------------------------------------------------------
2758  }//end routine
2759 ///////////////////////////////////////////////////////////////////////////////=
2760 ///
2761 
2762 ///////////////////////////////////////////////////////////////////////////////=
2763 ///////////////////////////////////////////////////////////////////////////////c
2764 ///////////////////////////////////////////////////////////////////////////////=
2766 ///////////////////////////////////////////////////////////////////////////////=
2767 
2768  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2769  RedundantCommPkg *RCommPkg = sim->RCommPkg;
2770 
2771  int ncoef = msg->size;
2772  int isend = msg->senderIndex;
2773  complex *msgData = msg->data;
2774  complex *recvData = gs.packedRedPsi;
2775  int irecv = thisIndex.y; // my g-space chare index
2776  int *num_recv = RCommPkg[irecv].num_recv;
2777  int **lst_recv = RCommPkg[irecv].lst_recv;
2778 
2779 ///////////////////////////////////////////////////////////////////////////////=
2780 
2781 /// unpack
2782 
2783  if(num_recv[isend]!=ncoef){
2784  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2785  CkPrintf("Number sent not equal to number reciever expected \n");
2786  CkPrintf("Sender %d size %d Reciever %d size %d \n",isend,ncoef,irecv,num_recv[isend]);
2787  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2788  CkExit();
2789  }//endif
2790 
2791  if(countRedPsi==0){jtemp=0;}
2792  jtemp+= ncoef;
2793 
2794  for(int i=0;i<ncoef;i++){
2795  recvData[lst_recv[isend][i]].re= msgData[i].re;
2796  recvData[lst_recv[isend][i]].im=-msgData[i].im;
2797  }//endfor
2798 
2799  delete msg;
2800 
2801 ///////////////////////////////////////////////////////////////////////////////=
2802 
2803 /// Check if all have arrived
2804  countRedPsi++;
2805  if(countRedPsi==numRecvRedPsi){
2806  countRedPsi=0;
2807  iRecvRedPsi=1;
2808  if(jtemp!=gs.nkx0_red){
2809  CkPrintf("Error in GSchare recv cnt %d %d : %d %d\n",thisIndex.x,thisIndex.y,
2810  gs.nkx0_red,jtemp);
2811  CkExit();
2812  }//endif
2813  }//endif
2814 
2815 //-----------------------------------------------------------------------------
2816  }//end routine
2817 ///////////////////////////////////////////////////////////////////////////////=
2818 
2819 ///////////////////////////////////////////////////////////////////////////////=
2820 ///////////////////////////////////////////////////////////////////////////////c
2821 ///////////////////////////////////////////////////////////////////////////////=
2822 /** \brief When all red psi have arrived, complete their integration */
2824 
2825  int ncoef = gs.nkx0_red;
2826  if(ncoef>0){
2827  complex *psi = gs.packedPlaneData;
2828  complex *psi_red = gs.packedRedPsi;
2829  for(int i=0;i<ncoef;i++){psi[i]=psi_red[i];}
2830  }//endif
2831 
2832 }//end routine
2833 ///////////////////////////////////////////////////////////////////////////////=
2834 
2835 
2836 ///////////////////////////////////////////////////////////////////////////////=
2837 ///////////////////////////////////////////////////////////////////////////////c
2838 ///////////////////////////////////////////////////////////////////////////////=
2839 /** \brief Send the psi out to symm PC */
2841 ///////////////////////////////////////////////////////////////////////////////=
2842 /// Error checking
2843 
2844 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2845  if(thisIndex.x==0)
2846  CkPrintf("sendpsi %d %d\n",thisIndex.y,cleanExitCalled);
2847 #endif
2848 
2849  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2850  int cp_min_opt = sim->cp_min_opt;
2851 
2852  if(finishedCpIntegrate==0){
2853  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2854  CkPrintf("Dude, you can't sendPsi without completing integrate\n");
2855  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2856  CkExit();
2857  }//endif
2858 
2859  if(iteration>0){
2860  if(iRecvRedPsi!=1 || iSentRedPsi!=1){
2861  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2862  CkPrintf("Dude, you can't sendPsi without receiving Redpsi\n");
2863  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2864  CkExit();
2865  }//endif
2866  }//endif
2867 
2868 ///////////////////////////////////////////////////////////////////////////////=
2869 /// Prepare the data : If cp dynamics is going, save the non-orthogonal puppies.
2870 
2871  acceptedPsi = false;
2872 
2873  complex *psi = gs.packedPlaneData;
2874  int numPoints = gs.numPoints;
2875 
2876  if(cp_min_opt==0){
2877  int ncoef = gs.numPoints;
2878  complex *scr = gs.packedPlaneDataScr; //save non-orthog psi
2879  CmiMemcpy(scr,psi,sizeof(complex)*ncoef);
2880  }//endif
2881 
2882 #ifndef PAIRCALC_TEST_DUMP
2883 #ifndef _CP_DEBUG_ORTHO_OFF_
2884  if(gs.ihave_kx0==1){
2885  double rad2i = 1.0/sqrt(2.0);
2886  for(int i=gs.kx0_strt; i<gs.kx0_end; i++){psi[i] *= rad2i;}
2887  }//endif
2888 #endif
2889 #endif
2890 
2891 ///////////////////////////////////////////////////////////////////////////////=
2892 
2893 /// Debugging
2894 
2895 #ifdef _PAIRCALC_DEBUG_PARANOID_FW_
2896  for(int i=0;i<config.numChunksSym;i++){CkAssert(countPsiO[i]==0);}
2897 #endif
2898 
2899 #ifdef _CP_GS_DUMP_PSI_
2900  dumpMatrix("psiBfp",(double *)psi, 1, gs.numPoints*2,
2901  thisIndex.y,thisIndex.x,thisIndex.x,0,false);
2902 #endif
2903 
2904 #ifdef _CP_GS_DEBUG_COMPARE_PSI_
2905  double testvalue=0.00000001;
2906  if(savedpsiBfp==NULL){ // load it
2907  savedpsiBfp= new complex[gs.numPoints];
2908  loadMatrix("psiBfp",(double *)savedpsiBfp, 1, gs.numPoints*2,
2909  thisIndex.y,thisIndex.x,thisIndex.x,0,false);
2910  }//endif
2911  for(int i=0;i<gs.numPoints;i++){
2912  if(fabs(psi[i].re-savedpsiBfp[i].re)>testvalue){
2913  fprintf(stderr, "GSP [%d,%d] %d element psi %.10g not %.10g\n",
2914  thisIndex.x, thisIndex.y,i, psi[i].re, savedpsiBfp[i].re);
2915  }//endif
2916  CkAssert(fabs(psi[i].re-savedpsiBfp[i].re)<testvalue);
2917  CkAssert(fabs(psi[i].im-savedpsiBfp[i].im)<testvalue);
2918  }//endfor
2919 #endif
2920 
2921 ///////////////////////////////////////////////////////////////////////////////=
2922 
2923 /// Start the calculator
2924 
2925 #ifndef _CP_DEBUG_ORTHO_OFF_
2926  symmPCmgr.sendLeftData(numPoints, psi, false);
2927  /// Symm loop PC chares in the top left [*,0,0,*] will not receive any right matrix data. Hence, if you're in such a PC's block, dont send right
2928  if(thisIndex.x >= symmPCmgr.pcCfg.grainSize)
2929  symmPCmgr.sendRightData(numPoints, psi, false);
2930 #else
2931  acceptedPsi=true;
2932 #endif
2933 
2934 //----------------------------------------------------------------------------
2935  }// end routine
2936 ///////////////////////////////////////////////////////////////////////////////=
2937 
2938 
2939 
2940 ///////////////////////////////////////////////////////////////////////////////=
2941 ///////////////////////////////////////////////////////////////////////////////c
2942 ///////////////////////////////////////////////////////////////////////////////=
2943 void CP_State_GSpacePlane::unpackNewPsi(CkReductionMsg *msg){
2944 ///////////////////////////////////////////////////////////////////////////////
2945 /// (0) Fuss with the redundant psis
2946 
2947  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2948  int cp_min_opt = sim->cp_min_opt;
2949  if(iteration>0){
2950  if(iRecvRedPsi!=1 || iSentRedPsi!=1){
2951  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2952  CkPrintf("Dude, you can't acceptPsi without receiving Redpsi\n");
2953  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2954  CkExit();
2955  }//endif
2956  }//endif
2957 
2958 ///////////////////////////////////////////////////////////////////////////////
2959 
2960 /// (0) Nan Check and output
2961 
2962  int N = msg->getSize()/sizeof(complex);
2963  complex *data = (complex *)msg->getData();
2964  int offset = msg->getUserFlag(); if(offset<0){offset=0;}
2965  complex *psi = gs.packedPlaneData;
2966  int chunksize = gs.numPoints/config.numChunksSym;
2967  int chunkoffset = offset*chunksize; // how far into the points this contribution lies
2968 
2969 #ifdef _NAN_CHECK_
2970  for(int i=0;i<N ;i++){
2971  CkAssert(finite(((complex *) msg->getData())[i].re));
2972  CkAssert(finite(((complex *) msg->getData())[i].im));
2973  }//endfor
2974 #endif
2975 
2976 ///////////////////////////////////////////////////////////////////////////////
2977 
2978 /// (I) Unpack the contribution to newpsi (orthonormal psi)
2979 
2980  int idest=chunkoffset;
2981 
2982  if(countPsiO[offset]<1){
2983  CmiMemcpy(&(psi[idest]), &(data[0]), N*sizeof(complex)); //slower?
2984  //for(int i=0; i<N; i++,idest++){psi[idest] = data[i];}
2985  }else{
2986  // for(int i=0; i<N; i++,idest++){psi[idest] += data[i];}
2987  fastAdd((double *) &psi[idest],(double *)data,N*2);
2988  }//endif
2989 
2990  delete msg;
2991 
2992 ///////////////////////////////////////////////////////////////////////////////
2993 
2994 /// (II) If you have got it all : Rescale it, produce some output
2995 
2996  countPsi++;//psi arrives in as many as 2 *numblock reductions
2997  countPsiO[offset]++;//psi arrives in as many as 2
2998  if(countPsi==AllPsiExpected){
2999 #ifdef _CP_DEBUG_STATEG_VERBOSE_
3000  if(thisIndex.x==0)
3001  CkPrintf("acceptpsi %d %d\n",thisIndex.y,cleanExitCalled);
3002 #endif
3003  }//endif
3004 
3005 //----------------------------------------------------------------------------
3006  }//end routine
3007 ///////////////////////////////////////////////////////////////////////////////=
3008 
3009 
3010 ///////////////////////////////////////////////////////////////////////////////=
3011 ///////////////////////////////////////////////////////////////////////////////c
3012 ///////////////////////////////////////////////////////////////////////////////=
3013 /**
3014  * \brief Accept partial results when symm PC s sending to us directly
3015  */
3016 ///////////////////////////////////////////////////////////////////////////////=
3018 ///////////////////////////////////////////////////////////////////////////////
3019 
3020 /// (0) Fuss with the redundant psis
3021 
3022  CPcharmParaInfo *sim = CPcharmParaInfo::get();
3023  int cp_min_opt = sim->cp_min_opt;
3024  if(iteration>0){
3025  if(iRecvRedPsi!=1 || iSentRedPsi!=1){
3026  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3027  CkPrintf("Dude, you can't acceptPsi without receiving Redpsi\n");
3028  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3029  CkExit();
3030  }//endif
3031  }//endif
3032 
3033 ///////////////////////////////////////////////////////////////////////////////
3034 
3035 /// (0) Check for Nans
3036 
3037 
3038 
3039  int N = msg->N;
3040  complex *data = msg->result;
3041  int offset = msg->myoffset; if(offset<0){offset=0;}
3042  complex *psi = gs.packedPlaneData;
3043  int chunksize = gs.numPoints/config.numChunksSym;
3044  int chunkoffset = offset*chunksize; // how far into the points this contribution lies
3045 
3046 #ifdef _NAN_CHECK_
3047  for(int i=0;i<N ;i++){
3048  if((!finite(data[i].re)) || (!finite(data[i].im))){
3049  CkPrintf("GSP [%d,%d] acceptNewPsi offset %d %d of %d is nan\n",
3050  thisIndex.x,thisIndex.y, msg->myoffset, i,N);
3051  }
3052  CkAssert(finite(data[i].re));
3053  CkAssert(finite(data[i].im));
3054  }//endif
3055 #endif
3056 
3057 ///////////////////////////////////////////////////////////////////////////////
3058 /// (I) Unpack the contribution to newpsi (orthonormal psi)
3059 
3060  int idest = chunkoffset;
3061  if(countPsiO[offset]<1){
3062  CmiMemcpy(&(psi[idest]), &(data[0]), N*sizeof(complex)); //slower?
3063  // for(int i=0; i<N; i++,idest++){psi[idest] = data[i];}
3064  }else{
3065  //for(int i=0; i<N; i++,idest++){psi[idest] += data[i];}
3066  fastAdd((double *) &psi[idest],(double *)data,N*2);
3067  }//endif
3068 
3069  delete msg;
3070 
3071 ///////////////////////////////////////////////////////////////////////////////=
3072 
3073 /// When all have arrived, continue in doNewPsi
3074 
3075  countPsi++; //psi arrives in as many as 2 *numblock * numgrain reductions
3076  countPsiO[offset]++;//psi arrives in as many as 2 * numgrain
3077  //
3078  if(countPsi==AllPsiExpected){
3079 #ifdef _CP_DEBUG_STATEG_VERBOSE_
3080  if(thisIndex.x==0){CkPrintf("aceeptpsi %d %d\n",thisIndex.y,cleanExitCalled);}
3081 #endif
3082  }//endif
3083 
3084 //----------------------------------------------------------------------------
3085  }//end routine
3086 ///////////////////////////////////////////////////////////////////////////////=
3087 
3088 
3089 ///////////////////////////////////////////////////////////////////////////////=
3090 /**
3091  * \brief All Psi have arrived, finish the new Psi process
3092  */
3093 ///////////////////////////////////////////////////////////////////////////////=
3094 ///////////////////////////////////////////////////////////////////////////////c
3095 ///////////////////////////////////////////////////////////////////////////////=
3097 ///////////////////////////////////////////////////////////////////////////////
3098 /// (0) Fuss with the redundant psis
3099 
3100  CPcharmParaInfo *sim = CPcharmParaInfo::get();
3101  int cp_min_opt = sim->cp_min_opt;
3102  int cp_min_update = sim->cp_min_update;
3103 
3104  if(iteration>0){
3105  if(iRecvRedPsi!=1 || iSentRedPsi!=1){
3106  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3107  CkPrintf("Dude, you can't acceptPsi without receiving Redpsi\n");
3108  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3109  CkExit();
3110  }//endif
3111  }//endif
3112 
3113 ///////////////////////////////////////////////////////////////////////////////
3114 /// (I) If you have got it all : Rescale it, produce some output
3115 
3116 #ifdef _CP_DEBUG_STATEG_VERBOSE_
3117  if(thisIndex.x==0)
3118  CkPrintf("donewpsi %d %d\n",thisIndex.y,cleanExitCalled);
3119 #endif
3120 
3121  CkAssert(countPsi==AllPsiExpected);
3122 
3123  complex *psi = gs.packedPlaneData;
3124 #ifdef _NAN_CHECK_
3125  for(int i=0;i<gs.numPoints ;i++){
3126  CkAssert(finite(psi[i].re));
3127  CkAssert(finite(psi[i].im));
3128  }//endfor
3129 #endif
3130 
3131 ///////////////////////////////////////////////////////////////////////////////
3132 /// (A) Reset counters and rescale the kx=0 stuff
3133 
3134  acceptedPsi = true;
3135  countPsi = 0;
3136  bzero(countPsiO,config.numChunksSym*sizeof(int));
3137 
3138 #ifndef PAIRCALC_TEST_DUMP
3139  if(gs.ihave_kx0==1){
3140  double rad2 = sqrt(2.0);
3141  for(int i=gs.kx0_strt; i<gs.kx0_end; i++){psi[i] *= rad2;}
3142  }//endif
3143 #endif
3144 
3145 ///////////////////////////////////////////////////////////////////////////////
3146 /// (B) Generate some screen output of orthogonal psi
3147 
3148  if(iteration>0){screenOutputPsi(iteration);}
3149 
3150 ///////////////////////////////////////////////////////////////////////////////
3151 /// (E) Debug psi
3152 
3153 #ifdef _CP_GS_DUMP_PSI_
3154  dumpMatrix("psiAf",(double *)psi, 1, gs.numPoints*2,thisIndex.y,thisIndex.x,
3155  thisIndex.x,0,false);
3156 #endif
3157 
3158 #ifdef _CP_GS_DEBUG_COMPARE_PSI_
3159  double testvalue=0.00000001;
3160  if(savedpsiAf==NULL){
3161  savedpsiAf= new complex[gs.numPoints];
3162  loadMatrix("psiAf",(double *)savedpsiAf, 1, gs.numPoints*2,
3163  thisIndex.y,thisIndex.x,thisIndex.x,0,false);
3164  }//endif
3165  for(int i=0;i<gs.numPoints;i++){
3166  if(fabs(psi[i].re-savedpsiAf[i].re)>testvalue){
3167  fprintf(stderr, "GSP [%d,%d] %d element psi %.10g not %.10g\n",
3168  thisIndex.x, thisIndex.y,i, psi[i].re, savedpsiAf[i].re);
3169  }//endif
3170  CkAssert(fabs(psi[i].re-savedpsiAf[i].re)<testvalue);
3171  CkAssert(fabs(psi[i].im-savedpsiAf[i].im)<testvalue);
3172  }//endfor
3173 #endif
3174 
3175 ///////////////////////////////////////////////////////////////////////////////
3176 /// (E) Reset psi
3177 
3178  if(cp_min_opt==1 && cp_min_update==0 && iteration>0){
3179  CmiMemcpy(gs.packedPlaneData,gs.packedPlaneDataTemp,
3180  sizeof(complex)*gs.numPoints);
3181  memset(gs.packedVelData,0,sizeof(complex)*gs.numPoints);
3182  }//endif
3183 
3184  if(cp_min_opt==1 && cp_min_update==0 && iteration==0){
3185  CmiMemcpy(gs.packedPlaneDataTemp,gs.packedPlaneData,
3186  sizeof(complex)*gs.numPoints);
3187  memset(gs.packedVelData,0,sizeof(complex)*gs.numPoints);
3188  }//endif
3189 
3190 ///////////////////////////////////////////////////////////////////////////////=
3191 /// Back to the threaded loop.
3192 #ifndef _CP_DEBUG_ORTHO_OFF_
3193 #ifdef BARRIER_CP_GSPACE_PSI
3194  int wehaveours=1;
3195  contribute(sizeof(int),&wehaveours,CkReduction::sum_int,
3196  CkCallback(CkIndex_CP_State_GSpacePlane::allDonePsi(NULL),thisProxy));
3197 #endif
3198 #endif
3199 
3200 //----------------------------------------------------------------------------
3201  }//end routine
3202 ///////////////////////////////////////////////////////////////////////////////=
3203 
3204 
3205 
3206 ///////////////////////////////////////////////////////////////////////////////=
3207 ///////////////////////////////////////////////////////////////////////////////c
3208 ///////////////////////////////////////////////////////////////////////////////=
3209 //! dynamics triggers send of orthoT to asymm calc when psi is done
3211 ///////////////////////////////////////////////////////////////////////////////
3212  CkPrintf("[%d,%d] launchOrthoT \n",thisIndex.x, thisIndex.y);
3213  if(thisIndex.x==0 && thisIndex.y==0)
3214  myOrtho.sendOrthoTtoAsymm();
3215 //----------------------------------------------------------------------------
3216  }//end routine
3217 ///////////////////////////////////////////////////////////////////////////////=
3218 
3219 
3220 ///////////////////////////////////////////////////////////////////////////////=
3221 ///////////////////////////////////////////////////////////////////////////////c
3222 ///////////////////////////////////////////////////////////////////////////////=
3224 
3225 #ifdef DEBUG_CP_GSPACE_PSIV
3226  CkPrintf("GSpace[%d,%d] sendRedPsiV: Going to send redundant PsiV data\n",thisIndex.x,thisIndex.y);
3227 #endif
3228 ///////////////////////////////////////////////////////////////////////////////=
3229 
3230 /// I) Local Pointer setup
3231 
3232  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
3233  double *coef_mass = eesData->GspData[iplane_ind]->coef_mass;
3234 
3235  int ncoef = gSpaceNumPoints;
3236  complex *psi = gs.packedPlaneData;
3237  complex *vpsi = gs.packedVelData; // to evolve psi to time, t.
3238  complex *forces = gs.packedForceData;
3239  double ***xNHC = gs.xNHC;
3240  double ***xNHCP = gs.xNHCP;
3241  double ***vNHC = gs.vNHC;
3242  double ***fNHC = gs.fNHC;
3243  double *mNHC = gs.mNHC;
3244  int len_nhc_cp = gs.len_nhc_cp;
3245  int num_nhc_cp = gs.num_nhc_cp;
3246  int nck_nhc_cp = gs.nck_nhc_cp;
3247  int nkx0_red = gs.nkx0_red;
3248  int nkx0_uni = gs.nkx0_uni;
3249  int nkx0_zero = gs.nkx0_zero;
3250  double kTCP = gs.kTCP;
3251 
3252 ///////////////////////////////////////////////////////////////////////////////
3253 
3254 /// II) Sync yourself with psi by integrating to time t if output has not done it
3255 /// you have the wrong force but thats OK until you put in a better rotation
3256 
3257  halfStepEvolve = 1;
3258 
3259 #ifdef JUNK
3260  halfStepEvolve = 0; // do the 1/2 step update now
3261  CPINTEGRATE::cp_evolve_vel(ncoef,forces,vpsi,coef_mass,
3262  len_nhc_cp,num_nhc_cp,nck_nhc_cp,fNHC,vNHC,xNHC,xNHCP,mNHC,
3263  gs.v0NHC,gs.a2NHC,gs.a4NHC,kTCP,nkx0_red,nkx0_uni,nkx0_zero,
3264  2,iteration,gs.degfree,gs.degfreeNHC,gs.degFreeSplt,
3265  gs.istrNHC,gs.iendNHC,1);
3266 #endif
3267 
3268 ///////////////////////////////////////////////////////////////////////////////
3269 
3270 /// III) We still have these funky g=0 plane guys that may be on other procs and
3271 /// we have sync those guys, also, or PC won't work correctly
3272 
3273  CPcharmParaInfo *sim = CPcharmParaInfo::get();
3274  RedundantCommPkg *RCommPkg = sim->RCommPkg;
3275 
3276  complex *sendData = gs.packedVelData;
3277  int isend = thisIndex.y; // my g-space chare index
3278  int *num_send = RCommPkg[isend].num_send;
3279  int **lst_send = RCommPkg[isend].lst_send;
3280  int num_send_tot = RCommPkg[isend].num_send_tot;
3281 
3282  int iii=0; int jjj=0;
3283  if(num_send_tot>0){
3284  for(int irecv = 0; irecv < nchareG; irecv ++){
3285 
3286  int ncoef = num_send[irecv];
3287  jjj += ncoef;
3288  if(ncoef>0){
3289  GSRedPsiMsg *msg = new (ncoef,8*sizeof(int)) GSRedPsiMsg;
3290  if(config.prioFFTMsg){
3291  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
3292  *(int*)CkPriorityPtr(msg) = config.rsfftpriority +
3293  thisIndex.x*gs.planeSize[0]+thisIndex.y;
3294  }//endif
3295  if(ncoef>0){iii++;}
3296  msg->size = ncoef;
3297  msg->senderIndex = isend; // my gspace chare index
3298  complex *msgData = msg->data;
3299  for (int i=0;i<ncoef; i++){msgData[i] = sendData[lst_send[irecv][i]];}
3300  UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x,irecv).acceptRedPsiV(msg);
3301  }//endif
3302 
3303  }//endfor
3304 
3305  }//endif : no one to which I have to send
3306 
3307 ///////////////////////////////////////////////////////////////////////////////=
3308 
3309 /// Check for errors
3310 
3311  if(iii!=num_send_tot){
3312  CkPrintf("Error in GSchare %d %d : %d %d : sendRedPsiV.1\n",thisIndex.x,thisIndex.y,
3313  num_send_tot,iii);
3314  CkExit();
3315  }//endif
3316  if(numRecvRedPsi==0 && gs.nkx0_red>0){
3317  CkPrintf("Error in GSchare %d %d : %d %d : sendRedPsiV.2\n",thisIndex.x,thisIndex.y,
3318  numRecvRedPsi,gs.nkx0_red);
3319  CkExit();
3320  }//endif
3321  if(jjj != gs.nkx0_uni-gs.nkx0_zero){
3322  CkPrintf("Error in GSchare %d %d : %d %d : sendRedPsiV.3\n",thisIndex.x,thisIndex.y,
3323  jjj,gs.nkx0_uni);
3324  CkExit();
3325  }//endif
3326 
3327 ///////////////////////////////////////////////////////////////////////////////=
3328 
3329 /// I send the stuff and I need a new velocity
3330 
3331  iSentRedPsiV = 1;
3332  acceptedVPsi = false;
3333 
3334 ///////////////////////////////////////////////////////////////////////////////=
3335  }//end routine
3336 ///////////////////////////////////////////////////////////////////////////////=
3337 
3338 ///////////////////////////////////////////////////////////////////////////////=
3339 ///////////////////////////////////////////////////////////////////////////////c
3340 ///////////////////////////////////////////////////////////////////////////////=
3342 ///////////////////////////////////////////////////////////////////////////////=
3343 
3344  CPcharmParaInfo *sim = CPcharmParaInfo::get();
3345  RedundantCommPkg *RCommPkg = sim->RCommPkg;
3346 
3347  int ncoef = msg->size;
3348  int isend = msg->senderIndex;
3349  complex *msgData = msg->data;
3350  complex *recvData = gs.packedRedPsiV;
3351 
3352  int irecv = thisIndex.y; // my g-space chare index
3353  int *num_recv = RCommPkg[irecv].num_recv;
3354  int **lst_recv = RCommPkg[irecv].lst_recv;
3355 
3356 #ifdef DEBUG_CP_GSPACE_PSIV
3357  CkPrintf("GSpace[%d,%d] acceptRedPsiV Received redundant PsiV values from sender %d\n",thisIndex.x,thisIndex.y,isend);
3358 #endif
3359 ///////////////////////////////////////////////////////////////////////////////=
3360 
3361 /// unpack
3362 
3363  if(num_recv[isend]!=ncoef){
3364  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3365  CkPrintf("Number sent not equal to number reciever expected \n");
3366  CkPrintf("Sender %d size %d Reciever %d size %d \n",isend,ncoef,irecv,num_recv[isend]);
3367  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3368  CkExit();
3369  }//endif
3370 
3371  if(countRedPsiV==0){jtemp=0;}
3372  jtemp+= ncoef;
3373 
3374  for(int i=0;i<ncoef;i++){
3375  recvData[lst_recv[isend][i]].re= msgData[i].re;
3376  recvData[lst_recv[isend][i]].im=-msgData[i].im;
3377  }//endfor
3378 
3379  delete msg;
3380 
3381 ///////////////////////////////////////////////////////////////////////////////=
3382 
3383 /// if all have arrived resumecontrol
3384 
3385  countRedPsiV++;
3386  if(countRedPsiV==numRecvRedPsi){
3387 #ifdef DEBUG_CP_GSPACE_PSIV
3388  CkPrintf("GSpace[%d,%d] acceptRedPsiV received all %d GSRedPsi messages carrying redundant PsiV data\n",thisIndex.x,thisIndex.y,countRedPsiV);
3389 #endif
3390  countRedPsiV = 0;
3391  iRecvRedPsiV = 1;
3392  if(jtemp!=gs.nkx0_red){
3393  CkPrintf("Error in GSchare recv cnt %d %d : %d %d\n",thisIndex.x,thisIndex.y,
3394  gs.nkx0_red,jtemp);
3395  CkExit();
3396  }//endif
3397  }//endif
3398 
3399 //-----------------------------------------------------------------------------
3400  }//end routine
3401 ///////////////////////////////////////////////////////////////////////////////=
3402 
3403 
3404 ///////////////////////////////////////////////////////////////////////////////=
3405 ///////////////////////////////////////////////////////////////////////////////c
3406 ///////////////////////////////////////////////////////////////////////////////=
3407 /** \brief when all red psi velocities are in, integrate */
3409 
3410  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
3411  double *coef_mass = eesData->GspData[iplane_ind]->coef_mass;
3412  int ncoef = gs.numPoints;
3413  int ncoef_red = gs.nkx0_red;
3414  complex *vpsi = gs.packedVelData;
3415  int istrt0 = gs.nkx0_red;
3416  int istrt = gs.nkx0_red+gs.nkx0_zero;
3417  int iend = gs.nkx0_red+gs.nkx0_uni;
3418 
3419  if(ncoef_red>0){
3420  complex *vpsi_red = gs.packedRedPsiV;
3421  for(int i=0;i<ncoef_red;i++){vpsi[i]=vpsi_red[i];}
3422  }//endif
3423 
3424  ake_old = 0.0;
3425  for(int i=istrt0;i<istrt;i++){ake_old += vpsi[i].getMagSqr()*coef_mass[i];} // g=0
3426  for(int i=istrt;i<iend;i++) {ake_old += vpsi[i].getMagSqr()*(2.0*coef_mass[i]);} // gx=0
3427  for(int i=iend;i<ncoef;i++) {ake_old += vpsi[i].getMagSqr()*coef_mass[i];} // gx!=0
3428 
3429 }//end routine
3430 ///////////////////////////////////////////////////////////////////////////////=
3431 
3432 ///////////////////////////////////////////////////////////////////////////////=
3433 ///////////////////////////////////////////////////////////////////////////////c
3434 ///////////////////////////////////////////////////////////////////////////////=
3435 /** \brief send psi velocities */
3437 ///////////////////////////////////////////////////////////////////////////////=
3438 
3439 /// Error Check
3440 
3441  CPcharmParaInfo *sim = CPcharmParaInfo::get();
3442  RedundantCommPkg *RCommPkg = sim->RCommPkg;
3443 
3444  if(iRecvRedPsiV!=1 || iSentRedPsiV!=1){
3445  CkPrintf("GSpace[%d,%d] Error: You can't sendPsiV() without sending/receiving the redundant psiV values around: finished %d %d : %d %d\n",thisIndex.x,thisIndex.y,iRecvRedPsiV,iSentRedPsiV,numRecvRedPsi,gs.nkx0_red);
3446  CkAbort("Error: GSpace cannot sendPsiV() without sending/receiving the redundant psi values around\n");
3447  }//endif
3448 
3449 #ifdef DEBUG_CP_GSPACE_PSIV
3450  CkPrintf("GSpace[%d,%d] sendPsiV\n",thisIndex.x,thisIndex.y);
3451 #endif
3452 
3453  acceptedVPsi = false;
3454 
3455 ///////////////////////////////////////////////////////////////////////////////=
3456 
3457  nrotation++;
3458  iterRotation = iteration+1;
3459 
3460  int ncoef = gs.numPoints;
3461  complex *data = gs.packedVelData;
3462  complex *scr = gs.packedPlaneDataScr; // replace no-ortho psi
3463  complex *psi = gs.packedPlaneData; // by orthonormal psi when norb rotating
3464  CmiMemcpy(scr,psi,sizeof(complex)*ncoef);
3465 
3466 #ifndef PAIRCALC_TEST_DUMP
3467  if(gs.ihave_kx0==1){
3468  double rad2i = 1.0/sqrt(2.0);
3469  for(int i=gs.kx0_strt; i<gs.kx0_end; i++){data[i] *= rad2i;}
3470  }//endif
3471 #endif
3472 
3473  int numPoints = gs.numPoints;
3474  symmPCmgr.sendLeftData(numPoints,data,true);
3475  /// Symm loop PC chares in the top left [*,0,0,*] will not receive any right matrix data. Hence, if you're in such a PC's block, dont send right
3476  if(thisIndex.x >= symmPCmgr.pcCfg.grainSize)
3477  symmPCmgr.sendRightData(numPoints,data,true);
3478 
3479 //----------------------------------------------------------------------------
3480 }// end routine
3481 ///////////////////////////////////////////////////////////////////////////////=
3482 
3483 
3484 ///////////////////////////////////////////////////////////////////////////////=
3485 ///////////////////////////////////////////////////////////////////////////////c
3486 ///////////////////////////////////////////////////////////////////////////////=
3487 void CP_State_GSpacePlane::unpackNewPsiV(CkReductionMsg *msg){
3488 ///////////////////////////////////////////////////////////////////////////////
3489 
3490 /// (I) Local pointer setup
3491 
3492  int N = msg->getSize()/sizeof(complex);
3493  complex *data = (complex *)msg->getData();
3494  int offset = msg->getUserFlag(); if(offset<0){offset=0;}
3495 
3496  complex *vpsi = gs.packedVelData;
3497  int chunksize = gs.numPoints/config.numChunksSym;
3498  int chunkoffset = offset*chunksize;; // how far into the points this contribution lies
3499 
3500 #ifdef DEBUG_CP_GSPACE_PSIV
3501  CkPrintf("GSpace[%d,%d] acceptNewPsiV(reductionMsg) PCs have sent new PsiV data\n",thisIndex.x,thisIndex.y);
3502 #endif
3503  if(iRecvRedPsiV!=1 || iSentRedPsiV!=1){
3504  CkPrintf("GSpace[%d,%d] Error: You can't acceptNewPsiV() without sending/receiving the redundant PsiV values around: finished %d %d : %d %d\n",thisIndex.x,thisIndex.y,iRecvRedPsiV,iSentRedPsiV,numRecvRedPsi,gs.nkx0_red);
3505  CkAbort("Error: GSpace cannot acceptNewPsiV() without sending/receiving the redundant PsiV values around\n");
3506  }//endif
3507 
3508 ///////////////////////////////////////////////////////////////////////////////
3509 
3510 /// (II) Unpack the contribution to newpsi (orthonormal psi)
3511 
3512  int idest=chunkoffset;
3513  if(countVPsiO[offset]<1){
3514  CmiMemcpy(&(vpsi[idest]), &(data[0]), N*sizeof(complex));//slower?
3515  //for(int i=0; i<N; i++,idest++){vpsi[idest] = data[i];}
3516  }else{
3517  // for(int i=0; i<N; i++,idest++){vpsi[idest] += data[i];}
3518  fastAdd((double *) &vpsi[idest],(double *)data,N*2);
3519  }//endif
3520 
3521  delete msg;
3522 
3523 ///////////////////////////////////////////////////////////////////////////////
3524 
3525 /// (III) When all has arrive, onward to victory
3526 
3527  countVPsi++; //psi arrives in as many as 2 reductions
3528  countVPsiO[offset]++;//psi arrives in as many as 2 reductions
3529 
3530  if(countVPsi==AllPsiExpected){
3531 #ifdef DEBUG_CP_GSPACE_PSIV
3532  CkPrintf("GSpace[%d,%d] Received all PsiV data from PCs (%d reductions).\n",thisIndex.x,thisIndex.y,AllPsiExpected);
3533 #endif
3534  }//endif
3535 
3536 //----------------------------------------------------------------------------
3537  }//end routine
3538 ///////////////////////////////////////////////////////////////////////////////=
3539 
3540 ///////////////////////////////////////////////////////////////////////////////=
3541 ///////////////////////////////////////////////////////////////////////////////c
3542 ///////////////////////////////////////////////////////////////////////////////=
3544 ///////////////////////////////////////////////////////////////////////////////
3545 
3546 /// (I) Local pointer setup
3547 
3548  int N = msg->N;
3549  complex *data = msg->result;
3550  int offset = msg->myoffset; if(offset<0){offset=0;}
3551 
3552 #ifdef _NAN_CHECK_
3553  for(int i=0; i < N; i++)
3554  {
3555  CkAssert(finite(data[i].re));
3556  CkAssert(finite(data[i].im));
3557  }
3558 #endif
3559 
3560  complex *vpsi = gs.packedVelData;
3561  int chunksize = gs.numPoints/config.numChunksSym;
3562  int chunkoffset = offset*chunksize;; // how far into the points this contribution lies
3563 
3564 #ifdef DEBUG_CP_GSPACE_PSIV
3565  CkPrintf("GSpace[%d,%d] acceptNewPsiV(partialResultMsg) Received new PsiV data (msg %d of %d) from PC [%d,%d,%d,%d] (offset %d)\n",thisIndex.x,thisIndex.y,countVPsi+1,AllPsiExpected,
3566  msg->sndr.w,msg->sndr.x,msg->sndr.y,msg->sndr.z,offset);
3567 #endif
3568  if(iRecvRedPsiV!=1 || iSentRedPsiV!=1){
3569  CkPrintf("GSpace[%d,%d] Error: You can't acceptNewPsiV() without sending/receiving the redundant PsiV values around: finished %d %d : %d %d\n",thisIndex.x,thisIndex.y,iRecvRedPsiV,iSentRedPsiV,numRecvRedPsi,gs.nkx0_red);
3570  CkAbort("Error: GSpace cannot acceptNewPsiV() without sending/receiving the redundant PsiV values around\n");
3571  }//endif
3572 
3573 ///////////////////////////////////////////////////////////////////////////////
3574 
3575 /// (I) Unpack the contribution to newpsi (orthonormal psi)
3576 
3577  int idest=chunkoffset;
3578  if(countVPsiO[offset]<1){
3579  CmiMemcpy(&(vpsi[idest]), &(data[0]), N*sizeof(complex));//slower?
3580  //for(int i=0; i<N; i++,idest++){vpsi[idest] = data[i];}
3581  }else{
3582  // for(int i=0; i<N; i++,idest++){vpsi[idest] += data[i];}
3583  fastAdd((double *) &vpsi[idest],(double *)data,N*2);
3584  }//endif
3585 
3586  delete msg;
3587 
3588 ///////////////////////////////////////////////////////////////////////////////
3589 
3590 /// (II) Continue
3591 
3592  countVPsi++;//psi arrives in as many as 2 reductions
3593  countVPsiO[offset]++;//psi arrives in as many as 2 reductions
3594 
3595  if(countVPsi==AllPsiExpected){
3596 #ifdef DEBUG_CP_GSPACE_PSIV
3597  CkPrintf("GSpace[%d,%d] Received all PsiV data from PCs (%d messages).\n",thisIndex.x,thisIndex.y,AllPsiExpected);
3598 #endif
3599  }//endif
3600 
3601 //----------------------------------------------------------------------------
3602  }//end routine
3603 ///////////////////////////////////////////////////////////////////////////////=
3604 
3605 ///////////////////////////////////////////////////////////////////////////////=
3606 ///////////////////////////////////////////////////////////////////////////////c
3607 ///////////////////////////////////////////////////////////////////////////////=
3608 /** process all the PsiV data */
3610 ///////////////////////////////////////////////////////////////////////////////
3611 
3612 /// (0) Error check
3613 
3614 #ifdef DEBUG_CP_GSPACE_PSIV
3615  CkPrintf("GSpace[%d,%d] doNewPsiV\n",thisIndex.x,thisIndex.y);
3616 #endif
3617 
3618  CkAssert(countVPsi==AllPsiExpected);
3619 
3620  if(iRecvRedPsiV!=1 || iSentRedPsiV!=1){
3621  CkPrintf("GSpace[%d,%d] Error: You can't doNewPsiV() without sending/receiving the redundant PsiV values around: finished %d %d : %d %d\n",thisIndex.x,thisIndex.y,iRecvRedPsiV,iSentRedPsiV,numRecvRedPsi,gs.nkx0_red);
3622  CkAbort("Error: GSpace cannot doNewPsiV() without sending the redundant/receiving PsiV values around\n");
3623  }//endif
3624 
3625 ///////////////////////////////////////////////////////////////////////////////
3626 
3627 /// (I) Reset counters and rescale the kx=0 stuff
3628 
3629  acceptedVPsi = true;
3630  countVPsi = 0;
3631 
3632  complex *vpsi = gs.packedVelData;
3633 
3634 #ifndef PAIRCALC_TEST_DUMP
3635  for(int i=0;i<config.numChunksSym;i++){countVPsiO[i]=0;}
3636  if(gs.ihave_kx0==1){
3637  double rad2 = sqrt(2.0);
3638  for(int i=gs.kx0_strt; i<gs.kx0_end; i++){vpsi[i] *= rad2;}
3639  }//endif
3640 #endif
3641 
3642 ///////////////////////////////////////////////////////////////////////////////
3643 
3644 /// III) Replace by finite difference until update is better
3645 
3646  CPcharmParaInfo *sim = CPcharmParaInfo::get();
3647  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
3648 
3649  double *coef_mass = eesData->GspData[iplane_ind]->coef_mass;
3650  double dt = sim->dt;
3651  double dt2 = 2.0*dt;
3652  int ncoef = gs.numPoints;
3653  complex *psi_g = gs.packedPlaneData;
3654  complex *psi_g_tmp = gs.packedPlaneDataTemp;
3655  int istrt0 = gs.nkx0_red;
3656  int istrt = gs.nkx0_red+gs.nkx0_zero;
3657  int iend = gs.nkx0_red+gs.nkx0_uni;
3658 
3659  for(int i=0;i<ncoef;i++){
3660  double vre = (psi_g[i].re-psi_g_tmp[i].re)/dt;
3661  double vim = (psi_g[i].im-psi_g_tmp[i].im)/dt;
3662  vpsi[i].re = vre;
3663  vpsi[i].im = vim;
3664  }//endif
3665 
3666  double ake_new = 0.0;
3667  for(int i=istrt0;i<istrt;i++){ake_new += vpsi[i].getMagSqr()*coef_mass[i];} // g=0
3668  for(int i=istrt;i<iend;i++) {ake_new += vpsi[i].getMagSqr()*(2.0*coef_mass[i]);} // gx=0
3669  for(int i=iend;i<ncoef;i++) {ake_new += vpsi[i].getMagSqr()*coef_mass[i];} // gx!=0
3670 
3671  if(sim->cp_norb_rot_kescal==1){
3672  double scale = sqrt(ake_old/ake_new);
3673  for(int i=0;i<ncoef;i++){
3674  vpsi[i].re *= scale;
3675  vpsi[i].im *= scale;
3676  }//endfor
3677  }//endif
3678 
3679 ///////////////////////////////////////////////////////////////////////////////
3680 
3681 //// II) A Barrier for debugging
3682 
3683 #ifdef BARRIER_CP_GSPACE_PSIV
3684  int wehaveours=1;
3685  contribute(sizeof(int),&wehaveours,CkReduction::sum_int,
3686  CkCallback(CkIndex_CP_State_GSpacePlane::allDonePsiV(NULL),thisProxy));
3687 #endif
3688 
3689 //------------------------------------------------------------------------------
3690  }//end routine
3691 ///////////////////////////////////////////////////////////////////////////////=
3692 
3693 ///////////////////////////////////////////////////////////////////////////////=
3694 ///////////////////////////////////////////////////////////////////////////////c
3695 ///////////////////////////////////////////////////////////////////////////////=
3696 /** \brief output a few select psi values for post processing validation */
3698 ///////////////////////////////////////////////////////////////////////////////=
3699 #ifdef _CP_DEBUG_STATEG_VERBOSE_
3700  if(thisIndex.x==0){CkPrintf("output %d %d\n",thisIndex.y,cleanExitCalled);}
3701 #endif
3702 
3703  eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
3704  int *k_x = eesData->GspData[iplane_ind]->ka;
3705  int *k_y = eesData->GspData[iplane_ind]->kb;
3706  int *k_z = eesData->GspData[iplane_ind]->kc;
3707 
3708  CPcharmParaInfo *sim = CPcharmParaInfo::get();
3709  int cp_min_opt = sim->cp_min_opt;
3710  int nstates = sim->nstates;
3711 
3712  complex *vpsi = gs.packedVelData;
3713  complex *psi = gs.packedPlaneData; //orthogonal psi
3714  if(cp_min_opt==0){psi=gs.packedPlaneDataScr;} //non-orthogonal psi
3715 
3716  int ntime = config.maxIter;
3717  if(cp_min_opt==0){ntime-=1;}
3718 
3719 ///////////////////////////////////////////////////////////////////////////////=
3720 
3721 /// Screen Output
3722 
3723 #ifdef _CP_DEBUG_COEF_SCREEN_
3724  if(iteration<=ntime){
3725  if(gs.istate_ind==0 || gs.istate_ind==nstates-1){
3726  for(int i = 0; i < gs.numPoints; i++){
3727  if(k_x[i]==0 && k_y[i]==1 && k_z[i]==4 ){
3728  fprintf(temperScreenFile,"-------------------------------------------------------------------------------\n");
3729  fprintf(temperScreenFile,"Iter [%d] Psi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g\n",
3730  iprintout,
3731  gs.istate_ind+1,k_x[i],k_y[i],k_z[i],psi[i].re,psi[i].im);
3732  if(cp_min_opt==0){
3733  double vre=vpsi[i].re;
3734  double vim=vpsi[i].im;
3735  fprintf(temperScreenFile,"Iter [%d] VPsi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g\n",
3736  iprintout,
3737  gs.istate_ind+1,k_x[i],k_y[i],k_z[i],vre,vim);
3738  }//endif
3739  fprintf(temperScreenFile,"-------------------------------------------------------------------------------\n");
3740  }//endif
3741  if(k_x[i]==2 && k_y[i]==1 && k_z[i]==3){
3742  double vre=vpsi[i].re;
3743  double vim=vpsi[i].im;
3744  fprintf(temperScreenFile,"-------------------------------------------------------------------------------\n");
3745  fprintf(temperScreenFile,"Iter [%d] Psi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g\n",
3746  iprintout,
3747  gs.istate_ind+1,k_x[i],k_y[i],k_z[i],psi[i].re,psi[i].im);
3748  if(cp_min_opt==0){
3749  fprintf(temperScreenFile,"Iter [%d] VPsi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g\n",
3750  iprintout,
3751  gs.istate_ind+1,k_x[i],k_y[i],k_z[i],vre,vim);
3752  }//endif
3753  fprintf(temperScreenFile,"-------------------------------------------------------------------------------\n");
3754  }//endif
3755  }//endfor
3756  }//endif
3757  }//endif
3758 #endif
3759 
3760 ///////////////////////////////////////////////////////////////////////////////=
3761 
3762 /// II) Tell the world you are done with the output
3763 
3764 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3765  CkPrintf("GSpace[%d,%d] screenwrite: %d\n",thisIndex.x, thisIndex.y,iteration);
3766 #endif
3767 
3768 //----------------------------------------------------------------------------
3769  }//end routine
3770 
3771 
3772 
3773 
3774 ///////////////////////////////////////////////////////////////////////////////=
3775 /** \brief Once All chares have completed energy computation and reduction, this
3776  * storage routine is invoked. */
3777 /// Since eke is the last energy, its reduction client
3778 /// invokes this guy on chare (0,0). The routine then bcasts its results to the
3779 /// energy group (one per processor) thereby making information available on all
3780 /// procs for tolerence testing via a cklocal type deal.
3781 ///////////////////////////////////////////////////////////////////////////////=
3782 ///////////////////////////////////////////////////////////////////////////////c
3783 ///////////////////////////////////////////////////////////////////////////////=
3784 void CP_State_GSpacePlane::computeEnergies(int param, double d){
3785 ///////////////////////////////////////////////////////////////////////////////=
3786 
3787  switch(param){
3788 
3789  case ENERGY_EHART :
3790 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3791  CkPrintf("Received Hart\n");
3792 #endif
3793  ehart_total = d;
3794  total_energy += d;
3795  ecount++;
3796  break;
3797 
3798  case ENERGY_ENL :
3799 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3800  CkPrintf("Received Enl\n");
3801 #endif
3802  enl_total = d;
3803  total_energy += d;
3804  ecount++;
3805  break;
3806 
3807  case ENERGY_EKE :
3808 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3809  CkPrintf("Received Eke\n");
3810 #endif
3811  eke_total = d;
3812  total_energy += d;
3813  ecount++;
3814  break;
3815 
3816  case ENERGY_EGGA :
3817 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3818  CkPrintf("Received GGA\n");
3819 #endif
3820  egga_total = d;
3821  total_energy += d;
3822  ecount++;
3823  break;
3824 
3825  case ENERGY_EEXC :
3826 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3827  CkPrintf("Received EEXC\n");
3828 #endif
3829  eexc_total = d;
3830  total_energy += d;
3831  ecount++;
3832  break;
3833 
3834  case ENERGY_EEXT :
3835 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3836  CkPrintf("Received EEXT\n");
3837 #endif
3838  eext_total = d;
3839  total_energy += d;
3840  ecount++;
3841  break;
3842 
3843  case ENERGY_EWD :
3844 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3845  CkPrintf("Received EWD\n");
3846 #endif
3847  ewd_total = d;
3848  total_energy += d;
3849  ecount++;
3850  break;
3851 
3852  case ENERGY_FICTEKE :
3853 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3854  CkPrintf("Received FICTEKE\n");
3855 #endif
3856  fictEke_total = d;
3857  ecount++;
3858  break;
3859 
3860  case ENERGY_FMAG :
3861 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3862  CkPrintf("Received FMAG\n");
3863 #endif
3864  fmagPsi_total0 = d;
3865  ecount++;
3866  break;
3867 
3868  default :
3869  CkAbort("unknown energy");
3870  break;
3871  }//end switch
3872 
3873 ///////////////////////////////////////////////////////////////////////////////=
3874 /// if your debugging or natm_nl==0 you get fewer energies
3875 
3876  CPcharmParaInfo *sim = CPcharmParaInfo::get();
3877  int isub =0;
3878  int natm_nl = sim->natm_nl;
3879  if(natm_nl==0){isub++;}
3880 #ifdef _CP_DEBUG_SFNL_OFF_
3881  if(natm_nl!=0){isub++;}
3882 #endif
3883 #ifdef _CP_DEBUG_RHO_OFF_
3884  isub+=5;
3885 #endif
3886 #ifdef _CP_DEBUG_HARTEEXT_OFF_
3887 #ifndef _CP_DEBUG_RHO_OFF_
3888  isub+=3;
3889 #endif
3890 #endif
3891 
3892  if(thisInstance.idxU.y>0)
3893  { // you get no rho
3894  isub+=5;
3895  }
3896 
3897  int myid = CkMyPe();
3898 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3899  CkPrintf("ecount %d %d %d\n",ecount,NUM_ENERGIES-isub,myid);
3900 #endif
3901  if(ecount == NUM_ENERGIES-isub){
3902  EnergyStruct estruct;
3903  estruct.enl = enl_total;
3904  estruct.eke = eke_total;
3905  estruct.eext = eext_total;
3906  estruct.ehart = ehart_total;
3907  estruct.eewald_recip = ewd_total;
3908  estruct.egga = egga_total;
3909  estruct.eexc = eexc_total;
3910  estruct.fictEke = fictEke_total;
3911  estruct.totalElecEnergy = total_energy;
3912  estruct.fmagPsi = fmagPsi_total0;
3913  estruct.iteration_gsp = iteration;
3914 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3915  CkPrintf("Bcasting to energygrp %d\n",myid);
3916 #endif
3917  UegroupProxy[thisInstance.proxyOffset].updateEnergiesFromGS(estruct,thisInstance); // broadcast the electronic energies
3918  // so that all procs have them
3919  total_energy = 0.0;
3920  ecount = 0;
3921 
3922  }// got all the energies
3923 
3924 //-----------------------------------------------------------------------------
3925  }// end routine : computenergies
3926 ///////////////////////////////////////////////////////////////////////////////=
3927 
3928 
3929 
3930 
3931 ///////////////////////////////////////////////////////////////////////////////=
3932 /** \brief Test does ReadFile() parse psi(g) and g-vectors properly */
3933 ///////////////////////////////////////////////////////////////////////////////=
3934 ///////////////////////////////////////////////////////////////////////////////c
3935 ///////////////////////////////////////////////////////////////////////////////=
3936 void testeke(int ncoef,complex *psi_g,int *k_x,int *k_y,int *k_z, int iflag,int index)
3937 ///////////////////////////////////////////////////////////////////////////////=
3938  {//begin routine
3939 ///////////////////////////////////////////////////////////////////////////////=
3940 
3941 /// Local pointers
3942 
3943  GENERAL_DATA *general_data = GENERAL_DATA::get();
3944  CP *cp = CP::get();
3945 
3946 #include "../class_defs/allclass_strip_gen.h"
3947 #include "../class_defs/allclass_strip_cp.h"
3948 
3949  double gx, gy, gz, g2;
3950  double *hmati = gencell->hmati;
3951  double ecut = cpcoeffs_info->ecut_psi; // KS-state cutoff in Ryd
3952  double tpi = 2.0*M_PI;
3953  double wght = 2.0;
3954  double norm = 0.0;
3955  double norm2 = 0.0;
3956  double eke = 0.0;
3957  double eke2 = 0.0;
3958 
3959 ///////////////////////////////////////////////////////////////////////////////=
3960 
3961 /// Compute some eke
3962 
3963  double eke_i[1000];
3964  int kxmax, kxmin;
3965  for(int i=0; i<1000; i++){eke_i[i]=0.0;}
3966 
3967  kxmax=-10000;
3968  kxmin=100000;
3969  for(int i = 0; i < ncoef; i++){
3970  kxmax=(kxmax < k_x[i] ? k_x[i] : kxmax);
3971  kxmin=(kxmin > k_x[i] ? k_x[i] : kxmin);
3972  }//endfor
3973 
3974  for(int i = 0; i < ncoef; i++){
3975 
3976  gx = tpi*(k_x[i]*hmati[1] + k_y[i]*hmati[2] + k_z[i]*hmati[3]);
3977  gy = tpi*(k_x[i]*hmati[4] + k_y[i]*hmati[5] + k_z[i]*hmati[6]);
3978  gz = tpi*(k_x[i]*hmati[7] + k_y[i]*hmati[8] + k_z[i]*hmati[9]);
3979  g2 = gx*gx + gy*gy + gz*gz;
3980 
3981  if(g2<=ecut){
3982  double wght_now = 1.0;
3983  if(config.doublePack){
3984  wght_now=2.0;
3985  if(k_x[i]==0 && k_y[i]<0){wght_now=0.0;}
3986  if(k_x[i]==0 && k_y[i]==0 && k_z[i]<0){wght_now=0.0;}
3987  if(k_x[i]==0 && k_y[i]==0 && k_z[i]==0){wght_now=1.0;}
3988  }//endif
3989  eke_i[k_x[i]-kxmin]+=(wght_now*g2)*psi_g[i].getMagSqr();
3990  eke += (wght_now*g2)*psi_g[i].getMagSqr();
3991  norm += (wght_now)*psi_g[i].getMagSqr();
3992  wght_now = (k_x[i]==0 ? 1.0 : wght);
3993  eke2 += (wght_now*g2)*psi_g[i].getMagSqr();
3994  norm2 += (wght_now)*psi_g[i].getMagSqr();
3995  }else{
3996  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3997  CkPrintf("Why the cutoff\n");
3998  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3999  CkExit();
4000  }//endif
4001 
4002  }/* endfor */
4003 
4004  eke/=2.0;
4005  eke2/=2.0;
4006 
4007  // if(index==0){
4008  // CkPrintf("hmati :");
4009  // for(int i=1;i<=9;i++){CkPrintf(" %g",hmati[i]);}
4010  // CkPrintf("\n");
4011  //}/*endif*/
4012 
4013  CkPrintf("%.12g %.12g %.12g %.12g: %d : eke\n",eke,eke2,norm,norm2,index);
4014 
4015  /****************************************
4016  FILE* fp;
4017  char junk[1000];
4018  sprintf(junk,"eke.%d.out",index);
4019  fp=fopen(junk,"w");
4020  fprintf(fp,"%.12g %.12g: %d : eke\n",eke,norm,index);
4021  double sum=0;
4022  for(int i=0; i<kxmax-kxmin+1; i++){
4023  fprintf(fp,"%d %.12g\n",i+kxmin,eke_i[i]);
4024  sum += eke_i[i];
4025  }//endfor
4026  fprintf(fp,"%.12g %.12g: %d : eke\n",eke,sum/2,index);
4027  fclose(fp);
4028  ***********************************/
4029 
4030 //-----------------------------------------------------------------------------
4031  }// end routine : testeke
4032 ///////////////////////////////////////////////////////////////////////////////=
4033 
4034 
4035 
4036 ///////////////////////////////////////////////////////////////////////////////=
4037 
4038 /// RDMA routines for ibverbs CmiDirect optimization
4039 
4040 ///////////////////////////////////////////////////////////////////////////////=
4041 ///////////////////////////////////////////////////////////////////////////////c
4042 ///////////////////////////////////////////////////////////////////////////////=
4043 #include <sstream>
4044 
4046 {
4047  #ifndef PC_USE_RDMA
4048  CkPrintf("GSpace[%d,%d] aborting because someone called an RDMA method when it has been turned off\n",thisIndex.x,thisIndex.y);
4049  CkAbort("GSpace aborting RDMA setup completion");
4050  #else
4051 
4052  /// Retrieve the handshake token and the rdma handle from the message
4053  RDMApair_GSP_PC token = msg->token();
4054  rdmaHandleType ourHandle = msg->handle();
4055 #ifdef DEBUG_CP_PAIRCALC_RDMA
4056  std::stringstream dbgStr;
4057  dbgStr<<token;
4058  CkPrintf("%s : Received RDMA setup confirmation from paircalc. Now have %d handles of %d (%d symm + %d asymm)\n", dbgStr.str().c_str(),
4059  thisIndex.x,thisIndex.y, gotHandles+1, numRDMAlinksSymm+numRDMAlinksAsymm, numRDMAlinksSymm, numRDMAlinksAsymm );
4060 #endif
4061  /// Determine which loop (symm/Asymm) this PC that has sent setup confirmation, belongs to
4063  if (token.symmetric)
4064  pcMgr = &symmPCmgr;
4065  else
4066  pcMgr = &asymmPCmgr;
4067 
4068  /// Compute the location and amount of data to be sent
4069  int chunkSize= gs.numPoints / pcMgr->pcCfg.numChunks;
4070  int offset = token.pcIndex.z * chunkSize;
4071  int dataSize = chunkSize;
4072  /// The last chunk of data should get whatever is remaining
4073  if( (pcMgr->pcCfg.numChunks > 1) && (token.pcIndex.z == pcMgr->pcCfg.numChunks-1) )
4074  dataSize += gs.numPoints % pcMgr->pcCfg.numChunks;
4075 
4076  /// If the PC should be sent left matrix data ...
4077  if (token.shouldSendLeft)
4078  {
4079  /// Stuff the location of the data to be sent into the handle
4080  CmiDirect_assocLocalBuffer(&ourHandle,&(gs.packedPlaneData[offset]),dataSize*sizeof(complex));
4081  /// Store the rdma handle in the appropriate array
4082  pcMgr->leftDestinationHandles.push_back(ourHandle);
4083  }
4084  /// ... else if the PC should be sent right matrix data
4085  else
4086  {
4087  /// Stuff the location of the data to be sent into the handle
4088  if (token.symmetric)
4089  CmiDirect_assocLocalBuffer(&ourHandle,&(gs.packedPlaneData[offset]),dataSize*sizeof(complex));
4090  else
4091  CmiDirect_assocLocalBuffer(&ourHandle,&(gs.packedForceData[offset]),dataSize*sizeof(complex));
4092  /// Store the rdma handle in the appropriate array
4093  pcMgr->rightDestinationHandles.push_back(ourHandle);
4094  }
4095 
4096 #ifdef DEBUG_CP_PAIRCALC_RDMA
4097  CkPrintf("%s : Will RDMA-put %d units of data at an offset of %d units from %p on proc %d to %p on proc %d\n",
4098  dbgStr.str().c_str(), dataSize, offset,
4099  (!token.shouldSendLeft && !token.symmetric)? &(gs.packedForceData) : &(gs.packedPlaneData), ourHandle.senderNode,
4100  ourHandle.recverBuf,ourHandle.recverNode);
4101 #endif
4102 
4103  /// Call a reduction that signals the end of the initialization phase to main
4104  if(++gotHandles == numRDMAlinksSymm + numRDMAlinksAsymm)
4105  {
4106 #ifdef DEBUG_CP_PAIRCALC_RDMA
4107  CkPrintf("GSpace[%d,%d] received RDMA setup confirmation from all %d PCs (%d symm + %d asymm) I was expecting. Triggering reduction to indicate end of init phase.\n",
4108  thisIndex.x,thisIndex.y,gotHandles,numRDMAlinksSymm,numRDMAlinksAsymm);
4109 #endif
4110  int i=1;
4111  CkCallback cbDoneInit = CkCallback(CkIndex_InstanceController::doneInit(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy);
4112  contribute(sizeof(int), &i, CkReduction::sum_int, cbDoneInit, thisInstance.proxyOffset);
4113  }
4114  delete msg;
4115  #endif // PC_USE_RDMA
4116 }
4117 /*@}*/
4118 #include "gStatePlane.def.h"
4119 
void contributeFileOutput()
In this function data is written to files the simpliest way possible.
void unpackFileOutput(GStateOutMsg *msg)
void acceptPairCalcAIDs(pcSetupMsg *msg)
void pup(PUP::er &)
= data
Definition: stateSlab.C:115
holds the UberIndex and the offset for proxies
Definition: Uber.h:68
void sendRedPsi()
send red psi
void unpackNewPsiV(CkReductionMsg *msg)
void doStpFFTRtoG_Gchare(complex *, complex *, int, int, int, int, RunDescriptor *, int, int)
StatePlane : Gchare : data(gx,gy,z) -> data(gx,gy,gz) : backward ////////////////////////////////////...
Definition: fftCache.C:1135
void initGStateSlab(GStateSlab *gs, int sizeX, int sizeY, int sizeZ, int gSpaceUnits, int realSpaceUnits, int s_grain, int iplane_ind, int istate_ind, int len_nhc_cp, int num_nhc_cp, int nck_nhc_cp)
= slab initialization helpers
Definition: stateSlab.C:35
PCCommManager asymmPCmgr
Manages communication with the asymmetric paircalc array.
void readFile()
In this function data is read from files, and sent to the corresponding G-space planes.
void launchOrthoT()
= dynamics triggers send of orthoT to asymm calc when psi is done
void doneRedPsiIntegrate()
When all red psi have arrived, complete their integration.
void setKRange(int, int *, int *, int *)
Definition: stateSlab.C:287
void completeRDMAhandshake(RDMASetupConfirmationMsg< RDMApair_GSP_PC > *msg)
Gets called from the PairCalc data receivers to confirm the setup of an RDMA link.
void computeCgOverlap()
If minimization : compute lambda matrix (Lagrange multipliers) modify forces using the lambda matrix ...
void unpackRedPsiV(GSRedPsiMsg *msg)
CProxySection_PairCalculator makeOneResultSection_asym(int chunk)
Initialize an array section that is used to reduce the results from the PCs back to the GSP chares...
int istate_ind
My state index.
CProxy_Ortho myOrtho
A proxy for the my ortho chare array so I can interact with it.
CkGroupID mCastGrpId
Multicast manager group that handles many mcast/redns in the code. Grep for info. ...
Definition: cpaimd.C:216
void doNewPsi()
All Psi have arrived, finish the new Psi process.
void psiCgOvlap(CkReductionMsg *msg)
Entry method to resume execution after computing reduction over all planes and states to form psiCgOv...
void screenOutputPsi(int)
output a few select psi values for post processing validation
A place to collect substep times.
Add type declarations for simulationConstants class (readonly vars) and once class for each type of o...
void sendLambda()
After my CP forces have arrived : sendLambda.
int nfreq_cpintegrate
CPINTEGRATE::CP_integrate_min_STD, CPINTEGRATE::CP_integrate_min_CG.
Definition: configure.h:207
paircalc::CreationManager returns relevant chare array handles via this msg
CProxySection_PairCalculator makeOneResultSection_sym1(int chunk)
initialize plane and row wise section reduction for psi->gspace
#define IntegrateModForces_
200-300 reserved for paircalculator
Definition: cpaimd.h:676
int grainSize
The grain size along the states dimensions (plural) (number of states per PC chare) ...
Definition: pcConfig.h:45
void testeke(int, complex *, int *, int *, int *, int, int)
Test does ReadFile() parse psi(g) and g-vectors properly.
== Index logic for lines of constant x,y in gspace.
Definition: RunDescriptor.h:22
void doLambda()
When all the inputs have arrived, complete the lambda computation.
void unpackLambda(CkReductionMsg *msg)
void doneRedPsiVIntegrate()
when all red psi velocities are in, integrate
void unpackRedPsi(GSRedPsiMsg *msg)
=
cp::paircalc::pcConfig pcCfg
Input configurations for the paircalcs.
Definition: pcCommManager.h:77
Some basic data structures and the array map classes are defined here.
int iplane_ind
My plane index.
void sendPsiV()
send psi velocities
CProxySection_PairCalculator makeOneResultSection_sym2(int chunk)
initialize plane and column wise section reduction for psi->gspace
int nfreq_cpnonlocal_eke
CPNONLOCAL::CP_eke_calc.
Definition: configure.h:211
void readState(int nPacked, complex *arrCP, const char *fromFile, int ibinary_opt, int *nline_tot_ret, int *nplane_ret, int *kx, int *ky, int *kz, int *nx_ret, int *ny_ret, int *nz_ret, int *istrt_lgrp, int *iend_lgrp, int *npts_lgrp, int *nline_lgrp, int iget_decomp, int iget_vstate)
Definition: util.C:1108
Based on whether RDMA is enabled, the handle type is either the actual handle or just an empty struct...
Definition: RDMAMessages.h:16
void unpackNewPsi(CkReductionMsg *msg)
void sendLeftData(int numPoints, complex *ptr, bool psiV)
Starts the forward path work (Psi, Lambda and PsiV cases) by multicasting an entry method call to the...
CProxySection_PairCalculator makeOneResultSection_asym_column(int chunk)
initialize plane and column wise section reduction for lambda->gspace
void launchAtoms()
Atoms are launched by allDoneCpForces when all after ALL planes and states have reported.
void sendFFTData()
Send result to realSpacePlane : perform the transpose Force data cannot be overwritten due to all to ...
Manages communication with a single paircalc array.
Definition: pcCommManager.h:31
A (hopefully) tiny token that is unique to every data sender-receiver pair, and is shared by them dur...
Definition: RDMAMessages.h:94
Group Container class : Only allowed chare data classes have data.
Definition: eesCache.h:18
void computeEnergies(int p, double d)
Once All chares have completed energy computation and reduction, this storage routine is invoked...
void doNewPsiV()
process all the PsiV data
void sendPsi()
Send the psi out to symm PC.
PCCommManager symmPCmgr
Manages communication with the symmetric paircalc array.
void integrateModForce()
Integrate the forces.
Reply from data receiver to the data sender indicating completion of setup on the receiver side...
int numRDMAlinksSymm
The number of symmetric and asymmetric PCs that communicate with me.
void initGSpace(int, complex *, int, complex *, int, int, int, int, int, int, int)
This method is used to accept the state data from some initializing routine.
void doFFT()
This method is used to start the forward ffts in the CP_State_GSpacePlanes.
void unpackIFFT(GSIFFTMsg *)
Entry Method. This is used to receive data from all the corresponding RealSpacePlanes, upon which the inverse FFTs are triggered.
void processState(int nPacked, int nktot, complex *arrCP, const char *fromFile, int ibinary_opt, int *nline_tot_ret, int *nplane_ret, int *kx, int *ky, int *kz, int *istrt_lgrp, int *iend_lgrp, int *npts_lgrp, int *nline_lgrp, int iget_decomp, int iget_vstate, int iopt, int ny)
Definition: util.C:1332
void sendRightData(int numPoints, complex *ptr, bool psiV)
Starts the forward path work (along with startPairCalcLeft()) in the asymmetric (Lambda) case...
CP_State_GSpacePlane(int, int, int, int, int, int, UberCollection)
Constructor for GSpacePlane.