OpenAtom  Version1.5a
CP_Rho_RealSpacePlane.C
Go to the documentation of this file.
1 //////////////////////////////////////////////////////////////////////////////
2 //////////////////////////////////////////////////////////////////////////////
3 //////////////////////////////////////////////////////////////////////////////
4 /** \file CP_Rho_RealSpacePlane.C
5  *
6  * @defgroup Density Density
7  *
8  * \brief Computes electron density in real space, (exchange correlation energy) for transforming to
9  * CP_Rho_GSpacePlane which will utilize CP_Rho_GHartExt and
10  * CP_Rho_RHartExt (Hartree and external energies). Each plane
11  * may be further subdivided into subplanes at runtime for additional
12  * parallelism.
13  *
14  * This is the description of the "life" of a CP_Rho_RealSpacePlane object.
15  *
16  * At the start of the program, the constructor CP_Rho_RealSpacePlane() is called.
17  *
18  * The CP_State_RealSpacePlanes send the squared magnitudes of the psi_r values
19  * using the acceptData() method. The squared magnitudes are summed across states.
20  * A copy of this data is made, inverse fft is done in the z and x directions
21  * and sent to rhoGDensity. The other copy is processed using CP_exc_calc.
22  * Then the CP_Rho_RealSpacePlane object waits for a reply from the RhoGDensity
23  * object.
24  *
25  * The reply from RhoGDensity comes in the form of the method
26  * acceptDensityForSumming(). The data obtained from this reply is taken and
27  * forward fft in z and x directions is performed. The resulting data is
28  * summed with the result of CP_exc_calc. The sum is sent to the
29  * CP_State_RealSpacePlane objects.
30  */
31 //////////////////////////////////////////////////////////////////////////////
32 
33 #include "charm++.h"
34 #include <iostream>
35 #include <fstream>
36 #include <cmath>
37 
38 #include "debug_flags.h"
39 #include "utility/util.h"
40 #include "main/cpaimd.h"
43 #include "src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cpxcfnctls.h"
44 
45 //#ifndef USE_COMLIB
46 //#define USE_COMLIB 1
47 //#endif
48 
49 //////////////////////////////////////////////////////////////////////////////
50 extern CProxy_TimeKeeper TimeKeeperProxy;
51 extern CkVec <CProxy_CP_State_RealSpacePlane> UrealSpacePlaneProxy;
52 extern CkVec <CProxy_CP_State_RealParticlePlane> UrealParticlePlaneProxy;
53 extern CkVec <CProxy_CP_Rho_RealSpacePlane> UrhoRealProxy;
54 extern CkVec <CProxy_CP_Rho_GSpacePlane> UrhoGProxy;
55 extern CkVec <CProxy_FFTcache> UfftCacheProxy;
56 extern CkVec <CProxy_CP_Rho_RHartExt> UrhoRHartExtProxy;
57 extern CkVec <CProxy_GSpaceDriver> UgSpaceDriverProxy;
58 
59 extern ComlibInstanceHandle commRealInstance;
60 extern ComlibInstanceHandle commRealIGXInstance;
61 extern ComlibInstanceHandle commRealIGYInstance;
62 extern ComlibInstanceHandle commRealIGZInstance;
63 extern CkVec <ComlibInstanceHandle> mcastInstance;
64 extern CkGroupID mCastGrpId;
65 
66 extern Config config;
67 extern int nstates;
68 
69 bool is_pow2(int );
70 
71 
72 //#define _CP_RHO_RSP_VERBOSE_OFF
73 
74 /** @addtogroup Density
75  @{
76 */
77 //////////////////////////////////////////////////////////////////////////////
78 //////////////////////////////////////////////////////////////////////////////
79 //////////////////////////////////////////////////////////////////////////////
80 ///
81 /// This class (array) accepts the real space densities from all the states
82 /// Performs lots of operations to get exc, eext energies and vks
83 ///
84 //////////////////////////////////////////////////////////////////////////////
85 CP_Rho_RealSpacePlane::CP_Rho_RealSpacePlane(int xdim, bool _useCommlib,
86  int _ees_eext_on,int _ngridcEext,
87  int _rhokeeperid,
88  UberCollection _instance) :
89  thisInstance(_instance)
90 //////////////////////////////////////////////////////////////////////////////
91  {//begin routine
92 //////////////////////////////////////////////////////////////////////////////
93 
94 #ifdef _CP_DEBUG_RHOR_VERBOSE_
95  CkPrintf("[%d %d] RhoR constructs \n",thisIndex.x, thisIndex.y);
96 #endif
97 
98 //////////////////////////////////////////////////////////////////////////////
99 /// Get parameters from the globals/groups
100 
101  CPcharmParaInfo *sim = CPcharmParaInfo::get();
102 
103  ngridcEext = _ngridcEext; // FFT sizes for rharteext
104  ngrida = sim->sizeX; // FFT sizes for rho
105  ngridb = sim->sizeY;
106  ngridc = sim->sizeZ;
107  nplane_rho_x = sim->nplane_rho_x; // gx_max
108  listSubFlag = sim->listSubFlag;
109 
110  iplane_ind = thisIndex.y*ngridc + thisIndex.x;
111 
112  rhoRsubplanes = config.rhoRsubplanes;
113  CkAssert(nplane_rho_x >= rhoRsubplanes); // safety : should already be checked.
114 
115  cp_grad_corr_on = sim->cp_grad_corr_on;
116  ees_eext_on = _ees_eext_on;
117  rhoKeeperId = _rhokeeperid;
118 
119  double vol = sim->vol;
120  int numFFT = config.numFFTPoints;
121  FFTscale = 1.0/((double)numFFT); // these are based on the full size
122  volumeFactor = vol*FFTscale;
123  probScale = 1.0/vol;
124  redCount =0;
125  RedMsg=NULL;
126 //////////////////////////////////////////////////////////////////////////////
127 /// Compute number of messages to be received
128 
129  int nchareRhoG=sim->nchareRhoG;
130  if(rhoRsubplanes>1){
131  recvCountFromGRho = 0;
132  for(int i=0;i<nchareRhoG;i++){
133  if(sim->nline_send_rho_y[i][thisIndex.y]>0){recvCountFromGRho++;}
134  }//endfor
135  }else{
136  recvCountFromGRho=nchareRhoG;
137  }//endif
138 
139  int nchareRhoGEext=sim->nchareRhoGEext;
140  if(rhoRsubplanes>1){
141  recvCountFromGHartExt = 0;
142  for(int i=0;i<nchareRhoGEext;i++){
143  if(sim->nline_send_eext_y[i][thisIndex.y]>0)recvCountFromGHartExt++;
144  }//endfor
145  }else{
146  recvCountFromGHartExt=nchareRhoGEext;
147  }//endif
148 
149 //////////////////////////////////////////////////////////////////////////////
150 /// Parallelization
151 
152  // Before transpose : rho(x,y,z) : parallelize y and z
153  int div = (ngridb/rhoRsubplanes);
154  int rem = (ngridb % rhoRsubplanes);
155  int max = (thisIndex.y < rem ? thisIndex.y : rem);
156  myNgridb = (thisIndex.y<rem ? div+1 : div); // number of y values/lines of x
157  myBoff = div*thisIndex.y + max; // offset into y
158  nptsB = ngrida*myNgridb; // size of plane without extra room
159  nptsExpndB = (ngrida+2)*myNgridb; // extra memory for RealToComplex FFT
160 
161  // After transpose : rho(gx,y,z) : parallelize gx and z
162  if(rhoRsubplanes>1){
163  myNplane_rho = sim->numSubGx[thisIndex.y];
164  }else{
165  myNplane_rho = nplane_rho_x;
166  }//endif
167  nptsA = 2*myNplane_rho*ngridb; // memory size fft in doubles
168  nptsExpndA = 2*myNplane_rho*ngridb; // memory size fft in doubles
169 
170 //////////////////////////////////////////////////////////////////////////////
171 /// Set up the data class : mallocs rho,gradrho, etc.
172 
173  initRhoRealSlab(&rho_rs,ngrida,myNgridb,ngridc,myNplane_rho,ngridb,
174  thisIndex.x,thisIndex.y,rhoRsubplanes);
175 
176 //////////////////////////////////////////////////////////////////////////////
177 /// Initialize counters, set booleans.myTime
178 
179  myTime = 0;
180  countDebug = 0; // does nothing in the working code.
181  countWhiteByrd = 0;
182  doneGradRhoVks = 0;
183  countRHart = 0;
184  countFFTRyToGy = 0;
185 
186  countRHartValue = 1; if(thisIndex.x<(ngridcEext-rho_rs.sizeZ)){countRHartValue=2;}
187  countRHartValue*=(config.nchareHartAtmT);
188 
189  doneHartVks = false;
190  doneRHart = false;
191  doneWhiteByrd = false;
192  for(int i=0;i<5;i++){countGradVks[i]=0;}///trls bkc-transpose rho(gx,gy,z):gxy->gx/z
193  for(int i=0;i<5;i++){countIntGtoR[i]=0;}///trls bkc-int-transpose rho(gx,y,z):gx/z->yz
194  for(int i=0;i<5;i++){countIntRtoG[i]=0;}///trls fwd-int-transpose rho(gx,y,z):yz->gx/z
195 
196 
197 //////////////////////////////////////////////////////////////////////////////
198 /// Migration
199 
200  usesAtSync = true;
201  // if(config.lbdensity){
202  // setMigratable(true);
203  // }else{
204  setMigratable(false);
205  // }//endif
206 
207 //////////////////////////////////////////////////////////////////////////////
208  }//end routine
209 //////////////////////////////////////////////////////////////////////////////
210 
211 /** post constructor initialization */
212 //////////////////////////////////////////////////////////////////////////////
213 //////////////////////////////////////////////////////////////////////////////
214 //////////////////////////////////////////////////////////////////////////////
216 
217 /// make sections in the realSpacePlane array. These will be used when
218 /// computing real-space densities and multicasting v_ks values
219 
220  CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(mCastGrpId).ckLocalBranch();
221  CkArrayIndexMax *elems = new CkArrayIndexMax[nstates];
222 
223  int j;
224  // section i has al the portions with all
225  CkArrayIndex2D idx(0, thisIndex.x);
226  if(is_pow2(nstates)){
227  for (j = 0; j < nstates; j++) {
228  idx.index[0] = j^((thisIndex.x+thisIndex.y)%nstates);
229  elems[j] = idx;
230  }//endfor
231  }else{
232  for (j = 0; j < nstates; j++) {
233  idx.index[0] = (j+thisIndex.x+thisIndex.y)%nstates;
234  elems[j] = idx;
235  }//endfor
236  }//endif
237 
238  // we need one RS proxy for each K-point until the cross proxies with
239  // reductions work correctly
240  UberCollection RhoReductionSource=thisInstance;
241  realSpaceSectionProxyA= new CProxySection_CP_State_RealSpacePlane[config.UberJmax];
242  realSpaceSectionCProxyA= new CProxySection_CP_State_RealSpacePlane[config.UberJmax];
243  for(int kp=0;kp<config.UberJmax;kp++)
244  {
245 
246  RhoReductionSource.idxU.y=kp; // not at the gamma point
247  RhoReductionSource.setPO();
248  realSpaceSectionProxyA[kp] = CProxySection_CP_State_RealSpacePlane::
249  ckNew(UrealSpacePlaneProxy[RhoReductionSource.proxyOffset].ckGetArrayID(), elems, nstates);
250 
251  realSpaceSectionCProxyA[kp] = CProxySection_CP_State_RealSpacePlane::
252  ckNew(UrealSpacePlaneProxy[RhoReductionSource.proxyOffset].ckGetArrayID(), elems, nstates);
253 
254  realSpaceSectionProxyA[kp].ckDelegate
255  (CProxy_CkMulticastMgr(mCastGrpId).ckLocalBranch());
256  mcastGrp->setSection(realSpaceSectionProxyA[kp]);
257 
258 #ifdef USE_COMLIB
259  ComlibAssociateProxy(mcastInstance[kp],realSpaceSectionCProxyA[kp]);
260 #endif
261  ProductMsg *dummyProductMessage = new (0) ProductMsg;
262  // inform realspace element of this section proxy.
263  dummyProductMessage->subplane=thisIndex.y;
264  realSpaceSectionProxyA[kp].init(dummyProductMessage);
265  }
266  delete [] elems;
267  rhoGProxy_com = UrhoGProxy[thisInstance.proxyOffset];
268  rhoGProxyIGX_com = UrhoGProxy[thisInstance.proxyOffset];
269  rhoGProxyIGY_com = UrhoGProxy[thisInstance.proxyOffset];
270  rhoGProxyIGZ_com = UrhoGProxy[thisInstance.proxyOffset];
271 
272 #ifdef USE_COMLIB
273  if (config.useRInsRhoGP)
274  ComlibAssociateProxy(commRealInstance,rhoGProxy_com);
275  if (config.useRInsIGXRhoGP)
276  ComlibAssociateProxy(commRealIGXInstance,rhoGProxyIGX_com);
277  if (config.useRInsIGYRhoGP)
278  ComlibAssociateProxy(commRealIGYInstance,rhoGProxyIGY_com);
279  if (config.useRInsIGZRhoGP)
280  ComlibAssociateProxy(commRealIGZInstance,rhoGProxyIGZ_com);
281 #endif
282 
283 }
284 
285 //////////////////////////////////////////////////////////////////////////////
286 //////////////////////////////////////////////////////////////////////////////
287 //////////////////////////////////////////////////////////////////////////////
288 CP_Rho_RealSpacePlane::~CP_Rho_RealSpacePlane(){
289 }
290 //////////////////////////////////////////////////////////////////////////////
291 
292 
293 //////////////////////////////////////////////////////////////////////////////
294 //////////////////////////////////////////////////////////////////////////////
295 //////////////////////////////////////////////////////////////////////////////
296 /// Pup my variables for migration
297 //////////////////////////////////////////////////////////////////////////////
298 void CP_Rho_RealSpacePlane::pup(PUP::er &p){
299  ArrayElement2D::pup(p);
300  p|listSubFlag;
301  p|myTime;
302  p|nplane_rho_x;
303  p|ngrida;
304  p|myNgridb;
305  p|myNplane_rho;
306  p|ngridb;
307  p|ngridc;
308  p|iplane_ind;
309  p|nptsExpndA;
310  p|nptsExpndB;
311  p|nptsA;
312  p|nptsB;
313  p|ees_eext_on;
314  p|ngridcEext;
315  p|cp_grad_corr_on;
316  p|FFTscale;
317  p|volumeFactor;
318  p|probScale;
319  p|rhoGHelpers;
320  p|doneGradRhoVks;
321  p|countWhiteByrd;
322  p|doneWhiteByrd;
323  p|doneHartVks;
324  p|countDebug;
325  p|rhoRsubplanes;
326 
327  PUParray(p,countGradVks,5);
328  PUParray(p,countIntGtoR,5);
329  PUParray(p,countIntGtoR,5);
330 
331  rho_rs.pup(p); // pup your data class
332 
333  // Pupping Proxies???
334  PUParray(p,realSpaceSectionProxyA, config.UberJmax);
335  PUParray(p,realSpaceSectionCProxyA, config.UberJmax);
336  if(p.isUnpacking()){
337  rhoGProxy_com = UrhoGProxy[thisInstance.proxyOffset];
338  rhoGProxyIGX_com = UrhoGProxy[thisInstance.proxyOffset];
339  rhoGProxyIGY_com = UrhoGProxy[thisInstance.proxyOffset];
340  rhoGProxyIGZ_com = UrhoGProxy[thisInstance.proxyOffset];
341 
342 #ifdef USE_COMLIB
343  if (config.useRInsRhoGP)
344  ComlibAssociateProxy(commRealInstance,rhoGProxy_com);
345  if (config.useRInsIGXRhoGP)
346  ComlibAssociateProxy(commRealIGXInstance,rhoGProxyIGX_com);
347  if (config.useRInsIGYRhoGP)
348  ComlibAssociateProxy(commRealIGYInstance,rhoGProxyIGY_com);
349  if (config.useRInsIGZRhoGP)
350  ComlibAssociateProxy(commRealIGZInstance,rhoGProxyIGZ_com);
351 #endif
352 
353  }//endif
354 
355 //---------------------------------------------------------------------------
356  }//end routine
357 //////////////////////////////////////////////////////////////////////////////
358 
359 
360 //////////////////////////////////////////////////////////////////////////////
361 //////////////////////////////////////////////////////////////////////////////
362 //////////////////////////////////////////////////////////////////////////////
363 ///
364 /// Data comes from StateRspacePlane once an algorithm step.
365 /**
366  * Here the density from all the states is added up. The data from all the
367  * states is received via an array section reduction. Nothing happens in this
368  * chare until the density arrives.
369  *
370  * If we're not at the gamma point, there will be UberJmax of these.
371  * Otherwise there will be only 1.
372  */
373 //////////////////////////////////////////////////////////////////////////////
374 void CP_Rho_RealSpacePlane::acceptDensity(CkReductionMsg *msg) {
375 //////////////////////////////////////////////////////////////////////////////
376 
377 #ifdef _CP_DEBUG_RHOR_VERBOSE_
378  CkPrintf("RhoReal accepting Density %d %d %d\n",
379  thisIndex.x,thisIndex.y,CkMyPe());
380 #endif
381 
382 #ifdef _NAN_CHECK_
383  for(int i=0;i<msg->getSize()/sizeof(double) ;i++){
384  CkAssert(isnan(((double*) msg->getData())[i])==0);
385  }//endif
386 #endif
387  redCount++;
388  if(redCount==1){
389  RedMsg=msg; // This guy is deleted in the next routine
390  }else{
391  if(redCount<=config.UberJmax){
392  // there is one per k-point we'll sum them into the first
393  // message we receive
394  double *indata=(double*) msg->getData();
395  double *outdata=(double*) RedMsg->getData();
396  int size=msg->getSize()/sizeof(double);
397  for(int i=0;i<size ;i++)
398  outdata[i]+=indata[i];
399  delete(msg);
400  }//endif
401  }//endif
402 
403  if(redCount==config.UberJmax){
404  redCount=0;
406  }//endif
407 
408 //--------------------------------------------------------------------------
409  }//end routine
410 //////////////////////////////////////////////////////////////////////////////
411 
412 
413 //////////////////////////////////////////////////////////////////////////////
414 /// Handle the memory cleanup and setting of flags when density has
415 /// all arrived
416 //////////////////////////////////////////////////////////////////////////////
418 //////////////////////////////////////////////////////////////////////////////
419 
420 #ifdef _CP_SUBSTEP_TIMING_
421  if(rhoKeeperId>0){
422  double rhostart=CmiWallTimer();
423  CkCallback cb(CkIndex_TimeKeeper::collectStart(NULL),0,TimeKeeperProxy);
424  contribute(sizeof(double),&rhostart,CkReduction::min_double, cb ,rhoKeeperId);
425  }//endif
426 #endif
427 
428 //////////////////////////////////////////////////////////////////////////////
429 /// Set the flags : you are not done unless certain conditions apply.
430 
431  myTime++;
432  doneHartVks = false;
433  doneWhiteByrd = false;
434  doneRHart = false;
435  if(cp_grad_corr_on==0){doneWhiteByrd = true;}
436  if(ees_eext_on==0) {doneRHart = true;}
437 
438 #ifdef _CP_DEBUG_HARTEEXT_OFF_
439  doneHartVks = true;
440  doneRHart = true;
441 #endif
442 
443 //////////////////////////////////////////////////////////////////////////////
444 /// Unpack into spread out form and delete the message
445 
446  double *realValues = (double *) RedMsg->getData();
447  double *density = rho_rs.density;
448  CkAssert(RedMsg->getSize() == nptsB * sizeof(double));
449 
450  rho_rs.uPackScaleGrow(density,realValues,probScale);
451 
452  delete RedMsg;
453 
454 //////////////////////////////////////////////////////////////////////////////
455 /// If debugging, generate output!
456 
457 #ifdef _CP_DEBUG_RHOR_RHO_
458  char myFileName[MAX_CHAR_ARRAY_LENGTH];
459  sprintf(myFileName, "Rho_Real_%d_%d.out", thisIndex.x,thisIndex.y);
460  FILE *fp = fopen(myFileName,"w");
461  for (int i = 0; i <nptsB; i++){
462  fprintf(fp,"%g\n",realValues[i]*probScale);
463  }//endfor
464  fclose(fp);
465 #endif
466 
467 //////////////////////////////////////////////////////////////////////////////
468 /// Compute the exchange correlation energy (density no-grad part)
469 
471 
472 //////////////////////////////////////////////////////////////////////////////
473 /// 2nd Launch real-space external-hartree and the G-space non-local
474 /// The energy comp through RhoG is the more expensive critical path.
475  launchEextRNlG();
476 
477 
478 //////////////////////////////////////////////////////////////////////////////
479  }//end routine
480 //////////////////////////////////////////////////////////////////////////////
481 
482 
483 //////////////////////////////////////////////////////////////////////////////
484 //////////////////////////////////////////////////////////////////////////////
485 //////////////////////////////////////////////////////////////////////////////
486 ///
487 /// The density is here : Launch ees NL and ees Ext routines
488 ///
489 /// Do this once an algorithm step
490 ///
491 //////////////////////////////////////////////////////////////////////////////
493 //////////////////////////////////////////////////////////////////////////////
494 /// Launch the external energy computation in r-space :
495 /// rhart has the same rhoRsubplanes for simplicity.
496 
497 #ifndef _CP_DEBUG_HARTEEXT_OFF_
498  if(ees_eext_on==1){
499  int div = (ngridcEext / ngridc);
500  int rem = (ngridcEext % ngridc);
501  int ind = thisIndex.x+ngridc;
502  if(div>1){
503  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
504  CkPrintf("Eext Grid size too large for launch Scheme\n");
505  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
506  CkExit();
507  }//endif
508 #ifdef _CP_RHO_RSP_VERBOSE_
509  CkPrintf("HI, I am r-rho chare %d lauchning %d : ngrids %d %d : %d\n",
510  thisIndex.x,thisIndex.x,ngridcEext,ngridc,rem);
511 #endif
512  for(int j=0;j<config.nchareHartAtmT;j++){
513  UrhoRHartExtProxy[thisInstance.proxyOffset](thisIndex.x,thisIndex.y,j).startEextIter();
514  if(thisIndex.x<rem){
515 #ifdef _CP_RHO_RSP_VERBOSE_
516  CkPrintf("HI, I am r-rho chare %d also lauchning %d\n",thisIndex.x,ind);
517 #endif
518  UrhoRHartExtProxy[thisInstance.proxyOffset](ind,thisIndex.y,j).startEextIter();
519  }//endif : the launch
520  }//endfor : atmTyp parallism
521  }//endif : Launch is needed
522 #endif
523 
524 //////////////////////////////////////////////////////////////////////////////
525 /// Launch nonlocal g space if it wasn't done in RS
526 /// Spread the launch over all the rhoRchares you can.
527 
528  CPcharmParaInfo *sim = CPcharmParaInfo::get();
529 
530  if(sim->ees_nloc_on==1 && config.launchNLeesFromRho==1 && sim->natm_nl>0){
531 
532  CkAssert(rho_rs.sizeZ>=config.nchareG);
533  if(thisIndex.x<config.nchareG){
534  int nstates = config.nstates;
535  int div = (nstates/rhoRsubplanes);
536  int rem = (nstates % rhoRsubplanes);
537  int add = (thisIndex.y < rem ? 1 : 0);
538  int max = (thisIndex.y < rem ? thisIndex.y : rem);
539  int ist = div*thisIndex.y + max;
540  int iend = ist + div + add;
541  for(int kpoint=0;kpoint < config.UberJmax;kpoint++)
542  {
543  UberCollection destKpointInstance=thisInstance;
544  destKpointInstance.idxU.y=kpoint;
545  int proxyOffset=destKpointInstance.setPO();
546  //TODO Change this to a section multicast (see Init() in CP_Rho_GSpacePlane)
547  for(int ns=ist;ns<iend;ns++){
548 
549  // CkPrintf("RhoRP[%d,%d] triggering NL %d %d \n",
550  // thisIndex.x, thisIndex.y, ns, thisIndex.x);
551  CkAssert(ns<config.nstates);
552  // CkAssert(thisIndex.x<32);
553  UgSpaceDriverProxy[proxyOffset](ns,thisIndex.x).startNonLocalEes(myTime);
554  }//endfor
555  }//endfor
556  }//endif
557 
558  }//endif : launch the non-local ees
559 
560 //----------------------------------------------------------------------------
561  }//end routine
562 //////////////////////////////////////////////////////////////////////////////
563 
564 
565 //////////////////////////////////////////////////////////////////////////////
566 //////////////////////////////////////////////////////////////////////////////
567 //////////////////////////////////////////////////////////////////////////////
568 /**
569  * Compute one part of the EXC energy using PINY CP_exc_calc.
570  * This is done once an algorithm step
571  */
572 //////////////////////////////////////////////////////////////////////////////
574 //////////////////////////////////////////////////////////////////////////////
575 
576 #ifdef _CP_DEBUG_RHOR_VERBOSE_
577  CkPrintf("In RhoRealSpacePlane[%d] energyComp %d %d\n",thisIndex.x,
578  CkMyPe(),CmiMemoryUsage());
579 #endif
580 
581 //////////////////////////////////////////////////////////////////////////////
582 
583  double *density = rho_rs.density;
584  double *Vks = rho_rs.Vks;
585  int nf1 = ngrida;
586  int nf2 = myNgridb;
587  int nf3 = ngridc;
588  int npts = config.numFFTPoints; // total number of points
589  double *exc_ret = &(rho_rs.exc_ret);
590  double *muxc_ret = &(rho_rs.muxc_ret);
591 
592 //////////////////////////////////////////////////////////////////////////////
593 /// Perform exchange correlation computation (no grad corr here).
594 
595  CPXCFNCTS::CP_exc_calc(npts,nf1,nf2,nf3,density,Vks,exc_ret,muxc_ret,config.nfreq_xcfnctl);
596 
597  if(cp_grad_corr_on==0){
598  double exc[2];
599  exc[0]=rho_rs.exc_ret;
600  exc[1]=0.0;
601  contribute(2*sizeof(double),exc,CkReduction::sum_double);
602  }//endif
603 
604 //////////////////////////////////////////////////////////////////////////////
605 /// Invoke FFT to take rho(r) to rho(g) : do not over-write the density!!
606 
607  fftRhoRtoRhoG();
608 
609 //////////////////////////////////////////////////////////////////////////////
610  }//end routine
611 //////////////////////////////////////////////////////////////////////////////
612 
613 
614 //////////////////////////////////////////////////////////////////////////////
615 //////////////////////////////////////////////////////////////////////////////
616 //////////////////////////////////////////////////////////////////////////////
617 ///
618 /// 1) Perform FFT of density: Single Transpose method rho(x,y,z) ---> rho(gx,gy,z)
619 /// Double Transpose method rho(x,y,z) ---> rho(gx,y,z)
620 ///
621 /// 2) launch the real space part of the non-local
622 ///
623 /// Routine invoked once an algorithm step.
624 ///
625 //////////////////////////////////////////////////////////////////////////////
627 //////////////////////////////////////////////////////////////////////////////
628 
629 #ifdef _CP_DEBUG_RHOR_VERBOSE_
630  CkPrintf("In RhoRealSpacePlane[%d %d] FFT_RSpacetoGSpace %d %d\n",thisIndex.x,
631  thisIndex.y, CkMyPe(),CmiMemoryUsage());
632 #endif
633 
634 //////////////////////////////////////////////////////////////////////////////
635 /// FFT myself back to G-space part way
636 
637  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
638  double *density = rho_rs.density; // we need to save the density and vks.
639  double *dataR = rho_rs.rhoIRX; // rhoirx is around doing nothing now
640  complex *dataC = rho_rs.rhoIRXC; // so we can use it to store the FFT
641 
642 #if CMK_TRACE_ENABLED
643  double StartTime=CmiWallTimer();
644 #endif
645 
646  rho_rs.uPackScale(dataR,density,volumeFactor); // can't avoid the CmiMemcpy-scaling
647  if(rhoRsubplanes>1){
648  fftcache->doRhoFFTRxToGx_Rchare(dataC,dataR,nplane_rho_x,ngrida,myNgridb,iplane_ind);
649  }else{
650  fftcache->doRhoFFTRtoG_Rchare(dataC,dataR,nplane_rho_x,ngrida,ngridb,iplane_ind);
651  }//endif
652 
653 #if CMK_TRACE_ENABLED
654  traceUserBracketEvent(RhoRtoGFFT_, StartTime, CmiWallTimer());
655 #endif
656 
657 //////////////////////////////////////////////////////////////////////////////
658 /// Launch the transpose :go directly to rhog if the 1 transpose method is on.
659 /// : do an internal transpose if the 2 transpose method is on.
660 
661 //#define DEBUG_INT_TRANS_FWD
662  int iopt = 0;
663  if(rhoRsubplanes>1){
664  sendPartlyFFTRyToGy(iopt); // double transpose method (yz ---> gx,z)
665  }else{
666 #ifndef DEBUG_INT_TRANS_FWD
667  sendPartlyFFTtoRhoG(iopt); // single transpose method (z ---> gx,gy)
668 #else
669  char name[100];
670  sprintf(name,"partFFTGxGyZ%d.out.%d.%d",rhoRsubplanes,thisIndex.x,thisIndex.y);
671  FILE *fp = fopen(name,"w");
672  for(int ix =0;ix<nplane_rho_x;ix++){
673  for(int iy =0;iy<ngridb;iy++){
674  int i = iy*(ngrida/2+1) + ix;
675  fprintf(fp,"%d %d : %g %g\n",iy,ix,dataC[i].re,dataC[i].im);
676  }//endfor
677  }//endof
678  fclose(fp);
679  UrhoRealProxy[thisInstance.proxyOffset](0,0).exitForDebugging();
680 #endif
681  }//endif
682 
683 //////////////////////////////////////////////////////////////////////////////
684 /// Launch non-local real space FFT : allow NL to advance after density works a bit
685 /// Now that we have previously done our bit for the critical path through rhog
686 
687  launchNLRealFFT();
688 
689 //////////////////////////////////////////////////////////////////////////////
690  }//end routine
691 //////////////////////////////////////////////////////////////////////////////
692 
693 
694 //////////////////////////////////////////////////////////////////////////////
695 //////////////////////////////////////////////////////////////////////////////
696 //////////////////////////////////////////////////////////////////////////////
697 /**
698  * Launch ees-nonlocal real here. This routine can be called from any
699  * other routine except gradCorr(). GradCorr comp is optional (PINY option)
700  * e.g. it is not always computed.
701  */
702 //////////////////////////////////////////////////////////////////////////////
704 //////////////////////////////////////////////////////////////////////////////
705 /// Tell NLeesR its ok to compute its FFT. Otherwise we get no overlap
706 /// Spread the launch over all the RhoR chares
707 
708  CPcharmParaInfo *sim = CPcharmParaInfo::get();
709  if(sim->ees_nloc_on==1){
710  CkAssert(rho_rs.sizeZ>=sim->ngrid_nloc_c);;
711  if(thisIndex.x < sim->ngrid_nloc_c){
712  int nstates = config.nstates;
713  int div = (nstates/rhoRsubplanes);
714  int rem = (nstates % rhoRsubplanes);
715  int add = (thisIndex.y < rem ? 1 : 0);
716  int max = (thisIndex.y < rem ? thisIndex.y : rem);
717  int ist = div*thisIndex.y + max;
718  int iend = ist + div + add;
719  for(int kpoint=0;kpoint < config.UberJmax;kpoint++)
720  {
721  UberCollection destKpointInstance=thisInstance;
722  destKpointInstance.idxU.y=kpoint;
723  int proxyOffset=destKpointInstance.setPO();
724  for(int ns=ist;ns<iend;ns++){
725  CkAssert(ns<config.nstates);
726  UrealParticlePlaneProxy[proxyOffset](ns,thisIndex.x).launchFFTControl(myTime);
727  if(ns%4)
728  CmiNetworkProgress();
729  }//endfor
730  }//endfor
731  }//endif
732  }//endif
733 
734 //----------------------------------------------------------------------------
735  }//end routine
736 //////////////////////////////////////////////////////////////////////////////
737 
738 
739 
740 //////////////////////////////////////////////////////////////////////////////
741 //////////////////////////////////////////////////////////////////////////////
742 //////////////////////////////////////////////////////////////////////////////
743 ///
744 /// Double Transpose Fwd Send : A(gx,y,z) on the way to A(gx,gy,z)
745 /// Send so that (y,z) parallelism is
746 /// switched to (gx,z)
747 ///
748 /// Invoked 4 times per algorithm step : case 0 density(gx,y,z)
749 /// : cast 1-3 gradients(gx,y,z)
750 ///
751 //////////////////////////////////////////////////////////////////////////////
753 //////////////////////////////////////////////////////////////////////////////
754 
755  CPcharmParaInfo *sim = CPcharmParaInfo::get();
756  int **listSubGx = sim->listSubGx;
757  int *numSubGx = sim->numSubGx;
758 
759  CkAssert(rhoRsubplanes>1);
760  complex *FFTresult;
761  switch(iopt){
762  case 0 : FFTresult = rho_rs.rhoIRXC; break; //Scr variable for rho send
763  case 1 : FFTresult = rho_rs.rhoIRXC; break;
764  case 2 : FFTresult = rho_rs.rhoIRYC; break;
765  case 3 : FFTresult = rho_rs.rhoIRZC; break;
766  default: CkAbort("impossible iopt"); break;
767  }//end switch
768 
769 //////////////////////////////////////////////////////////////////////////////
770 /// Launch the communication
771 
772  //-----------------------------------------------------------------------------
773  // Commlib launch :
774 
775 #ifdef USE_COMLIB
776 #ifdef _ERIC_SETS_UP_COMMLIB_
777  switch(iopt){
778  case 0 : if(config.useRInsRhoRP) commRealInstanceRx.beginIteration(); break;
779  case 1 : if(config.useRInsIGXRhoRP) commRealIGXInstanceRx.beginIteration(); break;
780  case 2 : if(config.useRInsIGYRhoRP) commRealIGYInstanceRx.beginIteration(); break;
781  case 3 : if(config.useRInsIGZRhoRP) commRealIGZInstanceRx.beginIteration(); break;
782  default: CkAbort("impossible iopt");break;
783  }//end switch
784 #endif
785 #endif
786 
787  //-----------------------------------------------------------------------------
788  // Send the data : I have myNgridB values of y (gx,y) y=1...myNgridB and all gx
789  // Send all the `y' I have for the gx range desired after transpose
790 
791  int stride = ngrida/2+1;
792  int ix = thisIndex.x;
793  for(int ic = 0; ic < rhoRsubplanes; ic ++) { // chare arrays to which we will send
794  int num = numSubGx[ic]; // number of gx values chare ic wants
795  int size = num*myNgridb; // num*(all the y's i have)
796 
797  int sendFFTDataSize = size;
798  RhoGSFFTMsg *msg = new (sendFFTDataSize, 8 * sizeof(int)) RhoGSFFTMsg;
799  msg->size = size;
800  msg->iopt = iopt;
801  msg->offset = myBoff; // where the myNgridB y-lines start.
802  msg->num = myNgridb; // number of y-lines I have.
803  complex *data = msg->data; // data
804 
805  if(config.prioFFTMsg){
806  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
807  *(int*)CkPriorityPtr(msg) = config.rhogpriority+thisIndex.y;
808  }//endif
809 
810  if(listSubFlag==1){
811  for(int i=0,koff=0;i<num;i++,koff+=myNgridb){
812  for(int k=koff,ii=listSubGx[ic][i];k<myNgridb+koff;k++,ii+=stride){
813  data[k] = FFTresult[ii]; // all y's of this gx
814  }//endfor
815  }//endfor
816  }else{
817  int nst=listSubGx[ic][0];
818  for(int i=0,ist=nst,koff=0;i<num;i++,koff+=myNgridb,ist++){
819  for(int k=koff,ii=ist;k<myNgridb+koff;k++,ii+=stride){
820  data[k] = FFTresult[ii]; // all y's of this gx
821  }//endfor
822  }//endfor
823  }//endif
824 
825  switch(iopt){
826  case 0 : UrhoRealProxy[thisInstance.proxyOffset](ix,ic).acceptRhoGradVksRyToGy(msg); break;
827  case 1 : UrhoRealProxy[thisInstance.proxyOffset](ix,ic).acceptRhoGradVksRyToGy(msg); break;
828  case 2 : UrhoRealProxy[thisInstance.proxyOffset](ix,ic).acceptRhoGradVksRyToGy(msg); break;
829  case 3 : UrhoRealProxy[thisInstance.proxyOffset](ix,ic).acceptRhoGradVksRyToGy(msg); break;
830  default: CkAbort("impossible iopt");break;
831  }//end switch
832 
833 #ifdef _ERIC_SETS_UP_COMMLIB_
834  switch(iopt){
835  case 0 : UrhoRealProxy[thisInstance.proxyOffset](ix,ic).acceptRhoGradVksRyToGy(msg); break;
836  default: CkAbort("impossible iopt");break;
837  }//end switch
838 #endif
839 
840  }//end for : chare sending
841 
842  //-----------------------------------------------------------------------------
843  // Commlib stop
844 
845 #ifdef _ERIC_SETS_UP_COMMLIB_
846  switch(iopt){
847  case 0 : if(config.useRInsRhoRP) commRealInstanceRx.endIteration(); break;
848  case 1 : if(config.useRInsIGXRhoRP) commRealIGXInstanceRx.endIteration(); break;
849  case 2 : if(config.useRInsIGYRhoRP) commRealIGYInstanceRx.endIteration(); break;
850  case 3 : if(config.useRInsIGZRhoRP) commRealIGZInstanceRx.endIteration(); break;
851  default: CkAbort("impossible iopt");break;
852  }//end switch
853 #endif
854 
855 //---------------------------------------------------------------------------
856  }//end routine
857 //////////////////////////////////////////////////////////////////////////////
858 
859 
860 
861 //////////////////////////////////////////////////////////////////////////////
862 //////////////////////////////////////////////////////////////////////////////
863 //////////////////////////////////////////////////////////////////////////////
864 ///
865 /// Double Transpose Fwd Recv : A(gx,y,z) on the way to A(gx,gy,z)
866 /// Recv so that (y,z) parallel switched to (gx,z)
867 ///
868 /// Invoked 4 times per algorithm step : case 0 rho(gx,y,z)
869 /// : cast 1-3 gradRho(gx,y,z)
870 ///
871 //////////////////////////////////////////////////////////////////////////////
873 //////////////////////////////////////////////////////////////////////////////
874 
875  int size = msg->size; // msg size
876  int iopt = msg->iopt; //
877  int num = msg->num;
878  int offset = msg->offset;
879  complex *msgData = msg->data;
880 
881  CkAssert(size==myNplane_rho*num);
882  CkAssert(rhoRsubplanes>1);
883 
884  complex *dataC;
885  switch(iopt){
886  case 0: dataC = rho_rs.rhoIRXCint; break;
887  case 1: dataC = rho_rs.rhoIRXCint; break;
888  case 2: dataC = rho_rs.rhoIRYCint; break;
889  case 3: dataC = rho_rs.rhoIRZCint; break;
890  default: CkAbort("Impossible option\n"); break;
891  }//endif
892 
893  for(int js=0,j=offset;js<size;js+=num,j+=ngridb){
894  for(int is=js,i=j;is<num+js;is++,i++){
895  dataC[i] = msgData[is];
896  }//endfor
897  }//endfor
898 
899  delete msg;
900 
901  countIntRtoG[iopt]++;
902  if(countIntRtoG[iopt]==rhoRsubplanes){
903  countIntRtoG[iopt]=0;
904  fftRhoRyToGy(iopt);
905  }//endfor
906 
907 //////////////////////////////////////////////////////////////////////////////
908  }//end routine
909 //////////////////////////////////////////////////////////////////////////////
910 
911 
912 
913 //////////////////////////////////////////////////////////////////////////////
914 //////////////////////////////////////////////////////////////////////////////
915 //////////////////////////////////////////////////////////////////////////////
916 /// Double Transpose Fwd FFT : A(gx,y,z) -> A(gx,gy,z)
917 ///
918 /// Invoked 4 times per algorithm step :
919 /// case 0 rho(gx,y,z) -> rho(gx,gy,z)
920 /// cast 1-3 gradRho(gx,y,z)-> gradRho(gx,gy,z)
921 ///
922 //////////////////////////////////////////////////////////////////////////////
924 //////////////////////////////////////////////////////////////////////////////
925 /// FFT myself back to G-space part way : e.g. along gy here
926 
927  CkAssert(rhoRsubplanes>1);
928  complex *dataC;
929  double *dataR;
930  switch(iopt){
931  case 0: dataC = rho_rs.rhoIRXCint; dataR = rho_rs.rhoIRXint; break;
932  case 1: dataC = rho_rs.rhoIRXCint; dataR = rho_rs.rhoIRXint; break;
933  case 2: dataC = rho_rs.rhoIRYCint; dataR = rho_rs.rhoIRYint; break;
934  case 3: dataC = rho_rs.rhoIRZCint; dataR = rho_rs.rhoIRZint; break;
935  default: CkAbort("Impossible option\n"); break;
936  }//endif
937 
938  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
939 #if CMK_TRACE_ENABLED
940  double StartTime=CmiWallTimer();
941 #endif
942  fftcache->doRhoFFTRyToGy_Rchare(dataC,dataR,myNplane_rho,ngrida,ngridb,iplane_ind);
943 #if CMK_TRACE_ENABLED
944  traceUserBracketEvent(doRhoFFTRytoGy_, StartTime, CmiWallTimer());
945 #endif
946 
947 //////////////////////////////////////////////////////////////////////////////
948 /// Send chunk to RhoGDensity
949 
950  int igo=0;
951  if(iopt>=1 && iopt <= 3){countFFTRyToGy++; igo=1;}
952 
953 #ifndef DEBUG_INT_TRANS_FWD
954  if(config.rhoGToRhoRMsgComb==0 || iopt==0){sendPartlyFFTtoRhoG(iopt);}
955  if(config.rhoGToRhoRMsgComb==1 && countFFTRyToGy==3 && igo==1){
956  countFFTRyToGy=0;
958  }//endif
959 #else
960  CPcharmParaInfo *sim = CPcharmParaInfo::get();
961  int **listSubGx = sim->listSubGx;
962  int ic = thisIndex.y;
963  CkPrintf("%d %d : %d\n",thisIndex.x,thisIndex.y,myNplane_rho);
964  char name[100];
965  sprintf(name,"partFFTGxGyZ%d.out.%d.%d",rhoRsubplanes,thisIndex.x,thisIndex.y);
966  FILE *fp = fopen(name,"w");
967  for(int ix =0;ix<myNplane_rho;ix++){
968  for(int iy =0;iy<ngridb;iy++){
969  int i = ix*ngridb + iy;
970  fprintf(fp,"%d %d : %g %g\n",iy,listSubGx[ic][ix],dataC[i].re,dataC[i].im);
971  }//endfor
972  }//endof
973  fclose(fp);
974  UrhoRealProxy[thisInstance.proxyOffset](0,0).exitForDebugging();
975 #endif
976 
977 //////////////////////////////////////////////////////////////////////////////
978  }//end routine
979 //////////////////////////////////////////////////////////////////////////////
980 
981 
982 
983 //////////////////////////////////////////////////////////////////////////////
984 //////////////////////////////////////////////////////////////////////////////
985 //////////////////////////////////////////////////////////////////////////////
986 ///
987 /// The Tranpose to G-space : A(gx,gy,z) on the way to A(gx,gy,gz)
988 /// Change parallel by gx,z to parallel by {gx,gy}
989 /// We switch chare arrays here from RhoR to RhoG
990 ///
991 /// Invoked 4 times per algorithm step :
992 /// case 0 send rho(gx,gy,z) -> rho(gx,gy,z) in Rhog
993 /// cast 1-3 send gradRho(gx,gy,z) -> gradRho(gx,gy,z) in Rhog
994 ///
995 /// Send the partly FFTed array A(gx,gy,z), to rhoGSpacePlane :
996 /// You then wait while RHOG works, sends back RhoIRalpha which you whitebyrdize
997 /// You send back the whitebyrdized RhoIRalpha puppies to RhoGspacePlane.
998 /// Whilst all this is going on, HartG is churning and will send you another
999 /// part of Vks. All this requires keeping density, vks, div_rho_alpha and
1000 /// vkshart memory available at all times to receive messages.
1001 ///
1002 //////////////////////////////////////////////////////////////////////////////
1004 //////////////////////////////////////////////////////////////////////////////
1005 /// Local pointers and variables
1006 
1007  CPcharmParaInfo *sim = CPcharmParaInfo::get();
1008  int nchareRhoG = sim->nchareRhoG;
1009  int **tranpack_rho = sim->index_tran_upack_rho;
1010  int *nlines_per_chareRhoG = sim->nlines_per_chareRhoG;
1011  int ***tranupack_rhoY = sim->index_tran_upack_rho_y;
1012  int **nlines_per_chareRhoGY = sim->nline_send_rho_y;
1013 
1014  complex *FFTresult;
1015  if(rhoRsubplanes==1){
1016  switch(iopt){
1017  case 0 : FFTresult = rho_rs.rhoIRXC; break; //Scr variable for rho send
1018  case 1 : FFTresult = rho_rs.rhoIRXC; break;
1019  case 2 : FFTresult = rho_rs.rhoIRYC; break;
1020  case 3 : FFTresult = rho_rs.rhoIRZC; break;
1021  default: CkAbort("impossible iopt"); break;
1022  }//end switch
1023  }else{
1024  switch(iopt){
1025  case 0 : FFTresult = rho_rs.rhoIRXCint; break; //Scr variable for rho send
1026  case 1 : FFTresult = rho_rs.rhoIRXCint; break;
1027  case 2 : FFTresult = rho_rs.rhoIRYCint; break;
1028  case 3 : FFTresult = rho_rs.rhoIRZCint; break;
1029  default: CkAbort("impossible iopt"); break;
1030  }//end switch
1031  }//endif
1032 
1033 //////////////////////////////////////////////////////////////////////////////
1034 /// Commlib launch
1035 
1036 #ifdef USE_COMLIB
1037  if(rhoRsubplanes==1){
1038  switch(iopt){
1039 #ifdef OLD_COMMLIB
1040  case 0 : if(config.useRInsRhoGP) commRealInstance.beginIteration(); break;
1041  case 1 : if(config.useRInsIGXRhoGP) commRealIGXInstance.beginIteration(); break;
1042  case 2 : if(config.useRInsIGYRhoGP) commRealIGYInstance.beginIteration(); break;
1043  case 3 : if(config.useRInsIGZRhoGP) commRealIGZInstance.beginIteration(); break;
1044 #else
1045  case 0 : if(config.useRInsRhoGP) ComlibBegin(rhoGProxy_com,0); break;
1046  case 1 : if(config.useRInsIGXRhoGP) ComlibBegin(rhoGProxyIGX_com,0); break;
1047  case 2 : if(config.useRInsIGYRhoGP) ComlibBegin(rhoGProxyIGY_com,0); break;
1048  case 3 : if(config.useRInsIGZRhoGP) ComlibBegin(rhoGProxyIGZ_com,0); break;
1049 #endif
1050  default: CkAbort("impossible iopt");break;
1051  }//end switch
1052  }//endif
1053 #endif
1054 
1055 //////////////////////////////////////////////////////////////////////////////
1056 /// Send the data
1057 
1058  int iy = thisIndex.y;
1059  for(int ic = 0; ic < nchareRhoG; ic ++) { // chare arrays to which we will send
1060 
1061  //---------------------------
1062  //malloc the message
1063  int sendFFTDataSize = nlines_per_chareRhoG[ic];
1064  if(rhoRsubplanes!=1){sendFFTDataSize = nlines_per_chareRhoGY[ic][iy];}
1065  if(sendFFTDataSize>0)
1066  {
1067  RhoGSFFTMsg *msg = new (sendFFTDataSize, 8 * sizeof(int)) RhoGSFFTMsg;
1068 
1069  //---------------------------
1070  //Pack the message
1071  msg->size = sendFFTDataSize;
1072  msg->iopt = iopt;
1073  msg->offset = thisIndex.x; // z-index
1074  msg->offsetGx = thisIndex.y; // gx parallelization index
1075  complex *data = msg->data;
1076  if(config.prioFFTMsg){
1077  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
1078  *(int*)CkPriorityPtr(msg) = config.rhogpriority+thisIndex.x;
1079  }//endif
1080 
1081  if(rhoRsubplanes==1){
1082  for(int i=0;i<sendFFTDataSize;i++){
1083  data[i] = FFTresult[tranpack_rho[ic][i]];
1084  }//endfor
1085  }else{
1086  for(int i=0;i<sendFFTDataSize;i++){
1087  data[i] = FFTresult[tranupack_rhoY[ic][iy][i]];
1088  }//endfor
1089  }//endif
1090 
1091  //---------------------------
1092  // Send the message
1093  if(rhoRsubplanes==1){ // if subplanes is 1 we can use comlib
1094  switch(iopt){
1095  case 0 : rhoGProxy_com(ic,0).acceptRhoData(msg); break;
1096  case 1 : rhoGProxyIGX_com(ic,0).acceptWhiteByrd(msg); break;
1097  case 2 : rhoGProxyIGY_com(ic,0).acceptWhiteByrd(msg); break;
1098  case 3 : rhoGProxyIGZ_com(ic,0).acceptWhiteByrd(msg); break;
1099  default: CkAbort("impossible iopt");break;
1100  }//end switch
1101  }else{
1102  switch(iopt){
1103  case 0 : UrhoGProxy[thisInstance.proxyOffset](ic,0).acceptRhoData(msg); break;
1104  case 1 : UrhoGProxy[thisInstance.proxyOffset](ic,0).acceptWhiteByrd(msg); break;
1105  case 2 : UrhoGProxy[thisInstance.proxyOffset](ic,0).acceptWhiteByrd(msg); break;
1106  case 3 : UrhoGProxy[thisInstance.proxyOffset](ic,0).acceptWhiteByrd(msg); break;
1107  default: CkAbort("impossible iopt");break;
1108  }//end switch
1109  }//endif
1110  }//end if nonzero
1111  }//end for : chare sending
1112 
1113 //////////////////////////////////////////////////////////////////////////////
1114 /// Commlib stop
1115 
1116 #ifdef USE_COMLIB
1117  if(rhoRsubplanes==1){
1118  switch(iopt){
1119 #ifdef OLD_COMMLIB
1120  case 0 : if(config.useRInsRhoGP) commRealInstance.endIteration(); break;
1121  case 1 : if(config.useRInsIGXRhoGP) commRealIGXInstance.endIteration(); break;
1122  case 2 : if(config.useRInsIGYRhoGP) commRealIGYInstance.endIteration(); break;
1123  case 3 : if(config.useRInsIGZRhoGP) commRealIGZInstance.endIteration(); break;
1124 #else
1125  case 0 : if(config.useRInsRhoGP) ComlibEnd(rhoGProxy_com,0); break;
1126  case 1 : if(config.useRInsIGXRhoGP) ComlibEnd(rhoGProxyIGX_com,0); break;
1127  case 2 : if(config.useRInsIGYRhoGP) ComlibEnd(rhoGProxyIGY_com,0); break;
1128  case 3 : if(config.useRInsIGZRhoGP) ComlibEnd(rhoGProxyIGZ_com,0); break;
1129 #endif
1130  default: CkAbort("impossible iopt");break;
1131  }//end switch
1132  }//endif
1133 #endif
1134 
1135 //---------------------------------------------------------------------------
1136  }//end routine
1137 //////////////////////////////////////////////////////////////////////////////
1138 
1139 
1140 //////////////////////////////////////////////////////////////////////////////
1141 //////////////////////////////////////////////////////////////////////////////
1142 //////////////////////////////////////////////////////////////////////////////
1144 //////////////////////////////////////////////////////////////////////////////
1145 /// Local pointers and variables
1146 
1147  CPcharmParaInfo *sim = CPcharmParaInfo::get();
1148  int nchareRhoG = sim->nchareRhoG;
1149  int **tranpack_rho = sim->index_tran_upack_rho;
1150  int *nlines_per_chareRhoG = sim->nlines_per_chareRhoG;
1151  int ***tranupack_rhoY = sim->index_tran_upack_rho_y;
1152  int **nlines_per_chareRhoGY = sim->nline_send_rho_y;
1153 
1154  complex *FFTresultX;
1155  complex *FFTresultY;
1156  complex *FFTresultZ;
1157  if(rhoRsubplanes==1){
1158  FFTresultX = rho_rs.rhoIRXC;
1159  FFTresultY = rho_rs.rhoIRYC;
1160  FFTresultZ = rho_rs.rhoIRZC;
1161  }else{
1162  FFTresultX = rho_rs.rhoIRXCint;
1163  FFTresultY = rho_rs.rhoIRYCint;
1164  FFTresultZ = rho_rs.rhoIRZCint;
1165  }//endif
1166 
1167 //////////////////////////////////////////////////////////////////////////////
1168 /// Send the data
1169 
1170  int iy = thisIndex.y;
1171  for(int ic = 0; ic < nchareRhoG; ic ++) { // chare arrays to which we will send
1172 
1173  //---------------------------
1174  //malloc the message
1175  int sendFFTDataSize = nlines_per_chareRhoG[ic];
1176  if(rhoRsubplanes!=1){sendFFTDataSize = nlines_per_chareRhoGY[ic][iy];}
1177 
1178  if(sendFFTDataSize>0){
1179  //---------------------------
1180  //Pack the message
1181  RhoGSFFTMsg *msg = new (3*sendFFTDataSize, 8 * sizeof(int)) RhoGSFFTMsg;
1182  msg->size = sendFFTDataSize;
1183  msg->iopt = 1;
1184  msg->offset = thisIndex.x; // z-index
1185  msg->offsetGx = thisIndex.y; // gx parallelization index
1186  complex *data = msg->data;
1187  if(config.prioFFTMsg){
1188  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
1189  *(int*)CkPriorityPtr(msg) = config.rhogpriority+thisIndex.x;
1190  }//endif
1191 
1192  if(rhoRsubplanes==1){
1193  for(int i=0,ii=0;ii<sendFFTDataSize;i+=3,ii++){
1194  int j = tranpack_rho[ic][ii];
1195  data[i] = FFTresultX[j];
1196  data[i+1] = FFTresultY[j];
1197  data[i+2] = FFTresultZ[j];
1198  }//endfor
1199  }else{
1200  for(int i=0,ii=0;ii<sendFFTDataSize;i+=3,ii++){
1201  int j = tranupack_rhoY[ic][iy][ii];
1202  data[i] = FFTresultX[j];
1203  data[i+1] = FFTresultY[j];
1204  data[i+2] = FFTresultZ[j];
1205  }//endfor
1206  }//endif
1207  //---------------------------
1208  // Send the message
1209  UrhoGProxy[thisInstance.proxyOffset](ic,0).acceptWhiteByrdAll(msg);
1210  }//end if : nonzero msg
1211  }//end for : send to rhog(ic)
1212 
1213 //---------------------------------------------------------------------------
1214  }//end routine
1215 //////////////////////////////////////////////////////////////////////////////
1216 
1217 
1218 //////////////////////////////////////////////////////////////////////////////
1219 //////////////////////////////////////////////////////////////////////////////
1220 //////////////////////////////////////////////////////////////////////////////
1221 /**
1222  *
1223  * Accept transpose data from RhoG : receive grad_rho(gy,gx,z)
1224  *
1225  * Invoked 3 times per algorithm step : once for each grad_rho
1226  *
1227  * Memory required is : rho_igx,rho_igy,rho_igz so stuff can come in any order
1228  * : density and vks are needed later so no reusing for you.
1229  * : VksHart can also arrive at any time and cannot be used
1230  * here.
1231  */
1232 //////////////////////////////////////////////////////////////////////////////
1234 //////////////////////////////////////////////////////////////////////////////
1235 /// Unpack the message
1236 
1237  CPcharmParaInfo *sim = CPcharmParaInfo::get();
1238  int nchareG = sim->nchareRhoG;
1239  int **tranUnpack = sim->index_tran_upack_rho;
1240  int *nlines_per_chareG = sim->nlines_per_chareRhoG;
1241  int ***tranupack_rhoY = sim->index_tran_upack_rho_y;
1242  int **nlines_per_chareRhoGY = sim->nline_send_rho_y;
1243  int iy = thisIndex.y;
1244 
1245  int size = msg->size;
1246  int Index = msg->senderIndex;
1247  int iopt = msg->iopt;
1248  complex *partiallyFFTd = msg->data;
1249 
1250  int mySize;
1251  int nptsExpnd;
1252  if(rhoRsubplanes==1){
1253  mySize = nlines_per_chareG[Index];
1254  nptsExpnd = nptsExpndB;
1255  }else{
1256  mySize = nlines_per_chareRhoGY[Index][iy];
1257  nptsExpnd = nptsExpndA;
1258  }//endif
1259 
1260 #ifdef _NAN_CHECK_
1261  for(int i=0;i<msg->size ;i++){
1262  CkAssert(isnan(msg->data[i].re)==0);
1263  CkAssert(isnan(msg->data[i].im)==0);
1264  }
1265 #endif
1266 #ifdef _CP_DEBUG_RHOR_VERBOSE_
1267  CkPrintf("Data from RhoG arriving at RhoR : %d %d %d %d\n",
1268  thisIndex.x,thisIndex.y,iopt,countGradVks[iopt]);
1269 #endif
1270 
1271 //////////////////////////////////////////////////////////////////////////////
1272 /// Perform some error checking
1273 
1274  countGradVks[iopt]++;
1275  if (countGradVks[iopt] > recvCountFromGRho) {
1276  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1277  CkPrintf("Mismatch in allowed rho_gspace chare arrays : %d %d %d %d\n",
1278  countGradVks[iopt],nchareG,thisIndex.x,thisIndex.y);
1279  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1280  CkExit();
1281  }//endif
1282 
1283  if(size!=mySize){
1284  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1285  CkPrintf("Dude.1, %d != %d for rho chare %d %d %d\n",size,mySize,
1286  thisIndex.y,Index,iopt);
1287  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1288  CkExit();
1289  }//endif
1290 
1291  if(1> iopt || iopt >3){
1292  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1293  CkPrintf("Wrong option in rhoR \n",iopt);
1294  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1295  CkExit();
1296  }//endif
1297 
1298 //////////////////////////////////////////////////////////////////////////////
1299 /// unpack the data and delete the message
1300 
1301  complex *dataC;
1302  double *dataR;
1303  if(rhoRsubplanes==1){
1304  switch(iopt){
1305  case 1 : dataC = rho_rs.rhoIRXC; dataR = rho_rs.rhoIRX; break;
1306  case 2 : dataC = rho_rs.rhoIRYC; dataR = rho_rs.rhoIRY; break;
1307  case 3 : dataC = rho_rs.rhoIRZC; dataR = rho_rs.rhoIRZ; break;
1308  default: CkAbort("impossible iopt");break;
1309  }//end switch
1310  }else{
1311  switch(iopt){
1312  case 1 : dataC = rho_rs.rhoIRXCint; dataR = rho_rs.rhoIRXint; break;
1313  case 2 : dataC = rho_rs.rhoIRYCint; dataR = rho_rs.rhoIRYint; break;
1314  case 3 : dataC = rho_rs.rhoIRZCint; dataR = rho_rs.rhoIRZint; break;
1315  default: CkAbort("impossible iopt");break;
1316  }//end switch
1317  }//endif
1318 
1319  // you must zero because messages don't fill the plane
1320  if(countGradVks[iopt]==1){bzero(dataR,sizeof(double)*nptsExpnd);}
1321 
1322  if(rhoRsubplanes==1){
1323  for(int i=0;i<size;i++){
1324  dataC[tranUnpack[Index][i]] = partiallyFFTd[i]*probScale;
1325  }//endfor
1326  }else{
1327  for(int i=0;i<size;i++){
1328  dataC[tranupack_rhoY[Index][iy][i]] = partiallyFFTd[i]*probScale;
1329  }//endfor
1330  }//endif
1331 
1332  delete msg;
1333 
1334 //////////////////////////////////////////////////////////////////////////////
1335 /// When you have all the data : finish the FFT back to real space
1336 
1337  if (countGradVks[iopt] == recvCountFromGRho){
1338 
1339  countGradVks[iopt]=0;
1340  if(rhoRsubplanes==1){doneGradRhoVks++;}
1341 
1342 #if CMK_TRACE_ENABLED
1343  double StartTime=CmiWallTimer();
1344 #endif
1345 
1346  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
1347  if(rhoRsubplanes==1){
1348  fftcache->doRhoFFTGtoR_Rchare(dataC,dataR,nplane_rho_x,ngrida,ngridb,iplane_ind);
1349  }else{
1350  fftcache->doRhoFFTGyToRy_Rchare(dataC,dataR,myNplane_rho,ngrida,ngridb,iplane_ind);
1351  sendPartlyFFTGxToRx(iopt);
1352  }//endif
1353 
1354 #if CMK_TRACE_ENABLED
1355  traceUserBracketEvent(fwFFTGtoRnot0_, StartTime, CmiWallTimer());
1356 #endif
1357 
1358  }//endif : I captured a divRho
1359 
1360 //////////////////////////////////////////////////////////////////////////////
1361 /// When you have rhoiRX,rhoiRY,rhoiRZ and Vks invoke gradient correction
1362 
1363  if(doneGradRhoVks==3 && rhoRsubplanes==1){
1364  doneGradRhoVks = 0;
1365  GradCorr(); // if rhosubplanes>1 you have a transpose
1366  // and another fft to do before you can GradCorr);
1367  }//endif
1368 
1369 //----------------------------------------------------------------------------
1370  }//end routine
1371 //////////////////////////////////////////////////////////////////////////////
1372 
1373 
1374 //////////////////////////////////////////////////////////////////////////////
1375 //////////////////////////////////////////////////////////////////////////////
1376 //////////////////////////////////////////////////////////////////////////////
1377 /**
1378  *
1379  * Accept transpose data from RhoG : receive grad_rho(gy,gx,z)
1380  *
1381  * Invoked 1 time per algorithm step : all grad_rho packed up
1382  *
1383  * Memory required is : rho_igx,rho_igy,rho_igz so stuff can come in any order
1384  * : density and vks are needed later so no reusing for you.
1385  * : VksHart can also arrive at any time and cannot be used
1386  * here.
1387  */
1388 //////////////////////////////////////////////////////////////////////////////
1390 //////////////////////////////////////////////////////////////////////////////
1391 /// Unpack the message
1392 
1393  CPcharmParaInfo *sim = CPcharmParaInfo::get();
1394  int nchareG = sim->nchareRhoG;
1395  int **tranUnpack = sim->index_tran_upack_rho;
1396  int *nlines_per_chareG = sim->nlines_per_chareRhoG;
1397  int ***tranupack_rhoY = sim->index_tran_upack_rho_y;
1398  int **nlines_per_chareRhoGY = sim->nline_send_rho_y;
1399  int iy = thisIndex.y;
1400 
1401  int size = msg->size;
1402  int Index = msg->senderIndex;
1403  int iopt = msg->iopt;
1404  complex *partiallyFFTd = msg->data;
1405 
1406  int mySize;
1407  int nptsExpnd;
1408  if(rhoRsubplanes==1){
1409  mySize = nlines_per_chareG[Index];
1410  nptsExpnd = nptsExpndB;
1411  }else{
1412  mySize = nlines_per_chareRhoGY[Index][iy];
1413  nptsExpnd = nptsExpndA;
1414  }//endif
1415 
1416 #ifdef _NAN_CHECK_
1417  for(int i=0;i<msg->size ;i++){
1418  CkAssert(isnan(msg->data[i].re)==0);
1419  CkAssert(isnan(msg->data[i].im)==0);
1420  }
1421 #endif
1422 
1423 #ifdef _CP_DEBUG_RHOR_VERBOSE_
1424  CkPrintf("Data from RhoG arriving at RhoR : %d %d %d %d\n",
1425  thisIndex.x,thisIndex.y,iopt,countGradVks[iopt]);
1426 #endif
1427 
1428 //////////////////////////////////////////////////////////////////////////////
1429 /// Perform some error checking
1430 
1431  countGradVks[iopt]++;
1432 
1433  if (countGradVks[iopt] > recvCountFromGRho) {
1434  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1435  CkPrintf("Mismatch in allowed rho_gspace chare arrays : %d %d %d %d\n",
1436  countGradVks[iopt],nchareG,thisIndex.x,thisIndex.y);
1437  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1438  CkExit();
1439  }//endif
1440 
1441  if(size!=mySize){
1442  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1443  CkPrintf("Dude.1, %d != %d for rho chare %d %d %d\n",size,mySize,
1444  thisIndex.y,Index,iopt);
1445  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1446  CkExit();
1447  }//endif
1448 
1449  if(iopt!=1){
1450  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1451  CkPrintf("Wrong option in rhoR \n",iopt);
1452  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1453  CkExit();
1454  }//endif
1455 
1456 //////////////////////////////////////////////////////////////////////////////
1457 /// unpack the data and delete the message
1458 
1459  complex *dataCX,*dataCY,*dataCZ;
1460  double *dataRX,*dataRY,*dataRZ;
1461  if(rhoRsubplanes==1){
1462  dataCX = rho_rs.rhoIRXC; dataRX = rho_rs.rhoIRX;
1463  dataCY = rho_rs.rhoIRYC; dataRY = rho_rs.rhoIRY;
1464  dataCZ = rho_rs.rhoIRZC; dataRZ = rho_rs.rhoIRZ;
1465  }else{
1466  dataCX = rho_rs.rhoIRXCint; dataRX = rho_rs.rhoIRXint;
1467  dataCY = rho_rs.rhoIRYCint; dataRY = rho_rs.rhoIRYint;
1468  dataCZ = rho_rs.rhoIRZCint; dataRZ = rho_rs.rhoIRZint;
1469  }//endif
1470 
1471  // you must zero because messages don't fill the plane
1472  if(countGradVks[iopt]==1){
1473  bzero(dataRX,sizeof(double)*nptsExpnd);
1474  bzero(dataRY,sizeof(double)*nptsExpnd);
1475  bzero(dataRZ,sizeof(double)*nptsExpnd);
1476  }//endif
1477 
1478  if(rhoRsubplanes==1){
1479  for(int i=0,ii=0;ii<size;i+=3,ii++){
1480  int j = tranUnpack[Index][ii];
1481  dataCX[j] = partiallyFFTd[i]*probScale;
1482  dataCY[j] = partiallyFFTd[i+1]*probScale;
1483  dataCZ[j] = partiallyFFTd[i+2]*probScale;
1484  }//endfor
1485  }else{
1486  for(int i=0,ii=0;ii<size;i+=3,ii++){
1487  int j = tranupack_rhoY[Index][iy][ii];
1488  dataCX[j] = partiallyFFTd[i]*probScale;
1489  dataCY[j] = partiallyFFTd[i+1]*probScale;
1490  dataCZ[j] = partiallyFFTd[i+2]*probScale;
1491  }//endfor
1492  }//endif
1493 
1494  delete msg;
1495 
1496 //////////////////////////////////////////////////////////////////////////////
1497 /// When you have all the data : finish the FFT back to real space
1498 
1499  if (countGradVks[iopt] == recvCountFromGRho){
1500 
1501  countGradVks[iopt]=0;
1502  if(rhoRsubplanes==1){doneGradRhoVks+=3;}
1503 
1504 #if CMK_TRACE_ENABLED
1505  double StartTime=CmiWallTimer();
1506 #endif
1507 
1508  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
1509  if(rhoRsubplanes==1){
1510  fftcache->doRhoFFTGtoR_Rchare(dataCX,dataRX,nplane_rho_x,ngrida,ngridb,iplane_ind);
1511  fftcache->doRhoFFTGtoR_Rchare(dataCY,dataRY,nplane_rho_x,ngrida,ngridb,iplane_ind);
1512  fftcache->doRhoFFTGtoR_Rchare(dataCZ,dataRZ,nplane_rho_x,ngrida,ngridb,iplane_ind);
1513  }else{
1514  fftcache->doRhoFFTGyToRy_Rchare(dataCX,dataRX,myNplane_rho,ngrida,ngridb,iplane_ind);
1515  fftcache->doRhoFFTGyToRy_Rchare(dataCY,dataRY,myNplane_rho,ngrida,ngridb,iplane_ind);
1516  fftcache->doRhoFFTGyToRy_Rchare(dataCZ,dataRZ,myNplane_rho,ngrida,ngridb,iplane_ind);
1520  }//endif
1521 
1522 #if CMK_TRACE_ENABLED
1523  traceUserBracketEvent(fwFFTGtoRnot0_, StartTime, CmiWallTimer());
1524 #endif
1525 
1526  }//endif : I captured a divRho
1527 
1528 //////////////////////////////////////////////////////////////////////////////
1529 /// When you have rhoiRX,rhoiRY,rhoiRZ and Vks invoke gradient correction
1530 
1531  if(doneGradRhoVks==3 && rhoRsubplanes==1){
1532  doneGradRhoVks = 0;
1533  GradCorr(); // if rhosubplanes>1 you have a transpose
1534  // and another fft to do before you can GradCorr);
1535  }//endif
1536 
1537 //----------------------------------------------------------------------------
1538  }//end routine
1539 //////////////////////////////////////////////////////////////////////////////
1540 
1541 
1542 //////////////////////////////////////////////////////////////////////////////
1543 //////////////////////////////////////////////////////////////////////////////
1544 //////////////////////////////////////////////////////////////////////////////
1545 ///
1546 /// Double Transpose Bck Send : A(gx,y,z) on the way to A(x,y,z)
1547 /// Send so (gx,z) parallel -> (y,z) parallel
1548 ///
1549 /// Invoked 5 times an algorithm step:
1550 /// case 1-3: Gradients
1551 /// case 0 : VksWhiteByrd
1552 /// case 4 : VksHarteext
1553 ///
1554 //////////////////////////////////////////////////////////////////////////////
1556 //////////////////////////////////////////////////////////////////////////////
1557  CkAssert(rhoRsubplanes>1);
1558 
1559  complex *FFTresult;
1560  switch(iopt){
1561  case 0 : FFTresult = rho_rs.rhoIRXCint; break;
1562  case 1 : FFTresult = rho_rs.rhoIRXCint; break;
1563  case 2 : FFTresult = rho_rs.rhoIRYCint; break;
1564  case 3 : FFTresult = rho_rs.rhoIRZCint; break;
1565  case 4 : FFTresult = rho_rs.VksHartCint; break;
1566  default: CkAbort("impossible iopt"); break;
1567  }//end switch
1568 
1569 //////////////////////////////////////////////////////////////////////////////
1570 /// Launch the communication
1571 
1572  //-----------------------------------------------------------------------------
1573  // Commlib launch :
1574 
1575 #ifdef _ERIC_SETS_UP_COMMLIB_
1576  switch(iopt){
1577  case 0 : if(config.useRInsRhoRP) commRealInstanceRx.beginIteration(); break;
1578  case 1 : if(config.useRInsIGXRhoRP) commRealIGXInstanceRx.beginIteration(); break;
1579  case 2 : if(config.useRInsIGYRhoRP) commRealIGYInstanceRx.beginIteration(); break;
1580  case 3 : if(config.useRInsIGZRhoRP) commRealIGZInstanceRx.beginIteration(); break;
1581  default: CkAbort("impossible iopt");break;
1582  }//end switch
1583 #endif
1584 
1585  //-----------------------------------------------------------------------------
1586  // Send the data : I have myNgridB values of y (gx,y) y=1...myNgridB and all gx
1587  // Send all the `y' I have for the gx range desired after transpose
1588 
1589  int ix = thisIndex.x;
1590  for(int ic = 0; ic < rhoRsubplanes; ic ++) { // chare arrays to which we will send
1591  int div = (ngridb/rhoRsubplanes); //parallelize y
1592  int rem = (ngridb % rhoRsubplanes);
1593  int add = (ic < rem ? 1 : 0);
1594  int max = (ic < rem ? ic : rem);
1595  int ist = div*ic + max; // start of y desired by chare ic
1596  int iend = ist + div + add; // end of y desired by chare ic
1597  int size = (iend-ist)*myNplane_rho; // data size : send all the gx I have
1598 
1599  int sendFFTDataSize = size;
1600  RhoGSFFTMsg *msg = new (sendFFTDataSize, 8 * sizeof(int)) RhoGSFFTMsg;
1601  msg->size = size;
1602  msg->iopt = iopt;
1603  msg->offset = thisIndex.y; // my chare index
1604  msg->num = myNplane_rho; // number of gx-lines I have.
1605  complex *data = msg->data; // data
1606 
1607  if(config.prioFFTMsg){
1608  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
1609  *(int*)CkPriorityPtr(msg) = config.rhogpriority+thisIndex.y;
1610  }//endif
1611 
1612  for(int i=ist,koff=0;i<iend;i++,koff+=myNplane_rho){
1613  for(int k=koff,ii=i;k<myNplane_rho+koff;k++,ii+=ngridb){
1614  data[k] = FFTresult[ii];
1615  }//endfor
1616  }//endfor
1617 
1618  switch(iopt){
1619  case 0 : UrhoRealProxy[thisInstance.proxyOffset](ix,ic).acceptRhoGradVksGxToRx(msg);break;
1620  case 1 : UrhoRealProxy[thisInstance.proxyOffset](ix,ic).acceptRhoGradVksGxToRx(msg); break;
1621  case 2 : UrhoRealProxy[thisInstance.proxyOffset](ix,ic).acceptRhoGradVksGxToRx(msg); break;
1622  case 3 : UrhoRealProxy[thisInstance.proxyOffset](ix,ic).acceptRhoGradVksGxToRx(msg); break;
1623  case 4 : UrhoRealProxy[thisInstance.proxyOffset](ix,ic).acceptRhoGradVksGxToRx(msg); break;
1624  default: CkAbort("impossible iopt");break;
1625  }//end switch
1626 
1627 #ifdef _ERIC_SETS_UP_COMMLIB_
1628  switch(iopt){
1629  case 0 : UrhoGProxy[thisInstance.proxyOffset]_com(ic,0).acceptRhoGradVksGxToRx(msg);break;
1630  case 1 : UrhoGProxy[thisInstance.proxyOffset]IGX_com(ic,0).acceptRhoGradVksGxToRx(msg); break;
1631  case 2 : UrhoGProxy[thisInstance.proxyOffset]IGY_com(ic,0).acceptRhoGradVksGxToRx(msg); break;
1632  case 3 : UrhoGProxy[thisInstance.proxyOffset]IGZ_com(ic,0).acceptRhoGradVksGxToRx(msg); break;
1633  default: CkAbort("impossible iopt");break;
1634  }//end switch
1635 #endif
1636 
1637  }//end for : chare sending
1638 
1639  //-----------------------------------------------------------------------------
1640  // Commlib stop
1641 
1642 #ifdef _ERIC_SETS_UP_COMMLIB_
1643  switch(iopt){
1644  case 0 : if(config.useRInsRhoRP) commRealInstanceRx.endIteration(); break;
1645  case 1 : if(config.useRInsIGXRhoRP) commRealIGXInstanceRx.endIteration(); break;
1646  case 2 : if(config.useRInsIGYRhoRP) commRealIGYInstanceRx.endIteration(); break;
1647  case 3 : if(config.useRInsIGZRhoRP) commRealIGZInstanceRx.endIteration(); break;
1648  default: CkAbort("impossible iopt");break;
1649  }//end switch
1650 #endif
1651 
1652 //---------------------------------------------------------------------------
1653  }//end routine
1654 //////////////////////////////////////////////////////////////////////////////
1655 
1656 
1657 
1658 //////////////////////////////////////////////////////////////////////////////
1659 //////////////////////////////////////////////////////////////////////////////
1660 //////////////////////////////////////////////////////////////////////////////
1661 ///
1662 /// Double Transpose Bck Recv : A(gx,y,z) on the way to A(x,y,z)
1663 /// Recv (gx,z) parallel -> (y,z) parallel
1664 ///
1665 /// Invoked 5 times an algorithm step:
1666 /// case 1-3: Gradients
1667 /// case 0 : VksWhiteByrd
1668 /// case 4 : VksHarteext
1669 ///
1670 //////////////////////////////////////////////////////////////////////////////
1672 //////////////////////////////////////////////////////////////////////////////
1673 
1674  CPcharmParaInfo *sim = CPcharmParaInfo::get();
1675  int **listSubGx = sim->listSubGx;
1676  int *numSubGx = sim->numSubGx;
1677 
1678  int size = msg->size; // msg size
1679  int iopt = msg->iopt;
1680  int num = msg->num; // number of lines along `a' sent
1681  int offset = msg->offset; // chare array that sent the data
1682  complex *msgData = msg->data;
1683 
1684  CkAssert(size==myNgridb*num);
1685  CkAssert(rhoRsubplanes>1);
1686 
1687  complex *dataC;
1688  double *dataR;
1689  switch(iopt){
1690  case 0: dataC = rho_rs.densityC; dataR = rho_rs.density; break;
1691  case 1: dataC = rho_rs.rhoIRXC; dataR = rho_rs.rhoIRX; break;
1692  case 2: dataC = rho_rs.rhoIRYC; dataR = rho_rs.rhoIRY; break;
1693  case 3: dataC = rho_rs.rhoIRZC; dataR = rho_rs.rhoIRZ; break;
1694  case 4: dataC = rho_rs.VksHartC; dataR = rho_rs.VksHart; break;
1695  default: CkAbort("Impossible option\n"); break;
1696  }//endif
1697 
1698 //////////////////////////////////////////////////////////////////////////////
1699 /// Unpack the message
1700 
1701  countIntGtoR[iopt]++;
1702  if(countIntGtoR[iopt]==1){bzero(dataR,sizeof(double)*nptsExpndB);}
1703 
1704  int stride = ngrida/2+1;
1705  if(listSubFlag==1){
1706  for(int js=0,j=0;js<size;js+=num,j++){
1707  int jj = j*stride;
1708  for(int is=js,i=0;is<(num+js);is++,i++){
1709  dataC[(listSubGx[offset][i]+jj)] = msgData[is];
1710  }//endfor
1711  }//endfor
1712  }else{
1713  int nst = listSubGx[offset][0];
1714  for(int js=0,j=0;js<size;js+=num,j++){
1715  int jj = j*stride+nst;
1716  for(int is=js,i=jj;is<(num+js);is++,i++){
1717  dataC[i] = msgData[is];
1718  }//endfor
1719  }//endfor
1720  }//endif
1721 
1722  delete msg;
1723 
1724 //////////////////////////////////////////////////////////////////////////////
1725 /// Do the FFT when you have all the parts
1726 
1727  int done = 0;
1728  if(countIntGtoR[iopt]==rhoRsubplanes){
1729  done = 1;
1730  countIntGtoR[iopt]=0;
1731  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
1732 #if CMK_TRACE_ENABLED
1733  double StartTime=CmiWallTimer();
1734 #endif
1735  fftcache->doRhoFFTGxToRx_Rchare(dataC,dataR,nplane_rho_x,ngrida,myNgridb,iplane_ind);
1736 #if CMK_TRACE_ENABLED
1737  traceUserBracketEvent(doRhoFFTGxtoRx_, StartTime, CmiWallTimer());
1738 #endif
1739 
1740  }//endif
1741 
1742 //////////////////////////////////////////////////////////////////////////////
1743 /// When you have completed an FFT, you have some choices of what to do next
1744 
1745  if(done == 1){
1746 
1747  //---------------------------------------------------
1748  // If you have finished a gradient, increment counter.
1749  // If you have completed all gradients : Gradcorr.
1750 
1751  if(1 <= iopt && iopt <=3){
1752  doneGradRhoVks++;
1753  if(doneGradRhoVks==3){
1754  doneGradRhoVks=0;
1755  GradCorr();
1756  }//endif
1757  }//endif
1758 
1759  //---------------------------------------------------
1760  // if you have finished the whiteByrd : add it to vks
1761 
1762  if(iopt==0){addWhiteByrdVks();}
1763 
1764  //---------------------------------------------------
1765  // if you have finished the HartEext : add it to vks
1766 
1767  if(iopt==4){addHartEextVks();}
1768 
1769  }//endif : you have completed an fft
1770 
1771 //////////////////////////////////////////////////////////////////////////////
1772  }//end routine
1773 //////////////////////////////////////////////////////////////////////////////
1774 
1775 
1776 
1777 
1778 //////////////////////////////////////////////////////////////////////////////
1779 //////////////////////////////////////////////////////////////////////////////
1780 //////////////////////////////////////////////////////////////////////////////
1781 ///
1782 /// The gradient of the density is now completed. You can compute the
1783 /// GGA-DFT functional now. Density is now available to be used as scratch.
1784 ///
1785 /// Invoked once per algorithm step
1786 ///
1787 //////////////////////////////////////////////////////////////////////////////
1789 //////////////////////////////////////////////////////////////////////////////
1790 
1791  if(cp_grad_corr_on==0){
1792  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1793  CkPrintf("Don't come in the grad corr routines when\n");
1794  CkPrintf("gradient corrections are off\n");
1795  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1796  CkExit();
1797  }//endif
1798 
1799  double *density = rho_rs.density;
1800  double *Vks = rho_rs.Vks;
1801  double *rhoIRX = rho_rs.rhoIRX;
1802  double *rhoIRY = rho_rs.rhoIRY;
1803  double *rhoIRZ = rho_rs.rhoIRZ;
1804 
1805  int nf1 = ngrida;
1806  int nf2 = myNgridb;
1807  int nf3 = ngridc;
1808  int npts = config.numFFTPoints; // total number of points
1809  double *exc_gga_ret = &(rho_rs.exc_gga_ret);
1810 
1811 //////////////////////////////////////////////////////////////////////////////
1812 
1813 #ifdef _CP_DEBUG_RHOR_VKSC_
1814  char myFileName[MAX_CHAR_ARRAY_LENGTH];
1815  sprintf(myFileName, "BGradRho_Real_%d_%d.out", thisIndex.x,thisIndex.y);
1816  FILE *fp = fopen(myFileName,"w");
1817  for (int i = 0; i <ngridb*ngrida; i++){
1818  fprintf(fp,"%g %g %g %g\n",rho_rs.rhoIRX[i],rho_rs.rhoIRY[i],rho_rs.rhoIRZ[i],
1819  rho_rs.Vks[i]);
1820  }//endfor
1821  fclose(fp);
1822 #endif
1823 
1824 //////////////////////////////////////////////////////////////////////////////
1825 /// Compute the gradient corrected functional : Density is toast after this.
1826 
1827  rho_rs.exc_gga_ret = 0.0;
1828 #define GGA_ON
1829 #ifdef GGA_ON
1830 
1831 #if CMK_TRACE_ENABLED
1832  double StartTime=CmiWallTimer();
1833 #endif
1834  CPXCFNCTS::CP_getGGAFunctional(npts,nf1,nf2,nf3,density,rhoIRX,rhoIRY,rhoIRZ,
1835  Vks,thisIndex.x,exc_gga_ret,config.nfreq_xcfnctl);
1836 #if CMK_TRACE_ENABLED
1837  traceUserBracketEvent(GradCorrGGA_, StartTime, CmiWallTimer());
1838 #endif
1839 
1840 #endif // GGA ON
1841 
1842 //////////////////////////////////////////////////////////////////////////////
1843 /// Reduce the exchange correlation energy
1844 
1845  double exc[2];
1846  exc[0]=rho_rs.exc_ret;
1847  exc[1]=rho_rs.exc_gga_ret;
1848  contribute(2*sizeof(double),exc,CkReduction::sum_double);
1849 
1850 //////////////////////////////////////////////////////////////////////////////
1851 /// output
1852 
1853 #ifdef _CP_DEBUG_RHOR_VKSD_
1854  myFileName[MAX_CHAR_ARRAY_LENGTH];
1855  sprintf(myFileName, "AGradRho_Real_%d_%d.out", thisIndex.x,thisIndex.y);
1856  fp = fopen(myFileName,"w");
1857  for (int i = 0; i <ngridb*ngrida; i++){
1858  fprintf(fp,"%g %g %g %g\n",rho_rs.rhoIRX[i],rho_rs.rhoIRY[i],rho_rs.rhoIRZ[i],
1859  rho_rs.Vks[i]);
1860  }//endfor
1861  fclose(fp);
1862 #endif
1863 
1864 //////////////////////////////////////////////////////////////////////////////
1865 /// Start the white bird puppy : back fft of rhoirx, rhoiry, rhoirz
1866 
1867  whiteByrdFFT();
1868 
1869 //---------------------------------------------------------------------------
1870  }//end routine
1871 //////////////////////////////////////////////////////////////////////////////
1872 
1873 
1874 //////////////////////////////////////////////////////////////////////////////
1875 ///
1876 /// The white-bird term : First fwfft redefined delrho(r) to delrho(g)
1877 /// then send to RhoGspacePlane. RhoGspacePlane sends you back back another term.
1878 /// After this routine, rhoIRX, rhoIRY and rhoIRZ are `free'.
1879 ///
1880 /// Invoked once per algorithm step
1881 ///
1882 ///
1883 //////////////////////////////////////////////////////////////////////////////
1884 //////////////////////////////////////////////////////////////////////////////
1885 //////////////////////////////////////////////////////////////////////////////
1887 //////////////////////////////////////////////////////////////////////////////
1888 /// Constants and pointers
1889 
1890  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
1891  double *rhoIRX = rho_rs.rhoIRX;
1892  double *rhoIRY = rho_rs.rhoIRY;
1893  double *rhoIRZ = rho_rs.rhoIRZ;
1894  complex *rhoIRXC = rho_rs.rhoIRXC;
1895  complex *rhoIRYC = rho_rs.rhoIRYC;
1896  complex *rhoIRZC = rho_rs.rhoIRZC;
1897 
1898  int ioptx = 1;
1899  int iopty = 2;
1900  int ioptz = 3;
1901 
1902 //////////////////////////////////////////////////////////////////////////////
1903 /// I) rhoIRX : Scale, Real to complex FFT, perform FFT, transpose
1904 
1905 #if CMK_TRACE_ENABLED
1906  double StartTime=CmiWallTimer();
1907 #endif
1908 
1909  rho_rs.scale(rhoIRX,FFTscale);
1910 
1911  if(rhoRsubplanes==1){
1912  fftcache->doRhoFFTRtoG_Rchare(rhoIRXC,rhoIRX,nplane_rho_x,ngrida,ngridb,iplane_ind);
1913  if(config.rhoGToRhoRMsgComb==0){sendPartlyFFTtoRhoG(ioptx);}
1914  }else{
1915  fftcache->doRhoFFTRxToGx_Rchare(rhoIRXC,rhoIRX,nplane_rho_x,ngrida,myNgridb,iplane_ind);
1916  sendPartlyFFTRyToGy(ioptx);// transpose and do the y-gy fft
1917  }//endif
1918 
1919 #if CMK_TRACE_ENABLED
1920  traceUserBracketEvent(WhiteByrdFFTX_, StartTime, CmiWallTimer());
1921 #endif
1922 
1923 //////////////////////////////////////////////////////////////////////////////
1924 /// II) rhoIRY : Scale, real to complex FFT, perform FFT, transpose
1925 
1926 #if CMK_TRACE_ENABLED
1927  StartTime=CmiWallTimer();
1928 #endif
1929 
1930  rho_rs.scale(rhoIRY,FFTscale);
1931 
1932  if(rhoRsubplanes==1){
1933  fftcache->doRhoFFTRtoG_Rchare(rhoIRYC,rhoIRY,nplane_rho_x,ngrida,ngridb,iplane_ind);
1934  if(config.rhoGToRhoRMsgComb==0){sendPartlyFFTtoRhoG(iopty);}
1935  }else{
1936  fftcache->doRhoFFTRxToGx_Rchare(rhoIRYC,rhoIRY,nplane_rho_x,ngrida,myNgridb,iplane_ind);
1937  sendPartlyFFTRyToGy(iopty); // transpose and do the y-gy fft
1938  }//endif
1939 
1940 #if CMK_TRACE_ENABLED
1941  traceUserBracketEvent(WhiteByrdFFTY_, StartTime, CmiWallTimer());
1942 #endif
1943 
1944 //////////////////////////////////////////////////////////////////////////////
1945 /// III) rhoIRZ : Scale, real to complex FFT, perform FFT, transpose
1946 
1947 #if CMK_TRACE_ENABLED
1948  StartTime=CmiWallTimer();
1949 #endif
1950 
1951  rho_rs.scale(rhoIRZ,FFTscale);
1952  if(rhoRsubplanes==1){
1953  fftcache->doRhoFFTRtoG_Rchare(rhoIRZC,rhoIRZ,nplane_rho_x,ngrida,ngridb,iplane_ind);
1954  if(config.rhoGToRhoRMsgComb==0){sendPartlyFFTtoRhoG(ioptz);}
1955  }else{
1956  fftcache->doRhoFFTRxToGx_Rchare(rhoIRZC,rhoIRZ,nplane_rho_x,ngrida,myNgridb,iplane_ind);
1957  sendPartlyFFTRyToGy(ioptz);// transpose and do the y-gy fft
1958  }//endif
1959 
1960 #if CMK_TRACE_ENABLED
1961  traceUserBracketEvent(WhiteByrdFFTZ_, StartTime, CmiWallTimer());
1962 #endif
1963 
1964 //////////////////////////////////////////////////////////////////////////////
1965 /// Send all 3 components at once
1966 
1967  if(rhoRsubplanes==1 && config.rhoGToRhoRMsgComb==1){
1969  }//endif
1970 
1971 //////////////////////////////////////////////////////////////////////////////
1972  }//end routine
1973 //////////////////////////////////////////////////////////////////////////////
1974 
1975 
1976 
1977 //////////////////////////////////////////////////////////////////////////////
1978 //////////////////////////////////////////////////////////////////////////////
1979 //////////////////////////////////////////////////////////////////////////////
1980 ///
1981 /// The white bird vks correction is returned from RhoG : VksW(gx,gy,z)
1982 /// This routine recvs the transpose {gx,gy} to (gx,z)
1983 ///
1984 /// Invoked once per algorithm step
1985 ///
1986 /// After this routine, VksW(gx,gy,z) -> VksW(x,y,gz) and is added vksTot();
1987 ///
1988 //////////////////////////////////////////////////////////////////////////////
1990 //////////////////////////////////////////////////////////////////////////////
1991 
1992 #ifdef _CP_DEBUG_RHOR_VERBOSE_
1993  CkPrintf("WhiteByrd Data from RhoG arriving at RhoR : %d %d\n",
1994  thisIndex.x,thisIndex.y);
1995 #endif
1996 
1997 //////////////////////////////////////////////////////////////////////////////
1998 /// Local Pointers
1999 
2000  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2001  int nchareG = sim->nchareRhoG;
2002  int **tranUnpack = sim->index_tran_upack_rho;
2003  int *nlines_per_chareG = sim->nlines_per_chareRhoG;
2004  int ***tranupack_rhoY = sim->index_tran_upack_rho_y;
2005  int **nlines_per_chareRhoGY = sim->nline_send_rho_y;
2006  int iy = thisIndex.y;
2007 
2008  int size = msg->size;
2009  int Index = msg->senderIndex;
2010  int iopt = msg->iopt;
2011  complex *partiallyFFTd = msg->data;
2012 
2013  int mySize;
2014  int nptsExpnd;
2015  if(rhoRsubplanes==1){
2016  mySize = nlines_per_chareG[Index];
2017  nptsExpnd = nptsExpndB;
2018  }else{
2019  mySize = nlines_per_chareRhoGY[Index][iy];
2020  nptsExpnd = nptsExpndA;
2021  }//endif
2022 
2023 //////////////////////////////////////////////////////////////////////////////
2024 /// Perform some error checking
2025 
2026  countWhiteByrd++;
2027  if (countWhiteByrd > recvCountFromGRho) {
2028  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2029  CkPrintf("Mismatch in allowed rho_gspace chare arrays : %d %d %d %d\n",
2030  countWhiteByrd,nchareG,thisIndex.x,thisIndex.y);
2031  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2032  CkExit();
2033  }//endif
2034 
2035  if(size!=mySize){
2036  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2037  CkPrintf("Dude.2, %d != %d for rho chare %d %d\n",size,mySize,
2038  thisIndex.y,Index);
2039  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2040  CkExit();
2041  }//endif
2042 
2043 //////////////////////////////////////////////////////////////////////////////
2044 /// unpack the data and delete the message : The density is free for use as scr
2045 
2046  double *dataR;
2047  complex *dataC;
2048  if(rhoRsubplanes==1){
2049  dataR = rho_rs.density;
2050  dataC = rho_rs.densityC;
2051  }else{
2052  dataR = rho_rs.rhoIRXint;
2053  dataC = rho_rs.rhoIRXCint;
2054  }//endif
2055 
2056  // zero because input data does not fill the plane
2057  if(countWhiteByrd==1){bzero(dataR,sizeof(double)*nptsExpnd);}
2058 
2059  if(rhoRsubplanes==1){
2060  for(int i=0;i<size;i++){
2061  dataC[tranUnpack[Index][i]] = partiallyFFTd[i];
2062  }//endfor
2063  }else{
2064  for(int i=0;i<size;i++){
2065  dataC[tranupack_rhoY[Index][iy][i]] = partiallyFFTd[i];
2066  }//endfor
2067  }//endif
2068 
2069  delete msg;
2070 
2071 //////////////////////////////////////////////////////////////////////////////
2072 /// When you have all the messages, do the last fft, and add in the correction
2073 
2074  if(countWhiteByrd == recvCountFromGRho){
2075  countWhiteByrd=0;
2076 
2077 #if CMK_TRACE_ENABLED
2078  double StartTime=CmiWallTimer();
2079 #endif
2080  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
2081  if(rhoRsubplanes==1){
2082  fftcache->doRhoFFTGtoR_Rchare(dataC,dataR,nplane_rho_x,ngrida,ngridb,iplane_ind);
2083  addWhiteByrdVks();
2084  }else{
2085  fftcache->doRhoFFTGyToRy_Rchare(dataC,dataR,myNplane_rho,ngrida,ngridb,iplane_ind);
2087  }
2088 #if CMK_TRACE_ENABLED
2089  traceUserBracketEvent(PostByrdfwFFTGtoR_, StartTime, CmiWallTimer());
2090 #endif
2091 
2092  }//endif : communication from rhog has all arrived safely
2093 
2094 //////////////////////////////////////////////////////////////////////////////
2095  }//end routine
2096 //////////////////////////////////////////////////////////////////////////////
2097 
2098 
2099 
2100 //////////////////////////////////////////////////////////////////////////////
2101 //////////////////////////////////////////////////////////////////////////////
2102 //////////////////////////////////////////////////////////////////////////////
2103 ///
2104 /// Add the VksWhiteByrd to VksTot : Set the done flag.
2105 ///
2106 //////////////////////////////////////////////////////////////////////////////
2108 //////////////////////////////////////////////////////////////////////////////
2109 /// Add the whitebyrd contrib to vks
2110 
2111  int nptsExpnd = nptsExpndB;
2112  double *dataR = rho_rs.density; // whitebyrd correction stored in density
2113  double *Vks = rho_rs.Vks;
2114  for(int i=0;i<nptsExpnd;i++){Vks[i] -= dataR[i];}
2115 
2116 //////////////////////////////////////////////////////////////////////////////
2117 /// Our we done yet?
2118 
2119  doneWhiteByrd = true;
2120  doMulticastCheck();
2121 
2122 //////////////////////////////////////////////////////////////////////////////
2123  }//end routine
2124 //////////////////////////////////////////////////////////////////////////////
2125 
2126 
2127 
2128 //////////////////////////////////////////////////////////////////////////////
2129 //////////////////////////////////////////////////////////////////////////////
2130 //////////////////////////////////////////////////////////////////////////////
2131 /**
2132  *
2133  * Accept hartExt transpose data : receive VksHartEext(gx,gy,z) gx,z is parallel.
2134  * Since the message can come at any time memory, VksHart has to be ready to
2135  * receive it.
2136  *
2137  * Invoked once per algorithm step.
2138  *
2139  * After the routine, VksHartEext(gx,gy,z)--> VksHarteext(x,y,z) and is added
2140  * to Vkstot(x,y,z)
2141  *
2142  */
2143 //////////////////////////////////////////////////////////////////////////////
2145 //////////////////////////////////////////////////////////////////////////////
2146 /// Local pointers
2147 
2148  CPcharmParaInfo *sim = CPcharmParaInfo::get();
2149  int nchareG = sim->nchareRhoG;
2150  int **tranUnpack = sim->index_tran_upack_rho;
2151  int *nlines_per_chareG = sim->nlines_per_chareRhoG;
2152  int ***tranupack_rhoY = sim->index_tran_upack_eext_ys;
2153  int **nlines_per_chareRhoGY = sim->nline_send_eext_y;
2154  int iy = thisIndex.y;
2155 
2156  int size = msg->size;
2157  int IndexS = msg->index;
2158  int Index = msg->senderBigIndex;
2159  int istrt_lines = msg->senderStrtLine;
2160  int iopt = msg->iopt;
2161  complex *partiallyFFTd = msg->data;
2162 
2163  double *dataR;
2164  complex *dataC;
2165  int nptsExpnd;
2166  int mySize;
2167  if(rhoRsubplanes==1){
2168  dataR = rho_rs.VksHart;
2169  dataC = rho_rs.VksHartC;
2170  nptsExpnd = nptsExpndB;
2171  mySize = size;
2172  }else{
2173  dataR = rho_rs.VksHartint;
2174  dataC = rho_rs.VksHartCint;
2175  nptsExpnd = nptsExpndA;
2176  mySize = nlines_per_chareRhoGY[IndexS][iy];
2177  }//endif
2178 
2179  CkAssert(size==mySize);
2180 
2181 //////////////////////////////////////////////////////////////////////////////
2182 
2183 #ifdef _CP_DEBUG_RHOR_VERBOSE_
2184  CkPrintf("Data from RhoG arriving at RhoR : %d %d %d %d\n",
2185  thisIndex.x,thisIndex.y,iopt,countGradVks[iopt]);
2186 #endif
2187 
2188  CkAssert(iopt==0);
2189  if(size!=mySize){
2190  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2191  CkPrintf("Dude.2, %d != %d for rho chare %d %d : %d\n",size,mySize,
2192  thisIndex.y,IndexS,Index);
2193  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2194  CkExit();
2195  }//endif
2196 
2197 //////////////////////////////////////////////////////////////////////////////
2198 /// Copy out the hart-eext contrib to vks : iopt==0
2199 
2200  countGradVks[iopt]++;
2201  if(countGradVks[iopt]==1){bzero(dataR,sizeof(double)*nptsExpnd);}
2202 
2203  if(rhoRsubplanes==1){
2204  for(int i=0,j=istrt_lines;i<size;i++,j++){
2205  dataC[tranUnpack[Index][j]] = partiallyFFTd[i];
2206  }//endfor
2207  }else{
2208  for(int i=0;i<size;i++){
2209  dataC[tranupack_rhoY[IndexS][iy][i]] = partiallyFFTd[i];
2210  }//endfor
2211  }//endif
2212 
2213  delete msg;
2214 
2215 //////////////////////////////////////////////////////////////////////////////
2216 /// fft the puppy if you've got it all : only atmtyp index=1 of ghart sends rho_real
2217 
2218  if (countGradVks[iopt] == recvCountFromGHartExt){
2219  countGradVks[iopt]=0;
2220 
2221  FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
2222 #if CMK_TRACE_ENABLED
2223  double StartTime=CmiWallTimer();
2224 #endif
2225  if(rhoRsubplanes==1){
2226  fftcache->doRhoFFTGtoR_Rchare(dataC,dataR,nplane_rho_x,ngrida,ngridb,iplane_ind);
2227  addHartEextVks();
2228  }else{
2229  fftcache->doRhoFFTGyToRy_Rchare(dataC,dataR,myNplane_rho,ngrida,ngridb,iplane_ind);
2231  }//endif
2232 
2233 #if CMK_TRACE_ENABLED
2234  traceUserBracketEvent(fwFFTGtoR0_, StartTime, CmiWallTimer());
2235 #endif
2236  }//endif : communication from rhog
2237 
2238 
2239 //////////////////////////////////////////////////////////////////////////////
2240  }//end routine
2241 //////////////////////////////////////////////////////////////////////////////
2242 
2243 
2244 //////////////////////////////////////////////////////////////////////////////
2245 //////////////////////////////////////////////////////////////////////////////
2246 //////////////////////////////////////////////////////////////////////////////
2247 ///
2248 /// Add the VksHartEext to VksTot : Set the done flag.
2249 ///
2250 //////////////////////////////////////////////////////////////////////////////
2252 //////////////////////////////////////////////////////////////////////////////
2253 /// Add the whitebyrd contrib to vks
2254 
2255  int nptsExpnd = nptsExpndB;
2256  double *dataR = rho_rs.VksHart;
2257  double *Vks = rho_rs.Vks;
2258  for(int i=0;i<nptsExpnd;i++){Vks[i]+=dataR[i];}
2259 
2260 //////////////////////////////////////////////////////////////////////////////
2261 /// Our we done yet?
2262 
2263  doneHartVks = true;
2264  doMulticastCheck();
2265 
2266 //////////////////////////////////////////////////////////////////////////////
2267  }//end routine
2268 //////////////////////////////////////////////////////////////////////////////
2269 
2270 
2271 //////////////////////////////////////////////////////////////////////////////
2272 //////////////////////////////////////////////////////////////////////////////
2273 //////////////////////////////////////////////////////////////////////////////
2274 ///
2275 /// Under ees-eext Rhart chare reports its completion : Set the done flag.
2276 ///
2277 //////////////////////////////////////////////////////////////////////////////
2279  countRHart++;
2280  if(countRHart==countRHartValue){
2281  doneRHart=true;
2282  doMulticastCheck();
2283  countRHart=0;
2284  }//endif
2285 }
2286 //////////////////////////////////////////////////////////////////////////////
2287 
2288 
2289 //////////////////////////////////////////////////////////////////////////////
2290 //////////////////////////////////////////////////////////////////////////////
2291 //////////////////////////////////////////////////////////////////////////////
2292 ///
2293 /// If all the parts of exc-eext-hart are done, invoke blast of vks to states
2294 ///
2295 //////////////////////////////////////////////////////////////////////////////
2297 //////////////////////////////////////////////////////////////////////////////
2298 
2299  if(doneWhiteByrd && doneRHart && doneHartVks){doMulticast();}
2300 
2301 //////////////////////////////////////////////////////////////////////////////
2302  }//end routine
2303 //////////////////////////////////////////////////////////////////////////////
2304 
2305 
2306 
2307 //////////////////////////////////////////////////////////////////////////////
2308 /// Send vks back to the states
2309 //////////////////////////////////////////////////////////////////////////////
2310 //////////////////////////////////////////////////////////////////////////////
2311 //////////////////////////////////////////////////////////////////////////////
2313 //////////////////////////////////////////////////////////////////////////////
2314 
2315  if(!doneWhiteByrd || !doneHartVks || !doneRHart){
2316  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2317  CkPrintf("Flow of Control Error : Attempting to rho multicast\n");
2318  CkPrintf("without harteext or gradcorr (whitebyrd) \n");
2319  CkPrintf("without rhodensity or rhart \n");
2320  CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2321  CkExit();
2322  }//endif
2323 
2324  // overkill on the resetting
2325  countRHart=0;
2326  doneWhiteByrd = false;
2327  doneHartVks = false;
2328  doneRHart = false;
2329 
2330 //////////////////////////////////////////////////////////////////////////////
2331 /// Send vks back to the states in real space
2332 
2333  int dataSize = ngrida*myNgridb;
2334  double *Vks = rho_rs.Vks;
2335 
2336  if ((config.useGMulticast+config.useCommlibMulticast)!=1) {
2337  CkAbort("No multicast strategy\n");
2338  }//endif
2339 
2340  if (config.useGMulticast || config.useCommlibMulticast) {
2341 
2342  ProductMsg *msg = new (dataSize, 0) ProductMsg;
2343 
2344  // ADD new index here for Y
2345  msg->idx = thisIndex.x;
2346  msg->datalen = dataSize;
2347  msg->hops = 0;
2348  msg->subplane = thisIndex.y;
2349  double *dataR = msg->data;
2350 
2351  rho_rs.uPackShrink(dataR,Vks); // down pack Vks for the send
2352 //#define _CP_DEBUG_RHOR_VKSE_
2353 #ifdef _CP_DEBUG_RHOR_VKSE_
2354  char myFileName[MAX_CHAR_ARRAY_LENGTH];
2355  sprintf(myFileName, "vks_rho_Real_%d_%d.out", thisIndex.x,thisIndex.y);
2356  FILE *fp = fopen(myFileName,"w");
2357  for (int j = 0, iii=0; j <myNgridb; j++){
2358  for (int i = 0; i <ngrida; i++,iii++){
2359  fprintf(fp,"%d %d %g\n",i,j+myBoff,dataR[iii]);
2360  }}//endfor
2361  fclose(fp);
2362  UrhoRealProxy[thisInstance.proxyOffset](0,0).exitForDebugging();
2363 #else
2364  /*
2365 #ifdef OLD_COMMLIB
2366  if(config.useCommlibMulticast){mcastInstance.beginIteration();}
2367 #else
2368  if(config.useCommlibMulticast){ComlibBegin(realSpaceSectionCProxy);}
2369 #endif
2370  */
2371  for(int kp=0;kp<config.UberJmax;kp++)
2372  {
2373  ProductMsg *loopm;
2374  if(kp+1<config.UberJmax)
2375  {
2376 
2377  // copying a message is perilous
2378  loopm = new (dataSize, 0) ProductMsg;
2379  loopm->idx = thisIndex.x;
2380  loopm->datalen = dataSize;
2381  loopm->hops = 0;
2382  loopm->subplane = thisIndex.y;
2383  memcpy(loopm->data, msg->data, msg->datalen* sizeof(double));
2384  }
2385  else
2386  loopm=msg;
2387  if(config.useCommlibMulticast){
2388  realSpaceSectionCProxyA[kp].acceptProduct(loopm);
2389  }else{
2390  realSpaceSectionProxyA[kp].acceptProduct(loopm);
2391 
2392  }//enddif
2393  } //endfor kp
2394  /*
2395 #ifdef OLD_COMMLIB
2396  if(config.useCommlibMulticast){mcastInstance.endIteration();}
2397 #else
2398  if(config.useCommlibMulticast){ComlibEnd(realSpaceSectionCProxy);}
2399 #endif
2400  */
2401 #ifdef _CP_SUBSTEP_TIMING_
2402  if(rhoKeeperId>0)
2403  {
2404  double rhoend=CmiWallTimer();
2405  contribute(sizeof(double), &rhoend, CkReduction::max_double, CkCallback(CkIndex_TimeKeeper::collectEnd(NULL),0,TimeKeeperProxy),rhoKeeperId);
2406  }
2407 #endif
2408 
2409 #endif
2410  }//endif
2411 
2412 //////////////////////////////////////////////////////////////////////////////
2413  }//end routine
2414 //////////////////////////////////////////////////////////////////////////////
2415 
2416 
2417 //////////////////////////////////////////////////////////////////////////////
2418 ///////////////////////////////////////////////////////////////////////////cc
2419 //////////////////////////////////////////////////////////////////////////////
2420 /// Glenn's RhoReal exit
2421 //////////////////////////////////////////////////////////////////////////////
2423  countDebug++;
2424  if(countDebug==(rhoRsubplanes*ngridc)){
2425  countDebug=0;
2426  CkPrintf("I am in the exitfordebuging rhoreal puppy. Bye-bye\n");
2427  CkExit();
2428  }//endif
2429 }
2430 //////////////////////////////////////////////////////////////////////////////
2431 
2432 
2433 
2434 //////////////////////////////////////////////////////////////////////////////
2435 //////////////////////////////////////////////////////////////////////////////
2436 //////////////////////////////////////////////////////////////////////////////
2437 void CP_Rho_RealSpacePlane::ResumeFromSync(){
2438  /*
2439  if(config.useCommlibMulticast)
2440  ComlibResetSectionProxy(&realSpaceSectionCProxy);
2441  if(config.useRInsRhoGP)
2442  ComlibResetProxy(&UrhoGProxy[thisInstance.proxyOffset]_com);
2443  */
2444 }
2445 //////////////////////////////////////////////////////////////////////////////
2446 
2447 /*@}*/
2448 
2449 
2450 //////////////////////////////////////////////////////////////////////////////
2451 //////////////////////////////////////////////////////////////////////////////
2452 //////////////////////////////////////////////////////////////////////////////
2453 //! return tru if input is power of 2
2454 //////////////////////////////////////////////////////////////////////////////
2455 bool is_pow2(int input){
2456  int y=0;
2457  for(int x=0;x<32;x++){
2458  y = 1<<x;
2459  if(y==input){return true;}
2460  }//endfor
2461  return false;
2462 }
2463 //////////////////////////////////////////////////////////////////////////////
void acceptRhoGradVksGxToRx(RhoGSFFTMsg *msg)
Double Transpose Bck Recv : A(gx,y,z) on the way to A(x,y,z) Recv (gx,z) parallel -> (y...
void acceptDensity(CkReductionMsg *)
Data comes from StateRspacePlane once an algorithm step.
void sendPartlyFFTtoRhoG(int)
The Tranpose to G-space : A(gx,gy,z) on the way to A(gx,gy,gz) Change parallel by gx...
holds the UberIndex and the offset for proxies
Definition: Uber.h:68
void addHartEextVks()
Add the VksHartEext to VksTot : Set the done flag.
int nfreq_xcfnctl
CPXCFNCTS::CP_exc_calc, CPXCFNCTS::CP_getGGAFunctional ////////////////////////////////=.
Definition: configure.h:214
void doRhoFFTGtoR_Rchare(complex *, double *, int, int, int, int)
Definition: fftCache.C:1541
void addWhiteByrdVks()
Add the VksWhiteByrd to VksTot : Set the done flag.
void doRhoFFTRxToGx_Rchare(complex *, double *, int, int, int, int)
Definition: fftCache.C:1482
void exitForDebugging()
Glenn's RhoReal exit.
void whiteByrdFFT()
The white-bird term : First fwfft redefined delrho(r) to delrho(g) then send to RhoGspacePlane.
void doMulticastCheck()
If all the parts of exc-eext-hart are done, invoke blast of vks to states.
void acceptWhiteByrd(RhoRSFFTMsg *msg)
The white bird vks correction is returned from RhoG : VksW(gx,gy,z) This routine recvs the transpose ...
void launchEextRNlG()
The density is here : Launch ees NL and ees Ext routines.
void fftRhoRyToGy(int iopt)
Double Transpose Fwd FFT : A(gx,y,z) -> A(gx,gy,z)
void GradCorr()
The gradient of the density is now completed.
CkGroupID mCastGrpId
Multicast manager group that handles many mcast/redns in the code. Grep for info. ...
Definition: cpaimd.C:216
Add type declarations for simulationConstants class (readonly vars) and once class for each type of o...
void doRhoFFTGyToRy_Rchare(complex *, double *, int, int, int, int)
Definition: fftCache.C:1586
void doMulticast()
Send vks back to the states /////////////////////////////////////////////////////////////////////////...
void acceptHartVks(RhoHartRSFFTMsg *)
Accept hartExt transpose data : receive VksHartEext(gx,gy,z) gx,z is parallel.
void acceptGradRhoVks(RhoRSFFTMsg *)
Accept transpose data from RhoG : receive grad_rho(gy,gx,z)
void launchNLRealFFT()
Launch ees-nonlocal real here.
void handleDensityReduction()
Handle the memory cleanup and setting of flags when density has all arrived.
void init()
post constructor initialization
void sendPartlyFFTGxToRx(int)
Double Transpose Bck Send : A(gx,y,z) on the way to A(x,y,z) Send so (gx,z) parallel -> (y...
bool is_pow2(int)
return tru if input is power of 2
void energyComputation()
Compute one part of the EXC energy using PINY CP_exc_calc.
Some basic data structures and the array map classes are defined here.
void acceptRhoGradVksRyToGy(RhoGSFFTMsg *msg)
Double Transpose Fwd Recv : A(gx,y,z) on the way to A(gx,gy,z) Recv so that (y,z) parallel switched t...
void fftRhoRtoRhoG()
1) Perform FFT of density: Single Transpose method rho(x,y,z) —> rho(gx,gy,z) Double Transpose method...
void acceptGradRhoVksAll(RhoRSFFTMsg *msg)
Accept transpose data from RhoG : receive grad_rho(gy,gx,z)
void sendPartlyFFTRyToGy(int iopt)
Double Transpose Fwd Send : A(gx,y,z) on the way to A(gx,gy,z) Send so that (y,z) parallelism is swit...
Useful debugging flags.
void doRhoFFTGxToRx_Rchare(complex *, double *, int, int, int, int)
Definition: fftCache.C:1611
void doRhoFFTRtoG_Rchare(complex *, double *, int, int, int, int)
Definition: fftCache.C:1437
void RHartReport()
Under ees-eext Rhart chare reports its completion : Set the done flag.
void doRhoFFTRyToGy_Rchare(complex *, double *, int, int, int, int)
Definition: fftCache.C:1514
void pup(PUP::er &)
Pup my variables for migration.