CP_Rho_RealSpacePlane.C

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * $Source: /cvsroot/leanCP/src_charm_driver/cp_density_ctrl/CP_Rho_RealSpacePlane.C,v $
00003  * $Author: bhatele $
00004  * $Date: 2007/12/05 08:33:19 $
00005  * $Revision: 1.70 $
00006  *****************************************************************************/
00007 
00008 //////////////////////////////////////////////////////////////////////////////
00009 //////////////////////////////////////////////////////////////////////////////
00010 //////////////////////////////////////////////////////////////////////////////
00011 /** \file CP_Rho_RealSpacePlane.C
00012  * This is the description of the "life" of a CP_Rho_RealSpacePlane object.
00013  *
00014  * At the start of the program, the constructor CP_Rho_RealSpacePlane() is called.
00015  * 
00016  * The CP_State_RealSpacePlanes send the squared magnitudes of the psi_r values 
00017  * using the acceptData() method. The squared magnitudes are summed across states.
00018  * A copy of this data is made, inverse fft is done in the z and x directions 
00019  * and sent to rhoGDensity. The other copy is processed using CP_exc_calc. 
00020  * Then the CP_Rho_RealSpacePlane object waits for a reply from the RhoGDensity 
00021  * object.
00022  *
00023  * The reply from RhoGDensity comes in the form of the method 
00024  * acceptDensityForSumming(). The data obtained from this reply is taken and
00025  * forward fft in z and x directions is performed. The resulting data is 
00026  * summed with the result of CP_exc_calc. The sum is sent to the 
00027  * CP_State_RealSpacePlane objects.
00028  */
00029 //////////////////////////////////////////////////////////////////////////////
00030 
00031 #include "charm++.h"
00032 #include <iostream.h>
00033 #include <fstream.h>
00034 #include <math.h>
00035 
00036 #include "../../include/debug_flags.h"
00037 #include "util.h"
00038 #include "cpaimd.h"
00039 #include "groups.h"
00040 #include "fftCacheSlab.h"
00041 #include "CP_State_Plane.h"
00042 #include "../../src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cpxcfnctls.h"
00043 
00044 //////////////////////////////////////////////////////////////////////////////
00045 extern CProxy_TimeKeeper                 TimeKeeperProxy;
00046 extern CProxy_CP_State_RealSpacePlane    realSpacePlaneProxy;
00047 extern CProxy_CP_State_RealParticlePlane realParticlePlaneProxy;
00048 extern CProxy_CP_Rho_RealSpacePlane      rhoRealProxy;
00049 extern CProxy_CP_Rho_GSpacePlane         rhoGProxy;
00050 extern CProxy_CPcharmParaInfoGrp         scProxy;
00051 extern CProxy_FFTcache                   fftCacheProxy;
00052 extern CProxy_CP_Rho_RHartExt            rhoRHartExtProxy;
00053 extern CProxy_CP_State_GSpacePlane       gSpacePlaneProxy;
00054 
00055 extern ComlibInstanceHandle commRealInstance;
00056 extern ComlibInstanceHandle commRealIGXInstance;
00057 extern ComlibInstanceHandle commRealIGYInstance;
00058 extern ComlibInstanceHandle commRealIGZInstance;
00059 extern ComlibInstanceHandle mcastInstance;
00060 extern CkGroupID            mCastGrpId;
00061 
00062 extern Config    config;
00063 extern int       nstates;
00064 
00065 bool is_pow2(int );
00066 
00067 
00068 //#define  _CP_RHO_RSP_VERBOSE_OFF
00069 
00070 //////////////////////////////////////////////////////////////////////////////
00071 //////////////////////////////////////////////////////////////////////////////
00072 //////////////////////////////////////////////////////////////////////////////
00073 ///
00074 /// This class (array) accepts the real space densities from all the states
00075 /// Performs lots of operations to get exc, eext energies and vks
00076 ///
00077 //////////////////////////////////////////////////////////////////////////////
00078 CP_Rho_RealSpacePlane::CP_Rho_RealSpacePlane(int xdim, size2d yzdim,bool _useCommlib, 
00079                                              int _ees_eext_on,int _ngridcEext,
00080                                              int _rhokeeperid)
00081 //////////////////////////////////////////////////////////////////////////////
00082    {//begin routine
00083 //////////////////////////////////////////////////////////////////////////////
00084 
00085 #ifdef _CP_DEBUG_RHOR_VERBOSE_
00086     CkPrintf("[%d %d] RhoR constructs \n",thisIndex.x, thisIndex.y);
00087 #endif
00088 
00089 //////////////////////////////////////////////////////////////////////////////
00090 /// Get parameters from the globals/groups
00091 
00092     CPcharmParaInfo *sim = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
00093 
00094     ngridcEext           = _ngridcEext;    // FFT sizes for rharteext
00095     ngrida               = sim->sizeX;     // FFT sizes for rho
00096     ngridb               = sim->sizeY;
00097     ngridc               = sim->sizeZ;
00098     nplane_rho_x         = sim->nplane_rho_x; // gx_max 
00099     listSubFlag          = sim->listSubFlag;
00100 
00101     iplane_ind           = thisIndex.y*ngridc + thisIndex.x;
00102 
00103     rhoRsubplanes        = config.rhoRsubplanes;
00104     CkAssert(nplane_rho_x >= rhoRsubplanes); // safety : should already be checked.
00105 
00106     cp_grad_corr_on      = sim->cp_grad_corr_on;
00107     ees_eext_on          = _ees_eext_on;
00108     rhoKeeperId          = _rhokeeperid;
00109 
00110     double vol           = sim->vol;
00111     int numFFT           = config.numFFTPoints;
00112     FFTscale     = 1.0/((double)numFFT);  // these are based on the full size
00113     volumeFactor = vol*FFTscale;
00114     probScale    = 1.0/vol;
00115 
00116 //////////////////////////////////////////////////////////////////////////////
00117 /// Compute number of messages to be received 
00118 
00119     int nchareRhoG=sim->nchareRhoG;
00120     if(rhoRsubplanes>1){
00121         recvCountFromGRho = 0;
00122         for(int i=0;i<nchareRhoG;i++){
00123           if(sim->nline_send_rho_y[i][thisIndex.y]>0){recvCountFromGRho++;}
00124         }//endfor
00125     }else{
00126         recvCountFromGRho=nchareRhoG;
00127     }//endif
00128 
00129     int nchareRhoGEext=sim->nchareRhoGEext;
00130     if(rhoRsubplanes>1){
00131        recvCountFromGHartExt = 0;
00132        for(int i=0;i<nchareRhoGEext;i++){
00133          if(sim->nline_send_eext_y[i][thisIndex.y]>0)recvCountFromGHartExt++;
00134        }//endfor
00135     }else{
00136       recvCountFromGHartExt=nchareRhoGEext;
00137     }//endif
00138 
00139 //////////////////////////////////////////////////////////////////////////////
00140 /// Parallelization 
00141 
00142    // Before transpose : rho(x,y,z) : parallelize y and z
00143     int div      = (ngridb/rhoRsubplanes); 
00144     int rem      = (ngridb % rhoRsubplanes);
00145     int max      = (thisIndex.y < rem ? thisIndex.y : rem);
00146     myNgridb     = (thisIndex.y<rem ? div+1 : div);  // number of y values/lines of x
00147     myBoff       = div*thisIndex.y + max;            // offset into y 
00148     nptsB        =  ngrida*myNgridb;                 // size of plane without extra room
00149     nptsExpndB   = (ngrida+2)*myNgridb;              // extra memory for RealToComplex FFT
00150 
00151    // After transpose : rho(gx,y,z) : parallelize gx and z
00152     if(rhoRsubplanes>1){
00153       myNplane_rho = sim->numSubGx[thisIndex.y];
00154     }else{
00155       myNplane_rho = nplane_rho_x;
00156     }//endif
00157     nptsA        = 2*myNplane_rho*ngridb;            // memory size fft in doubles
00158     nptsExpndA   = 2*myNplane_rho*ngridb;            // memory size fft in doubles
00159 
00160 //////////////////////////////////////////////////////////////////////////////    
00161 /// Set up the data class : mallocs rho,gradrho, etc.
00162 
00163     initRhoRealSlab(&rho_rs,ngrida,myNgridb,ngridc,myNplane_rho,ngridb,
00164                     thisIndex.x,thisIndex.y,rhoRsubplanes);
00165 
00166 //////////////////////////////////////////////////////////////////////////////
00167 /// Initialize counters, set booleans.myTime
00168 
00169     myTime          = 0;
00170     countDebug      = 0; // does nothing in the working code.
00171     countWhiteByrd  = 0;
00172     doneGradRhoVks  = 0;
00173     countRHart      = 0;
00174     countFFTRyToGy  = 0;
00175 
00176     countRHartValue = 1; if(thisIndex.x<(ngridcEext-rho_rs.sizeZ)){countRHartValue=2;}
00177     countRHartValue*=(config.nchareHartAtmT);
00178 
00179     doneHartVks     = false;
00180     doneRHart       = false;
00181     doneWhiteByrd   = false;
00182     for(int i=0;i<5;i++){countGradVks[i]=0;}///trls bkc-transpose rho(gx,gy,z):gxy->gx/z
00183     for(int i=0;i<5;i++){countIntGtoR[i]=0;}///trls bkc-int-transpose rho(gx,y,z):gx/z->yz
00184     for(int i=0;i<5;i++){countIntRtoG[i]=0;}///trls fwd-int-transpose rho(gx,y,z):yz->gx/z
00185 
00186 
00187 //////////////////////////////////////////////////////////////////////////////
00188 /// Migration
00189 
00190     usesAtSync = CmiTrue;
00191    //    if(config.lbdensity){
00192    //      setMigratable(true);
00193    //    }else{
00194     setMigratable(false);
00195    //    }//endif
00196 
00197 //////////////////////////////////////////////////////////////////////////////
00198    }//end routine
00199 //////////////////////////////////////////////////////////////////////////////
00200 
00201 /** post constructor initialization */
00202 //////////////////////////////////////////////////////////////////////////////
00203 //////////////////////////////////////////////////////////////////////////////
00204 //////////////////////////////////////////////////////////////////////////////
00205 void CP_Rho_RealSpacePlane::init(){
00206 
00207 /// make sections in the realSpacePlane array. These will be used when 
00208 /// computing real-space densities and multicasting v_ks values 
00209 
00210     CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(mCastGrpId).ckLocalBranch();
00211     CkArrayIndexMax *elems   = new CkArrayIndexMax[nstates];
00212 
00213     int j;
00214     // section i has al the portions with all 
00215     CkArrayIndex2D idx(0, thisIndex.x);
00216     if(is_pow2(nstates)){
00217         for (j = 0; j < nstates; j++) {
00218           idx.index[0] = j^((thisIndex.x+thisIndex.y)%nstates);
00219             elems[j] = idx;
00220         }//endfor
00221     }else{
00222         for (j = 0; j < nstates; j++) {
00223             idx.index[0] = (j+thisIndex.x+thisIndex.y)%nstates;
00224             elems[j] = idx;
00225         }//endfor
00226     }//endif
00227 
00228     realSpaceSectionProxy = CProxySection_CP_State_RealSpacePlane::
00229         ckNew(realSpacePlaneProxy.ckGetArrayID(), elems, nstates);
00230 
00231     realSpaceSectionCProxy = CProxySection_CP_State_RealSpacePlane::
00232         ckNew(realSpacePlaneProxy.ckGetArrayID(), elems, nstates);
00233 
00234     realSpaceSectionProxy.ckDelegate
00235         (CProxy_CkMulticastMgr(mCastGrpId).ckLocalBranch());
00236 
00237     mcastGrp->setSection(realSpaceSectionProxy);
00238 
00239     ComlibAssociateProxy(&mcastInstance,realSpaceSectionCProxy);
00240 
00241     delete [] elems;    
00242 
00243     ProductMsg *dummyProductMessage = new (0) ProductMsg;    
00244     // inform realspace element of this section proxy.
00245     dummyProductMessage->subplane=thisIndex.y;
00246     realSpaceSectionProxy.init(dummyProductMessage);
00247 
00248     rhoGProxy_com    = rhoGProxy;
00249     rhoGProxyIGX_com = rhoGProxy;
00250     rhoGProxyIGY_com = rhoGProxy;
00251     rhoGProxyIGZ_com = rhoGProxy;
00252     if (config.useRInsRhoGP) 
00253         ComlibAssociateProxy(&commRealInstance,rhoGProxy_com);          
00254     if (config.useRInsIGXRhoGP) 
00255         ComlibAssociateProxy(&commRealIGXInstance,rhoGProxyIGX_com);          
00256     if (config.useRInsIGYRhoGP) 
00257         ComlibAssociateProxy(&commRealIGYInstance,rhoGProxyIGY_com);          
00258     if (config.useRInsIGZRhoGP) 
00259         ComlibAssociateProxy(&commRealIGZInstance,rhoGProxyIGZ_com);          
00260 }
00261 
00262 //////////////////////////////////////////////////////////////////////////////
00263 //////////////////////////////////////////////////////////////////////////////
00264 //////////////////////////////////////////////////////////////////////////////
00265 CP_Rho_RealSpacePlane::~CP_Rho_RealSpacePlane(){
00266 }
00267 //////////////////////////////////////////////////////////////////////////////
00268 
00269 
00270 //////////////////////////////////////////////////////////////////////////////
00271 //////////////////////////////////////////////////////////////////////////////
00272 //////////////////////////////////////////////////////////////////////////////
00273 ///  Pup my variables for migration
00274 //////////////////////////////////////////////////////////////////////////////
00275 void CP_Rho_RealSpacePlane::pup(PUP::er &p){
00276   ArrayElement2D::pup(p);
00277   p|listSubFlag;
00278   p|myTime;
00279   p|nplane_rho_x;
00280   p|ngrida;
00281   p|myNgridb;
00282   p|myNplane_rho;
00283   p|ngridb;
00284   p|ngridc;
00285   p|iplane_ind;
00286   p|nptsExpndA;
00287   p|nptsExpndB;
00288   p|nptsA;
00289   p|nptsB;
00290   p|ees_eext_on;
00291   p|ngridcEext;
00292   p|cp_grad_corr_on;
00293   p|FFTscale;        
00294   p|volumeFactor;        
00295   p|probScale;             
00296   p|rhoGHelpers;
00297   p|doneGradRhoVks;
00298   p|countWhiteByrd;
00299   p|doneWhiteByrd;
00300   p|doneHartVks;
00301   p|countDebug;
00302   p|rhoRsubplanes;
00303 
00304   PUParray(p,countGradVks,5);
00305   PUParray(p,countIntGtoR,5);
00306   PUParray(p,countIntGtoR,5);
00307 
00308   rho_rs.pup(p);   // pup your data class
00309 
00310  // Pupping Proxies???
00311   p|realSpaceSectionProxy;
00312   p|realSpaceSectionCProxy;
00313   if(p.isUnpacking()){
00314     rhoGProxy_com = rhoGProxy;
00315     rhoGProxyIGX_com = rhoGProxy;
00316     rhoGProxyIGY_com = rhoGProxy;
00317     rhoGProxyIGZ_com = rhoGProxy;
00318     if (config.useRInsRhoGP) 
00319         ComlibAssociateProxy(&commRealInstance,rhoGProxy_com);          
00320     if (config.useRInsIGXRhoGP) 
00321         ComlibAssociateProxy(&commRealIGXInstance,rhoGProxyIGX_com);          
00322     if (config.useRInsIGYRhoGP) 
00323         ComlibAssociateProxy(&commRealIGYInstance,rhoGProxyIGY_com);          
00324     if (config.useRInsIGZRhoGP) 
00325         ComlibAssociateProxy(&commRealIGZInstance,rhoGProxyIGZ_com);          
00326 
00327     }//endif
00328 
00329 //---------------------------------------------------------------------------
00330    }//end routine
00331 //////////////////////////////////////////////////////////////////////////////
00332 
00333 
00334 //////////////////////////////////////////////////////////////////////////////
00335 //////////////////////////////////////////////////////////////////////////////
00336 //////////////////////////////////////////////////////////////////////////////
00337 ///
00338 ///  Data comes from StateRspacePlane once an algorithm step.
00339 /** 
00340  * Here the density from all the states is added up. The data from all the
00341  * states is received via an array section reduction. Nothing happens in this
00342  * chare until the density arrives.
00343  */
00344 //////////////////////////////////////////////////////////////////////////////
00345 void CP_Rho_RealSpacePlane::acceptDensity(CkReductionMsg *msg) {
00346 //////////////////////////////////////////////////////////////////////////////
00347 
00348 #ifdef _CP_DEBUG_RHOR_VERBOSE_
00349     CkPrintf("RhoReal accepting Density %d %d %d\n",
00350               thisIndex.x,thisIndex.y,CkMyPe());
00351 #endif
00352 
00353 #ifdef _NAN_CHECK_
00354   for(int i=0;i<msg->getSize()/sizeof(double) ;i++){
00355       CkAssert(isnan(((double*) msg->getData())[i])==0);
00356   }//endif
00357 #endif
00358 #ifdef _CP_SUBSTEP_TIMING_
00359   if(rhoKeeperId>0){
00360       double rhostart=CmiWallTimer();
00361       CkCallback cb(CkIndex_TimeKeeper::collectStart(NULL),TimeKeeperProxy);
00362       contribute(sizeof(double),&rhostart,CkReduction::min_double, cb ,rhoKeeperId);
00363   }//endif
00364 #endif
00365 
00366 //////////////////////////////////////////////////////////////////////////////
00367 /// Set the flags : you are not done unless certain conditions apply.
00368 
00369    myTime++;
00370    doneHartVks   = false;
00371    doneWhiteByrd = false;
00372    doneRHart     = false;
00373    if(cp_grad_corr_on==0){doneWhiteByrd = true;}
00374    if(ees_eext_on==0)    {doneRHart     = true;}
00375 
00376 #ifdef _CP_DEBUG_HARTEEXT_OFF_
00377    doneHartVks    = true;
00378    doneRHart      = true;
00379 #endif
00380 
00381 //////////////////////////////////////////////////////////////////////////////
00382 /// Unpack into spread out form and delete the message
00383 
00384    double *realValues = (double *) msg->getData(); 
00385    double *density    = rho_rs.density;
00386    CkAssert(msg->getSize() == nptsB * sizeof(double));
00387 
00388    rho_rs.uPackScaleGrow(density,realValues,probScale);
00389 
00390    delete msg;
00391 
00392 //////////////////////////////////////////////////////////////////////////////
00393 /// If debugging, generate output!
00394 
00395 #ifdef _CP_DEBUG_RHOR_RHO_
00396       char myFileName[MAX_CHAR_ARRAY_LENGTH];
00397       sprintf(myFileName, "Rho_Real_%d_%d.out", thisIndex.x,thisIndex.y);
00398       FILE *fp = fopen(myFileName,"w");
00399         for (int i = 0; i <nptsB; i++){
00400           fprintf(fp,"%g\n",realValues[i]*probScale);
00401         }//endfor
00402       fclose(fp);
00403 #endif
00404 
00405 //////////////////////////////////////////////////////////////////////////////
00406 /// Compute the exchange correlation energy (density no-grad part)
00407 
00408   energyComputation();
00409 
00410 //////////////////////////////////////////////////////////////////////////////
00411 /// 2nd Launch real-space external-hartree and the G-space non-local
00412 /// The energy comp through RhoG is he more expensive critical path.
00413    launchEextRNlG();
00414 
00415 
00416 //////////////////////////////////////////////////////////////////////////////
00417   }//end routine
00418 //////////////////////////////////////////////////////////////////////////////
00419 
00420 
00421 //////////////////////////////////////////////////////////////////////////////
00422 //////////////////////////////////////////////////////////////////////////////
00423 //////////////////////////////////////////////////////////////////////////////
00424 ///
00425 /// The density is here : Launch ees NL and ees Ext routines
00426 ///
00427 /// Do this once an algorithm step
00428 ///
00429 //////////////////////////////////////////////////////////////////////////////
00430 void CP_Rho_RealSpacePlane::launchEextRNlG() {
00431 //////////////////////////////////////////////////////////////////////////////
00432 /// Launch the external energy computation in r-space :
00433 ///   rhart has the same rhoRsubplanes for simplicity.
00434 
00435 #ifndef _CP_DEBUG_HARTEEXT_OFF_
00436   if(ees_eext_on==1){
00437     int div    = (ngridcEext / ngridc);
00438     int rem    = (ngridcEext % ngridc);
00439     int ind    = thisIndex.x+ngridc;
00440     if(div>1){
00441       CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
00442       CkPrintf("Eext Grid size too large for launch Scheme\n");
00443       CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
00444       CkExit();
00445     }//endif
00446 #ifdef _CP_RHO_RSP_VERBOSE_
00447     CkPrintf("HI, I am r-rho chare %d lauchning %d : ngrids %d %d : %d\n",
00448              thisIndex.x,thisIndex.x,ngridcEext,ngridc,rem);
00449 #endif
00450     for(int j=0;j<config.nchareHartAtmT;j++){
00451       rhoRHartExtProxy(thisIndex.x,thisIndex.y,j).startEextIter();
00452       if(thisIndex.x<rem){
00453 #ifdef _CP_RHO_RSP_VERBOSE_
00454         CkPrintf("HI, I am r-rho chare %d also lauchning %d\n",thisIndex.x,ind);
00455 #endif
00456         rhoRHartExtProxy(ind,thisIndex.y,j).startEextIter();
00457       }//endif : the launch
00458     }//endfor : atmTyp parallism
00459   }//endif : Launch is needed
00460 #endif
00461 
00462 //////////////////////////////////////////////////////////////////////////////
00463 /// Launch nonlocal g space if it wasn't done in RS
00464 ///  Spread the launch over all the rhoRchares you can.
00465 
00466   CPcharmParaInfo *sim  = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
00467   if(sim->ees_nloc_on==1 && config.launchNLeesFromRho==1){ 
00468 
00469       CkAssert(rho_rs.sizeZ>=config.nchareG);
00470       if(thisIndex.x<config.nchareG){
00471           int nstates = config.nstates; 
00472          int div     = (nstates/rhoRsubplanes);
00473          int rem     = (nstates % rhoRsubplanes);
00474          int add     = (thisIndex.y < rem ? 1 : 0);
00475          int max     = (thisIndex.y < rem ? thisIndex.y : rem);
00476          int ist     = div*thisIndex.y + max;
00477          int iend    = ist + div + add;
00478           for(int ns=ist;ns<iend;ns++){
00479            //           CkPrintf("RhoRP[%d,%d] triggering NL %d %d \n",
00480            //                    thisIndex.x, thisIndex.y, ns, thisIndex.x);
00481            CkAssert(ns<config.nstates);
00482            //           CkAssert(thisIndex.x<32);
00483            gSpacePlaneProxy(ns,thisIndex.x).startNLEes(false,myTime);
00484          }//endfor
00485        }//endif
00486   }//endif
00487 
00488 //----------------------------------------------------------------------------
00489    }//end routine 
00490 //////////////////////////////////////////////////////////////////////////////
00491 
00492 
00493 //////////////////////////////////////////////////////////////////////////////
00494 //////////////////////////////////////////////////////////////////////////////
00495 //////////////////////////////////////////////////////////////////////////////
00496 /** 
00497  * Compute one part of the EXC energy using PINY CP_exc_calc.
00498  * This is done once an algorithm step
00499  */
00500 //////////////////////////////////////////////////////////////////////////////
00501 void CP_Rho_RealSpacePlane::energyComputation(){
00502 //////////////////////////////////////////////////////////////////////////////
00503 
00504 #ifdef _CP_DEBUG_RHOR_VERBOSE_
00505    CkPrintf("In RhoRealSpacePlane[%d] energyComp %d %d\n",thisIndex.x, 
00506             CkMyPe(),CmiMemoryUsage());
00507 #endif
00508 
00509 //////////////////////////////////////////////////////////////////////////////
00510 
00511    double *density  = rho_rs.density;
00512    double *Vks      = rho_rs.Vks;
00513    int nf1          = ngrida;
00514    int nf2          = myNgridb;
00515    int nf3          = ngridc;
00516    int npts         = config.numFFTPoints;  // total number of points
00517    double *exc_ret  = &(rho_rs.exc_ret);
00518    double *muxc_ret = &(rho_rs.muxc_ret);
00519 
00520 //////////////////////////////////////////////////////////////////////////////
00521 /// Perform exchange correlation computation (no grad corr here).
00522 
00523    CPXCFNCTS::CP_exc_calc(npts,nf1,nf2,nf3,density,Vks,exc_ret,muxc_ret);
00524 
00525 #ifdef CMK_VERSION_BLUEGENE
00526    CmiNetworkProgress();
00527 #endif
00528 
00529    if(cp_grad_corr_on==0){
00530      double exc[2];
00531      exc[0]=rho_rs.exc_ret;
00532      exc[1]=0.0;
00533      contribute(2*sizeof(double),exc,CkReduction::sum_double);
00534    }//endif
00535 
00536 //////////////////////////////////////////////////////////////////////////////
00537 /// Invoke FFT to take rho(r) to rho(g) : do not over-write the density!!
00538 
00539    fftRhoRtoRhoG();
00540 
00541 //////////////////////////////////////////////////////////////////////////////
00542   }//end routine
00543 //////////////////////////////////////////////////////////////////////////////
00544 
00545 
00546 //////////////////////////////////////////////////////////////////////////////
00547 //////////////////////////////////////////////////////////////////////////////
00548 //////////////////////////////////////////////////////////////////////////////
00549 ///
00550 /// 1) Perform FFT of density: Single Transpose method rho(x,y,z) ---> rho(gx,gy,z)
00551 ///                            Double Transpose method rho(x,y,z) ---> rho(gx,y,z)
00552 ///
00553 /// 2) launch the real space part of the non-local 
00554 ///
00555 ///    Routine invoked once an algorithm step.
00556 ///
00557 //////////////////////////////////////////////////////////////////////////////
00558 void CP_Rho_RealSpacePlane::fftRhoRtoRhoG(){
00559 //////////////////////////////////////////////////////////////////////////////
00560 
00561 #ifdef _CP_DEBUG_RHOR_VERBOSE_
00562   CkPrintf("In RhoRealSpacePlane[%d %d] FFT_RSpacetoGSpace %d %d\n",thisIndex.x, 
00563                    thisIndex.y, CkMyPe(),CmiMemoryUsage());
00564 #endif
00565 
00566 //////////////////////////////////////////////////////////////////////////////
00567 /// FFT myself back to G-space part way
00568 
00569   FFTcache *fftcache = fftCacheProxy.ckLocalBranch();  
00570   double  *density   = rho_rs.density;  // we need to save the density and vks.
00571   double  *dataR     = rho_rs.rhoIRX;   // rhoirx is around doing nothing now
00572   complex *dataC     = rho_rs.rhoIRXC;  // so we can use it to store the FFT
00573 
00574 #ifndef CMK_OPTIMIZE
00575   double StartTime=CmiWallTimer();
00576 #endif
00577 
00578   rho_rs.uPackScale(dataR,density,volumeFactor);  // can't avoid the CmiMemcpy-scaling
00579   if(rhoRsubplanes>1){
00580     fftcache->doRhoFFTRxToGx_Rchare(dataC,dataR,nplane_rho_x,ngrida,myNgridb,iplane_ind);
00581   }else{
00582     fftcache->doRhoFFTRtoG_Rchare(dataC,dataR,nplane_rho_x,ngrida,ngridb,iplane_ind);
00583   }//endif
00584 
00585 #ifndef CMK_OPTIMIZE
00586   traceUserBracketEvent(RhoRtoGFFT_, StartTime, CmiWallTimer());    
00587 #endif
00588 
00589 #ifdef CMK_VERSION_BLUEGENE
00590   CmiNetworkProgress();
00591 #endif
00592 
00593 
00594 //////////////////////////////////////////////////////////////////////////////
00595 /// Launch the transpose :go directly to rhog if the 1 transpose method is on.
00596 ///                     : do an internal transpose if the 2 transpose method is on.
00597 
00598 //#define DEBUG_INT_TRANS_FWD
00599   int iopt = 0;
00600   if(rhoRsubplanes>1){
00601     sendPartlyFFTRyToGy(iopt);    // double transpose method (yz ---> gx,z)
00602   }else{ 
00603 #ifndef DEBUG_INT_TRANS_FWD
00604     sendPartlyFFTtoRhoG(iopt);    // single transpose method (z ---> gx,gy)
00605 #else
00606     char name[100];
00607     sprintf(name,"partFFTGxGyZ%d.out.%d.%d",rhoRsubplanes,thisIndex.x,thisIndex.y);
00608     FILE *fp = fopen(name,"w");
00609     for(int ix =0;ix<nplane_rho_x;ix++){
00610       for(int iy =0;iy<ngridb;iy++){
00611         int i = iy*(ngrida/2+1) + ix;
00612         fprintf(fp,"%d %d : %g %g\n",iy,ix,dataC[i].re,dataC[i].im);
00613       }//endfor
00614     }//endof
00615     fclose(fp);
00616     rhoRealProxy(0,0).exitForDebugging();
00617 #endif
00618   }//endif
00619 
00620 //////////////////////////////////////////////////////////////////////////////
00621 /// Launch non-local real space FFT : allow NL to advance after density works a bit
00622 /// Now that we have previously done our bit for the critical path through rhog
00623 
00624   launchNLRealFFT();
00625 
00626 //////////////////////////////////////////////////////////////////////////////
00627   }//end routine
00628 //////////////////////////////////////////////////////////////////////////////
00629 
00630 
00631 //////////////////////////////////////////////////////////////////////////////
00632 //////////////////////////////////////////////////////////////////////////////
00633 //////////////////////////////////////////////////////////////////////////////
00634 /** 
00635  *  Launch ees-nonlocal real here. This routine can be called from any
00636  *  other routine except gradCorr(). GradCorr comp is optional (PINY option)
00637  *  e.g. it is not always computed.
00638  */
00639 //////////////////////////////////////////////////////////////////////////////
00640 void CP_Rho_RealSpacePlane::launchNLRealFFT(){
00641 //////////////////////////////////////////////////////////////////////////////
00642 /// Tell NLeesR its ok to compute its FFT. Otherwise we get no overlap
00643 ///   Spread the launch over all the RhoR chares
00644 
00645   CPcharmParaInfo *sim  = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
00646   if(sim->ees_nloc_on==1){ 
00647     CkAssert(rho_rs.sizeZ>=sim->ngrid_nloc_c);;
00648     if(thisIndex.x < sim->ngrid_nloc_c){
00649         int nstates =  config.nstates; 
00650         int div     = (nstates/rhoRsubplanes);
00651         int rem     = (nstates % rhoRsubplanes);
00652         int add     = (thisIndex.y < rem ? 1 : 0);
00653         int max     = (thisIndex.y < rem ? thisIndex.y : rem);
00654         int ist     = div*thisIndex.y + max;
00655         int iend    = ist + div + add;
00656         for(int ns=ist;ns<iend;ns++){
00657           CkAssert(ns<config.nstates);
00658           realParticlePlaneProxy(ns,thisIndex.x).launchFFTControl(myTime);
00659           if(ns%4)
00660             CmiNetworkProgress();
00661         }//endfor
00662     }//endif
00663   }//endif
00664 
00665 //----------------------------------------------------------------------------
00666    }//end routine
00667 //////////////////////////////////////////////////////////////////////////////
00668 
00669 
00670 
00671 //////////////////////////////////////////////////////////////////////////////
00672 //////////////////////////////////////////////////////////////////////////////
00673 //////////////////////////////////////////////////////////////////////////////
00674 ///
00675 /// Double Transpose Fwd Send : A(gx,y,z) on the way to A(gx,gy,z)
00676 ///                             Send so that (y,z) parallelism is 
00677 ///                             switched to (gx,z)
00678 ///
00679 ///  Invoked 4 times per algorithm step : case 0   density(gx,y,z)
00680 ///                                     : cast 1-3 gradients(gx,y,z)
00681 ///
00682 //////////////////////////////////////////////////////////////////////////////
00683 void CP_Rho_RealSpacePlane::sendPartlyFFTRyToGy(int iopt){
00684 //////////////////////////////////////////////////////////////////////////////
00685 
00686     CPcharmParaInfo *sim  = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
00687     int **listSubGx       = sim->listSubGx;
00688     int  *numSubGx        = sim->numSubGx;
00689 
00690     CkAssert(rhoRsubplanes>1);
00691     complex *FFTresult;
00692     switch(iopt){   
00693       case 0 : FFTresult = rho_rs.rhoIRXC;  break; //Scr variable for rho send
00694       case 1 : FFTresult = rho_rs.rhoIRXC;  break;
00695       case 2 : FFTresult = rho_rs.rhoIRYC;  break;
00696       case 3 : FFTresult = rho_rs.rhoIRZC;  break;
00697       default: CkAbort("impossible iopt");  break;
00698     }//end switch
00699 
00700 //////////////////////////////////////////////////////////////////////////////
00701 /// Launch the communication
00702 
00703   //-----------------------------------------------------------------------------
00704   // Commlib launch : 
00705 
00706 #ifdef _ERIC_SETS_UP_COMMLIB_
00707     switch(iopt){
00708        case 0 : if(config.useRInsRhoRP)    commRealInstanceRx.beginIteration();    break;
00709        case 1 : if(config.useRInsIGXRhoRP) commRealIGXInstanceRx.beginIteration(); break;
00710        case 2 : if(config.useRInsIGYRhoRP) commRealIGYInstanceRx.beginIteration(); break;
00711        case 3 : if(config.useRInsIGZRhoRP) commRealIGZInstanceRx.beginIteration(); break;
00712        default: CkAbort("impossible iopt");break;
00713     }//end switch
00714 #endif
00715 
00716    //-----------------------------------------------------------------------------
00717    // Send the data : I have myNgridB values of y  (gx,y) y=1...myNgridB and all gx
00718    //                 Send all the `y' I have for the gx range desired after transpose
00719 
00720     int stride = ngrida/2+1;
00721     int ix     = thisIndex.x;
00722     for(int ic = 0; ic < rhoRsubplanes; ic ++) { // chare arrays to which we will send
00723       int num   = numSubGx[ic]; // number of gx values chare ic wants
00724       int size  = num*myNgridb; // num*(all the y's i have)
00725 
00726       int sendFFTDataSize = size;
00727       RhoGSFFTMsg *msg = new (sendFFTDataSize, 8 * sizeof(int)) RhoGSFFTMsg; 
00728       msg->size        = size;
00729       msg->iopt        = iopt;
00730       msg->offset      = myBoff;      // where the myNgridB y-lines start.
00731       msg->num         = myNgridb;    // number of y-lines I have. 
00732       complex *data    = msg->data;   // data
00733 
00734       if(config.prioFFTMsg){
00735           CkSetQueueing(msg, CK_QUEUEING_IFIFO);
00736           *(int*)CkPriorityPtr(msg) = config.rhogpriority+thisIndex.y;
00737       }//endif
00738 
00739       if(listSubFlag==1){
00740         for(int i=0,koff=0;i<num;i++,koff+=myNgridb){
00741           for(int k=koff,ii=listSubGx[ic][i];k<myNgridb+koff;k++,ii+=stride){
00742             data[k] = FFTresult[ii];  // all y's of this gx
00743           }//endfor
00744         }//endfor
00745       }else{
00746         int nst=listSubGx[ic][0];
00747         for(int i=0,ist=nst,koff=0;i<num;i++,koff+=myNgridb,ist++){
00748           for(int k=koff,ii=ist;k<myNgridb+koff;k++,ii+=stride){
00749             data[k] = FFTresult[ii];  // all y's of this gx
00750           }//endfor
00751         }//endfor
00752       }//endif
00753 
00754       switch(iopt){
00755         case 0 : rhoRealProxy(ix,ic).acceptRhoGradVksRyToGy(msg);      break;
00756         case 1 : rhoRealProxy(ix,ic).acceptRhoGradVksRyToGy(msg);      break;
00757         case 2 : rhoRealProxy(ix,ic).acceptRhoGradVksRyToGy(msg);      break;
00758         case 3 : rhoRealProxy(ix,ic).acceptRhoGradVksRyToGy(msg);      break;
00759         default: CkAbort("impossible iopt");break;
00760       }//end switch
00761 
00762 #ifdef _ERIC_SETS_UP_COMMLIB_
00763       switch(iopt){
00764         case 0 : rhoRealProxy(ix,ic).acceptRhoGradVksRyToGy(msg);      break;
00765         default: CkAbort("impossible iopt");break;
00766       }//end switch
00767 #endif
00768 
00769 #ifdef CMK_VERSION_BLUEGENE
00770       CmiNetworkProgress();
00771 #endif
00772     }//end for : chare sending
00773 
00774   //-----------------------------------------------------------------------------
00775   // Commlib stop
00776 
00777 #ifdef _ERIC_SETS_UP_COMMLIB_
00778     switch(iopt){
00779        case 0 : if(config.useRInsRhoRP)    commRealInstanceRx.endIteration();    break;
00780        case 1 : if(config.useRInsIGXRhoRP) commRealIGXInstanceRx.endIteration(); break;
00781        case 2 : if(config.useRInsIGYRhoRP) commRealIGYInstanceRx.endIteration(); break;
00782        case 3 : if(config.useRInsIGZRhoRP) commRealIGZInstanceRx.endIteration(); break;
00783        default: CkAbort("impossible iopt");break;
00784     }//end switch
00785 #endif
00786 
00787 //---------------------------------------------------------------------------
00788   }//end routine
00789 //////////////////////////////////////////////////////////////////////////////
00790 
00791 
00792 
00793 //////////////////////////////////////////////////////////////////////////////
00794 //////////////////////////////////////////////////////////////////////////////
00795 //////////////////////////////////////////////////////////////////////////////
00796 ///
00797 /// Double Transpose Fwd Recv : A(gx,y,z) on the way to A(gx,gy,z)
00798 ///                             Recv so that (y,z) parallel switched to (gx,z)
00799 ///
00800 /// Invoked 4 times per algorithm step : case 0   rho(gx,y,z)
00801 ///                                    : cast 1-3 gradRho(gx,y,z)
00802 ///
00803 //////////////////////////////////////////////////////////////////////////////
00804 void CP_Rho_RealSpacePlane::acceptRhoGradVksRyToGy(RhoGSFFTMsg *msg){
00805 //////////////////////////////////////////////////////////////////////////////
00806 
00807   int size         = msg->size;  // msg size
00808   int iopt         = msg->iopt;  //
00809   int num          = msg->num;
00810   int offset       = msg->offset;
00811   complex *msgData = msg->data;
00812 
00813   CkAssert(size==myNplane_rho*num);
00814   CkAssert(rhoRsubplanes>1);
00815 
00816   complex *dataC;
00817   switch(iopt){
00818     case  0: dataC   = rho_rs.rhoIRXCint;  break;
00819     case  1: dataC   = rho_rs.rhoIRXCint;  break;
00820     case  2: dataC   = rho_rs.rhoIRYCint;  break;
00821     case  3: dataC   = rho_rs.rhoIRZCint;  break;
00822     default: CkAbort("Impossible option\n"); break;
00823   }//endif
00824 
00825   for(int js=0,j=offset;js<size;js+=num,j+=ngridb){
00826    for(int is=js,i=j;is<num+js;is++,i++){
00827      dataC[i] = msgData[is];
00828    }//endfor
00829   }//endfor
00830 
00831   delete msg;
00832 
00833   countIntRtoG[iopt]++;
00834   if(countIntRtoG[iopt]==rhoRsubplanes){
00835     countIntRtoG[iopt]=0;
00836     fftRhoRyToGy(iopt);
00837   }//endfor
00838 
00839 //////////////////////////////////////////////////////////////////////////////
00840   }//end routine
00841 //////////////////////////////////////////////////////////////////////////////
00842 
00843 
00844 
00845 //////////////////////////////////////////////////////////////////////////////
00846 //////////////////////////////////////////////////////////////////////////////
00847 //////////////////////////////////////////////////////////////////////////////
00848 /// Double Transpose Fwd FFT : A(gx,y,z) -> A(gx,gy,z)
00849 ///
00850 ///  Invoked 4 times per algorithm step : 
00851 ///               case 0   rho(gx,y,z)    -> rho(gx,gy,z)
00852 ///               cast 1-3 gradRho(gx,y,z)-> gradRho(gx,gy,z)
00853 ///
00854 //////////////////////////////////////////////////////////////////////////////
00855 void CP_Rho_RealSpacePlane::fftRhoRyToGy(int iopt){
00856 //////////////////////////////////////////////////////////////////////////////
00857 /// FFT myself back to G-space part way : e.g. along gy here
00858 
00859   CkAssert(rhoRsubplanes>1);
00860   complex *dataC;
00861   double *dataR;
00862   switch(iopt){
00863     case 0: dataC = rho_rs.rhoIRXCint; dataR = rho_rs.rhoIRXint; break;
00864     case 1: dataC = rho_rs.rhoIRXCint; dataR = rho_rs.rhoIRXint; break;
00865     case 2: dataC = rho_rs.rhoIRYCint; dataR = rho_rs.rhoIRYint; break;
00866     case 3: dataC = rho_rs.rhoIRZCint; dataR = rho_rs.rhoIRZint; break;
00867     default: CkAbort("Impossible option\n"); break;
00868   }//endif
00869 
00870   FFTcache *fftcache = fftCacheProxy.ckLocalBranch();  
00871 #ifndef CMK_OPTIMIZE
00872     double StartTime=CmiWallTimer();
00873 #endif
00874   fftcache->doRhoFFTRyToGy_Rchare(dataC,dataR,myNplane_rho,ngrida,ngridb,iplane_ind);
00875 #ifndef CMK_OPTIMIZE
00876   traceUserBracketEvent(doRhoFFTRytoGy_, StartTime, CmiWallTimer());    
00877 #endif
00878 
00879 //////////////////////////////////////////////////////////////////////////////
00880 /// Send chunk to RhoGDensity 
00881 
00882   int igo=0;
00883   if(iopt>=1 && iopt <= 3){countFFTRyToGy++; igo=1;}
00884 
00885 #ifndef DEBUG_INT_TRANS_FWD
00886   if(config.rhoGToRhoRMsgComb==0 || iopt==0){sendPartlyFFTtoRhoG(iopt);}
00887   if(config.rhoGToRhoRMsgComb==1 && countFFTRyToGy==3 && igo==1){
00888     countFFTRyToGy=0;
00889     sendPartlyFFTtoRhoGall();
00890   }//endif
00891 #else
00892   CPcharmParaInfo *sim = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
00893   int **listSubGx = sim->listSubGx;
00894   int ic          = thisIndex.y;
00895   CkPrintf("%d %d : %d\n",thisIndex.x,thisIndex.y,myNplane_rho);
00896   char name[100];
00897   sprintf(name,"partFFTGxGyZ%d.out.%d.%d",rhoRsubplanes,thisIndex.x,thisIndex.y);
00898   FILE *fp = fopen(name,"w");
00899   for(int ix =0;ix<myNplane_rho;ix++){
00900    for(int iy =0;iy<ngridb;iy++){
00901      int i = ix*ngridb + iy;
00902      fprintf(fp,"%d %d : %g %g\n",iy,listSubGx[ic][ix],dataC[i].re,dataC[i].im);
00903    }//endfor
00904   }//endof
00905   fclose(fp);
00906   rhoRealProxy(0,0).exitForDebugging();
00907 #endif
00908 
00909 //////////////////////////////////////////////////////////////////////////////
00910    }//end routine
00911 //////////////////////////////////////////////////////////////////////////////
00912 
00913 
00914 
00915 //////////////////////////////////////////////////////////////////////////////
00916 //////////////////////////////////////////////////////////////////////////////
00917 //////////////////////////////////////////////////////////////////////////////
00918 ///
00919 /// The Tranpose to G-space : A(gx,gy,z) on the way to A(gx,gy,gz)
00920 ///                           Change parallel by gx,z to parallel by {gx,gy}
00921 ///                           We switch chare arrays here from RhoR to RhoG
00922 ///
00923 ///  Invoked 4 times per algorithm step : 
00924 ///               case 0   send rho(gx,gy,z)     -> rho(gx,gy,z) in Rhog
00925 ///               cast 1-3 send gradRho(gx,gy,z) -> gradRho(gx,gy,z) in Rhog
00926 ///
00927 /// Send the partly FFTed array A(gx,gy,z), to rhoGSpacePlane :  
00928 /// You then wait while RHOG works, sends back RhoIRalpha which you whitebyrdize
00929 /// You send back the whitebyrdized RhoIRalpha puppies to RhoGspacePlane.
00930 /// Whilst all this is going on, HartG is churning and will send you another
00931 /// part of Vks.  All this requires keeping density, vks, div_rho_alpha and
00932 /// vkshart memory available at all times to receive messages.
00933 ///
00934 //////////////////////////////////////////////////////////////////////////////
00935 void CP_Rho_RealSpacePlane::sendPartlyFFTtoRhoG(int iopt){
00936 //////////////////////////////////////////////////////////////////////////////
00937 /// Local pointers and variables
00938 
00939     CPcharmParaInfo *sim        = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
00940     int nchareRhoG              = sim->nchareRhoG;
00941     int **tranpack_rho          = sim->index_tran_upack_rho;
00942     int *nlines_per_chareRhoG   = sim->nlines_per_chareRhoG;
00943     int ***tranupack_rhoY       = sim->index_tran_upack_rho_y;
00944     int **nlines_per_chareRhoGY = sim->nline_send_rho_y;
00945 
00946     complex *FFTresult;
00947     if(rhoRsubplanes==1){
00948      switch(iopt){   
00949       case 0 : FFTresult = rho_rs.rhoIRXC;  break; //Scr variable for rho send
00950       case 1 : FFTresult = rho_rs.rhoIRXC;  break;
00951       case 2 : FFTresult = rho_rs.rhoIRYC;  break;
00952       case 3 : FFTresult = rho_rs.rhoIRZC;  break;
00953       default: CkAbort("impossible iopt");  break;
00954      }//end switch
00955     }else{
00956      switch(iopt){   
00957       case 0 : FFTresult = rho_rs.rhoIRXCint;  break; //Scr variable for rho send
00958       case 1 : FFTresult = rho_rs.rhoIRXCint;  break;
00959       case 2 : FFTresult = rho_rs.rhoIRYCint;  break;
00960       case 3 : FFTresult = rho_rs.rhoIRZCint;  break;
00961       default: CkAbort("impossible iopt");  break;
00962      }//end switch
00963     }//endif
00964 
00965 //////////////////////////////////////////////////////////////////////////////
00966 /// Commlib launch
00967 
00968     if(rhoRsubplanes==1){
00969      switch(iopt){
00970        case 0 : if(config.useRInsRhoGP)    commRealInstance.beginIteration();    break;
00971        case 1 : if(config.useRInsIGXRhoGP) commRealIGXInstance.beginIteration(); break;
00972        case 2 : if(config.useRInsIGYRhoGP) commRealIGYInstance.beginIteration(); break;
00973        case 3 : if(config.useRInsIGZRhoGP) commRealIGZInstance.beginIteration(); break;
00974        default: CkAbort("impossible iopt");break;
00975      }//end switch
00976     }//endif
00977 
00978 //////////////////////////////////////////////////////////////////////////////
00979 /// Send the data
00980 
00981     int iy = thisIndex.y;
00982     for(int ic = 0; ic < nchareRhoG; ic ++) { // chare arrays to which we will send
00983 
00984      //---------------------------
00985      //malloc the message
00986       int sendFFTDataSize = nlines_per_chareRhoG[ic];
00987       if(rhoRsubplanes!=1){sendFFTDataSize = nlines_per_chareRhoGY[ic][iy];}
00988       if(sendFFTDataSize>0)
00989         {
00990           RhoGSFFTMsg *msg = new (sendFFTDataSize, 8 * sizeof(int)) RhoGSFFTMsg; 
00991 
00992           //---------------------------
00993           //Pack the message
00994           msg->size        = sendFFTDataSize;
00995           msg->iopt        = iopt;
00996           msg->offset      = thisIndex.x;    // z-index
00997           msg->offsetGx    = thisIndex.y;    // gx parallelization index
00998           complex *data    = msg->data;
00999           if(config.prioFFTMsg){
01000             CkSetQueueing(msg, CK_QUEUEING_IFIFO);
01001             *(int*)CkPriorityPtr(msg) = config.rhogpriority+thisIndex.x;
01002           }//endif
01003 
01004           if(rhoRsubplanes==1){
01005             for(int i=0;i<sendFFTDataSize;i++){
01006               data[i] = FFTresult[tranpack_rho[ic][i]];
01007             }//endfor
01008           }else{
01009             for(int i=0;i<sendFFTDataSize;i++){
01010               data[i] = FFTresult[tranupack_rhoY[ic][iy][i]];
01011             }//endfor
01012           }//endif
01013 
01014           //---------------------------
01015           // Send the message
01016           if(rhoRsubplanes==1){
01017             switch(iopt){
01018             case 0 : rhoGProxy_com(ic,0).acceptRhoData(msg);      break;
01019             case 1 : rhoGProxyIGX_com(ic,0).acceptWhiteByrd(msg); break;
01020             case 2 : rhoGProxyIGY_com(ic,0).acceptWhiteByrd(msg); break;
01021             case 3 : rhoGProxyIGZ_com(ic,0).acceptWhiteByrd(msg); break;
01022             default: CkAbort("impossible iopt");break;
01023             }//end switch
01024           }else{
01025             switch(iopt){
01026             case 0 : rhoGProxy(ic,0).acceptRhoData(msg);   break;
01027             case 1 : rhoGProxy(ic,0).acceptWhiteByrd(msg); break;
01028             case 2 : rhoGProxy(ic,0).acceptWhiteByrd(msg); break;
01029             case 3 : rhoGProxy(ic,0).acceptWhiteByrd(msg); break;
01030             default: CkAbort("impossible iopt");break;
01031             }//end switch
01032           }//endif
01033         }//end if nonzero
01034 #ifdef CMK_VERSION_BLUEGENE
01035       if(ic%4==0)
01036         CmiNetworkProgress();
01037 #endif
01038 
01039     }//end for : chare sending
01040 
01041 //////////////////////////////////////////////////////////////////////////////
01042 /// Commlib stop
01043 
01044     if(rhoRsubplanes==1){
01045      switch(iopt){
01046        case 0 : if(config.useRInsRhoGP)    commRealInstance.endIteration(); break;
01047        case 1 : if(config.useRInsIGXRhoGP) commRealIGXInstance.endIteration(); break;
01048        case 2 : if(config.useRInsIGYRhoGP) commRealIGYInstance.endIteration(); break;
01049        case 3 : if(config.useRInsIGZRhoGP) commRealIGZInstance.endIteration(); break;
01050        default: CkAbort("impossible iopt");break;
01051      }//end switch
01052     }//endif
01053 
01054 //---------------------------------------------------------------------------
01055   }//end routine
01056 //////////////////////////////////////////////////////////////////////////////
01057 
01058 
01059 //////////////////////////////////////////////////////////////////////////////
01060 //////////////////////////////////////////////////////////////////////////////
01061 //////////////////////////////////////////////////////////////////////////////
01062 void CP_Rho_RealSpacePlane::sendPartlyFFTtoRhoGall(){
01063 //////////////////////////////////////////////////////////////////////////////
01064 /// Local pointers and variables
01065 
01066     CPcharmParaInfo *sim        = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
01067     int nchareRhoG              = sim->nchareRhoG;
01068     int **tranpack_rho          = sim->index_tran_upack_rho;
01069     int *nlines_per_chareRhoG   = sim->nlines_per_chareRhoG;
01070     int ***tranupack_rhoY       = sim->index_tran_upack_rho_y;
01071     int **nlines_per_chareRhoGY = sim->nline_send_rho_y;
01072 
01073     complex *FFTresultX;
01074     complex *FFTresultY;
01075     complex *FFTresultZ;
01076     if(rhoRsubplanes==1){
01077       FFTresultX = rho_rs.rhoIRXC;
01078       FFTresultY = rho_rs.rhoIRYC;
01079       FFTresultZ = rho_rs.rhoIRZC;
01080     }else{
01081       FFTresultX = rho_rs.rhoIRXCint;
01082       FFTresultY = rho_rs.rhoIRYCint;
01083       FFTresultZ = rho_rs.rhoIRZCint;
01084     }//endif
01085 
01086 //////////////////////////////////////////////////////////////////////////////
01087 /// Send the data
01088 
01089     int iy = thisIndex.y;
01090     for(int ic = 0; ic < nchareRhoG; ic ++) { // chare arrays to which we will send
01091 
01092      //---------------------------
01093      //malloc the message
01094       int sendFFTDataSize = nlines_per_chareRhoG[ic];
01095       if(rhoRsubplanes!=1){sendFFTDataSize = nlines_per_chareRhoGY[ic][iy];}
01096 
01097       if(sendFFTDataSize>0){
01098         //---------------------------
01099         //Pack the message
01100           RhoGSFFTMsg *msg = new (3*sendFFTDataSize, 8 * sizeof(int)) RhoGSFFTMsg; 
01101           msg->size        = sendFFTDataSize;
01102           msg->iopt        = 1;
01103           msg->offset      = thisIndex.x;    // z-index
01104           msg->offsetGx    = thisIndex.y;    // gx parallelization index
01105           complex *data    = msg->data;
01106           if(config.prioFFTMsg){
01107             CkSetQueueing(msg, CK_QUEUEING_IFIFO);
01108             *(int*)CkPriorityPtr(msg) = config.rhogpriority+thisIndex.x;
01109           }//endif
01110 
01111           if(rhoRsubplanes==1){
01112             for(int i=0,ii=0;ii<sendFFTDataSize;i+=3,ii++){
01113               int j = tranpack_rho[ic][ii];
01114               data[i]   = FFTresultX[j];
01115               data[i+1] = FFTresultY[j];
01116               data[i+2] = FFTresultZ[j];
01117             }//endfor
01118           }else{
01119             for(int i=0,ii=0;ii<sendFFTDataSize;i+=3,ii++){
01120               int j = tranupack_rhoY[ic][iy][ii];
01121               data[i]   = FFTresultX[j];
01122               data[i+1] = FFTresultY[j];
01123               data[i+2] = FFTresultZ[j];
01124             }//endfor
01125           }//endif
01126          //---------------------------
01127          // Send the message
01128           rhoGProxy(ic,0).acceptWhiteByrdAll(msg);
01129       }//end if : nonzero msg
01130 #ifdef CMK_VERSION_BLUEGENE
01131       if(ic%4==0){CmiNetworkProgress();}
01132 #endif
01133    }//end for : send to rhog(ic)
01134 
01135 //---------------------------------------------------------------------------
01136   }//end routine
01137 //////////////////////////////////////////////////////////////////////////////
01138 
01139 
01140 //////////////////////////////////////////////////////////////////////////////
01141 //////////////////////////////////////////////////////////////////////////////
01142 //////////////////////////////////////////////////////////////////////////////
01143 /**
01144  *
01145  * Accept transpose data from RhoG : receive grad_rho(gy,gx,z)
01146  *
01147  * Invoked 3 times per algorithm step : once for each grad_rho
01148  * 
01149  * Memory required is : rho_igx,rho_igy,rho_igz so stuff can come in any order
01150  *                    : density and vks are needed later so no reusing for you.
01151  *                    : VksHart can also arrive at any time and cannot be used
01152  *                      here.
01153  */
01154 //////////////////////////////////////////////////////////////////////////////
01155 void CP_Rho_RealSpacePlane::acceptGradRhoVks(RhoRSFFTMsg *msg){
01156 //////////////////////////////////////////////////////////////////////////////
01157 /// Unpack the message
01158 
01159   CPcharmParaInfo *sim        = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
01160   int nchareG                 = sim->nchareRhoG;
01161   int **tranUnpack            = sim->index_tran_upack_rho;
01162   int *nlines_per_chareG      = sim->nlines_per_chareRhoG;
01163   int ***tranupack_rhoY       = sim->index_tran_upack_rho_y;
01164   int **nlines_per_chareRhoGY = sim->nline_send_rho_y;
01165   int iy                      = thisIndex.y;
01166 
01167   int size               = msg->size; 
01168   int Index              = msg->senderIndex;
01169   int iopt               = msg->iopt;
01170   complex *partiallyFFTd = msg->data;
01171 
01172   int mySize;
01173   int nptsExpnd;
01174   if(rhoRsubplanes==1){
01175      mySize = nlines_per_chareG[Index];
01176      nptsExpnd = nptsExpndB;
01177   }else{
01178      mySize = nlines_per_chareRhoGY[Index][iy];
01179      nptsExpnd = nptsExpndA;
01180   }//endif
01181 
01182 #ifdef _NAN_CHECK_
01183   for(int i=0;i<msg->size ;i++){
01184       CkAssert(isnan(msg->data[i].re)==0);
01185       CkAssert(isnan(msg->data[i].im)==0);
01186   }
01187 #endif
01188 #ifdef _CP_DEBUG_RHOR_VERBOSE_
01189   CkPrintf("Data from RhoG arriving at RhoR : %d %d %d %d\n",
01190            thisIndex.x,thisIndex.y,iopt,countGradVks[iopt]);
01191 #endif
01192 
01193 //////////////////////////////////////////////////////////////////////////////
01194 /// Perform some error checking
01195 
01196   countGradVks[iopt]++;
01197   if (countGradVks[iopt] > recvCountFromGRho) {
01198      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01199      CkPrintf("Mismatch in allowed rho_gspace chare arrays : %d %d %d %d\n",
01200                countGradVks[iopt],nchareG,thisIndex.x,thisIndex.y);
01201      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01202      CkExit();
01203   }//endif
01204 
01205   if(size!=mySize){
01206      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01207      CkPrintf("Dude.1, %d != %d for rho chare %d %d %d\n",size,mySize,
01208                   thisIndex.y,Index,iopt);
01209      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01210      CkExit();
01211   }//endif
01212 
01213   if(1> iopt || iopt >3){
01214      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01215      CkPrintf("Wrong option in rhoR \n",iopt);
01216      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01217      CkExit();
01218   }//endif
01219 
01220 //////////////////////////////////////////////////////////////////////////////
01221 /// unpack the data and delete the message
01222 
01223   complex *dataC;
01224   double  *dataR;
01225   if(rhoRsubplanes==1){
01226    switch(iopt){
01227      case 1 : dataC = rho_rs.rhoIRXC; dataR = rho_rs.rhoIRX; break;
01228      case 2 : dataC = rho_rs.rhoIRYC; dataR = rho_rs.rhoIRY; break;
01229      case 3 : dataC = rho_rs.rhoIRZC; dataR = rho_rs.rhoIRZ; break;
01230      default: CkAbort("impossible iopt");break;
01231    }//end switch
01232   }else{
01233    switch(iopt){
01234      case 1 : dataC = rho_rs.rhoIRXCint; dataR = rho_rs.rhoIRXint; break;
01235      case 2 : dataC = rho_rs.rhoIRYCint; dataR = rho_rs.rhoIRYint; break;
01236      case 3 : dataC = rho_rs.rhoIRZCint; dataR = rho_rs.rhoIRZint; break;
01237      default: CkAbort("impossible iopt");break;
01238    }//end switch
01239   }//endif
01240 
01241   // you must zero because messages don't fill the plane
01242   if(countGradVks[iopt]==1){bzero(dataR,sizeof(double)*nptsExpnd);}
01243 
01244   if(rhoRsubplanes==1){
01245     for(int i=0;i<size;i++){ 
01246       dataC[tranUnpack[Index][i]] = partiallyFFTd[i]*probScale;
01247     }//endfor
01248   }else{
01249     for(int i=0;i<size;i++){
01250       dataC[tranupack_rhoY[Index][iy][i]] = partiallyFFTd[i]*probScale;
01251     }//endfor
01252   }//endif
01253 
01254   delete msg;
01255 
01256 //////////////////////////////////////////////////////////////////////////////
01257 /// When you have all the data : finish the FFT back to real space
01258 
01259   if (countGradVks[iopt] == recvCountFromGRho){
01260 
01261     countGradVks[iopt]=0;
01262     if(rhoRsubplanes==1){doneGradRhoVks++;}
01263 
01264 #ifndef CMK_OPTIMIZE
01265     double StartTime=CmiWallTimer();
01266 #endif
01267 
01268     FFTcache *fftcache = fftCacheProxy.ckLocalBranch();  
01269     if(rhoRsubplanes==1){
01270       fftcache->doRhoFFTGtoR_Rchare(dataC,dataR,nplane_rho_x,ngrida,ngridb,iplane_ind);
01271     }else{
01272       fftcache->doRhoFFTGyToRy_Rchare(dataC,dataR,myNplane_rho,ngrida,ngridb,iplane_ind);
01273       sendPartlyFFTGxToRx(iopt);
01274     }//endif
01275 
01276 #ifndef CMK_OPTIMIZE
01277     traceUserBracketEvent(fwFFTGtoRnot0_, StartTime, CmiWallTimer());    
01278 #endif
01279 
01280   }//endif : I captured a divRho
01281 
01282 //////////////////////////////////////////////////////////////////////////////
01283 /// When you have rhoiRX,rhoiRY,rhoiRZ and Vks invoke gradient correction
01284 
01285   if(doneGradRhoVks==3 && rhoRsubplanes==1){ 
01286     doneGradRhoVks = 0;
01287     GradCorr();         // if rhosubplanes>1 you have a transpose 
01288                         // and another fft to do before you can GradCorr);
01289   }//endif
01290 
01291 //----------------------------------------------------------------------------
01292    }//end routine
01293 //////////////////////////////////////////////////////////////////////////////
01294 
01295 
01296 //////////////////////////////////////////////////////////////////////////////
01297 //////////////////////////////////////////////////////////////////////////////
01298 //////////////////////////////////////////////////////////////////////////////
01299 /**
01300  *
01301  * Accept transpose data from RhoG : receive grad_rho(gy,gx,z)
01302  *
01303  * Invoked 3 times per algorithm step : once for each grad_rho
01304  * 
01305  * Memory required is : rho_igx,rho_igy,rho_igz so stuff can come in any order
01306  *                    : density and vks are needed later so no reusing for you.
01307  *                    : VksHart can also arrive at any time and cannot be used
01308  *                      here.
01309  */
01310 //////////////////////////////////////////////////////////////////////////////
01311 void CP_Rho_RealSpacePlane::acceptGradRhoVksAll(RhoRSFFTMsg *msg){
01312 //////////////////////////////////////////////////////////////////////////////
01313 /// Unpack the message
01314 
01315   CPcharmParaInfo *sim        = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
01316   int nchareG                 = sim->nchareRhoG;
01317   int **tranUnpack            = sim->index_tran_upack_rho;
01318   int *nlines_per_chareG      = sim->nlines_per_chareRhoG;
01319   int ***tranupack_rhoY       = sim->index_tran_upack_rho_y;
01320   int **nlines_per_chareRhoGY = sim->nline_send_rho_y;
01321   int iy                      = thisIndex.y;
01322 
01323   int size               = msg->size; 
01324   int Index              = msg->senderIndex;
01325   int iopt               = msg->iopt;
01326   complex *partiallyFFTd = msg->data;
01327 
01328   int mySize;
01329   int nptsExpnd;
01330   if(rhoRsubplanes==1){
01331      mySize = nlines_per_chareG[Index];
01332      nptsExpnd = nptsExpndB;
01333   }else{
01334      mySize = nlines_per_chareRhoGY[Index][iy];
01335      nptsExpnd = nptsExpndA;
01336   }//endif
01337 
01338 #ifdef _NAN_CHECK_
01339   for(int i=0;i<msg->size ;i++){
01340       CkAssert(isnan(msg->data[i].re)==0);
01341       CkAssert(isnan(msg->data[i].im)==0);
01342   }
01343 #endif
01344 
01345 #ifdef _CP_DEBUG_RHOR_VERBOSE_
01346   CkPrintf("Data from RhoG arriving at RhoR : %d %d %d %d\n",
01347            thisIndex.x,thisIndex.y,iopt,countGradVks[iopt]);
01348 #endif
01349 
01350 //////////////////////////////////////////////////////////////////////////////
01351 /// Perform some error checking
01352 
01353   countGradVks[iopt]++;
01354 
01355   if (countGradVks[iopt] > recvCountFromGRho) {
01356      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01357      CkPrintf("Mismatch in allowed rho_gspace chare arrays : %d %d %d %d\n",
01358                countGradVks[iopt],nchareG,thisIndex.x,thisIndex.y);
01359      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01360      CkExit();
01361   }//endif
01362 
01363   if(size!=mySize){
01364      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01365      CkPrintf("Dude.1, %d != %d for rho chare %d %d %d\n",size,mySize,
01366                   thisIndex.y,Index,iopt);
01367      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01368      CkExit();
01369   }//endif
01370 
01371   if(iopt!=1){
01372      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01373      CkPrintf("Wrong option in rhoR \n",iopt);
01374      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01375      CkExit();
01376   }//endif
01377 
01378 //////////////////////////////////////////////////////////////////////////////
01379 /// unpack the data and delete the message
01380 
01381   complex *dataCX,*dataCY,*dataCZ;
01382   double  *dataRX,*dataRY,*dataRZ;
01383   if(rhoRsubplanes==1){
01384      dataCX = rho_rs.rhoIRXC;    dataRX = rho_rs.rhoIRX;
01385      dataCY = rho_rs.rhoIRYC;    dataRY = rho_rs.rhoIRY;
01386      dataCZ = rho_rs.rhoIRZC;    dataRZ = rho_rs.rhoIRZ;
01387   }else{
01388      dataCX = rho_rs.rhoIRXCint; dataRX = rho_rs.rhoIRXint;
01389      dataCY = rho_rs.rhoIRYCint; dataRY = rho_rs.rhoIRYint;
01390      dataCZ = rho_rs.rhoIRZCint; dataRZ = rho_rs.rhoIRZint;
01391   }//endif
01392 
01393   // you must zero because messages don't fill the plane
01394   if(countGradVks[iopt]==1){
01395      bzero(dataRX,sizeof(double)*nptsExpnd);
01396      bzero(dataRY,sizeof(double)*nptsExpnd);
01397      bzero(dataRZ,sizeof(double)*nptsExpnd);
01398   }//endif
01399 
01400   if(rhoRsubplanes==1){
01401     for(int i=0,ii=0;ii<size;i+=3,ii++){ 
01402       int j = tranUnpack[Index][ii];
01403       dataCX[j] = partiallyFFTd[i]*probScale;
01404       dataCY[j] = partiallyFFTd[i+1]*probScale;
01405       dataCZ[j] = partiallyFFTd[i+2]*probScale;
01406     }//endfor
01407   }else{
01408     for(int i=0,ii=0;ii<size;i+=3,ii++){ 
01409       int j = tranupack_rhoY[Index][iy][ii];
01410       dataCX[j] = partiallyFFTd[i]*probScale;
01411       dataCY[j] = partiallyFFTd[i+1]*probScale;
01412       dataCZ[j] = partiallyFFTd[i+2]*probScale;
01413     }//endfor
01414   }//endif
01415 
01416   delete msg;
01417 
01418 //////////////////////////////////////////////////////////////////////////////
01419 /// When you have all the data : finish the FFT back to real space
01420 
01421   if (countGradVks[iopt] == recvCountFromGRho){
01422 
01423     countGradVks[iopt]=0;
01424     if(rhoRsubplanes==1){doneGradRhoVks+=3;}
01425 
01426 #ifndef CMK_OPTIMIZE
01427     double StartTime=CmiWallTimer();
01428 #endif
01429 
01430     FFTcache *fftcache = fftCacheProxy.ckLocalBranch();  
01431     if(rhoRsubplanes==1){
01432       fftcache->doRhoFFTGtoR_Rchare(dataCX,dataRX,nplane_rho_x,ngrida,ngridb,iplane_ind);
01433       fftcache->doRhoFFTGtoR_Rchare(dataCY,dataRY,nplane_rho_x,ngrida,ngridb,iplane_ind);
01434       fftcache->doRhoFFTGtoR_Rchare(dataCZ,dataRZ,nplane_rho_x,ngrida,ngridb,iplane_ind);
01435     }else{
01436       fftcache->doRhoFFTGyToRy_Rchare(dataCX,dataRX,myNplane_rho,ngrida,ngridb,iplane_ind);
01437       fftcache->doRhoFFTGyToRy_Rchare(dataCY,dataRY,myNplane_rho,ngrida,ngridb,iplane_ind);
01438       fftcache->doRhoFFTGyToRy_Rchare(dataCZ,dataRZ,myNplane_rho,ngrida,ngridb,iplane_ind);
01439       sendPartlyFFTGxToRx(1);
01440       sendPartlyFFTGxToRx(2);
01441       sendPartlyFFTGxToRx(3);
01442     }//endif
01443 
01444 #ifndef CMK_OPTIMIZE
01445     traceUserBracketEvent(fwFFTGtoRnot0_, StartTime, CmiWallTimer());    
01446 #endif
01447 
01448   }//endif : I captured a divRho
01449 
01450 //////////////////////////////////////////////////////////////////////////////
01451 /// When you have rhoiRX,rhoiRY,rhoiRZ and Vks invoke gradient correction
01452 
01453   if(doneGradRhoVks==3 && rhoRsubplanes==1){ 
01454     doneGradRhoVks = 0;
01455     GradCorr();         // if rhosubplanes>1 you have a transpose 
01456                         // and another fft to do before you can GradCorr);
01457   }//endif
01458 
01459 //----------------------------------------------------------------------------
01460    }//end routine
01461 //////////////////////////////////////////////////////////////////////////////
01462 
01463 
01464 //////////////////////////////////////////////////////////////////////////////
01465 //////////////////////////////////////////////////////////////////////////////
01466 //////////////////////////////////////////////////////////////////////////////
01467 ///
01468 /// Double Transpose Bck Send :  A(gx,y,z) on the way to A(x,y,z)
01469 ///                              Send so (gx,z) parallel -> (y,z) parallel 
01470 ///
01471 /// Invoked 5 times an algorithm step:
01472 ///             case 1-3: Gradients
01473 ///             case  0 : VksWhiteByrd
01474 ///             case  4 : VksHarteext
01475 ///
01476 //////////////////////////////////////////////////////////////////////////////
01477 void CP_Rho_RealSpacePlane::sendPartlyFFTGxToRx(int iopt){
01478 //////////////////////////////////////////////////////////////////////////////
01479   CkAssert(rhoRsubplanes>1);
01480 
01481     complex *FFTresult;
01482     switch(iopt){   
01483       case 0 : FFTresult = rho_rs.rhoIRXCint; break;
01484       case 1 : FFTresult = rho_rs.rhoIRXCint; break;
01485       case 2 : FFTresult = rho_rs.rhoIRYCint; break;
01486       case 3 : FFTresult = rho_rs.rhoIRZCint; break;
01487       case 4 : FFTresult = rho_rs.VksHartCint; break;
01488       default: CkAbort("impossible iopt");  break;
01489     }//end switch
01490 
01491 //////////////////////////////////////////////////////////////////////////////
01492 /// Launch the communication
01493 
01494   //-----------------------------------------------------------------------------
01495   // Commlib launch : 
01496 
01497 #ifdef _ERIC_SETS_UP_COMMLIB_
01498     switch(iopt){
01499        case 0 : if(config.useRInsRhoRP)    commRealInstanceRx.beginIteration();    break;
01500        case 1 : if(config.useRInsIGXRhoRP) commRealIGXInstanceRx.beginIteration(); break;
01501        case 2 : if(config.useRInsIGYRhoRP) commRealIGYInstanceRx.beginIteration(); break;
01502        case 3 : if(config.useRInsIGZRhoRP) commRealIGZInstanceRx.beginIteration(); break;
01503        default: CkAbort("impossible iopt");break;
01504     }//end switch
01505 #endif
01506 
01507    //-----------------------------------------------------------------------------
01508    // Send the data : I have myNgridB values of y  (gx,y) y=1...myNgridB and all gx
01509    //                 Send all the `y' I have for the gx range desired after transpose
01510 
01511     int ix = thisIndex.x;
01512     for(int ic = 0; ic < rhoRsubplanes; ic ++) { // chare arrays to which we will send
01513       int div     = (ngridb/rhoRsubplanes);   //parallelize y
01514       int rem     = (ngridb % rhoRsubplanes);
01515       int add     = (ic < rem ? 1 : 0);
01516       int max     = (ic < rem ? ic : rem);
01517       int ist     = div*ic + max;        // start of y desired by chare ic
01518       int iend    = ist + div + add;     // end   of y desired by chare ic
01519       int size    = (iend-ist)*myNplane_rho; // data size : send all the gx I have
01520 
01521       int sendFFTDataSize = size;
01522       RhoGSFFTMsg *msg = new (sendFFTDataSize, 8 * sizeof(int)) RhoGSFFTMsg; 
01523       msg->size        = size;
01524       msg->iopt        = iopt;
01525       msg->offset      = thisIndex.y;   // my chare index
01526       msg->num         = myNplane_rho;  // number of gx-lines I have. 
01527       complex *data    = msg->data;     // data
01528 
01529       if(config.prioFFTMsg){
01530           CkSetQueueing(msg, CK_QUEUEING_IFIFO);
01531           *(int*)CkPriorityPtr(msg) = config.rhogpriority+thisIndex.y;
01532       }//endif
01533 
01534       for(int i=ist,koff=0;i<iend;i++,koff+=myNplane_rho){ 
01535         for(int k=koff,ii=i;k<myNplane_rho+koff;k++,ii+=ngridb){
01536           data[k] = FFTresult[ii]; 
01537         }//endfor
01538       }//endfor
01539 
01540       switch(iopt){
01541         case 0 : rhoRealProxy(ix,ic).acceptRhoGradVksGxToRx(msg);break;
01542         case 1 : rhoRealProxy(ix,ic).acceptRhoGradVksGxToRx(msg); break;
01543         case 2 : rhoRealProxy(ix,ic).acceptRhoGradVksGxToRx(msg); break;
01544         case 3 : rhoRealProxy(ix,ic).acceptRhoGradVksGxToRx(msg); break;
01545         case 4 : rhoRealProxy(ix,ic).acceptRhoGradVksGxToRx(msg); break;
01546         default: CkAbort("impossible iopt");break;
01547       }//end switch
01548 
01549 #ifdef _ERIC_SETS_UP_COMMLIB_
01550       switch(iopt){
01551         case 0 : rhoGProxy_com(ic,0).acceptRhoGradVksGxToRx(msg);break;
01552         case 1 : rhoGProxyIGX_com(ic,0).acceptRhoGradVksGxToRx(msg); break;
01553         case 2 : rhoGProxyIGY_com(ic,0).acceptRhoGradVksGxToRx(msg); break;
01554         case 3 : rhoGProxyIGZ_com(ic,0).acceptRhoGradVksGxToRx(msg); break;
01555         default: CkAbort("impossible iopt");break;
01556       }//end switch
01557 #endif
01558 
01559 #ifdef CMK_VERSION_BLUEGENE
01560       if(ic%3==0)
01561        CmiNetworkProgress();
01562 #endif
01563     }//end for : chare sending
01564 
01565   //-----------------------------------------------------------------------------
01566   // Commlib stop
01567 
01568 #ifdef _ERIC_SETS_UP_COMMLIB_
01569     switch(iopt){
01570        case 0 : if(config.useRInsRhoRP)    commRealInstanceRx.endIteration();    break;
01571        case 1 : if(config.useRInsIGXRhoRP) commRealIGXInstanceRx.endIteration(); break;
01572        case 2 : if(config.useRInsIGYRhoRP) commRealIGYInstanceRx.endIteration(); break;
01573        case 3 : if(config.useRInsIGZRhoRP) commRealIGZInstanceRx.endIteration(); break;
01574        default: CkAbort("impossible iopt");break;
01575     }//end switch
01576 #endif
01577 
01578 //---------------------------------------------------------------------------
01579   }//end routine
01580 //////////////////////////////////////////////////////////////////////////////
01581 
01582 
01583 
01584 //////////////////////////////////////////////////////////////////////////////
01585 //////////////////////////////////////////////////////////////////////////////
01586 //////////////////////////////////////////////////////////////////////////////
01587 ///
01588 /// Double Transpose Bck Recv :  A(gx,y,z) on the way to A(x,y,z)
01589 ///                              Recv (gx,z) parallel -> (y,z) parallel 
01590 ///
01591 /// Invoked 5 times an algorithm step:
01592 ///             case 1-3: Gradients
01593 ///             case  0 : VksWhiteByrd
01594 ///             case  4 : VksHarteext
01595 ///
01596 //////////////////////////////////////////////////////////////////////////////
01597 void CP_Rho_RealSpacePlane::acceptRhoGradVksGxToRx(RhoGSFFTMsg *msg){
01598 //////////////////////////////////////////////////////////////////////////////
01599 
01600   CPcharmParaInfo *sim = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
01601   int **listSubGx      = sim->listSubGx;
01602   int  *numSubGx       = sim->numSubGx;
01603 
01604   int size         = msg->size;  // msg size
01605   int iopt         = msg->iopt;  
01606   int num          = msg->num;    // number of lines along `a' sent
01607   int offset       = msg->offset; // chare array that sent the data
01608   complex *msgData = msg->data;
01609 
01610   CkAssert(size==myNgridb*num);
01611   CkAssert(rhoRsubplanes>1);
01612 
01613   complex *dataC;
01614   double  *dataR;
01615   switch(iopt){
01616     case  0: dataC = rho_rs.densityC; dataR = rho_rs.density; break;
01617     case  1: dataC = rho_rs.rhoIRXC;  dataR = rho_rs.rhoIRX; break;
01618     case  2: dataC = rho_rs.rhoIRYC;  dataR = rho_rs.rhoIRY; break;
01619     case  3: dataC = rho_rs.rhoIRZC;  dataR = rho_rs.rhoIRZ; break;
01620     case  4: dataC = rho_rs.VksHartC; dataR = rho_rs.VksHart; break;
01621     default: CkAbort("Impossible option\n"); break;
01622   }//endif
01623 
01624 //////////////////////////////////////////////////////////////////////////////
01625 /// Unpack the message 
01626 
01627   countIntGtoR[iopt]++;
01628   if(countIntGtoR[iopt]==1){bzero(dataR,sizeof(double)*nptsExpndB);}
01629 
01630   int stride = ngrida/2+1;
01631   if(listSubFlag==1){
01632     for(int js=0,j=0;js<size;js+=num,j++){
01633       int jj = j*stride;
01634       for(int is=js,i=0;is<(num+js);is++,i++){
01635        dataC[(listSubGx[offset][i]+jj)] = msgData[is];
01636      }//endfor
01637     }//endfor
01638   }else{
01639     int nst = listSubGx[offset][0];
01640     for(int js=0,j=0;js<size;js+=num,j++){
01641       int jj = j*stride+nst;
01642       for(int is=js,i=jj;is<(num+js);is++,i++){
01643        dataC[i] = msgData[is];
01644      }//endfor
01645     }//endfor
01646   }//endif
01647 
01648   delete msg;
01649 
01650 //////////////////////////////////////////////////////////////////////////////
01651 /// Do the FFT when you have all the parts
01652 
01653   int done = 0;
01654   if(countIntGtoR[iopt]==rhoRsubplanes){
01655     done = 1;
01656     countIntGtoR[iopt]=0;
01657     FFTcache *fftcache = fftCacheProxy.ckLocalBranch();  
01658 #ifndef CMK_OPTIMIZE
01659     double StartTime=CmiWallTimer();
01660 #endif
01661     fftcache->doRhoFFTGxToRx_Rchare(dataC,dataR,nplane_rho_x,ngrida,myNgridb,iplane_ind);
01662 #ifndef CMK_OPTIMIZE
01663   traceUserBracketEvent(doRhoFFTGxtoRx_, StartTime, CmiWallTimer());    
01664 #endif
01665 
01666   }//endif
01667 
01668 //////////////////////////////////////////////////////////////////////////////
01669 /// When you have completed an FFT, you have some choices of what to do next
01670 
01671   if(done == 1){ 
01672 
01673    //---------------------------------------------------
01674    // If you have finished a gradient, increment counter.
01675    // If you have completed all gradients : Gradcorr.
01676 
01677     if(1 <= iopt && iopt <=3){
01678       doneGradRhoVks++;
01679       if(doneGradRhoVks==3){
01680        doneGradRhoVks=0;
01681        GradCorr();
01682       }//endif
01683     }//endif
01684 
01685    //---------------------------------------------------
01686    // if you have finished the whiteByrd : add it to vks
01687 
01688     if(iopt==0){addWhiteByrdVks();}
01689 
01690    //---------------------------------------------------
01691    // if you have finished the HartEext : add it to vks
01692 
01693     if(iopt==4){addHartEextVks();}
01694 
01695   }//endif : you have completed an fft
01696 
01697 //////////////////////////////////////////////////////////////////////////////
01698   }//end routine
01699 //////////////////////////////////////////////////////////////////////////////
01700 
01701 
01702 
01703 
01704 //////////////////////////////////////////////////////////////////////////////
01705 //////////////////////////////////////////////////////////////////////////////
01706 //////////////////////////////////////////////////////////////////////////////
01707 ///
01708 /// The gradient of the density is now completed. You can compute the 
01709 /// GGA-DFT functional now. Density is now available to be used as scratch.
01710 ///
01711 /// Invoked once per algorithm step
01712 ///
01713 //////////////////////////////////////////////////////////////////////////////
01714 void CP_Rho_RealSpacePlane::GradCorr(){
01715 //////////////////////////////////////////////////////////////////////////////
01716 
01717   if(cp_grad_corr_on==0){
01718      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01719      CkPrintf("Don't come in the grad corr routines when\n");
01720      CkPrintf("gradient corrections are off\n");
01721      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01722      CkExit();
01723   }//endif
01724 
01725    double *density            = rho_rs.density;
01726    double *Vks                = rho_rs.Vks;
01727    double *rhoIRX             = rho_rs.rhoIRX;
01728    double *rhoIRY             = rho_rs.rhoIRY;
01729    double *rhoIRZ             = rho_rs.rhoIRZ;
01730 
01731    int nf1                    = ngrida;
01732    int nf2                    = myNgridb;
01733    int nf3                    = ngridc;
01734    int npts         = config.numFFTPoints;  // total number of points
01735    double *exc_gga_ret        = &(rho_rs.exc_gga_ret);
01736 
01737 //////////////////////////////////////////////////////////////////////////////
01738 
01739 #ifdef _CP_DEBUG_RHOR_VKSC_
01740     char myFileName[MAX_CHAR_ARRAY_LENGTH];
01741     sprintf(myFileName, "BGradRho_Real_%d_%d.out", thisIndex.x,thisIndex.y);
01742     FILE *fp = fopen(myFileName,"w");
01743       for (int i = 0; i <ngridb*ngrida; i++){
01744         fprintf(fp,"%g %g %g %g\n",rho_rs.rhoIRX[i],rho_rs.rhoIRY[i],rho_rs.rhoIRZ[i],
01745                                    rho_rs.Vks[i]);
01746       }//endfor
01747     fclose(fp);
01748 #endif
01749 
01750 //////////////////////////////////////////////////////////////////////////////
01751 /// Compute the gradient corrected functional : Density is toast after this.
01752 
01753     rho_rs.exc_gga_ret = 0.0;
01754 #define GGA_ON
01755 #ifdef GGA_ON
01756 
01757 #ifndef CMK_OPTIMIZE
01758   double StartTime=CmiWallTimer();
01759 #endif
01760     CPXCFNCTS::CP_getGGAFunctional(npts,nf1,nf2,nf3,density,rhoIRX,rhoIRY,rhoIRZ,
01761                                    Vks,thisIndex.x,exc_gga_ret);
01762 #ifndef CMK_OPTIMIZE
01763     traceUserBracketEvent(GradCorrGGA_, StartTime, CmiWallTimer());    
01764 #endif
01765 
01766 #ifdef CMK_VERSION_BLUEGENE
01767      CmiNetworkProgress();
01768 #endif
01769 
01770 #endif // GGA ON
01771 
01772 //////////////////////////////////////////////////////////////////////////////
01773 /// Reduce the exchange correlation energy
01774 
01775    double exc[2];
01776    exc[0]=rho_rs.exc_ret;
01777    exc[1]=rho_rs.exc_gga_ret;
01778    contribute(2*sizeof(double),exc,CkReduction::sum_double);
01779 
01780 //////////////////////////////////////////////////////////////////////////////
01781 /// output
01782 
01783 #ifdef _CP_DEBUG_RHOR_VKSD_
01784     myFileName[MAX_CHAR_ARRAY_LENGTH];
01785     sprintf(myFileName, "AGradRho_Real_%d_%d.out", thisIndex.x,thisIndex.y);
01786     fp = fopen(myFileName,"w");
01787       for (int i = 0; i <ngridb*ngrida; i++){
01788         fprintf(fp,"%g %g %g %g\n",rho_rs.rhoIRX[i],rho_rs.rhoIRY[i],rho_rs.rhoIRZ[i],
01789                                    rho_rs.Vks[i]);
01790       }//endfor
01791     fclose(fp);
01792 #endif
01793 
01794 //////////////////////////////////////////////////////////////////////////////
01795 /// Start the white bird puppy : back fft of rhoirx, rhoiry, rhoirz
01796 
01797     whiteByrdFFT();
01798 
01799 //---------------------------------------------------------------------------
01800    }//end routine
01801 //////////////////////////////////////////////////////////////////////////////
01802 
01803 
01804 //////////////////////////////////////////////////////////////////////////////
01805 ///
01806 /// The white-bird term : First fwfft redefined delrho(r) to delrho(g)
01807 /// then send to RhoGspacePlane. RhoGspacePlane sends you back back another term.
01808 /// After this routine, rhoIRX, rhoIRY and rhoIRZ are `free'.
01809 ///
01810 /// Invoked once per algorithm step
01811 ///
01812 ///
01813 //////////////////////////////////////////////////////////////////////////////
01814 //////////////////////////////////////////////////////////////////////////////
01815 //////////////////////////////////////////////////////////////////////////////
01816 void CP_Rho_RealSpacePlane::whiteByrdFFT(){
01817 //////////////////////////////////////////////////////////////////////////////
01818 /// Constants and pointers
01819 
01820    FFTcache *fftcache = fftCacheProxy.ckLocalBranch();  
01821    double  *rhoIRX    = rho_rs.rhoIRX;
01822    double  *rhoIRY    = rho_rs.rhoIRY;
01823    double  *rhoIRZ    = rho_rs.rhoIRZ;
01824    complex *rhoIRXC   = rho_rs.rhoIRXC;
01825    complex *rhoIRYC   = rho_rs.rhoIRYC;
01826    complex *rhoIRZC   = rho_rs.rhoIRZC;
01827 
01828    int ioptx          = 1;
01829    int iopty          = 2;
01830    int ioptz          = 3;
01831 
01832 //////////////////////////////////////////////////////////////////////////////
01833 /// I) rhoIRX : Scale, Real to complex FFT, perform FFT, transpose
01834 
01835 #ifndef CMK_OPTIMIZE
01836   double StartTime=CmiWallTimer();
01837 #endif
01838 
01839   rho_rs.scale(rhoIRX,FFTscale);
01840 
01841   if(rhoRsubplanes==1){
01842     fftcache->doRhoFFTRtoG_Rchare(rhoIRXC,rhoIRX,nplane_rho_x,ngrida,ngridb,iplane_ind);
01843     if(config.rhoGToRhoRMsgComb==0){sendPartlyFFTtoRhoG(ioptx);}
01844   }else{
01845     fftcache->doRhoFFTRxToGx_Rchare(rhoIRXC,rhoIRX,nplane_rho_x,ngrida,myNgridb,iplane_ind);
01846     sendPartlyFFTRyToGy(ioptx);// transpose and do the y-gy fft
01847   }//endif
01848 
01849 #ifndef CMK_OPTIMIZE
01850     traceUserBracketEvent(WhiteByrdFFTX_, StartTime, CmiWallTimer());    
01851 #endif
01852 
01853 #ifdef CMK_VERSION_BLUEGENE
01854   CmiNetworkProgress();
01855 #endif
01856 
01857 //////////////////////////////////////////////////////////////////////////////
01858 /// II) rhoIRY : Scale, real to complex FFT, perform FFT, transpose
01859 
01860 #ifndef CMK_OPTIMIZE
01861   StartTime=CmiWallTimer();
01862 #endif
01863 
01864   rho_rs.scale(rhoIRY,FFTscale);
01865 
01866   if(rhoRsubplanes==1){
01867     fftcache->doRhoFFTRtoG_Rchare(rhoIRYC,rhoIRY,nplane_rho_x,ngrida,ngridb,iplane_ind);
01868     if(config.rhoGToRhoRMsgComb==0){sendPartlyFFTtoRhoG(iopty);}
01869   }else{
01870     fftcache->doRhoFFTRxToGx_Rchare(rhoIRYC,rhoIRY,nplane_rho_x,ngrida,myNgridb,iplane_ind);
01871     sendPartlyFFTRyToGy(iopty); // transpose and do the y-gy fft
01872   }//endif
01873 
01874 #ifndef CMK_OPTIMIZE
01875     traceUserBracketEvent(WhiteByrdFFTY_, StartTime, CmiWallTimer());    
01876 #endif
01877 
01878 #ifdef CMK_VERSION_BLUEGENE
01879   CmiNetworkProgress();
01880 #endif
01881 
01882 //////////////////////////////////////////////////////////////////////////////
01883 /// III) rhoIRZ : Scale, real to complex FFT, perform FFT, transpose
01884 
01885 #ifndef CMK_OPTIMIZE
01886   StartTime=CmiWallTimer();
01887 #endif
01888 
01889   rho_rs.scale(rhoIRZ,FFTscale);
01890   if(rhoRsubplanes==1){
01891     fftcache->doRhoFFTRtoG_Rchare(rhoIRZC,rhoIRZ,nplane_rho_x,ngrida,ngridb,iplane_ind);
01892     if(config.rhoGToRhoRMsgComb==0){sendPartlyFFTtoRhoG(ioptz);}
01893   }else{
01894     fftcache->doRhoFFTRxToGx_Rchare(rhoIRZC,rhoIRZ,nplane_rho_x,ngrida,myNgridb,iplane_ind);
01895     sendPartlyFFTRyToGy(ioptz);// transpose and do the y-gy fft
01896   }//endif
01897 
01898 #ifndef CMK_OPTIMIZE
01899   traceUserBracketEvent(WhiteByrdFFTZ_, StartTime, CmiWallTimer());    
01900 #endif
01901 
01902 #ifdef CMK_VERSION_BLUEGENE
01903   CmiNetworkProgress();
01904 #endif
01905 
01906 //////////////////////////////////////////////////////////////////////////////
01907 /// Send all 3 components at once
01908 
01909   if(rhoRsubplanes==1 && config.rhoGToRhoRMsgComb==1){
01910      sendPartlyFFTtoRhoGall();
01911   }//endif
01912 
01913 //////////////////////////////////////////////////////////////////////////////
01914    }//end routine
01915 //////////////////////////////////////////////////////////////////////////////
01916 
01917 
01918 
01919 //////////////////////////////////////////////////////////////////////////////
01920 //////////////////////////////////////////////////////////////////////////////
01921 //////////////////////////////////////////////////////////////////////////////
01922 ///
01923 /// The white bird vks correction is returned from RhoG : VksW(gx,gy,z)
01924 ///                This routine recvs the transpose {gx,gy} to (gx,z)
01925 ///
01926 /// Invoked once per algorithm step
01927 ///
01928 /// After this routine, VksW(gx,gy,z) -> VksW(x,y,gz) and is added vksTot();
01929 ///
01930 //////////////////////////////////////////////////////////////////////////////
01931 void CP_Rho_RealSpacePlane::acceptWhiteByrd(RhoRSFFTMsg *msg){
01932 //////////////////////////////////////////////////////////////////////////////
01933 
01934 #ifdef _CP_DEBUG_RHOR_VERBOSE_
01935   CkPrintf("WhiteByrd Data from RhoG arriving at RhoR : %d %d\n",
01936            thisIndex.x,thisIndex.y);
01937 #endif
01938 
01939 //////////////////////////////////////////////////////////////////////////////
01940 /// Local Pointers
01941 
01942   CPcharmParaInfo *sim   = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
01943   int nchareG            = sim->nchareRhoG;
01944   int **tranUnpack       = sim->index_tran_upack_rho;
01945   int *nlines_per_chareG = sim->nlines_per_chareRhoG;
01946   int ***tranupack_rhoY       = sim->index_tran_upack_rho_y;
01947   int **nlines_per_chareRhoGY = sim->nline_send_rho_y;
01948   int iy                      = thisIndex.y;
01949  
01950   int size               = msg->size; 
01951   int Index              = msg->senderIndex;
01952   int iopt               = msg->iopt;
01953   complex *partiallyFFTd = msg->data;
01954 
01955   int mySize;
01956   int nptsExpnd;
01957   if(rhoRsubplanes==1){
01958      mySize    = nlines_per_chareG[Index];
01959      nptsExpnd = nptsExpndB;
01960   }else{
01961      mySize    = nlines_per_chareRhoGY[Index][iy];
01962      nptsExpnd = nptsExpndA;
01963   }//endif
01964 
01965 //////////////////////////////////////////////////////////////////////////////
01966 /// Perform some error checking
01967 
01968   countWhiteByrd++;
01969   if (countWhiteByrd > recvCountFromGRho) {
01970      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01971      CkPrintf("Mismatch in allowed rho_gspace chare arrays : %d %d %d %d\n",
01972                countWhiteByrd,nchareG,thisIndex.x,thisIndex.y);
01973      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01974      CkExit();
01975   }//endif
01976 
01977   if(size!=mySize){
01978      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01979      CkPrintf("Dude.2, %d != %d for rho chare %d %d\n",size,mySize,
01980                thisIndex.y,Index);
01981      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
01982      CkExit();
01983   }//endif
01984 
01985 //////////////////////////////////////////////////////////////////////////////
01986 /// unpack the data and delete the message : The density is free for use as scr
01987 
01988   double *dataR;
01989   complex *dataC;
01990   if(rhoRsubplanes==1){
01991     dataR = rho_rs.density; 
01992     dataC = rho_rs.densityC; 
01993   }else{
01994     dataR = rho_rs.rhoIRXint;
01995     dataC = rho_rs.rhoIRXCint;
01996   }//endif
01997 
01998   // zero because input data does not fill the plane
01999   if(countWhiteByrd==1){bzero(dataR,sizeof(double)*nptsExpnd);}
02000 
02001   if(rhoRsubplanes==1){
02002     for(int i=0;i<size;i++){ 
02003       dataC[tranUnpack[Index][i]] = partiallyFFTd[i];
02004     }//endfor
02005   }else{
02006     for(int i=0;i<size;i++){
02007       dataC[tranupack_rhoY[Index][iy][i]] = partiallyFFTd[i];
02008     }//endfor
02009   }//endif
02010 
02011   delete msg;
02012 
02013 //////////////////////////////////////////////////////////////////////////////
02014 /// When you have all the messages, do the last fft, and add in the correction
02015 
02016   if(countWhiteByrd == recvCountFromGRho){
02017     countWhiteByrd=0;
02018 
02019 #ifndef CMK_OPTIMIZE
02020     double StartTime=CmiWallTimer();
02021 #endif
02022     FFTcache *fftcache = fftCacheProxy.ckLocalBranch();  
02023     if(rhoRsubplanes==1){
02024       fftcache->doRhoFFTGtoR_Rchare(dataC,dataR,nplane_rho_x,ngrida,ngridb,iplane_ind);
02025       addWhiteByrdVks();
02026     }else{
02027       fftcache->doRhoFFTGyToRy_Rchare(dataC,dataR,myNplane_rho,ngrida,ngridb,iplane_ind);
02028       sendPartlyFFTGxToRx(0);
02029     }
02030 #ifndef CMK_OPTIMIZE
02031     traceUserBracketEvent(PostByrdfwFFTGtoR_, StartTime, CmiWallTimer());    
02032 #endif
02033 
02034   }//endif : communication from rhog has all arrived safely
02035 
02036 //////////////////////////////////////////////////////////////////////////////
02037    }//end routine
02038 //////////////////////////////////////////////////////////////////////////////
02039 
02040 
02041 
02042 //////////////////////////////////////////////////////////////////////////////
02043 //////////////////////////////////////////////////////////////////////////////
02044 //////////////////////////////////////////////////////////////////////////////
02045 ///
02046 ///  Add the VksWhiteByrd to VksTot : Set the done flag.
02047 ///
02048 //////////////////////////////////////////////////////////////////////////////
02049 void CP_Rho_RealSpacePlane::addWhiteByrdVks(){
02050 //////////////////////////////////////////////////////////////////////////////
02051 /// Add the whitebyrd contrib to vks
02052 
02053     int nptsExpnd  = nptsExpndB;
02054     double *dataR  = rho_rs.density; 
02055     double *Vks    = rho_rs.Vks;
02056     for(int i=0;i<nptsExpnd;i++){Vks[i] -= dataR[i];}
02057 
02058 //////////////////////////////////////////////////////////////////////////////
02059 /// Our we done yet?
02060 
02061     doneWhiteByrd = true;
02062     doMulticastCheck();
02063 
02064 //////////////////////////////////////////////////////////////////////////////
02065    }//end routine
02066 //////////////////////////////////////////////////////////////////////////////
02067 
02068 
02069 
02070 //////////////////////////////////////////////////////////////////////////////
02071 //////////////////////////////////////////////////////////////////////////////
02072 //////////////////////////////////////////////////////////////////////////////
02073 /**
02074  *
02075  * Accept hartExt transpose data : receive VksHartEext(gx,gy,z) gx,z is parallel.
02076  * Since the message can come at any time memory, VksHart has to be ready to
02077  * receive it.
02078  *
02079  * Invoked once per algorithm step.
02080  *
02081  * After the routine, VksHartEext(gx,gy,z)--> VksHarteext(x,y,z) and is added
02082  *                    to Vkstot(x,y,z)
02083  *
02084  */
02085 //////////////////////////////////////////////////////////////////////////////
02086 void CP_Rho_RealSpacePlane::acceptHartVks(RhoHartRSFFTMsg *msg){
02087 //////////////////////////////////////////////////////////////////////////////
02088 /// Local pointers
02089 
02090   CPcharmParaInfo *sim        = (scProxy.ckLocalBranch ())->cpcharmParaInfo;
02091   int nchareG                 = sim->nchareRhoG;
02092   int **tranUnpack            = sim->index_tran_upack_rho;
02093   int *nlines_per_chareG      = sim->nlines_per_chareRhoG;
02094   int ***tranupack_rhoY       = sim->index_tran_upack_eext_ys;
02095   int **nlines_per_chareRhoGY = sim->nline_send_eext_y;
02096   int iy                      = thisIndex.y;
02097    
02098   int size               = msg->size; 
02099   int IndexS             = msg->index;
02100   int Index              = msg->senderBigIndex;
02101   int istrt_lines        = msg->senderStrtLine;
02102   int iopt               = msg->iopt;
02103   complex *partiallyFFTd = msg->data;
02104 
02105   double  *dataR;
02106   complex *dataC;
02107   int nptsExpnd;
02108   int mySize;
02109   if(rhoRsubplanes==1){
02110     dataR     = rho_rs.VksHart;
02111     dataC     = rho_rs.VksHartC;
02112     nptsExpnd = nptsExpndB;
02113     mySize    = size;
02114   }else{
02115     dataR     = rho_rs.VksHartint;
02116     dataC     = rho_rs.VksHartCint;
02117     nptsExpnd = nptsExpndA;
02118     mySize    = nlines_per_chareRhoGY[IndexS][iy];
02119   }//endif
02120   
02121   CkAssert(size==mySize);
02122 
02123 //////////////////////////////////////////////////////////////////////////////
02124 
02125 #ifdef _CP_DEBUG_RHOR_VERBOSE_
02126   CkPrintf("Data from RhoG arriving at RhoR : %d %d %d %d\n",
02127            thisIndex.x,thisIndex.y,iopt,countGradVks[iopt]);
02128 #endif
02129 
02130   CkAssert(iopt==0);
02131   if(size!=mySize){
02132      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
02133      CkPrintf("Dude.2, %d != %d for rho chare %d %d : %d\n",size,mySize,
02134                thisIndex.y,IndexS,Index);
02135      CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
02136      CkExit();
02137   }//endif
02138 
02139 //////////////////////////////////////////////////////////////////////////////
02140 /// Copy out the hart-eext contrib to vks : iopt==0
02141 
02142   countGradVks[iopt]++; 
02143   if(countGradVks[iopt]==1){bzero(dataR,sizeof(double)*nptsExpnd);}
02144    
02145   if(rhoRsubplanes==1){
02146     for(int i=0,j=istrt_lines;i<size;i++,j++){
02147       dataC[tranUnpack[Index][j]] = partiallyFFTd[i];
02148     }//endfor
02149   }else{
02150     for(int i=0;i<size;i++){
02151       dataC[tranupack_rhoY[IndexS][iy][i]] = partiallyFFTd[i];
02152     }//endfor
02153   }//endif
02154 
02155   delete msg;  
02156 
02157 //////////////////////////////////////////////////////////////////////////////
02158 /// fft the puppy if you've got it all : only atmtyp index=1 of ghart sends rho_real
02159 
02160   if (countGradVks[iopt] == recvCountFromGHartExt){
02161       countGradVks[iopt]=0;
02162 
02163       FFTcache *fftcache = fftCacheProxy.ckLocalBranch();  
02164 #ifndef CMK_OPTIMIZE
02165       double StartTime=CmiWallTimer();
02166 #endif
02167       if(rhoRsubplanes==1){
02168         fftcache->doRhoFFTGtoR_Rchare(dataC,dataR,nplane_rho_x,ngrida,ngridb,iplane_ind);
02169         addHartEextVks();
02170       }else{
02171         fftcache->doRhoFFTGyToRy_Rchare(dataC,dataR,myNplane_rho,ngrida,ngridb,iplane_ind);
02172         sendPartlyFFTGxToRx(4);
02173       }//endif
02174 
02175 #ifndef CMK_OPTIMIZE
02176       traceUserBracketEvent(fwFFTGtoR0_, StartTime, CmiWallTimer());    
02177 #endif
02178   }//endif : communication from rhog 
02179 
02180   
02181 //////////////////////////////////////////////////////////////////////////////
02182   }//end routine 
02183 //////////////////////////////////////////////////////////////////////////////
02184 
02185 
02186 //////////////////////////////////////////////////////////////////////////////
02187 //////////////////////////////////////////////////////////////////////////////
02188 //////////////////////////////////////////////////////////////////////////////
02189 ///
02190 ///  Add the VksHartEext to VksTot : Set the done flag.
02191 ///
02192 //////////////////////////////////////////////////////////////////////////////
02193 void CP_Rho_RealSpacePlane::addHartEextVks(){
02194 //////////////////////////////////////////////////////////////////////////////
02195 /// Add the whitebyrd contrib to vks
02196 
02197     int nptsExpnd  = nptsExpndB;
02198     double *dataR  = rho_rs.VksHart; 
02199     double *Vks    = rho_rs.Vks;
02200     for(int i=0;i<nptsExpnd;i++){Vks[i]+=dataR[i];}
02201 
02202 //////////////////////////////////////////////////////////////////////////////
02203 /// Our we done yet?
02204 
02205     doneHartVks = true;
02206     doMulticastCheck();
02207 
02208 //////////////////////////////////////////////////////////////////////////////
02209    }//end routine
02210 //////////////////////////////////////////////////////////////////////////////
02211 
02212 
02213 //////////////////////////////////////////////////////////////////////////////
02214 //////////////////////////////////////////////////////////////////////////////
02215 //////////////////////////////////////////////////////////////////////////////
02216 ///
02217 ///  Under ees-eext Rhart chare reports its completion :  Set the done flag.
02218 ///
02219 //////////////////////////////////////////////////////////////////////////////
02220 void CP_Rho_RealSpacePlane::RHartReport(){
02221   countRHart++;
02222   if(countRHart==countRHartValue){
02223       doneRHart=true;
02224       doMulticastCheck();
02225       countRHart=0;
02226   }//endif
02227 }
02228 //////////////////////////////////////////////////////////////////////////////
02229 
02230 
02231 //////////////////////////////////////////////////////////////////////////////
02232 //////////////////////////////////////////////////////////////////////////////
02233 //////////////////////////////////////////////////////////////////////////////
02234 ///
02235 /// If all the parts of exc-eext-hart are done, invoke blast of vks to states
02236 ///
02237 //////////////////////////////////////////////////////////////////////////////
02238   void CP_Rho_RealSpacePlane::doMulticastCheck(){
02239 //////////////////////////////////////////////////////////////////////////////
02240 
02241   if(doneWhiteByrd && doneRHart && doneHartVks){doMulticast();}
02242 
02243 //////////////////////////////////////////////////////////////////////////////
02244   }//end routine
02245 //////////////////////////////////////////////////////////////////////////////
02246 
02247 
02248 
02249 //////////////////////////////////////////////////////////////////////////////
02250 /// Send vks back to the states
02251 //////////////////////////////////////////////////////////////////////////////
02252 //////////////////////////////////////////////////////////////////////////////
02253 //////////////////////////////////////////////////////////////////////////////
02254 void CP_Rho_RealSpacePlane::doMulticast(){
02255 //////////////////////////////////////////////////////////////////////////////
02256 
02257   if(!doneWhiteByrd || !doneHartVks || !doneRHart){
02258     CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
02259     CkPrintf("Flow of Control Error : Attempting to rho multicast\n");
02260     CkPrintf("without harteext or gradcorr (whitebyrd) \n");
02261     CkPrintf("without rhodensity or rhart \n");
02262     CkPrintf("@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
02263     CkExit();
02264   }//endif
02265 
02266  // overkill on the resetting
02267   countRHart=0;
02268   doneWhiteByrd    = false;
02269   doneHartVks      = false;
02270   doneRHart        = false;
02271     
02272 //////////////////////////////////////////////////////////////////////////////
02273 /// Send vks back to the states in real space
02274 
02275    int dataSize    = ngrida*myNgridb;
02276    double *Vks     = rho_rs.Vks;
02277 
02278    if ((config.useGMulticast+config.useCommlibMulticast)!=1) {
02279      CkAbort("No multicast strategy\n");
02280    }//endif
02281 
02282    if (config.useGMulticast || config.useCommlibMulticast) {
02283 
02284       ProductMsg *msg = new (dataSize, 0) ProductMsg;
02285 
02286       // ADD new index here for Y
02287       msg->idx        = thisIndex.x;
02288       msg->datalen    = dataSize;
02289       msg->hops       = 0;
02290       msg->subplane   = thisIndex.y;
02291       double *dataR   = msg->data;
02292 
02293       rho_rs.uPackShrink(dataR,Vks); // down pack Vks for the send
02294 //#define _CP_DEBUG_RHOR_VKSE_
02295 #ifdef _CP_DEBUG_RHOR_VKSE_
02296        char myFileName[MAX_CHAR_ARRAY_LENGTH];
02297        sprintf(myFileName, "vks_rho_Real_%d_%d.out", thisIndex.x,thisIndex.y);
02298        FILE *fp = fopen(myFileName,"w");
02299        for (int j = 0, iii=0; j <myNgridb; j++){
02300        for (int i = 0; i <ngrida; i++,iii++){
02301          fprintf(fp,"%d %d %g\n",i,j+myBoff,dataR[iii]);
02302        }}//endfor
02303        fclose(fp);
02304        rhoRealProxy(0,0).exitForDebugging();
02305 #else
02306       if(config.useCommlibMulticast){mcastInstance.beginIteration();}
02307       if(config.useCommlibMulticast){
02308         realSpaceSectionCProxy.doProduct(msg);
02309       }else{
02310         realSpaceSectionProxy.doProduct(msg);
02311       }//enddif
02312      if(config.useCommlibMulticast){mcastInstance.endIteration();}
02313 #ifdef _CP_SUBSTEP_TIMING_
02314      if(rhoKeeperId>0)
02315        {
02316          double rhoend=CmiWallTimer();
02317          contribute(sizeof(double), &rhoend, CkReduction::max_double,  CkCallback(CkIndex_TimeKeeper::collectEnd(NULL),TimeKeeperProxy),rhoKeeperId);
02318        }
02319 #endif
02320 
02321 #endif
02322    }//endif
02323 
02324 //////////////////////////////////////////////////////////////////////////////
02325    }//end routine
02326 //////////////////////////////////////////////////////////////////////////////
02327 
02328 
02329 //////////////////////////////////////////////////////////////////////////////
02330 ///////////////////////////////////////////////////////////////////////////cc
02331 //////////////////////////////////////////////////////////////////////////////
02332 /// Glenn's RhoReal exit 
02333 //////////////////////////////////////////////////////////////////////////////
02334 void CP_Rho_RealSpacePlane::exitForDebugging(){
02335   countDebug++;  
02336   if(countDebug==(rhoRsubplanes*ngridc)){
02337     countDebug=0;
02338     CkPrintf("I am in the exitfordebuging rhoreal puppy. Bye-bye\n");
02339     CkExit();
02340   }//endif
02341 }
02342 //////////////////////////////////////////////////////////////////////////////
02343 
02344 
02345 
02346 //////////////////////////////////////////////////////////////////////////////
02347 //////////////////////////////////////////////////////////////////////////////
02348 //////////////////////////////////////////////////////////////////////////////
02349 void CP_Rho_RealSpacePlane::ResumeFromSync(){
02350 
02351     if(config.useCommlibMulticast)
02352         ComlibResetSectionProxy(&realSpaceSectionCProxy);
02353     if(config.useRInsRhoGP)
02354         ComlibResetProxy(&rhoGProxy_com);
02355 }
02356 //////////////////////////////////////////////////////////////////////////////
02357 
02358 
02359 
02360 //////////////////////////////////////////////////////////////////////////////
02361 //////////////////////////////////////////////////////////////////////////////
02362 //////////////////////////////////////////////////////////////////////////////
02363 //! return tru if input is power of 2
02364 //////////////////////////////////////////////////////////////////////////////
02365 bool is_pow2(int input){
02366     int y=0;
02367     for(int x=0;x<32;x++){
02368       y = 1<<x;
02369       if(y==input){return true;}
02370     }//endfor
02371     return false;
02372 }
02373 //////////////////////////////////////////////////////////////////////////////
02374 

Generated on Thu Dec 6 18:25:29 2007 for leanCP by  doxygen 1.5.3