31 #include "CP_State_GSpacePlane.h"
32 #include "CP_State_ParticlePlane.h"
35 #include "main/startupMessages.h"
42 #include "structure_factor/StructFactorCache.h"
43 #include "main/CPcharmParaInfoGrp.h"
45 #include "main/InstanceController.h"
47 #define ENABLE_RDMA_HANDSHAKES
49 #include "paircalc/RDMAMessages.h"
59 #include "../../src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cpnonlocal.h"
60 #include "../../src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cpintegrate.h"
61 #include "../../src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cprspaceion.h"
62 #include "../../src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cplocal.h"
63 #include "../../src_piny_physics_v1.0/include/class_defs/piny_constants.h"
64 #include "../../src_piny_physics_v1.0/include/class_defs/allclass_gen.h"
65 #include "../../src_piny_physics_v1.0/include/class_defs/allclass_cp.h"
66 #include "../../src_piny_physics_v1.0/include/class_defs/PINY_INIT/PhysicsParamTrans.h"
68 #if CMK_PERSISTENT_COMM
69 #define USE_PERSISTENT 1
74 extern CProxy_main mainProxy;
75 extern CProxy_InstanceController instControllerProxy;
76 extern CProxy_TimeKeeper TimeKeeperProxy;
77 extern CkVec <CProxy_CP_State_RealSpacePlane> UrealSpacePlaneProxy;
78 extern CkVec <CProxy_CP_State_GSpacePlane> UgSpacePlaneProxy;
79 extern CkVec <CProxy_GSpaceDriver> UgSpaceDriverProxy;
80 extern CkVec <CProxy_CP_State_ParticlePlane> UparticlePlaneProxy;
81 extern CkVec <CProxy_AtomsCache> UatomsCacheProxy;
82 extern CkVec <CProxy_StructureFactor> UsfCompProxy;
83 extern CkVec <CProxy_EnergyGroup> UegroupProxy;
84 extern CkVec <CProxy_FFTcache> UfftCacheProxy;
85 extern CkVec <CProxy_StructFactCache> UsfCacheProxy;
86 extern CkVec <CProxy_eesCache> UeesCacheProxy;
89 extern CProxy_ComlibManager mgrProxy;
90 extern ComlibInstanceHandle gssInstance;
118 AtomsCache *ag = UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
120 int cp_min_opt = sim->cp_min_opt;
121 double tol_cp_min = sim->tol_cp_min;
122 double tol_cp_dyn = sim->tol_cp_dyn;
124 double rnatm = ((double)natm)/96.0;
126 double d0 = ((
double *)msg->getData())[0];
127 double d1 = ((
double *)msg->getData())[1];
136 fmagPsi_total_old = fmagPsi_total;
144 if(thisIndex.x==0 && thisIndex.y==0){
145 int iprintout = iteration-1;
147 fprintf(temperScreenFile,
"Iter [%d] MagForPsi = %5.8lf | %5.8lf per entity\n", iprintout,d1,d1/rnatm);
148 fprintf(temperScreenFile,
"Iter [%d] Memory = %ld\n",iprintout,CmiMemoryUsage());
152 if(cp_min_opt==0 && fmagPsi_total>rnatm*tol_cp_dyn){
154 if(thisIndex.x==0 && thisIndex.y==0){
155 fprintf(temperScreenFile,
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
156 fprintf(temperScreenFile,
"Mag psi force %.10g > %.10g too large for CP dynamics. Ciao! \n",
157 fmagPsi_total/rnatm,tol_cp_dyn);
158 fprintf(temperScreenFile,
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
161 int numBeads=config.UberImax;
162 int numTempers=config.UberKmax;
163 #ifndef _CP_DEBUG_ORTHO_OFF_
166 if(numBeads==1 && numTempers==1)
168 if(fmagPsi_total<=tol_cp_min){
169 #ifndef _CP_DEBUG_SCALC_ONLY_
170 exitFlag=1; outputFlag=1;
171 if(thisIndex.x==0 && thisIndex.y==0){
172 CkPrintf(
"----------------------------------------------\n");
173 CkPrintf(
" CP wavefunction force tolerence reached! \n");
174 CkPrintf(
"----------------------------------------------\n");
176 #endif // _CP_DEBUG_SCALC_ONLY_
181 #ifndef _CP_DEBUG_SCALC_ONLY_
182 if(fmagPsi_total<=tol_cp_min)
186 exitFlag=1; outputFlag=1;
187 if(thisIndex.x==0 && thisIndex.y==0){
188 CkPrintf(
"----------------------------------------------\n");
189 CkPrintf(
" CP wavefunction force tolerence reached! \n");
190 CkPrintf(
"----------------------------------------------\n");
193 if(thisIndex.x==0 && thisIndex.y==0){
196 CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(
mCastGrpId).ckLocalBranch();
197 int result=(fmagPsi_total <= tol_cp_min);
199 mcastGrp->contribute(
sizeof(
int), &result, CkReduction::logical_and,
202 #endif // _CP_DEBUG_SCALC_ONLY_
211 if(thisIndex.x==0 && thisIndex.y==0){
212 #ifdef _CP_DEBUG_ORTHO_OFF_
213 CkPrintf(
"/////////////////////////////////////////////=\n");
214 CkPrintf(
" Running with PC/Ortho Off \n");
216 double cpuTimeOld = cpuTimeNow;
217 cpuTimeNow = CkWallTimer();
219 fprintf(temperScreenFile,
"Iter [%d] CpuTime(GSP)= %g\n",iteration-1,cpuTimeNow-cpuTimeOld);
221 int heavyside = 1-(iteration-iterRotation >= 1 ? 1 : 0);
222 fprintf(temperScreenFile,
"Iter [%d] Step = %d : Step Last Rot = %d : Interval Rot = %d : Num Rot = %d : %d\n",iteration,
223 iteration,iterRotation,iteration-iterRotation,nrotation,heavyside);
230 void CP_State_GSpacePlane::initBeadCookie(
ICCookieMsg *m)
233 CkGetSectionInfo(beadCookie,m);
237 void CP_State_GSpacePlane::minimizeSync(
ICCookieMsg *m)
240 CkGetSectionInfo(beadCookie,m);
242 thisProxy.setExitFlag();
245 void CP_State_GSpacePlane::setExitFlag()
252 void printForce(
void *param,
void *msg){
254 CkReductionMsg *m=(CkReductionMsg *)msg;
255 double d = ((
double *)m->getData())[0];
257 CkPrintf(
"printForces = %5.8f \n", d);
285 forwardTimeKeep(_gforward), backwardTimeKeep(_gbackward),
286 thisInstance(_thisInstance)
292 #ifdef USE_PERSISTENT
296 int cp_min_opt = sim->cp_min_opt;
297 int gen_wave = sim->gen_wave;
299 if(thisIndex.x==0 && thisIndex.y==0 && config.maxIter<30){
300 wallTimeArr =
new double[config.maxIter+2];
302 wallTimeArr =
new double[30];
307 myBeadIndex = thisInstance.idxU.x;
308 myKptIndex = thisInstance.idxU.y;
309 myTemperIndex = thisInstance.idxU.z;
310 mySpinIndex = thisInstance.idxU.s;
316 ibead_ind = thisInstance.idxU.x;
317 kpoint_ind = thisInstance.idxU.y;
318 itemper_ind = thisInstance.idxU.z;
337 fmagPsi_total0 = 0.0;
338 fmagPsi_total_old = 0.0;
353 finishedCpIntegrate = 0;
354 if(cp_min_opt==0){finishedCpIntegrate = 1;}
355 if(gen_wave==1){finishedCpIntegrate = 1;}
356 doneDoingIFFT =
false;
359 acceptedLambda =
false;
372 countPsiO =
new int[config.numChunksSym];
374 for(
int i=0;i<config.numChunksSym;i++)
377 countVPsiO=
new int[config.numChunksSym];
378 for(
int i=0;i<config.numChunksSym;i++)
384 countLambdaO=
new int[config.numChunksAsym];
385 for(
int i=0;i<config.numChunksAsym;i++)
388 numRecvRedPsi = sim->RCommPkg[thisIndex.y].num_recv_tot;
392 int remainder=nstates%s_grain;
393 int sizeoflastgrain=s_grain+remainder;
394 int lastgrain=nstates-sizeoflastgrain;
395 int ourgrain = thisIndex.x/s_grain*s_grain;
396 if(nstates == s_grain){
400 if(ourgrain<(nstates-s_grain)){
406 AllPsiExpected*=config.numChunksSym;
408 int numGrains=nstates/s_grain;
409 if(config.gSpaceSum){
410 AllPsiExpected=numGrains*config.numChunksSym;
416 if(nstates == s_grain){
425 if(config.gSpaceSum){
426 if(cp_min_opt==0 && numGrains>1)
427 { AllLambdaExpected=(2*numGrains-1)*config.numChunksAsym;}
429 { AllLambdaExpected=numGrains*config.numChunksAsym*AllLambdaExpected;}
433 AllLambdaExpected*=config.numChunksAsym;
440 numRDMAlinksAsymm = numGrains * config.numChunksAsym * 2;
443 numRDMAlinksAsymm = 0;
458 CPINTEGRATE::fetchNHCsize(&len_nhc_cp,&num_nhc_cp,&nck_nhc_cp);
459 initGStateSlab(&gs,sizeX,sim->sizeY,sim->sizeZ,gSpaceUnits,realSpaceUnits,s_grain,
460 thisIndex.y,thisIndex.x,len_nhc_cp,num_nhc_cp,nck_nhc_cp);
470 setMigratable(
false);
473 #ifdef _CP_GS_DEBUG_COMPARE_VKS_
477 #ifdef _CP_GS_DEBUG_COMPARE_PSI_
491 #ifdef _FANCY_PLANES_
492 if(config.numPes>2*nstates){
494 CkVec <int> peUsedByNLZ;
495 CkVec <int> planeUsedByNLZ;
496 for(
int state=0; state<thisIndex.x;state++){
500 int thisstateplaneproc=GSImaptable.get(state,redPlane)%CkNumPes();
501 for(
int i=0;i<usedVec.size();i++){
502 if(usedVec[i]==thisstateplaneproc){used=
true;}
504 if(!used || redPlane==0){
505 peUsedByNLZ.push_back(thisstateplaneproc);
506 planeUsedByNLZ.push_back(redPlane);
507 usedVec.push_back(thisstateplaneproc);
514 redPlane = (thisIndex.x % (nchareG-1));
517 redPlane = (thisIndex.x % (nchareG-1));
519 redPlane = (redPlane < 0 ? redPlane+nchareG : redPlane);
520 redPlane = (redPlane > nchareG-1 ? redPlane-nchareG : redPlane);
539 CP_State_GSpacePlane::~CP_State_GSpacePlane(){
541 delete [] lambdaproxyother;
542 delete [] lambdaproxy;
543 delete [] psiproxyother;
555 ArrayElement2D::pup(p);
556 UgSpaceDriverProxy[thisInstance.proxyOffset](thisIndex.x,thisIndex.y).
pup(p);
560 p|ibead_ind; p|kpoint_ind; p|itemper_ind; p|ispin_ind;
570 p|finishedCpIntegrate;
620 if (p.isUnpacking()) {
621 lambdaproxy=
new CProxySection_PairCalculator[config.numChunksAsym];
622 lambdaproxyother=
new CProxySection_PairCalculator[config.numChunksAsym];
623 psiproxy=
new CProxySection_PairCalculator[config.numChunksSym];
624 psiproxyother=
new CProxySection_PairCalculator[config.numChunksSym];
625 countPsiO=
new int[config.numChunksSym];
626 countVPsiO=
new int[config.numChunksSym];
627 countLambdaO=
new int[config.numChunksAsym];
629 PUParray(p,lambdaproxy,config.numChunksAsym);
630 PUParray(p,lambdaproxyother,config.numChunksAsym);
631 PUParray(p,psiproxy,config.numChunksSym);
632 PUParray(p,psiproxyother,config.numChunksSym);
633 PUParray(p,countPsiO,config.numChunksSym);
634 PUParray(p,countVPsiO,config.numChunksSym);
635 PUParray(p,countLambdaO,config.numChunksAsym);
647 myOrtho = CProxy_Ortho(msg->orthoAID);
653 contribute(
sizeof(
int), &constructed, CkReduction::sum_int,
654 CkCallback(CkIndex_InstanceController::doneInit(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy), thisInstance.proxyOffset);
671 #include "../class_defs/allclass_strip_cp.h"
673 int numData = config.numData;
674 int ibinary_opt = sim->ibinary_opt;
675 int istart_typ_cp = sim->istart_typ_cp;
676 int gen_wave = sim->gen_wave;
677 int ncoef = sim->ncoef;
678 CkVec <RunDescriptor> *sortedRunDescriptors = sim->sortedRunDescriptors;
679 int *npts_lgrp = sim->npts_per_chareG;
680 int *nline_lgrp = sim->nlines_per_chareG;
681 int *istrt_lgrp = NULL;
682 int *iend_lgrp = NULL;
687 int ngridaNL = sim->ngrid_nloc_a;
688 int ngridbNL = sim->ngrid_nloc_b;
689 int ngridcNL = sim->ngrid_nloc_c;
690 int nkpoint = sim->nkpoint;
691 int cp_force_complex_psi = sim->cp_force_complex_psi;
697 int ind_state=thisIndex.x;
702 complex *vcomplexPoints = NULL;
703 if(istart_typ_cp>=3){vcomplexPoints = (
complex *)fftw_malloc(numData*
sizeof(
complex));}
705 int *kx= (
int *)fftw_malloc(numData*
sizeof(
int));
706 int *ky= (
int *)fftw_malloc(numData*
sizeof(
int));
707 int *kz= (
int *)fftw_malloc(numData*
sizeof(
int));
708 int nlines_tot,nplane;
710 if(istart_typ_cp>=3){
711 sprintf(fname,
"%s/Spin.%d_Kpt.%d_Bead.%d_Temper.%d/vState%d.out",
712 config.dataPath,mySpinIndex,myKptIndex,myBeadIndex,myTemperIndex,ind_state+1);
713 readState(numData,vcomplexPoints,fname,ibinary_opt,&nlines_tot,&nplane,
714 kx,ky,kz,&nx,&ny,&nz,istrt_lgrp,iend_lgrp,npts_lgrp,nline_lgrp,0,1);
718 sprintf(fname,
"%s/Spin.%d_Kpt.%d_Bead.%d_Temper.%d/state%d.out",
719 config.dataPath,mySpinIndex,myKptIndex,myBeadIndex,myTemperIndex,ind_state+1);
720 readState(numData,complexPoints,fname,ibinary_opt,&nlines_tot,&nplane,
721 kx,ky,kz,&nx,&ny,&nz,istrt_lgrp,iend_lgrp,npts_lgrp,nline_lgrp,0,0);
722 if(cp_force_complex_psi==1){
724 CkPrintf(
"\n$$$$$$$$$$$$$$$$$$$$_warning_$$$$$$$$$$$$$$$$$$$$\n");
725 CkPrintf(
"Adding a phase to the states to debug kpt code!!\n");
726 CkPrintf(
"in routine CP_State_GspacePlane.C\n");
727 CkPrintf(
"$$$$$$$$$$$$$$$$$$$$_warning_$$$$$$$$$$$$$$$$$$$$\n\n");
729 double phase = M_PI*((double)(ind_state+1))/((
double)(nstates+1));
730 for(
int i=0;i<numData;i++){
731 double re = cos(phase);
double im = sin(phase);
732 double ore = re*complexPoints[i].re - im*complexPoints[i].im;
733 double oim = re*complexPoints[i].im + im*complexPoints[i].re;
734 complexPoints[i].re = ore;
735 complexPoints[i].im = oim;
739 kx -= 1; ky -= 1; kz -=1;
740 PhysicsParamTransfer::fetch_state_kvecs(kx,ky,kz,ncoef,config.doublePack);
741 kx += 1; ky += 1; kz +=1;
742 processState(numData,ncoef,complexPoints,fname,ibinary_opt,&nlines_tot,
743 &nplane,kx,ky,kz,istrt_lgrp,iend_lgrp,npts_lgrp,nline_lgrp,
745 double *xfull =UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->fastAtoms.x-1;
746 double *yfull =UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->fastAtoms.y-1;
747 double *zfull =UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->fastAtoms.z-1;
748 cpgen_wave->create_coefs(kx,ky,kz,numData,ind_state,complexPoints,
749 xfull,yfull,zfull,kpoint_ind);
752 if(config.nGplane_x != nplane && config.doublePack){
753 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
754 CkPrintf(
"Mismatch in planesize\n");
755 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
766 for(
int x = 0; x < nchareG; x ++){
769 for (
int j = 0; j < sortedRunDescriptors[x].size(); j++){
770 numPoints += sortedRunDescriptors[x][j].length;
774 complex *temp = complexPoints+ioff;
775 CmiMemcpy(dataToBeSent,temp,(
sizeof(
complex) * numPoints));
779 if(istart_typ_cp>=3){
780 numPointsV = numPoints;
781 vdataToBeSent = (
complex *)fftw_malloc(numPointsV*
sizeof(
complex));
782 complex *vtemp = vcomplexPoints+ioff;
783 CmiMemcpy(vdataToBeSent,vtemp,(
sizeof(
complex) * numPoints));
786 vdataToBeSent = (
complex *)fftw_malloc(numPointsV*
sizeof(
complex));
787 vdataToBeSent[0].re = 0.0;
788 vdataToBeSent[0].im = 0.0;
792 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
793 CkPrintf(
"Error reading\n");
794 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
798 UgSpacePlaneProxy[thisInstance.proxyOffset](ind_state, x).
initGSpace(
799 numPoints,dataToBeSent,numPointsV,vdataToBeSent,
800 nx,ny,nz,ngridaNL,ngridbNL,ngridcNL,istart_typ_cp);
801 fftw_free(dataToBeSent);
802 fftw_free(vdataToBeSent);
805 CmiNetworkProgress();
808 CkAssert(numData==ioff);
813 fftw_free(complexPoints);
814 if(istart_typ_cp>=3){fftw_free(vcomplexPoints);}
844 int nx,
int ny,
int nz,
845 int nxNL,
int nyNL,
int nzNL,
850 #ifdef _CP_DEBUG_STATEG_VERBOSE_
851 CkPrintf(
"GSpace[%d,%d] initGSpace %d\n",thisIndex.x,thisIndex.y,size);
854 temperScreenFile = UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->temperScreenFile;
857 registrationFlag = 1;
858 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
859 eesData->registerCacheGSP(thisIndex.x,thisIndex.y);
864 real_proxy = UrealSpacePlaneProxy[thisInstance.proxyOffset];
867 if (config.useGssInsRealP){
868 ComlibAssociateProxy(gssInstance,real_proxy);
876 int cp_min_opt = sim->cp_min_opt;
877 int cp_min_update = sim->cp_min_update;
879 if (
true == initialized) {
880 ckerr <<
"Attempt to initialize a plane twice" << endl;
888 istart_typ_cp = istart_cp;
891 gs.fictEke_ret = 0.0;
893 gs.cp_min_opt = cp_min_opt;
895 gs.numRuns = eesData->GspData[
iplane_ind]->numRuns;
896 gs.numLines = gs.numRuns/2;
897 gs.numFull = (gs.numLines)*nz;
898 gs.numFullNL = (gs.numLines)*nzNL;
899 gs.istate_ind = thisIndex.x;
900 gs.iplane_ind = thisIndex.y;
912 gs.numPoints = eesData->GspData[
iplane_ind]->ncoef;;
913 CkAssert(gs.numPoints == size);
915 gs.ees_nonlocal = sim->ees_nloc_on;
917 gs.packedPlaneData = (
complex *)fftw_malloc(gs.numPoints*
sizeof(
complex));
918 gs.packedForceData = (
complex *)fftw_malloc(gs.numFull*
sizeof(
complex));
919 gs.packedVelData = (
complex *)fftw_malloc(gs.numPoints*
sizeof(
complex));
920 CmiMemcpy(gs.packedPlaneData, points,
sizeof(
complex)*gs.numPoints);
921 bzero(gs.packedForceData,
sizeof(
complex)*gs.numFull);
924 gs.packedPlaneDataScr = (
complex *)fftw_malloc(gs.numPoints*
sizeof(
complex));
925 gs.packedPlaneDataTemp = (
complex *)fftw_malloc(gs.numPoints*
sizeof(
complex));
926 memset(gs.packedPlaneDataScr, 0,
sizeof(
complex)*gs.numPoints);
927 CmiMemcpy(gs.packedPlaneDataTemp, points,
sizeof(
complex)*gs.numPoints);
930 if(cp_min_opt==1 && cp_min_update==0){
931 gs.packedPlaneDataTemp = (
complex *)fftw_malloc(gs.numPoints*
sizeof(
complex));
932 CmiMemcpy(gs.packedPlaneDataTemp, points,
sizeof(
complex)*gs.numPoints);
935 #ifdef _CP_DEBUG_SCALC_ONLY_
937 gs.packedPlaneDataTemp2 = (
complex *)fftw_malloc(gs.numPoints*
sizeof(
complex));
938 bzero(gs.packedPlaneDataTemp2,
sizeof(
complex)*gs.numPoints);
943 if(istart_typ_cp>=3 && cp_min_opt==0){
944 CkAssert(vsize == size);
945 CmiMemcpy(gs.packedVelData, vpoints,
sizeof(
complex)*gs.numPoints);
947 memset(gs.packedVelData, 0,
sizeof(
complex)*gs.numPoints);
953 CkAssert(UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x, thisIndex.y).ckLocal());
954 CP_State_ParticlePlane *localParticlePlaneChare = UparticlePlaneProxy[thisInstance.proxyOffset](thisIndex.x, thisIndex.y).ckLocal();
955 CkAssert(localParticlePlaneChare);
956 localParticlePlaneChare->initKVectors();
964 double *coef_mass = eesData->GspData[
iplane_ind]->coef_mass;
965 int mycoef = eesData->GspData[
iplane_ind]->ncoef;
966 gSpaceNumPoints = gs.numPoints;
968 if(eesData->allowedGspChares[
iplane_ind]==0 || mycoef != gSpaceNumPoints){
969 CkPrintf(
"Plane %d of state %d toasy %d %d\n",
iplane_ind,thisIndex.x,
970 mycoef,gSpaceNumPoints);
974 gs.
setKRange(gSpaceNumPoints,k_x,k_y,k_z);
977 if(numRecvRedPsi==0 && gs.nkx0_red>0){
978 CkPrintf(
"Error in GSchare(%d %d) on proc %d : numRecvRedPsi=%d gs.nkx0_red=%d\n",thisIndex.x,thisIndex.y,
979 CkMyPe(),numRecvRedPsi,gs.nkx0_red);
988 int ncoef_use = gs.numPoints-gs.nkx0_red;
989 int maxLenNHC = gs.len_nhc_cp;
990 int maxNumNHC = gs.num_nhc_cp;
991 int maxNckNHC = gs.nck_nhc_cp;
993 CPINTEGRATE::initCPNHC(ncoef_use,gs.nkx0_zero,maxLenNHC,maxNumNHC,maxNckNHC,
994 &gs.kTCP,&gs.tauNHCCP,&gs.degfree,&gs.degfreeNHC,
995 gs.mNHC,gs.v0NHC,gs.a2NHC,gs.a4NHC,
996 gs.degFreeSplt,gs.istrNHC,gs.iendNHC);
999 CPINTEGRATE::CPSmplVel(gs.numPoints,coef_mass,gs.packedVelData,
1000 gs.len_nhc_cp,gs.num_nhc_cp,gs.nck_nhc_cp,
1001 gs.mNHC,gs.vNHC,gs.xNHC,gs.xNHCP,gs.a2NHC,
1002 gs.kTCP,istart_typ_cp,gs.nkx0_red,gs.nkx0_uni,
1003 gs.nkx0_zero,gs.degfree,gs.degfreeNHC);
1006 #ifdef _CP_DEBUG_PSI_OFF_
1007 memset(gs.packedPlaneData, 0,
sizeof(
complex)*gs.numPoints);
1010 #define _CP_DEBUG_DYNAMICS_ZVEL_OFF_
1011 #ifdef _CP_DEBUG_DYNAMICS_ZVEL_
1012 bzero(gs.packedVelData,
sizeof(
complex)*gs.numPoints);
1018 if(sim->ees_nloc_on == 0){
1019 for(
int atm=0;atm<config.numSfGrps; atm++){
1020 for(
int dup=0;dup<config.numSfDups;dup++){
1021 if(dup==thisIndex.x){
1022 #ifdef _CP_DEBUG_SF_CACHE_
1023 CkPrintf(
"GSP [%d,%d] on PE %d sending KVectors to SF[%d,%d,%d]\n",
1024 thisIndex.x, thisIndex.y, CkMyPe(), atm, thisIndex.y, dup);
1026 UsfCompProxy[thisInstance.proxyOffset](atm,thisIndex.y,dup).acceptKVectors(gSpaceNumPoints, k_x, k_y, k_z);
1046 int rdmaConfirmEP = CkIndex_CP_State_GSpacePlane::completeRDMAhandshake(0);
1048 CkCallback rdmaConfirmCB( rdmaConfirmEP, CkArrayIndex2D(thisIndex.x,thisIndex.y), thisProxy.ckGetArrayID() );
1051 handshakeToken.gspIndex = thisIndex;
1054 handshakeToken.symmetric =
false;
1055 handshakeToken.shouldSendLeft =
true;
1056 sendLeftRDMARequest (handshakeToken,gs.numPoints,rdmaConfirmCB);
1057 handshakeToken.shouldSendLeft =
false;
1058 sendRightRDMARequest(handshakeToken,gs.numPoints,rdmaConfirmCB);
1061 handshakeToken.symmetric =
true;
1062 handshakeToken.shouldSendLeft =
true;
1063 sendLeftRDMARequest (handshakeToken,gs.numPoints,rdmaConfirmCB);
1064 handshakeToken.shouldSendLeft =
false;
1065 sendRightRDMARequest(handshakeToken,gs.numPoints,rdmaConfirmCB);
1072 contribute(
sizeof(
int), &i, CkReduction::sum_int,
1073 CkCallback(CkIndex_InstanceController::doneInit(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy), thisInstance.proxyOffset);
1076 #ifdef _CP_SUBSTEP_TIMING_
1078 (TimeKeeperProxy.ckLocalBranch())->initHPM();
1080 #endif // _CP_SUBSTEP_TIMING_
1087 void CP_State_GSpacePlane::acceptNewTemperature(
double temp)
1094 contribute(
sizeof(
int), &i, CkReduction::sum_int,
1095 CkCallback(CkIndex_InstanceController::gspDoneNewTemp(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy), thisInstance.proxyOffset);
1103 void CP_State_GSpacePlane::makePCproxies(){
1105 lambdaproxy=
new CProxySection_PairCalculator[config.numChunksAsym];
1106 lambdaproxyother=
new CProxySection_PairCalculator[config.numChunksAsym];
1107 psiproxy=
new CProxySection_PairCalculator[config.numChunksSym];
1108 psiproxyother=
new CProxySection_PairCalculator[config.numChunksSym];
1110 if(!config.gSpaceSum){
1111 for(
int chunk=0;chunk<config.numChunksAsym;chunk++){
1113 if(AllLambdaExpected/config.numChunksAsym == 2)
1116 for(
int chunk=0; chunk < config.numChunksSym ;chunk++){
1118 if(AllPsiExpected / config.numChunksSym > 1)
1136 #ifdef _CP_DEBUG_SF_CACHE_
1137 CkPrintf(
"GSP [%d,%d] StartNewIter\n",thisIndex.x, thisIndex.y);
1139 #if CMK_TRACE_ENABLED
1140 traceUserSuppliedData(iteration);
1143 if(UegroupProxy[thisInstance.proxyOffset].ckLocalBranch()->iteration_gsp != iteration ||
1144 UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->iteration != iteration){
1145 CkPrintf(
"{%d} @@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n",thisInstance.proxyOffset);
1146 CkPrintf(
"{%d} Flow of Control Error : Starting new iter before\n",thisInstance.proxyOffset);
1147 CkPrintf(
"{%d} finishing atom integrate or iteration mismatch.\n",thisInstance.proxyOffset);
1148 CkPrintf(
"{%d} iter_gsp %d iter_energy %d iter_atm %d and %d and %d\n",
1149 thisInstance.proxyOffset,
1150 iteration,UegroupProxy[thisInstance.proxyOffset].ckLocalBranch()->iteration_gsp,
1151 UatomsCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->iteration,
1152 config.maxIter,cleanExitCalled);
1153 CkPrintf(
"{%d} chare %d %d\n",thisInstance.proxyOffset,thisIndex.x,thisIndex.y);
1154 CkPrintf(
"{%d} @@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n",thisInstance.proxyOffset);
1160 CkPrintf(
"GSpace[%d,%d] Error: Flow of Control. Starting new iter (%d) before finishing Vpsi.\n",thisIndex.x,thisIndex.y,iteration+1);
1161 CkAbort(
"Error: GSpace cannot startNewIter() before finishing the PsiV loop\n");
1166 if(thisIndex.x==0 && thisIndex.y==0 ){
1167 if(!(sim->cp_min_opt==1)){
1168 fprintf(temperScreenFile,
"-------------------------------------------------------------------------------\n");
1169 int iii = iteration;
1170 if(!sim->gen_wave){iii+=1;}
1171 fprintf(temperScreenFile,
"Iteration %d done\n",iii);
1172 fprintf(temperScreenFile,
"/////////////////////////////////////////////////////////////////////////////==\n");
1173 fprintf(temperScreenFile,
"/////////////////////////////////////////////////////////////////////////////==\n");
1176 fprintf(temperScreenFile,
"/////////////////////////////////////////////////////////////////////////////==\n");
1177 fprintf(temperScreenFile,
"/////////////////////////////////////////////////////////////////////////////==\n");
1179 if(iteration<config.maxIter){
1180 fprintf(temperScreenFile,
"Beginning Iteration %d \n", iteration);
1182 fprintf(temperScreenFile,
"Completing Iteration %d \n", iteration-1);
1184 fprintf(temperScreenFile,
"-------------------------------------------------------------------------------\n");
1193 int cp_min_opt = sim->cp_min_opt;
1197 finishedCpIntegrate = 0;
1198 iRecvRedPsi = 1;
if(numRecvRedPsi>0){iRecvRedPsi = 0;}
1199 iRecvRedPsiV = 1;
if(cp_min_opt==0 && numRecvRedPsi>0){iRecvRedPsiV = 0;}
1201 iSentRedPsiV = 1;
if(cp_min_opt==0){iSentRedPsiV = 0;}
1205 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
1206 CkPrintf(
"GSP [%d,%d] StartNewIter : %d\n",thisIndex.x, thisIndex.y,iteration);
1221 #if CMK_TRACE_ENABLED
1222 if(iteration==TRACE_ON_STEP ){(TimeKeeperProxy.ckLocalBranch())->startTrace();}
1223 if(iteration==TRACE_OFF_STEP){(TimeKeeperProxy.ckLocalBranch())->stopTrace();}
1226 if(config.lbgspace || config.lbpaircalc ||config.lbdensity){
1227 if((iteration % (FIRST_BALANCE_STEP - PRE_BALANCE_STEP) == 0) ||
1228 (iteration % (LOAD_BALANCE_STEP - PRE_BALANCE_STEP) == 0)){
1229 LBTurnInstrumentOn();
1237 #ifdef _CP_SUBSTEP_TIMING_
1238 if(forwardTimeKeep>0){
1239 double gstart=CmiWallTimer();
1240 CkCallback cb(CkIndex_TimeKeeper::collectStart(NULL),0,TimeKeeperProxy);
1241 contribute(
sizeof(
double),&gstart,CkReduction::min_double, cb , forwardTimeKeep);
1244 if(iteration==HPM_ON_STEP ){(TimeKeeperProxy.ckLocalBranch())->startHPM(
"OneStep");}
1245 if(iteration==HPM_OFF_STEP ){(TimeKeeperProxy.ckLocalBranch())->stopHPM(
"OneStep");}
1252 void CP_State_GSpacePlane::screenPrintWallTimes()
1256 if (thisIndex.x==0 && thisIndex.y==0 &&
1257 thisInstance.idxU.x==0 && thisInstance.idxU.y==0 &&
1258 thisInstance.idxU.z==0 && thisInstance.idxU.s==0 ) {
1259 int iprintout = config.maxIter;
1260 if(!(sim->cp_min_opt==1) && !sim->gen_wave){iprintout-=1;}
1261 int itime = iteration;
1262 if(config.maxIter>=30){itime=1; wallTimeArr[0]=wallTimeArr[1];}
1263 wallTimeArr[itime] = CkWallTimer();
1264 if (iteration == iprintout && config.maxIter<30) {
1265 CkPrintf(
"-------------------------------------------------------------------------------\n");
1266 CkPrintf(
"Wall Times from within GSP\n\n");
1267 for (
int t = 1; t < iprintout; t++) {
1268 CkPrintf(
"%g\n",wallTimeArr[t] - wallTimeArr[t-1]);
1271 CkPrintf(
"%g\n", wallTimeArr[itime] - wallTimeArr[itime-1]);
1272 CkPrintf(
"-------------------------------------------------------------------------------\n");
1274 }
else if(iteration>0) {
1275 CkPrintf(
"Iteration time (GSP) : %g\n",
1276 wallTimeArr[itime] - wallTimeArr[itime-1]);
1293 #ifdef _CP_DEBUG_STATEG_VERBOSE_
1294 CkPrintf(
"dofft %d.%d \n",thisIndex.x,thisIndex.y);
1298 if (gs.numNonZeroPlanes == 0 || gs.fftReqd ==
false){
1299 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1300 CkPrintf(
"Dude, no data to send : Why launch the stategpsaceplane %d %d %d\n",
1301 thisIndex.x,thisIndex.y, gs.numNonZeroPlanes);
1302 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
1308 #if CMK_TRACE_ENABLED
1309 double StartTime=CmiWallTimer();
1315 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1318 UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch()->doStpFFTGtoR_Gchare(gs.packedPlaneData,gs.packedForceData,
1319 gs.numFull,gs.numPoints,gs.numLines,gs.numRuns,runs,gs.zdim,
iplane_ind);
1321 #if CMK_TRACE_ENABLED
1322 traceUserBracketEvent(GspaceFwFFT_, StartTime, CmiWallTimer());
1337 void CP_State_GSpacePlane::setupFFTPersistent() {
1339 int numLines = gs.numLines;
1340 int sizeZ = gs.planeSize[1];
1341 CkArray *real_proxy_local = real_proxy.ckLocalBranch();
1342 fftHandler = (PersistentHandle*) malloc(
sizeof(PersistentHandle) * sizeZ);
1344 for(
int z=0; z < sizeZ; z++) {
1345 int peer = real_proxy_local->homePe(CkArrayIndex2D(thisIndex.x, z));
1346 int compress_start =
sizeof(envelope) +
sizeof(
RSFFTMsg);
1347 int compress_size = numLines*
sizeof(
complex);
1348 size = compress_start + compress_size +
sizeof(int);
1349 fftHandler[z] = CmiCreatePersistent(peer, size, compress_start);
1352 CkPrintf(
"persistent handler message count %d, size %d\n", sizeZ, size);
1358 if(fftHandler == NULL) setupFFTPersistent();
1361 #ifdef _CP_DEBUG_STATEG_VERBOSE_
1362 CkPrintf(
"sendfft %d.%d \n",thisIndex.x,thisIndex.y);
1365 complex *data_out = gs.packedForceData;
1366 int numLines = gs.numLines;
1367 int sizeZ = gs.planeSize[1];
1373 if (config.useGssInsRealP){gssInstance.beginIteration();}
1388 for(
int z=0; z < sizeZ; z++) {
1392 msg->size = numLines;
1393 msg->senderIndex = thisIndex.y;
1394 msg->senderJndex = thisIndex.x;
1395 msg->senderKndex = z;
1396 msg->numPlanes = gs.numNonZeroPlanes;
1398 if(config.prioFFTMsg){
1399 CkSetQueueing(msg, CK_QUEUEING_IFIFO);
1400 *(
int*)CkPriorityPtr(msg) = config.rsfftpriority +
1401 thisIndex.x*gs.planeSize[0]+thisIndex.y;
1407 for (
int i=0,j=z; i<numLines; i++,j+=sizeZ){data[i] = data_out[j]; }
1410 CmiUsePersistentHandle(&fftHandler[z], 1);
1412 real_proxy(thisIndex.x, z).acceptFFT(msg);
1414 CmiUsePersistentHandle(NULL, 0);
1417 CmiNetworkProgress();
1425 if (config.useGssInsRealP){gssInstance.endIteration();}
1431 #ifdef _CP_SUBSTEP_TIMING_
1432 if(forwardTimeKeep>0)
1434 double gend=CmiWallTimer();
1435 CkCallback cb(CkIndex_TimeKeeper::collectEnd(NULL),0,TimeKeeperProxy);
1436 contribute(
sizeof(
double),&gend,CkReduction::max_double, cb , forwardTimeKeep);
1451 #ifdef _CP_SUBSTEP_TIMING_
1452 if(backwardTimeKeep>0)
1454 double gstart=CmiWallTimer();
1455 CkCallback cb(CkIndex_TimeKeeper::collectStart(NULL),0,TimeKeeperProxy);
1456 contribute(
sizeof(
double),&gstart,CkReduction::min_double, cb , backwardTimeKeep);
1460 #ifdef _CP_DEBUG_STATEG_VERBOSE_
1461 CkPrintf(
"GSpace[%d,%d] acceptIFFT\n",thisIndex.x,thisIndex.y);
1464 for(
int i=0;i<msg->size ;i++)
1466 CkAssert(finite(msg->data[i].re));
1467 CkAssert(finite(msg->data[i].im));
1471 int size = msg->size;
1472 int offset = msg->offset;
1473 complex *partlyIFFTd = msg->data;
1475 complex *data_in = gs.packedForceData;
1476 int numLines = gs.numLines;
1477 int sizeZ = gs.planeSize[1];
1478 int expandedDataSize = numLines*sizeZ;
1480 CkAssert(numLines == size);
1489 for(
int i=0,j=offset; i< numLines; i++,j+=sizeZ){data_in[j] = partlyIFFTd[i];}
1502 #if CMK_TRACE_ENABLED
1503 double StartTime=CmiWallTimer();
1505 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1507 FFTcache *fftcache = UfftCacheProxy[thisInstance.proxyOffset].ckLocalBranch();
1509 fftcache->getCacheMem(
"CP_State_GSpacePlane::doIFFT");
1510 complex *forcTmp = fftcache->tmpData;
1512 gs.numFull,gs.numPoints,gs.numLines,gs.numRuns,runs,gs.zdim,
iplane_ind);
1514 #if CMK_TRACE_ENABLED
1515 traceUserBracketEvent(GspaceBwFFT_, StartTime, CmiWallTimer());
1520 double scaleFactor = -2.0/double(sim->sizeX *
1523 complex *forces = gs.packedForceData;
1524 for(
int index=0; index<gs.numPoints; index++)
1525 forces[index] = forcTmp[index]*scaleFactor;
1526 fftcache->freeCacheMem(
"CP_State_GSpacePlane::doIFFT");
1527 CmiNetworkProgress();
1530 #ifdef _CP_DEBUG_STATEG_VERBOSE_
1532 CkPrintf(
"Done doing Ifft gsp : %d %d\n",thisIndex.x,thisIndex.y);
1536 #ifdef BARRIER_CP_GSPACE_IFFT
1539 contribute(
sizeof(
int),&wehaveours,CkReduction::sum_int,
1540 CkCallback(CkIndex_CP_State_GSpacePlane::allDoneIFFT(NULL),thisProxy));
1552 #ifdef _CP_DEBUG_VKS_OFF_
1553 if(thisIndex.x==0 && thisIndex.y==0){
1554 CkPrintf(
"EHART = OFF FOR DEBUGGING\n");
1555 CkPrintf(
"EExt = OFF FOR DEBUGGING\n");
1556 CkPrintf(
"EWALD_recip = OFF FOR DEBUGGING\n");
1557 CkPrintf(
"EEXC = OFF FOR DEBUGGING\n");
1558 CkPrintf(
"EGGA = OFF FOR DEBUGGING\n");
1559 CkPrintf(
"EEXC+EGGA = OFF FOR DEBUGGING\n");
1567 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1568 int ncoef = gs.numPoints;
1572 double **g2 = eesData->GspData[
iplane_ind]->g2;
1577 #ifdef _CP_DEBUG_VKS_FORCES_
1578 if(thisIndex.x==0 && thisIndex.y==0){
1579 FILE *fp = fopen(
"vks_forces_s0_p0.out",
"w");
1580 int ncoef = gs.numPoints;
1581 complex *forces = gs.packedForceData;
1582 for(
int i=0;i<ncoef;i++)
1583 fprintf(fp,
"%d %d %d : %g %g\n",k_x[i],k_y[i],k_z[i],forces[i].re,forces[i].im);
1588 complex *forces = gs.packedForceData;
1589 complex *ppForces = UparticlePlaneProxy[thisInstance.proxyOffset](thisIndex.x,thisIndex.y).ckLocal()->myForces;
1591 #ifdef _CP_DEBUG_SFNL_OFF_
1592 bzero(ppForces,ncoef*
sizeof(
complex));
1594 #ifdef _CP_DEBUG_VKS_OFF_
1595 bzero(forces,ncoef*
sizeof(
complex));
1596 bzero(ppForces,ncoef*
sizeof(
complex));
1600 #ifdef _CP_GS_DUMP_VKS_
1601 dumpMatrix(
"vksBf",(
double *)ppForces, 1,
1602 gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
1603 dumpMatrix(
"forceBf",(
double *)forces, 1,
1604 gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
1607 #ifdef _CP_GS_DEBUG_COMPARE_VKS_
1608 if(savedvksBf==NULL){
1609 savedvksBf=
new complex[gs.numPoints];
1610 loadMatrix(
"vksBf",(
double *)savedvksBf, 1,
1611 gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
1613 if(savedforceBf==NULL){
1614 savedforceBf=
new complex[gs.numPoints];
1615 loadMatrix(
"forceBf",(
double *)savedforceBf, 1,
1616 gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
1619 for(
int i=0;i<gs.numPoints;i++){
1620 if(fabs(ppForces[i].re-savedvksBf[i].re)>0.0001){
1621 fprintf(stderr,
"GSP [%d,%d] %d element vks %.10g not %.10g\n",
1622 thisIndex.x, thisIndex.y,i, ppForces[i].re, savedvksBf[i].re);
1624 CkAssert(fabs(ppForces[i].re-savedvksBf[i].re)<0.0001);
1625 CkAssert(fabs(ppForces[i].im-savedvksBf[i].im)<0.0001);
1627 for(
int i=0;i<gs.numPoints;i++){
1628 if(fabs(forces[i].re-savedforceBf[i].re)>0.0001){
1629 fprintf(stderr,
"GSP [%d,%d] %d element force %.10g not %.10g\n",
1630 thisIndex.x, thisIndex.y,i, forces[i].re, savedforceBf[i].re);
1632 CkAssert(fabs(forces[i].re-savedforceBf[i].re)<0.0001);
1633 CkAssert(fabs(forces[i].im-savedforceBf[i].im)<0.0001);
1637 gs.addForces(ppForces,k_x);
1638 bzero(ppForces,gs.numPoints*
sizeof(
complex));
1639 CmiNetworkProgress();
1645 int istate = gs.istate_ind;
1647 complex *psi_g = gs.packedPlaneData;
1648 double *eke_ret = &(gs.eke_ret);
1650 CPNONLOCAL::CP_eke_calc(ncoef,istate,forces,psi_g,k_x,k_y,k_z,g2,eke_ret,config.doublePack,nkx0,
1652 contribute(
sizeof(
double), &gs.eke_ret, CkReduction::sum_double,
1653 CkCallback(CkIndex_InstanceController::printEnergyEke(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy));
1656 #ifdef _CP_DEBUG_SCALC_ONLY_
1657 bzero(forces,ncoef*
sizeof(
complex));
1663 #ifdef _CP_DEBUG_OLDFORCE_
1665 FILE *fp = fopen(
"force_old.out",
"a+");
1666 for(
int i = 0; i < ncoef; i++){
1667 if(k_x[i]==0 && k_y[i]==1 && k_z[i]==4){
1669 "old force H+Ext+Exc+Eke+Enl : is=%d %d %d %d : %g %g\n",
1670 istate,k_x[i],k_y[i],k_z[i],forces[i].re,forces[i].im);
1694 #ifdef _CP_DEBUG_PSI_OFF_
1696 if(iteration==config.maxIter+1){
1698 #ifdef _CP_SUBSTEP_TIMING_
1700 (TimeKeeperProxy.ckLocalBranch())->printHPM();
1703 cleanExitCalled = 1;
1705 contribute(
sizeof(
int),&cleanExitCalled,CkReduction::sum_int, CkCallback(CkIndex_InstanceController::cleanExit(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy));
1709 contribute(
sizeof(
int),&i,CkReduction::sum_int, CkCallback(CkIndex_InstanceController::allDoneCPForces(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy));
1710 #ifdef _CP_DEBUG_PSI_OFF_
1727 acceptedLambda =
false;
1728 doneDoingIFFT =
false;
1729 doneNewIter =
false;
1736 complex *psi = gs.packedPlaneData;
1737 complex *force = gs.packedForceData;
1738 int cp_min_opt = sim->cp_min_opt;
1741 int ncoef = gs.numPoints;
1742 complex *psi_g = gs.packedPlaneData;
1743 complex *psi_g_tmp = gs.packedPlaneDataTemp;
1744 CmiMemcpy(psi_g_tmp,psi_g,
sizeof(
complex)*ncoef);
1754 #define _CP_GSPACE_DUMP_LMAT_DIAGONAL_VALS_OFF_
1756 #ifdef _CP_GSPACE_DUMP_LMAT_DIAGONAL_VALS_
1774 for(
int i=0; i<gs.numPoints; i++){
1775 mylambda_diag += force[i] * psi[i].conj();
1777 if(config.doublePack==0){
1778 CkPrintf(
"lambda[%d, %d] = %.12g %.12g at plane %d\n",
1779 thisIndex.x, thisIndex.x, mylambda_diag.re, mylambda_diag.im, thisIndex.y);
1781 CkPrintf(
"lambda[%d, %d] = %.12g %.12g at plane %d\n",
1782 thisIndex.x, thisIndex.x, mylambda_diag.re,0.0, thisIndex.y);
1792 #define _CP_GSPACE_PSI_FORCE_OUTPUT_BEFORE_LAMBDA_OFF_
1793 #ifdef _CP_GSPACE_PSI_FORCE_OUTPUT_BEFORE_LAMBDA_
1794 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1800 sprintf(fname,
"psi_forces_before_lambda_state%d_plane%d.out",thisIndex.x,thisIndex.y);
1801 FILE *fp = cfopen(fname,
"w");
1802 for(
int i=0; i<gs.numPoints; i++){
1803 int igo = 0;
double wght = 0.5;
1804 if (ka[i] > 0) igo=1;
1805 if (ka[i] == 0 && kb[i]>0){ igo=1; wght=1.0; }
1806 if (ka[i] == 0 && kb[i]==0 && kc[i]>=0){ igo=1; wght=1.0; }
1807 if(config.doublePack==0) wght=1.0;
1809 fprintf(fp,
"%d %d %d : %.10g %.10g\n",ka[i],kb[i],kc[i],force[i].re*wght,force[i].im*wght);
1821 #ifndef PAIRCALC_TEST_DUMP
1822 #ifndef _CP_DEBUG_ORTHO_OFF_
1824 double rad2i = 1.0/sqrt(2.0);
1825 double rad2 = sqrt(2.0);
1826 if(gs.ihave_kx0==1 && config.doublePack==1){
1827 for(
int i=gs.kx0_strt; i<gs.kx0_end; i++){psi[i] *= rad2i;}
1828 for(
int i=gs.kx0_strt; i<gs.kx0_end; i++){force[i] *= rad2;}
1830 for(
int i=0; i<gs.numPoints; i++){
1843 #ifdef _PAIRCALC_DEBUG_PARANOID_FW_
1844 for(
int i=0;i<config.numChunksAsym;i++)
1845 CkAssert(countLambdaO[i]==0);
1847 #ifdef _CP_GS_DUMP_LAMBDA_
1850 sprintf(name1,
"lambdaBf.iter.%d", iteration);
1851 sprintf(name2,
"psiBf.iter.%d", iteration);
1852 dumpMatrix(name1, (
double *)force, 1,
1853 gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
1854 dumpMatrix(name2,(
double *)psi, 1,
1855 gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
1858 #ifdef _CP_GS_DEBUG_COMPARE_PSI_
1859 if(savedlambdaBf==NULL){
1860 savedlambdaBf=
new complex[gs.numPoints];
1861 loadMatrix(
"lambdaBf",(
double *)savedlambdaBf, 1,
1862 gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
1864 if(savedpsiBf==NULL){
1865 savedpsiBf=
new complex[gs.numPoints];
1866 loadMatrix(
"psiBf",(
double *)savedpsiBf, 1,
1867 gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
1869 double testvalue=0.00000001;
1870 for(
int i=0;i<gs.numPoints;i++){
1871 if(fabs(force[i].re-savedlambdaBf[i].re)>testvalue){
1872 fprintf(stderr,
"GSP [%d,%d] %d element lambda %.10g not %.10g\n",
1873 thisIndex.x, thisIndex.y,i, force[i].re, savedlambdaBf[i].re);
1875 CkAssert(fabs(force[i].re-savedlambdaBf[i].re)<testvalue);
1876 CkAssert(fabs(force[i].im-savedlambdaBf[i].im)<testvalue);
1878 for(
int i=0;i<gs.numPoints;i++){
1879 CkAssert(fabs(psi[i].re-savedpsiBf[i].re)<testvalue);
1880 CkAssert(fabs(psi[i].im-savedpsiBf[i].im)<testvalue);
1888 int numPoints = gs.numPoints;
1889 #ifndef _CP_DEBUG_ORTHO_OFF_
1891 CmiNetworkProgress();
1894 acceptedLambda=
true;
1895 bzero(force,
sizeof(
complex)*numPoints);
1897 #ifdef _CP_DEBUG_STATEG_VERBOSE_
1898 if(thisIndex.x==0){CkPrintf(
"Sent Lambda %d %d\n",thisIndex.y,cleanExitCalled);}
1905 #ifdef _CP_SUBSTEP_TIMING_
1906 if(backwardTimeKeep>0){
1907 double gend=CmiWallTimer();
1908 CkCallback cb(CkIndex_TimeKeeper::collectEnd(NULL),0,TimeKeeperProxy);
1909 contribute(
sizeof(
double),&gend,CkReduction::max_double, cb , backwardTimeKeep);
1924 int cp_min_opt = sim->cp_min_opt;
1925 int cp_lsda = sim->nspin - 1;
1926 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
1929 int N = msg->getSize()/
sizeof(
complex);
1931 int offset = msg->getUserFlag();
if(offset<0){offset=0;}
1933 complex *force = gs.packedForceData;
1934 int chunksize = gs.numPoints/config.numChunksAsym;
1935 int chunkoffset=offset*chunksize;
1938 for(
int i=0;i<msg->getSize()/
sizeof(double) ;i++){
1939 CkAssert(finite(((
double*) msg->getData())[i]));
1955 if(config.doublePack==1){
1957 double overlap = (cp_lsda==0 ? 2.0 : 1.0);
1958 double ws = 1.0/overlap;
double wd = 2.0/overlap;
1959 #ifndef PAIRCALC_TEST_DUMP
1960 for(
int i=0,idest=chunkoffset; i<N; i++,idest++){
1961 double wght = (k_x[idest]==0 ? ws : wd);
1962 force[idest].re -= wght*data[i].re;
1963 force[idest].im -= wght*data[i].im;
1967 if(countLambdaO[offset]<1){
1968 for(
int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] = data[i]*(-1.0);}
1970 for(
int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] += data[i]*(-1.0);}
1978 if(config.doublePack==0){
1980 double overlap = (cp_lsda==0 ? 2.0 : 1.0);
1981 double ws = 1.0/overlap;
1982 for(
int i=0,idest=chunkoffset; i<N; i++,idest++){
1983 force[idest].re -= ws*data[i].re;
1984 force[idest].im -= ws*data[i].im;
1987 if(countLambdaO[offset]<1){
1988 for(
int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] = data[i]*(-1.0);}
1990 for(
int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] += data[i]*(-1.0);}
2003 countLambdaO[offset]++;
2004 if(countLambda==AllLambdaExpected){
2005 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2007 CkPrintf(
"doLambda %d %d\n",thisIndex.y,cleanExitCalled);
2024 int offset = msg->myoffset;
2025 if(offset<0){offset=0;}
2028 for(
int i=0; i < N; i++)
2030 CkAssert(finite(data[i].re));
2031 CkAssert(finite(data[i].im));
2036 int cp_min_opt = sim->cp_min_opt;
2037 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
2040 complex *force = gs.packedForceData;
2041 int chunksize = gs.numPoints/config.numChunksAsym;
2042 int chunkoffset = offset*chunksize;
2057 if(config.doublePack==1){
2059 #ifndef PAIRCALC_TEST_DUMP
2060 for(
int i=0,idest=chunkoffset; i<N; i++,idest++){
2061 double wght = (k_x[idest]==0 ? 0.5 : 1);
2062 force[idest].re -= wght*data[i].re;
2063 force[idest].im -= wght*data[i].im;
2067 if(countLambdaO[offset]<1){
2068 for(
int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] = data[i]*(-1.0);}
2070 for(
int i=0,idest=chunkoffset; i<N; i++,idest++){force[idest] += data[i]*(-1.0);}
2078 if(config.doublePack==0){
2079 for(
int i=0,idest=chunkoffset; i<N; i++,idest++){
2080 force[idest].re -= 0.5*data[i].re;
2081 force[idest].im -= 0.5*data[i].im;
2094 countLambdaO[offset]++;
2095 if(countLambda==AllLambdaExpected){
2096 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2098 CkPrintf(
"doLambda %d %d\n",thisIndex.y,cleanExitCalled);
2117 CkAssert(countLambda==AllLambdaExpected);
2119 int cp_min_opt = sim->cp_min_opt;
2120 complex *force = gs.packedForceData;
2122 for(
int i=0;i<gs.numPoints ;i++){
2123 CkAssert(finite(force[i].re));
2124 CkAssert(finite(force[i].im));
2130 if(gs.ihave_kx0==1){
2131 double rad2i = 1.0/sqrt(2.0);
2132 for(
int i=gs.kx0_strt; i<gs.kx0_end; i++){force[i] *= rad2i;}
2141 int ncoef = gs.numPoints;
2142 complex *psi_g_scr = gs.packedPlaneDataScr;
2143 complex *psi_g = gs.packedPlaneData;
2144 CmiMemcpy(psi_g,psi_g_scr,
sizeof(
complex)*ncoef);
2149 acceptedLambda=
true;
2151 bzero(countLambdaO,config.numChunksAsym*
sizeof(
int));
2157 #ifdef _CP_GS_DUMP_LAMBDA_
2158 dumpMatrix(
"lambdaAf",(
double *)force, 1, gs.numPoints*2,
2159 thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
2162 #ifdef _CP_GS_DEBUG_COMPARE_PSI_
2163 double testvalue=0.00000001;
2164 if(savedlambdaAf==NULL){
2165 savedlambdaAf=
new complex[gs.numPoints];
2166 loadMatrix(
"lambdaAf",(
double *)savedlambdaAf, 1,
2167 gs.numPoints*2,thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
2169 for(
int i=0;i<gs.numPoints;i++){
2170 CkAssert(fabs(force[i].re-savedlambdaAf[i].re)<testvalue);
2171 CkAssert(fabs(force[i].im-savedlambdaAf[i].im)<testvalue);
2175 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2177 CkPrintf(
"doLambda %d %d\n",thisIndex.y,cleanExitCalled);
2185 #define _CP_GSPACE_PSI_FORCE_OUTPUT_AFTER_LAMBDA_OFF_
2186 #ifdef _CP_GSPACE_PSI_FORCE_OUTPUT_AFTER_LAMBDA_
2187 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
2193 sprintf(fname,
"psi_forces_after_lambda_state%d_plane%d.out",thisIndex.x,thisIndex.y);
2194 FILE *fp = cfopen(fname,
"w");
2195 for(
int i=0; i<gs.numPoints; i++){
2196 int igo = 0;
double wght = 0.5;
2197 if (ka[i] > 0) igo=1;
2198 if (ka[i] == 0 && kb[i]>0){ igo=1; wght=1.0; }
2199 if (ka[i] == 0 && kb[i]==0 && kc[i]>=0){ igo=1; wght=1.0; }
2200 if(config.doublePack==0) wght=1.0;
2202 fprintf(fp,
"%d %d %d : %.10g %.10g\n",ka[i],kb[i],kc[i],force[i].re*wght,force[i].im*wght);
2223 if(!acceptedLambda){
2224 CkPrintf(
"GSpace[%d,%d] Flow of Control Error : Attempting to Cg ovlap without lambda correction\n",thisIndex.x,thisIndex.y);
2225 CkAbort(
"Error: Attempting to compute cg overlap without lambda correction\n");
2229 int istate = gs.istate_ind;
2230 int ncoef = gs.numPoints;
2231 complex *forces = gs.packedForceData;
2232 int cp_min_opt = sim->cp_min_opt;
2233 int cp_min_cg = sim->cp_min_cg;
2237 double fovlap_loc = 0.0;
2238 if(cp_min_opt==1 && cp_min_cg==1){
2239 CPINTEGRATE::CP_fovlap_calc(ncoef,istate,forces,&fovlap_loc);
2241 double force_sq_sum_loc=0.0;
2242 for(
int i=0; i<gs.numPoints; i++){force_sq_sum_loc+= forces[i].getMagSqr();}
2245 redforc[0] = fovlap_loc;
2246 redforc[1] = force_sq_sum_loc;
2247 contribute(2*
sizeof(
double),redforc,CkReduction::sum_double,
2248 CkCallback(CkIndex_CP_State_GSpacePlane::acceptCgOverlap(NULL),thisProxy));
2250 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2252 CkPrintf(
"ovlpa %d %d\n",thisIndex.y,cleanExitCalled);
2268 if(!acceptedPsi || !acceptedLambda || !acceptedVPsi)
2269 CkAbort(
"{%d} Flow of Control Error : Attempting to write states without completing psi, vpsi and Lambda\n");
2273 int cp_min_opt = (sim->cp_min_opt);
2275 int ind_state = (thisIndex.x+1);
2276 int ind_chare = (thisIndex.y+1);
2277 int ncoef = gSpaceNumPoints;
2279 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
2283 double *coef_mass = eesData->GspData[
iplane_ind]->coef_mass;
2285 complex *psi = gs.packedPlaneData;
2286 complex *vpsi = gs.packedVelData;
2287 complex *forces = gs.packedForceData;
2288 double ***xNHC = gs.xNHC;
2289 double ***xNHCP = gs.xNHCP;
2290 double ***vNHC = gs.vNHC;
2291 double ***fNHC = gs.fNHC;
2292 double *mNHC = gs.mNHC;
2293 int len_nhc_cp = gs.len_nhc_cp;
2294 int num_nhc_cp = gs.num_nhc_cp;
2295 int nck_nhc_cp = gs.nck_nhc_cp;
2296 int nkx0_red = gs.nkx0_red;
2297 int nkx0_uni = gs.nkx0_uni;
2298 int nkx0_zero = gs.nkx0_zero;
2299 double kTCP = gs.kTCP;
2301 int myiteration = iteration;
2302 if(cp_min_opt==0){myiteration=iteration-1;}
2306 if(ind_state==1 && ind_chare==1){
2307 CkPrintf(
"-----------------------------------\n");
2308 CkPrintf(
"Writing states to disk at time %d\n",myiteration);
2309 CkPrintf(
"-----------------------------------\n");
2313 if(cp_min_opt==0 && halfStepEvolve ==1){
2315 CPINTEGRATE::cp_evolve_vel(ncoef,forces,vpsi,coef_mass,
2316 len_nhc_cp,num_nhc_cp,nck_nhc_cp,fNHC,vNHC,xNHC,xNHCP,mNHC,
2317 gs.v0NHC,gs.a2NHC,gs.a4NHC,kTCP,nkx0_red,nkx0_uni,nkx0_zero,
2318 2,iteration,gs.degfree,gs.degfreeNHC,gs.degFreeSplt,
2319 gs.istrNHC,gs.iendNHC,1);
2323 GStateOutMsg *msg =
new (ncoef,ncoef,ncoef,ncoef,ncoef,
2325 if(config.prioFFTMsg){
2326 CkSetQueueing(msg, CK_QUEUEING_IFIFO);
2327 *(
int*)CkPriorityPtr(msg) = config.rsfftpriority +
2328 thisIndex.x*gs.planeSize[0]+thisIndex.y;
2331 msg->senderIndex = thisIndex.y;
2334 int *mk_x = msg->k_x;
2335 int *mk_y = msg->k_y;
2336 int *mk_z = msg->k_z;
2338 for(
int i=0;i<ncoef;i++){vdata[i] = vpsi[i];}
2340 for(
int i=0;i<ncoef;i++){vdata[i] = 0.0;}
2342 for (
int i=0;i<ncoef; i++){
2344 mk_x[i] = k_x[i]; mk_y[i] = k_y[i]; mk_z[i] = k_z[i];
2346 UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x,redPlane).acceptFileOutput(msg);
2356 int npts_tot = (sim->npts_tot);
2357 int *ipacked_off = (sim->index_output_off);
2362 if(countFileOut==0){
2365 tk_x = (
int *)fftw_malloc(npts_tot*
sizeof(
int));
2366 tk_y = (
int *)fftw_malloc(npts_tot*
sizeof(
int));
2367 tk_z = (
int *)fftw_malloc(npts_tot*
sizeof(
int));
2371 int ncoef = msg->size;
2372 int myplane = msg->senderIndex;
2375 int *mk_x = msg->k_x;
2376 int *mk_y = msg->k_y;
2377 int *mk_z = msg->k_z;
2378 int ioff = ipacked_off[myplane];
2379 for(
int i=0,j=ioff;i<ncoef;i++,j++){
2381 tvpsi[j] = vdata[i];
2393 void CP_State_GSpacePlane::writeOutputFile() {
2396 int sizeX = (sim->sizeX);
2397 int sizeY = (sim->sizeY);
2398 int sizeZ = (sim->sizeZ);
2399 int npts_tot = (sim->npts_tot);
2400 int cp_min_opt = (sim->cp_min_opt);
2401 int ibinary_write_opt = sim->ibinary_write_opt;
2403 int myiteration = iteration;
2404 if(cp_min_opt==0){myiteration=iteration-1;}
2407 int ind_state = thisIndex.x+1;
2408 int ibead = myBeadIndex;
2409 int ikpt = myKptIndex;
2410 int itemper = myTemperIndex;
2411 int ispin = mySpinIndex;
2413 char psiName[400];
char vpsiName[400];
2415 sprintf(psiName,
"%s/Spin.%d_Kpt.%d_Bead.%d_Temper.%d/state%d.out",
2416 config.dataPathOut,ispin,ikpt,ibead,itemper,ind_state);
2417 sprintf(vpsiName,
"%s/Spin.%d_Kpt.%d_Bead.%d_Temper.%d/vState%d.out",
2418 config.dataPathOut,ispin,ikpt,ibead,itemper,ind_state);
2419 writeStateFile(npts_tot,tpsi,tvpsi,tk_x,tk_y,tk_z,cp_min_opt,
2420 sizeX,sizeY,sizeZ,psiName,vpsiName,ibinary_write_opt,
2421 myiteration,ind_state,ispin,ikpt,ibead,itemper);
2422 fftw_free(tpsi); tpsi = NULL;
2423 fftw_free(tvpsi);tvpsi = NULL;
2424 fftw_free(tk_x); tk_x = NULL;
2425 fftw_free(tk_y); tk_y = NULL;
2426 fftw_free(tk_z); tk_z = NULL;
2442 if(!acceptedLambda || !acceptedVPsi || !acceptedPsi){
2443 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2444 CkPrintf(
"Flow of Control Error : Attempting to cp integrate\n");
2445 CkPrintf(
"without lambda correction to forces or psi or vpsi\n");
2446 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2450 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2452 CkPrintf(
"integrate %d %d\n",thisIndex.y,cleanExitCalled);
2459 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
2463 double *coef_mass = eesData->GspData[
iplane_ind]->coef_mass;
2466 int cp_min_opt = sim->cp_min_opt;
2467 int cp_min_cg = sim->cp_min_cg;
2469 int istate = gs.istate_ind;
2470 int ncoef = gs.numPoints;
2471 int len_nhc = gs.len_nhc_cp;
2472 int num_nhc = gs.num_nhc_cp;
2473 int nck_nhc = gs.nck_nhc_cp;
2474 complex *psi_g = gs.packedPlaneData;
2475 complex *forces = gs.packedForceData;
2476 complex *forcesold = gs.packedVelData;
2477 complex *vpsi_g = gs.packedVelData;
2478 double ***fNHC = gs.fNHC;
2479 double ***vNHC = gs.vNHC;
2480 double ***xNHC = gs.xNHC;
2481 double ***xNHCP = gs.xNHCP;
2482 double *mNHC = gs.mNHC;
2483 double *v0NHC = gs.v0NHC;
2484 double *a2NHC = gs.a2NHC;
2485 double *a4NHC = gs.a4NHC;
2486 double kTCP = gs.kTCP;
2487 int nkx0_red = gs.nkx0_red;
2488 int nkx0_uni = gs.nkx0_uni;
2489 int nkx0_zero = gs.nkx0_zero;
2490 double degfree = gs.degfree;
2491 double degfreeNHC = gs.degfreeNHC;
2492 double *degFreeSplt= gs.degFreeSplt;
2493 int *istrNHC = gs.istrNHC;
2494 int *iendNHC = gs.iendNHC;
2496 double fictEke = 0.0;
2497 double ekeNhc = 0.0;
2498 double potNHC = 0.0;
2505 if(iteration==1){ireset_cg=1;numReset_cg=0;}
2506 if(iteration>1 && cp_min_opt==1 && cp_min_cg==1){
2507 if( (fmagPsi_total>1.05*fmagPsi_total_old && numReset_cg>=5) ||
2508 (fmagPsi_total>2.0*fmagPsi_total_old)){
2511 if(thisIndex.x==0 && thisIndex.y==0){
2512 CkPrintf(
"----------------------------------------------\n");
2513 CkPrintf(
" Resetting Conjugate Gradient! \n");
2514 CkPrintf(
"----------------------------------------------\n");
2519 double gamma_conj_grad = 0.0;
2520 if( (cp_min_opt==1) && (cp_min_cg == 1) && (ireset_cg==0)){
2521 gamma_conj_grad = fovlap/fovlap_old;
2534 #define _CP_DEBUG_DYNAMICS_OFF
2535 #ifdef _CP_DEBUG_DYNAMICS_
2539 snprintf(forcefile,80,
"Bpsi_force_%d_%d_%d.out",thisIndex.x,thisIndex.y,iteration);
2540 FILE *fp=fopen(forcefile,
"w");
2541 for(
int i=0;i <ncoef;i++){
2542 fprintf(fp,
"%d %d %d : %.10g %.10g : %g %g : %g %g : %g\n",
2543 k_x[i], k_y[i], k_z[i],psi_g[i].re,psi_g[i].im,
2544 vpsi_g[i].re,vpsi_g[i].im,
2545 forces[i].re,forces[i].im, coef_mass[i]);
2556 #if CMK_TRACE_ENABLED
2557 double StartTime=CmiWallTimer();
2560 #define _GLENN_CHECK_DYNAMICS_OFF_
2561 #ifdef _GLENN_CHECK_DYNAMICS_
2562 if(iteration==1){bzero(vpsi_g,ncoef*
sizeof(
complex));}
2563 if(thisIndex.x==0&&thisIndex.y==0){CkPrintf(
"Before Integrate : iteration %d\n",iteration);}
2566 #ifdef _GLENN_CHECK_INTEGRATE_
2567 for(
int i=0;i<ncoef;i++){
2568 if( (k_x[i]==0 &&k_y[i]==1 && k_z[i]==4) ||
2569 (k_x[i]==2 &&k_y[i]==1 && k_z[i]==3)){
2570 if(thisIndex.x==0 || thisIndex.x==127){
2571 CkPrintf(
" Psi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g %.15g %.15g\n",
2572 gs.istate_ind+1,k_x[i],k_y[i],k_z[i],
2573 psi_g[i].re,psi_g[i].im,
2574 forces[i].re,forces[i].im);
2580 #ifdef _CP_DEBUG_SCALC_ONLY_
2581 bzero(forces,ncoef*
sizeof(
complex));
2583 psi_g = gs.packedPlaneDataTemp2;
2588 for(
int i=0; i < ncoef; i++)
2590 CkAssert(finite(vpsi_g[i].re));
2591 CkAssert(finite(vpsi_g[i].im));
2592 CkAssert(finite(psi_g[i].re));
2593 CkAssert(finite(psi_g[i].im));
2594 CkAssert(finite(forces[i].re));
2595 CkAssert(finite(forces[i].im));
2599 fictEke = 0.0; ekeNhc=0.0; potNHC=0.0;
2600 CPINTEGRATE::CP_integrate(ncoef,istate,iteration,forces,forcesold,psi_g,
2601 coef_mass,k_x,k_y,k_z,len_nhc,num_nhc,nck_nhc,fNHC,vNHC,xNHC,xNHCP,
2602 mNHC,v0NHC,a2NHC,a4NHC,kTCP,gamma_conj_grad,&fictEke,
2603 nkx0_red,nkx0_uni,nkx0_zero,&ekeNhc,&potNHC,degfree,degfreeNHC,
2608 #ifdef _GLENN_CHECK_INTEGRATE_
2609 for(
int i=0;i<ncoef;i++){
2610 if( (k_x[i]==0 &&k_y[i]==1 && k_z[i]==4) ||
2611 (k_x[i]==2 &&k_y[i]==1 && k_z[i]==3)){
2612 if(thisIndex.x==0 || thisIndex.x==127){
2613 CkPrintf(
" Psi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g %.15g %.15g\n",
2614 gs.istate_ind+1,k_x[i],k_y[i],k_z[i],
2615 psi_g[i].re,psi_g[i].im,
2616 forces[i].re,forces[i].im);
2622 #if CMK_TRACE_ENABLED
2631 for(
int i=0; i < ncoef; i++)
2633 CkAssert(finite(vpsi_g[i].re));
2634 CkAssert(finite(vpsi_g[i].im));
2635 CkAssert(finite(psi_g[i].re));
2636 CkAssert(finite(psi_g[i].im));
2637 CkAssert(finite(forces[i].re));
2638 CkAssert(finite(forces[i].im));
2642 #ifdef _CP_DEBUG_DYNAMICS_
2646 snprintf(forcefile,80,
"Apsi_force_%d_%d_%d.out",thisIndex.x,thisIndex.y,iteration);
2647 FILE *fp=fopen(forcefile,
"w");
2648 for(
int i=0;i <ncoef;i++){
2649 fprintf(fp,
"%d %d %d : %.10g %.10g : %g %g : %g %g : %g\n",
2650 k_x[i], k_y[i], k_z[i],psi_g[i].re,psi_g[i].im,
2651 vpsi_g[i].re,vpsi_g[i].im,
2652 forces[i].re,forces[i].im, coef_mass[i]);
2657 if(iteration==2){CkPrintf(
"later debugging dyn\n");CkExit();}
2665 double redSize = ((double) (nchareG*nstates));
2667 sendme[0] = 2.0*BOLTZ*(fictEke/degfree)/redSize;
2668 sendme[1] = 2.0*BOLTZ*(ekeNhc/degfreeNHC)/redSize;
2669 sendme[2] = fictEke;
2672 contribute(5*
sizeof(
double),sendme,CkReduction::sum_double,
2673 CkCallback(CkIndex_InstanceController::printFictEke(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy));
2679 gs.fictEke_ret = fictEke;
2680 gs.ekeNhc_ret = ekeNhc;
2681 gs.potNHC_ret = potNHC;
2682 finishedCpIntegrate = 1;
2699 complex *sendData = gs.packedPlaneData;
2700 int isend = thisIndex.y;
2701 int *num_send = RCommPkg[isend].num_send;
2702 int **lst_send = RCommPkg[isend].lst_send;
2703 int num_send_tot = RCommPkg[isend].num_send_tot;
2705 if(finishedCpIntegrate==0){
2706 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2707 CkPrintf(
"Flow of Control Error : Attempting to sendredpsi\n");
2708 CkPrintf(
"without integration\n");
2709 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2715 int iii=0;
int jjj=0;
2717 for(
int irecv = 0; irecv < nchareG; irecv ++){
2719 int ncoef = num_send[irecv];
2723 if(config.prioFFTMsg){
2724 CkSetQueueing(msg, CK_QUEUEING_IFIFO);
2725 *(
int*)CkPriorityPtr(msg) = config.rsfftpriority +
2726 thisIndex.x*gs.planeSize[0]+thisIndex.y;
2730 msg->senderIndex = isend;
2732 for (
int i=0;i<ncoef; i++){msgData[i] = sendData[lst_send[irecv][i]];}
2733 UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x,irecv).acceptRedPsi(msg);
2744 if(iii!=num_send_tot){
2745 CkPrintf(
"Error in GSchare %d %d : %d %d : sendredPsi.1\n",thisIndex.x,thisIndex.y,
2749 if(jjj != gs.nkx0_uni-gs.nkx0_zero){
2750 CkPrintf(
"Error in GSchare %d %d : %d %d\n sendredPsi.3",thisIndex.x,thisIndex.y,
2771 int ncoef = msg->size;
2772 int isend = msg->senderIndex;
2774 complex *recvData = gs.packedRedPsi;
2775 int irecv = thisIndex.y;
2776 int *num_recv = RCommPkg[irecv].num_recv;
2777 int **lst_recv = RCommPkg[irecv].lst_recv;
2783 if(num_recv[isend]!=ncoef){
2784 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2785 CkPrintf(
"Number sent not equal to number reciever expected \n");
2786 CkPrintf(
"Sender %d size %d Reciever %d size %d \n",isend,ncoef,irecv,num_recv[isend]);
2787 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2791 if(countRedPsi==0){jtemp=0;}
2794 for(
int i=0;i<ncoef;i++){
2795 recvData[lst_recv[isend][i]].re= msgData[i].re;
2796 recvData[lst_recv[isend][i]].im=-msgData[i].im;
2805 if(countRedPsi==numRecvRedPsi){
2808 if(jtemp!=gs.nkx0_red){
2809 CkPrintf(
"Error in GSchare recv cnt %d %d : %d %d\n",thisIndex.x,thisIndex.y,
2825 int ncoef = gs.nkx0_red;
2827 complex *psi = gs.packedPlaneData;
2828 complex *psi_red = gs.packedRedPsi;
2829 for(
int i=0;i<ncoef;i++){psi[i]=psi_red[i];}
2844 #ifdef _CP_DEBUG_STATEG_VERBOSE_
2846 CkPrintf(
"sendpsi %d %d\n",thisIndex.y,cleanExitCalled);
2850 int cp_min_opt = sim->cp_min_opt;
2852 if(finishedCpIntegrate==0){
2853 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2854 CkPrintf(
"Dude, you can't sendPsi without completing integrate\n");
2855 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2860 if(iRecvRedPsi!=1 || iSentRedPsi!=1){
2861 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2862 CkPrintf(
"Dude, you can't sendPsi without receiving Redpsi\n");
2863 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2871 acceptedPsi =
false;
2873 complex *psi = gs.packedPlaneData;
2874 int numPoints = gs.numPoints;
2877 int ncoef = gs.numPoints;
2878 complex *scr = gs.packedPlaneDataScr;
2879 CmiMemcpy(scr,psi,
sizeof(
complex)*ncoef);
2882 #ifndef PAIRCALC_TEST_DUMP
2883 #ifndef _CP_DEBUG_ORTHO_OFF_
2884 if(gs.ihave_kx0==1){
2885 double rad2i = 1.0/sqrt(2.0);
2886 for(
int i=gs.kx0_strt; i<gs.kx0_end; i++){psi[i] *= rad2i;}
2895 #ifdef _PAIRCALC_DEBUG_PARANOID_FW_
2896 for(
int i=0;i<config.numChunksSym;i++){CkAssert(countPsiO[i]==0);}
2899 #ifdef _CP_GS_DUMP_PSI_
2900 dumpMatrix(
"psiBfp",(
double *)psi, 1, gs.numPoints*2,
2901 thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
2904 #ifdef _CP_GS_DEBUG_COMPARE_PSI_
2905 double testvalue=0.00000001;
2906 if(savedpsiBfp==NULL){
2907 savedpsiBfp=
new complex[gs.numPoints];
2908 loadMatrix(
"psiBfp",(
double *)savedpsiBfp, 1, gs.numPoints*2,
2909 thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
2911 for(
int i=0;i<gs.numPoints;i++){
2912 if(fabs(psi[i].re-savedpsiBfp[i].re)>testvalue){
2913 fprintf(stderr,
"GSP [%d,%d] %d element psi %.10g not %.10g\n",
2914 thisIndex.x, thisIndex.y,i, psi[i].re, savedpsiBfp[i].re);
2916 CkAssert(fabs(psi[i].re-savedpsiBfp[i].re)<testvalue);
2917 CkAssert(fabs(psi[i].im-savedpsiBfp[i].im)<testvalue);
2925 #ifndef _CP_DEBUG_ORTHO_OFF_
2948 int cp_min_opt = sim->cp_min_opt;
2950 if(iRecvRedPsi!=1 || iSentRedPsi!=1){
2951 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2952 CkPrintf(
"Dude, you can't acceptPsi without receiving Redpsi\n");
2953 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
2962 int N = msg->getSize()/
sizeof(
complex);
2964 int offset = msg->getUserFlag();
if(offset<0){offset=0;}
2965 complex *psi = gs.packedPlaneData;
2966 int chunksize = gs.numPoints/config.numChunksSym;
2967 int chunkoffset = offset*chunksize;
2970 for(
int i=0;i<N ;i++){
2971 CkAssert(finite(((
complex *) msg->getData())[i].re));
2972 CkAssert(finite(((
complex *) msg->getData())[i].im));
2980 int idest=chunkoffset;
2982 if(countPsiO[offset]<1){
2983 CmiMemcpy(&(psi[idest]), &(data[0]), N*
sizeof(
complex));
2987 fastAdd((
double *) &psi[idest],(
double *)data,N*2);
2997 countPsiO[offset]++;
2998 if(countPsi==AllPsiExpected){
2999 #ifdef _CP_DEBUG_STATEG_VERBOSE_
3001 CkPrintf(
"acceptpsi %d %d\n",thisIndex.y,cleanExitCalled);
3023 int cp_min_opt = sim->cp_min_opt;
3025 if(iRecvRedPsi!=1 || iSentRedPsi!=1){
3026 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3027 CkPrintf(
"Dude, you can't acceptPsi without receiving Redpsi\n");
3028 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3041 int offset = msg->myoffset;
if(offset<0){offset=0;}
3042 complex *psi = gs.packedPlaneData;
3043 int chunksize = gs.numPoints/config.numChunksSym;
3044 int chunkoffset = offset*chunksize;
3047 for(
int i=0;i<N ;i++){
3048 if((!finite(data[i].re)) || (!finite(data[i].im))){
3049 CkPrintf(
"GSP [%d,%d] acceptNewPsi offset %d %d of %d is nan\n",
3050 thisIndex.x,thisIndex.y, msg->myoffset, i,N);
3052 CkAssert(finite(data[i].re));
3053 CkAssert(finite(data[i].im));
3060 int idest = chunkoffset;
3061 if(countPsiO[offset]<1){
3062 CmiMemcpy(&(psi[idest]), &(data[0]), N*
sizeof(
complex));
3066 fastAdd((
double *) &psi[idest],(
double *)data,N*2);
3076 countPsiO[offset]++;
3078 if(countPsi==AllPsiExpected){
3079 #ifdef _CP_DEBUG_STATEG_VERBOSE_
3080 if(thisIndex.x==0){CkPrintf(
"aceeptpsi %d %d\n",thisIndex.y,cleanExitCalled);}
3101 int cp_min_opt = sim->cp_min_opt;
3102 int cp_min_update = sim->cp_min_update;
3105 if(iRecvRedPsi!=1 || iSentRedPsi!=1){
3106 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3107 CkPrintf(
"Dude, you can't acceptPsi without receiving Redpsi\n");
3108 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3116 #ifdef _CP_DEBUG_STATEG_VERBOSE_
3118 CkPrintf(
"donewpsi %d %d\n",thisIndex.y,cleanExitCalled);
3121 CkAssert(countPsi==AllPsiExpected);
3123 complex *psi = gs.packedPlaneData;
3125 for(
int i=0;i<gs.numPoints ;i++){
3126 CkAssert(finite(psi[i].re));
3127 CkAssert(finite(psi[i].im));
3136 bzero(countPsiO,config.numChunksSym*
sizeof(
int));
3138 #ifndef PAIRCALC_TEST_DUMP
3139 if(gs.ihave_kx0==1){
3140 double rad2 = sqrt(2.0);
3141 for(
int i=gs.kx0_strt; i<gs.kx0_end; i++){psi[i] *= rad2;}
3153 #ifdef _CP_GS_DUMP_PSI_
3154 dumpMatrix(
"psiAf",(
double *)psi, 1, gs.numPoints*2,thisIndex.y,thisIndex.x,
3155 thisIndex.x,0,
false);
3158 #ifdef _CP_GS_DEBUG_COMPARE_PSI_
3159 double testvalue=0.00000001;
3160 if(savedpsiAf==NULL){
3161 savedpsiAf=
new complex[gs.numPoints];
3162 loadMatrix(
"psiAf",(
double *)savedpsiAf, 1, gs.numPoints*2,
3163 thisIndex.y,thisIndex.x,thisIndex.x,0,
false);
3165 for(
int i=0;i<gs.numPoints;i++){
3166 if(fabs(psi[i].re-savedpsiAf[i].re)>testvalue){
3167 fprintf(stderr,
"GSP [%d,%d] %d element psi %.10g not %.10g\n",
3168 thisIndex.x, thisIndex.y,i, psi[i].re, savedpsiAf[i].re);
3170 CkAssert(fabs(psi[i].re-savedpsiAf[i].re)<testvalue);
3171 CkAssert(fabs(psi[i].im-savedpsiAf[i].im)<testvalue);
3178 if(cp_min_opt==1 && cp_min_update==0 && iteration>0){
3179 CmiMemcpy(gs.packedPlaneData,gs.packedPlaneDataTemp,
3180 sizeof(
complex)*gs.numPoints);
3181 memset(gs.packedVelData,0,
sizeof(
complex)*gs.numPoints);
3184 if(cp_min_opt==1 && cp_min_update==0 && iteration==0){
3185 CmiMemcpy(gs.packedPlaneDataTemp,gs.packedPlaneData,
3186 sizeof(
complex)*gs.numPoints);
3187 memset(gs.packedVelData,0,
sizeof(
complex)*gs.numPoints);
3192 #ifndef _CP_DEBUG_ORTHO_OFF_
3193 #ifdef BARRIER_CP_GSPACE_PSI
3195 contribute(
sizeof(
int),&wehaveours,CkReduction::sum_int,
3196 CkCallback(CkIndex_CP_State_GSpacePlane::allDonePsi(NULL),thisProxy));
3212 CkPrintf(
"[%d,%d] launchOrthoT \n",thisIndex.x, thisIndex.y);
3213 if(thisIndex.x==0 && thisIndex.y==0)
3225 #ifdef DEBUG_CP_GSPACE_PSIV
3226 CkPrintf(
"GSpace[%d,%d] sendRedPsiV: Going to send redundant PsiV data\n",thisIndex.x,thisIndex.y);
3232 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
3233 double *coef_mass = eesData->GspData[
iplane_ind]->coef_mass;
3235 int ncoef = gSpaceNumPoints;
3236 complex *psi = gs.packedPlaneData;
3237 complex *vpsi = gs.packedVelData;
3238 complex *forces = gs.packedForceData;
3239 double ***xNHC = gs.xNHC;
3240 double ***xNHCP = gs.xNHCP;
3241 double ***vNHC = gs.vNHC;
3242 double ***fNHC = gs.fNHC;
3243 double *mNHC = gs.mNHC;
3244 int len_nhc_cp = gs.len_nhc_cp;
3245 int num_nhc_cp = gs.num_nhc_cp;
3246 int nck_nhc_cp = gs.nck_nhc_cp;
3247 int nkx0_red = gs.nkx0_red;
3248 int nkx0_uni = gs.nkx0_uni;
3249 int nkx0_zero = gs.nkx0_zero;
3250 double kTCP = gs.kTCP;
3261 CPINTEGRATE::cp_evolve_vel(ncoef,forces,vpsi,coef_mass,
3262 len_nhc_cp,num_nhc_cp,nck_nhc_cp,fNHC,vNHC,xNHC,xNHCP,mNHC,
3263 gs.v0NHC,gs.a2NHC,gs.a4NHC,kTCP,nkx0_red,nkx0_uni,nkx0_zero,
3264 2,iteration,gs.degfree,gs.degfreeNHC,gs.degFreeSplt,
3265 gs.istrNHC,gs.iendNHC,1);
3276 complex *sendData = gs.packedVelData;
3277 int isend = thisIndex.y;
3278 int *num_send = RCommPkg[isend].num_send;
3279 int **lst_send = RCommPkg[isend].lst_send;
3280 int num_send_tot = RCommPkg[isend].num_send_tot;
3282 int iii=0;
int jjj=0;
3284 for(
int irecv = 0; irecv < nchareG; irecv ++){
3286 int ncoef = num_send[irecv];
3290 if(config.prioFFTMsg){
3291 CkSetQueueing(msg, CK_QUEUEING_IFIFO);
3292 *(
int*)CkPriorityPtr(msg) = config.rsfftpriority +
3293 thisIndex.x*gs.planeSize[0]+thisIndex.y;
3297 msg->senderIndex = isend;
3299 for (
int i=0;i<ncoef; i++){msgData[i] = sendData[lst_send[irecv][i]];}
3300 UgSpacePlaneProxy[thisInstance.proxyOffset](thisIndex.x,irecv).acceptRedPsiV(msg);
3311 if(iii!=num_send_tot){
3312 CkPrintf(
"Error in GSchare %d %d : %d %d : sendRedPsiV.1\n",thisIndex.x,thisIndex.y,
3316 if(numRecvRedPsi==0 && gs.nkx0_red>0){
3317 CkPrintf(
"Error in GSchare %d %d : %d %d : sendRedPsiV.2\n",thisIndex.x,thisIndex.y,
3318 numRecvRedPsi,gs.nkx0_red);
3321 if(jjj != gs.nkx0_uni-gs.nkx0_zero){
3322 CkPrintf(
"Error in GSchare %d %d : %d %d : sendRedPsiV.3\n",thisIndex.x,thisIndex.y,
3332 acceptedVPsi =
false;
3347 int ncoef = msg->size;
3348 int isend = msg->senderIndex;
3350 complex *recvData = gs.packedRedPsiV;
3352 int irecv = thisIndex.y;
3353 int *num_recv = RCommPkg[irecv].num_recv;
3354 int **lst_recv = RCommPkg[irecv].lst_recv;
3356 #ifdef DEBUG_CP_GSPACE_PSIV
3357 CkPrintf(
"GSpace[%d,%d] acceptRedPsiV Received redundant PsiV values from sender %d\n",thisIndex.x,thisIndex.y,isend);
3363 if(num_recv[isend]!=ncoef){
3364 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3365 CkPrintf(
"Number sent not equal to number reciever expected \n");
3366 CkPrintf(
"Sender %d size %d Reciever %d size %d \n",isend,ncoef,irecv,num_recv[isend]);
3367 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3371 if(countRedPsiV==0){jtemp=0;}
3374 for(
int i=0;i<ncoef;i++){
3375 recvData[lst_recv[isend][i]].re= msgData[i].re;
3376 recvData[lst_recv[isend][i]].im=-msgData[i].im;
3386 if(countRedPsiV==numRecvRedPsi){
3387 #ifdef DEBUG_CP_GSPACE_PSIV
3388 CkPrintf(
"GSpace[%d,%d] acceptRedPsiV received all %d GSRedPsi messages carrying redundant PsiV data\n",thisIndex.x,thisIndex.y,countRedPsiV);
3392 if(jtemp!=gs.nkx0_red){
3393 CkPrintf(
"Error in GSchare recv cnt %d %d : %d %d\n",thisIndex.x,thisIndex.y,
3410 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
3411 double *coef_mass = eesData->GspData[
iplane_ind]->coef_mass;
3412 int ncoef = gs.numPoints;
3413 int ncoef_red = gs.nkx0_red;
3414 complex *vpsi = gs.packedVelData;
3415 int istrt0 = gs.nkx0_red;
3416 int istrt = gs.nkx0_red+gs.nkx0_zero;
3417 int iend = gs.nkx0_red+gs.nkx0_uni;
3420 complex *vpsi_red = gs.packedRedPsiV;
3421 for(
int i=0;i<ncoef_red;i++){vpsi[i]=vpsi_red[i];}
3425 for(
int i=istrt0;i<istrt;i++){ake_old += vpsi[i].getMagSqr()*coef_mass[i];}
3426 for(
int i=istrt;i<iend;i++) {ake_old += vpsi[i].getMagSqr()*(2.0*coef_mass[i]);}
3427 for(
int i=iend;i<ncoef;i++) {ake_old += vpsi[i].getMagSqr()*coef_mass[i];}
3444 if(iRecvRedPsiV!=1 || iSentRedPsiV!=1){
3445 CkPrintf(
"GSpace[%d,%d] Error: You can't sendPsiV() without sending/receiving the redundant psiV values around: finished %d %d : %d %d\n",thisIndex.x,thisIndex.y,iRecvRedPsiV,iSentRedPsiV,numRecvRedPsi,gs.nkx0_red);
3446 CkAbort(
"Error: GSpace cannot sendPsiV() without sending/receiving the redundant psi values around\n");
3449 #ifdef DEBUG_CP_GSPACE_PSIV
3450 CkPrintf(
"GSpace[%d,%d] sendPsiV\n",thisIndex.x,thisIndex.y);
3453 acceptedVPsi =
false;
3458 iterRotation = iteration+1;
3460 int ncoef = gs.numPoints;
3461 complex *data = gs.packedVelData;
3462 complex *scr = gs.packedPlaneDataScr;
3463 complex *psi = gs.packedPlaneData;
3464 CmiMemcpy(scr,psi,
sizeof(
complex)*ncoef);
3466 #ifndef PAIRCALC_TEST_DUMP
3467 if(gs.ihave_kx0==1){
3468 double rad2i = 1.0/sqrt(2.0);
3469 for(
int i=gs.kx0_strt; i<gs.kx0_end; i++){data[i] *= rad2i;}
3473 int numPoints = gs.numPoints;
3492 int N = msg->getSize()/
sizeof(
complex);
3494 int offset = msg->getUserFlag();
if(offset<0){offset=0;}
3496 complex *vpsi = gs.packedVelData;
3497 int chunksize = gs.numPoints/config.numChunksSym;
3498 int chunkoffset = offset*chunksize;;
3500 #ifdef DEBUG_CP_GSPACE_PSIV
3501 CkPrintf(
"GSpace[%d,%d] acceptNewPsiV(reductionMsg) PCs have sent new PsiV data\n",thisIndex.x,thisIndex.y);
3503 if(iRecvRedPsiV!=1 || iSentRedPsiV!=1){
3504 CkPrintf(
"GSpace[%d,%d] Error: You can't acceptNewPsiV() without sending/receiving the redundant PsiV values around: finished %d %d : %d %d\n",thisIndex.x,thisIndex.y,iRecvRedPsiV,iSentRedPsiV,numRecvRedPsi,gs.nkx0_red);
3505 CkAbort(
"Error: GSpace cannot acceptNewPsiV() without sending/receiving the redundant PsiV values around\n");
3512 int idest=chunkoffset;
3513 if(countVPsiO[offset]<1){
3514 CmiMemcpy(&(vpsi[idest]), &(data[0]), N*
sizeof(
complex));
3518 fastAdd((
double *) &vpsi[idest],(
double *)data,N*2);
3528 countVPsiO[offset]++;
3530 if(countVPsi==AllPsiExpected){
3531 #ifdef DEBUG_CP_GSPACE_PSIV
3532 CkPrintf(
"GSpace[%d,%d] Received all PsiV data from PCs (%d reductions).\n",thisIndex.x,thisIndex.y,AllPsiExpected);
3550 int offset = msg->myoffset;
if(offset<0){offset=0;}
3553 for(
int i=0; i < N; i++)
3555 CkAssert(finite(data[i].re));
3556 CkAssert(finite(data[i].im));
3560 complex *vpsi = gs.packedVelData;
3561 int chunksize = gs.numPoints/config.numChunksSym;
3562 int chunkoffset = offset*chunksize;;
3564 #ifdef DEBUG_CP_GSPACE_PSIV
3565 CkPrintf(
"GSpace[%d,%d] acceptNewPsiV(partialResultMsg) Received new PsiV data (msg %d of %d) from PC [%d,%d,%d,%d] (offset %d)\n",thisIndex.x,thisIndex.y,countVPsi+1,AllPsiExpected,
3566 msg->sndr.w,msg->sndr.x,msg->sndr.y,msg->sndr.z,offset);
3568 if(iRecvRedPsiV!=1 || iSentRedPsiV!=1){
3569 CkPrintf(
"GSpace[%d,%d] Error: You can't acceptNewPsiV() without sending/receiving the redundant PsiV values around: finished %d %d : %d %d\n",thisIndex.x,thisIndex.y,iRecvRedPsiV,iSentRedPsiV,numRecvRedPsi,gs.nkx0_red);
3570 CkAbort(
"Error: GSpace cannot acceptNewPsiV() without sending/receiving the redundant PsiV values around\n");
3577 int idest=chunkoffset;
3578 if(countVPsiO[offset]<1){
3579 CmiMemcpy(&(vpsi[idest]), &(data[0]), N*
sizeof(
complex));
3583 fastAdd((
double *) &vpsi[idest],(
double *)data,N*2);
3593 countVPsiO[offset]++;
3595 if(countVPsi==AllPsiExpected){
3596 #ifdef DEBUG_CP_GSPACE_PSIV
3597 CkPrintf(
"GSpace[%d,%d] Received all PsiV data from PCs (%d messages).\n",thisIndex.x,thisIndex.y,AllPsiExpected);
3614 #ifdef DEBUG_CP_GSPACE_PSIV
3615 CkPrintf(
"GSpace[%d,%d] doNewPsiV\n",thisIndex.x,thisIndex.y);
3618 CkAssert(countVPsi==AllPsiExpected);
3620 if(iRecvRedPsiV!=1 || iSentRedPsiV!=1){
3621 CkPrintf(
"GSpace[%d,%d] Error: You can't doNewPsiV() without sending/receiving the redundant PsiV values around: finished %d %d : %d %d\n",thisIndex.x,thisIndex.y,iRecvRedPsiV,iSentRedPsiV,numRecvRedPsi,gs.nkx0_red);
3622 CkAbort(
"Error: GSpace cannot doNewPsiV() without sending the redundant/receiving PsiV values around\n");
3629 acceptedVPsi =
true;
3632 complex *vpsi = gs.packedVelData;
3634 #ifndef PAIRCALC_TEST_DUMP
3635 for(
int i=0;i<config.numChunksSym;i++){countVPsiO[i]=0;}
3636 if(gs.ihave_kx0==1){
3637 double rad2 = sqrt(2.0);
3638 for(
int i=gs.kx0_strt; i<gs.kx0_end; i++){vpsi[i] *= rad2;}
3647 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
3649 double *coef_mass = eesData->GspData[
iplane_ind]->coef_mass;
3650 double dt = sim->dt;
3651 double dt2 = 2.0*dt;
3652 int ncoef = gs.numPoints;
3653 complex *psi_g = gs.packedPlaneData;
3654 complex *psi_g_tmp = gs.packedPlaneDataTemp;
3655 int istrt0 = gs.nkx0_red;
3656 int istrt = gs.nkx0_red+gs.nkx0_zero;
3657 int iend = gs.nkx0_red+gs.nkx0_uni;
3659 for(
int i=0;i<ncoef;i++){
3660 double vre = (psi_g[i].re-psi_g_tmp[i].re)/dt;
3661 double vim = (psi_g[i].im-psi_g_tmp[i].im)/dt;
3666 double ake_new = 0.0;
3667 for(
int i=istrt0;i<istrt;i++){ake_new += vpsi[i].getMagSqr()*coef_mass[i];}
3668 for(
int i=istrt;i<iend;i++) {ake_new += vpsi[i].getMagSqr()*(2.0*coef_mass[i]);}
3669 for(
int i=iend;i<ncoef;i++) {ake_new += vpsi[i].getMagSqr()*coef_mass[i];}
3671 if(sim->cp_norb_rot_kescal==1){
3672 double scale = sqrt(ake_old/ake_new);
3673 for(
int i=0;i<ncoef;i++){
3674 vpsi[i].re *= scale;
3675 vpsi[i].im *= scale;
3683 #ifdef BARRIER_CP_GSPACE_PSIV
3685 contribute(
sizeof(
int),&wehaveours,CkReduction::sum_int,
3686 CkCallback(CkIndex_CP_State_GSpacePlane::allDonePsiV(NULL),thisProxy));
3699 #ifdef _CP_DEBUG_STATEG_VERBOSE_
3700 if(thisIndex.x==0){CkPrintf(
"output %d %d\n",thisIndex.y,cleanExitCalled);}
3703 eesCache *eesData = UeesCacheProxy[thisInstance.proxyOffset].ckLocalBranch ();
3709 int cp_min_opt = sim->cp_min_opt;
3710 int nstates = sim->nstates;
3712 complex *vpsi = gs.packedVelData;
3713 complex *psi = gs.packedPlaneData;
3714 if(cp_min_opt==0){psi=gs.packedPlaneDataScr;}
3716 int ntime = config.maxIter;
3717 if(cp_min_opt==0){ntime-=1;}
3723 #ifdef _CP_DEBUG_COEF_SCREEN_
3724 if(iteration<=ntime){
3725 if(gs.istate_ind==0 || gs.istate_ind==nstates-1){
3726 for(
int i = 0; i < gs.numPoints; i++){
3727 if(k_x[i]==0 && k_y[i]==1 && k_z[i]==4 ){
3728 fprintf(temperScreenFile,
"-------------------------------------------------------------------------------\n");
3729 fprintf(temperScreenFile,
"Iter [%d] Psi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g\n",
3731 gs.istate_ind+1,k_x[i],k_y[i],k_z[i],psi[i].re,psi[i].im);
3733 double vre=vpsi[i].re;
3734 double vim=vpsi[i].im;
3735 fprintf(temperScreenFile,
"Iter [%d] VPsi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g\n",
3737 gs.istate_ind+1,k_x[i],k_y[i],k_z[i],vre,vim);
3739 fprintf(temperScreenFile,
"-------------------------------------------------------------------------------\n");
3741 if(k_x[i]==2 && k_y[i]==1 && k_z[i]==3){
3742 double vre=vpsi[i].re;
3743 double vim=vpsi[i].im;
3744 fprintf(temperScreenFile,
"-------------------------------------------------------------------------------\n");
3745 fprintf(temperScreenFile,
"Iter [%d] Psi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g\n",
3747 gs.istate_ind+1,k_x[i],k_y[i],k_z[i],psi[i].re,psi[i].im);
3749 fprintf(temperScreenFile,
"Iter [%d] VPsi[is=%d ka=%d kb=%d kc=%d] : %.15g %.15g\n",
3751 gs.istate_ind+1,k_x[i],k_y[i],k_z[i],vre,vim);
3753 fprintf(temperScreenFile,
"-------------------------------------------------------------------------------\n");
3764 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3765 CkPrintf(
"GSpace[%d,%d] screenwrite: %d\n",thisIndex.x, thisIndex.y,iteration);
3790 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3791 CkPrintf(
"Received Hart\n");
3799 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3800 CkPrintf(
"Received Enl\n");
3808 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3809 CkPrintf(
"Received Eke\n");
3817 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3818 CkPrintf(
"Received GGA\n");
3826 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3827 CkPrintf(
"Received EEXC\n");
3835 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3836 CkPrintf(
"Received EEXT\n");
3844 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3845 CkPrintf(
"Received EWD\n");
3852 case ENERGY_FICTEKE :
3853 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3854 CkPrintf(
"Received FICTEKE\n");
3861 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3862 CkPrintf(
"Received FMAG\n");
3869 CkAbort(
"unknown energy");
3878 int natm_nl = sim->natm_nl;
3879 if(natm_nl==0){isub++;}
3880 #ifdef _CP_DEBUG_SFNL_OFF_
3881 if(natm_nl!=0){isub++;}
3883 #ifdef _CP_DEBUG_RHO_OFF_
3886 #ifdef _CP_DEBUG_HARTEEXT_OFF_
3887 #ifndef _CP_DEBUG_RHO_OFF_
3892 if(thisInstance.idxU.y>0)
3897 int myid = CkMyPe();
3898 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3899 CkPrintf(
"ecount %d %d %d\n",ecount,NUM_ENERGIES-isub,myid);
3901 if(ecount == NUM_ENERGIES-isub){
3903 estruct.enl = enl_total;
3904 estruct.eke = eke_total;
3905 estruct.eext = eext_total;
3906 estruct.ehart = ehart_total;
3907 estruct.eewald_recip = ewd_total;
3908 estruct.egga = egga_total;
3909 estruct.eexc = eexc_total;
3910 estruct.fictEke = fictEke_total;
3911 estruct.totalElecEnergy = total_energy;
3912 estruct.fmagPsi = fmagPsi_total0;
3913 estruct.iteration_gsp = iteration;
3914 #ifdef _CP_DEBUG_STATE_GPP_VERBOSE_
3915 CkPrintf(
"Bcasting to energygrp %d\n",myid);
3917 UegroupProxy[thisInstance.proxyOffset].updateEnergiesFromGS(estruct,thisInstance);
3936 void testeke(
int ncoef,
complex *psi_g,
int *k_x,
int *k_y,
int *k_z,
int iflag,
int index)
3943 GENERAL_DATA *general_data = GENERAL_DATA::get();
3946 #include "../class_defs/allclass_strip_gen.h"
3947 #include "../class_defs/allclass_strip_cp.h"
3949 double gx, gy, gz, g2;
3950 double *hmati = gencell->hmati;
3951 double ecut = cpcoeffs_info->ecut_psi;
3952 double tpi = 2.0*M_PI;
3965 for(
int i=0; i<1000; i++){eke_i[i]=0.0;}
3969 for(
int i = 0; i < ncoef; i++){
3970 kxmax=(kxmax < k_x[i] ? k_x[i] : kxmax);
3971 kxmin=(kxmin > k_x[i] ? k_x[i] : kxmin);
3974 for(
int i = 0; i < ncoef; i++){
3976 gx = tpi*(k_x[i]*hmati[1] + k_y[i]*hmati[2] + k_z[i]*hmati[3]);
3977 gy = tpi*(k_x[i]*hmati[4] + k_y[i]*hmati[5] + k_z[i]*hmati[6]);
3978 gz = tpi*(k_x[i]*hmati[7] + k_y[i]*hmati[8] + k_z[i]*hmati[9]);
3979 g2 = gx*gx + gy*gy + gz*gz;
3982 double wght_now = 1.0;
3983 if(config.doublePack){
3985 if(k_x[i]==0 && k_y[i]<0){wght_now=0.0;}
3986 if(k_x[i]==0 && k_y[i]==0 && k_z[i]<0){wght_now=0.0;}
3987 if(k_x[i]==0 && k_y[i]==0 && k_z[i]==0){wght_now=1.0;}
3989 eke_i[k_x[i]-kxmin]+=(wght_now*g2)*psi_g[i].getMagSqr();
3990 eke += (wght_now*g2)*psi_g[i].getMagSqr();
3991 norm += (wght_now)*psi_g[i].getMagSqr();
3992 wght_now = (k_x[i]==0 ? 1.0 : wght);
3993 eke2 += (wght_now*g2)*psi_g[i].getMagSqr();
3994 norm2 += (wght_now)*psi_g[i].getMagSqr();
3996 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
3997 CkPrintf(
"Why the cutoff\n");
3998 CkPrintf(
"@@@@@@@@@@@@@@@@@@@@_error_@@@@@@@@@@@@@@@@@@@@\n");
4013 CkPrintf(
"%.12g %.12g %.12g %.12g: %d : eke\n",eke,eke2,norm,norm2,index);
4048 CkPrintf(
"GSpace[%d,%d] aborting because someone called an RDMA method when it has been turned off\n",thisIndex.x,thisIndex.y);
4049 CkAbort(
"GSpace aborting RDMA setup completion");
4055 #ifdef DEBUG_CP_PAIRCALC_RDMA
4056 std::stringstream dbgStr;
4058 CkPrintf(
"%s : Received RDMA setup confirmation from paircalc. Now have %d handles of %d (%d symm + %d asymm)\n", dbgStr.str().c_str(),
4063 if (token.symmetric)
4069 int chunkSize= gs.numPoints / pcMgr->pcCfg.numChunks;
4070 int offset = token.pcIndex.z * chunkSize;
4071 int dataSize = chunkSize;
4073 if( (pcMgr->pcCfg.numChunks > 1) && (token.pcIndex.z == pcMgr->pcCfg.numChunks-1) )
4074 dataSize += gs.numPoints % pcMgr->pcCfg.numChunks;
4077 if (token.shouldSendLeft)
4080 CmiDirect_assocLocalBuffer(&ourHandle,&(gs.packedPlaneData[offset]),dataSize*
sizeof(
complex));
4082 pcMgr->leftDestinationHandles.push_back(ourHandle);
4088 if (token.symmetric)
4089 CmiDirect_assocLocalBuffer(&ourHandle,&(gs.packedPlaneData[offset]),dataSize*
sizeof(
complex));
4091 CmiDirect_assocLocalBuffer(&ourHandle,&(gs.packedForceData[offset]),dataSize*
sizeof(
complex));
4093 pcMgr->rightDestinationHandles.push_back(ourHandle);
4096 #ifdef DEBUG_CP_PAIRCALC_RDMA
4097 CkPrintf(
"%s : Will RDMA-put %d units of data at an offset of %d units from %p on proc %d to %p on proc %d\n",
4098 dbgStr.str().c_str(), dataSize, offset,
4099 (!token.shouldSendLeft && !token.symmetric)? &(gs.packedForceData) : &(gs.packedPlaneData), ourHandle.senderNode,
4100 ourHandle.recverBuf,ourHandle.recverNode);
4104 if(++gotHandles == numRDMAlinksSymm + numRDMAlinksAsymm)
4106 #ifdef DEBUG_CP_PAIRCALC_RDMA
4107 CkPrintf(
"GSpace[%d,%d] received RDMA setup confirmation from all %d PCs (%d symm + %d asymm) I was expecting. Triggering reduction to indicate end of init phase.\n",
4108 thisIndex.x,thisIndex.y,gotHandles,numRDMAlinksSymm,numRDMAlinksAsymm);
4111 CkCallback cbDoneInit = CkCallback(CkIndex_InstanceController::doneInit(NULL),CkArrayIndex1D(thisInstance.proxyOffset),instControllerProxy);
4112 contribute(
sizeof(
int), &i, CkReduction::sum_int, cbDoneInit, thisInstance.proxyOffset);
4115 #endif // PC_USE_RDMA
4118 #include "gStatePlane.def.h"
void contributeFileOutput()
In this function data is written to files the simpliest way possible.
void unpackFileOutput(GStateOutMsg *msg)
void acceptPairCalcAIDs(pcSetupMsg *msg)
void pup(PUP::er &)
= data
holds the UberIndex and the offset for proxies
void sendRedPsi()
send red psi
void unpackNewPsiV(CkReductionMsg *msg)
void doStpFFTRtoG_Gchare(complex *, complex *, int, int, int, int, RunDescriptor *, int, int)
StatePlane : Gchare : data(gx,gy,z) -> data(gx,gy,gz) : backward ////////////////////////////////////...
void initGStateSlab(GStateSlab *gs, int sizeX, int sizeY, int sizeZ, int gSpaceUnits, int realSpaceUnits, int s_grain, int iplane_ind, int istate_ind, int len_nhc_cp, int num_nhc_cp, int nck_nhc_cp)
= slab initialization helpers
PCCommManager asymmPCmgr
Manages communication with the asymmetric paircalc array.
void readFile()
In this function data is read from files, and sent to the corresponding G-space planes.
void launchOrthoT()
= dynamics triggers send of orthoT to asymm calc when psi is done
void doneRedPsiIntegrate()
When all red psi have arrived, complete their integration.
void setKRange(int, int *, int *, int *)
void completeRDMAhandshake(RDMASetupConfirmationMsg< RDMApair_GSP_PC > *msg)
Gets called from the PairCalc data receivers to confirm the setup of an RDMA link.
void computeCgOverlap()
If minimization : compute lambda matrix (Lagrange multipliers) modify forces using the lambda matrix ...
void unpackRedPsiV(GSRedPsiMsg *msg)
CProxySection_PairCalculator makeOneResultSection_asym(int chunk)
Initialize an array section that is used to reduce the results from the PCs back to the GSP chares...
int istate_ind
My state index.
CProxy_Ortho myOrtho
A proxy for the my ortho chare array so I can interact with it.
CkGroupID mCastGrpId
Multicast manager group that handles many mcast/redns in the code. Grep for info. ...
void doNewPsi()
All Psi have arrived, finish the new Psi process.
void psiCgOvlap(CkReductionMsg *msg)
Entry method to resume execution after computing reduction over all planes and states to form psiCgOv...
void screenOutputPsi(int)
output a few select psi values for post processing validation
A place to collect substep times.
Add type declarations for simulationConstants class (readonly vars) and once class for each type of o...
void sendLambda()
After my CP forces have arrived : sendLambda.
int nfreq_cpintegrate
CPINTEGRATE::CP_integrate_min_STD, CPINTEGRATE::CP_integrate_min_CG.
paircalc::CreationManager returns relevant chare array handles via this msg
CProxySection_PairCalculator makeOneResultSection_sym1(int chunk)
initialize plane and row wise section reduction for psi->gspace
#define IntegrateModForces_
200-300 reserved for paircalculator
int grainSize
The grain size along the states dimensions (plural) (number of states per PC chare) ...
void testeke(int, complex *, int *, int *, int *, int, int)
Test does ReadFile() parse psi(g) and g-vectors properly.
== Index logic for lines of constant x,y in gspace.
void combineForcesGetEke()
void doLambda()
When all the inputs have arrived, complete the lambda computation.
void unpackLambda(CkReductionMsg *msg)
void doneRedPsiVIntegrate()
when all red psi velocities are in, integrate
void unpackRedPsi(GSRedPsiMsg *msg)
=
cp::paircalc::pcConfig pcCfg
Input configurations for the paircalcs.
Some basic data structures and the array map classes are defined here.
int iplane_ind
My plane index.
void sendPsiV()
send psi velocities
CProxySection_PairCalculator makeOneResultSection_sym2(int chunk)
initialize plane and column wise section reduction for psi->gspace
int nfreq_cpnonlocal_eke
CPNONLOCAL::CP_eke_calc.
void readState(int nPacked, complex *arrCP, const char *fromFile, int ibinary_opt, int *nline_tot_ret, int *nplane_ret, int *kx, int *ky, int *kz, int *nx_ret, int *ny_ret, int *nz_ret, int *istrt_lgrp, int *iend_lgrp, int *npts_lgrp, int *nline_lgrp, int iget_decomp, int iget_vstate)
Based on whether RDMA is enabled, the handle type is either the actual handle or just an empty struct...
void unpackNewPsi(CkReductionMsg *msg)
void sendLeftData(int numPoints, complex *ptr, bool psiV)
Starts the forward path work (Psi, Lambda and PsiV cases) by multicasting an entry method call to the...
CProxySection_PairCalculator makeOneResultSection_asym_column(int chunk)
initialize plane and column wise section reduction for lambda->gspace
void launchAtoms()
Atoms are launched by allDoneCpForces when all after ALL planes and states have reported.
void sendFFTData()
Send result to realSpacePlane : perform the transpose Force data cannot be overwritten due to all to ...
Manages communication with a single paircalc array.
A (hopefully) tiny token that is unique to every data sender-receiver pair, and is shared by them dur...
Group Container class : Only allowed chare data classes have data.
void computeEnergies(int p, double d)
Once All chares have completed energy computation and reduction, this storage routine is invoked...
void doNewPsiV()
process all the PsiV data
void sendPsi()
Send the psi out to symm PC.
PCCommManager symmPCmgr
Manages communication with the symmetric paircalc array.
void integrateModForce()
Integrate the forces.
Reply from data receiver to the data sender indicating completion of setup on the receiver side...
int numRDMAlinksSymm
The number of symmetric and asymmetric PCs that communicate with me.
void initGSpace(int, complex *, int, complex *, int, int, int, int, int, int, int)
This method is used to accept the state data from some initializing routine.
void doFFT()
This method is used to start the forward ffts in the CP_State_GSpacePlanes.
void unpackIFFT(GSIFFTMsg *)
Entry Method. This is used to receive data from all the corresponding RealSpacePlanes, upon which the inverse FFTs are triggered.
void processState(int nPacked, int nktot, complex *arrCP, const char *fromFile, int ibinary_opt, int *nline_tot_ret, int *nplane_ret, int *kx, int *ky, int *kz, int *istrt_lgrp, int *iend_lgrp, int *npts_lgrp, int *nline_lgrp, int iget_decomp, int iget_vstate, int iopt, int ny)
void sendRightData(int numPoints, complex *ptr, bool psiV)
Starts the forward path work (along with startPairCalcLeft()) in the asymmetric (Lambda) case...
CP_State_GSpacePlane(int, int, int, int, int, int, UberCollection)
Constructor for GSpacePlane.