49 #include "timeKeeper.decl.h"
52 #include "utility/matrix2file.h"
59 #include <trace-projections.h>
62 #include "src_mathlib/mathlib.h"
63 #include "src_piny_physics_v1.0/include/class_defs/CP_OPERATIONS/class_cporthog.h"
64 #include "src_piny_physics_v1.0/include/class_defs/piny_constants.h"
65 #define PRINTF CkPrintf
68 extern CProxy_TimeKeeper TimeKeeperProxy;
69 extern ComlibInstanceHandle orthoInstance;
83 if(thisIndex.x==0 && thisIndex.y==0)
98 CkPrintf(
"[%d,%d] Ortho::collect_error \n", thisIndex.x, thisIndex.y);
100 CmiAssert(thisIndex.x == 0 && thisIndex.y == 0);
102 double error = *((
double *) msg->getData());
105 if(error > invsqr_tolerance && iterations < invsqr_max_iter){
107 if(config.useOrthoSection)
110 multiproxy.do_iteration(tmsg);
113 thisProxy.do_iteration();
116 if(iterations>=config.invsqr_max_iter)
118 CkPrintf(
"Ortho reached max_iter %d with residual of %.10g, which is greater than invsqr_tolerance %.10g!\nEither increase invsqr_max_iter or invsqr_tolerance.\n If this is not the first step in a GenWave try lowering your timestep\n",config.invsqr_max_iter, error, config.invsqr_tolerance);
119 CkAbort(
"invsqr_tolerance not met within invsqr_max_iter\n");
123 if(config.useOrthoSection)
126 multiproxy.collect_results(tmsg);
129 thisProxy.collect_results();
142 CkPrintf(
"[%d,%d] Ortho::start_calc \n", thisIndex.x, thisIndex.y);
144 #ifdef _CP_SUBSTEP_TIMING_
147 double ostart=CmiWallTimer();
148 CkCallback cb(CkIndex_TimeKeeper::collectStart(NULL),0,TimeKeeperProxy);
149 contribute(
sizeof(
double),&ostart,CkReduction::min_double, cb , timeKeep);
153 internalType *S = (internalType*) msg->getData();
155 for(
int i=0;i<msg->getSize()/
sizeof(internalType) ;i++)
156 CkAssert( isfinite(S[i]) );
166 CkReductionMsg *omsg=CkReductionMsg::buildNew(msg->getSize(),msg->getData());
180 internalType *dest= (internalType*) omsg->getData();;
182 for(
int i = 0; i < m; i++)
183 for(
int j = 0; j < n; j++)
184 dest[j * m + i] = S[i *n + j];
186 thisProxy(thisIndex.y,thisIndex.x).start_calc(omsg);
193 else if((pc.x==pc.y) && (thisIndex.x > thisIndex.y))
198 #ifdef _CP_ORTHO_DUMP_SMAT_
199 dumpMatrix(
"smat",(
double *)S, m, n, numGlobalIter, thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
203 #ifdef _CP_ORTHO_DEBUG_COMPARE_SMAT_
206 savedsmat=
new double[m*n];
207 loadMatrix(
"smat",(
double *)savedsmat, m, n,numGlobalIter, thisIndex.x*cfg.
grainSize,thisIndex.y*cfg.
grainSize,0,
false);
209 for(
int i=0;i<m*n;i++)
211 if(fabs(S[i]-savedsmat[i])>0.0001)
213 fprintf(stderr,
"O [%d,%d] %d element ortho %.10g not %.10g\n",thisIndex.x, thisIndex.y,i, S[i], savedsmat[i]);
216 CkAssert(fabs(S[i]-savedsmat[i])<0.0001);
217 CkAssert(fabs(S[i]-savedsmat[i])<0.0001);
220 if(config.UberMmax ==1)
222 for(
int i = 0; i < m * n; i++){
228 for(
int i = 0; i < m * n; i++){
232 memset(A, 0,
sizeof(internalType) * m * n);
236 if(thisIndex.x == thisIndex.y){
237 for(
int i = 0; i < m; i++){
242 if(cfg.
isDynamics && (numGlobalIter % config.toleranceInterval)==0 && numGlobalIter>1){
244 contribute(
sizeof(
double),&max, CkReduction::max_double,
245 CkCallback(CkIndex_Ortho::maxCheck(NULL),CkArrayIndex2D(0,0),
246 thisProxy.ckGetArrayID()));
265 #ifdef _CP_DEBUG_TMAT_
276 if (numGlobalIter <= config.maxIter+1)
293 if(toleranceCheckOrthoT)
301 { orthoT =
new internalType[m * n];}
302 CmiMemcpy(orthoT,A,m*n*
sizeof(internalType));
307 #ifdef _CP_ORTHO_DUMP_TMAT_
308 dumpMatrix(
"tmat",(
double *)A, m, n,numGlobalIter,thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
311 #ifdef _CP_ORTHO_DEBUG_COMPARE_TMAT_
314 savedtmat=
new double[m*n];
315 loadMatrix(
"tmat",(
double *)savedtmat, m, n, numGlobalIter, thisIndex.x * cfg.
grainSize, thisIndex.y*cfg.
grainSize,0,
false);
317 for(
int i=0;i<m*n;i++)
319 if(fabs(A[i]-savedtmat[i])>0.0001)
321 fprintf(stderr,
"O [%d,%d] %d element ortho %.10g not %.10g\n",thisIndex.x, thisIndex.y,i, A[i], savedtmat[i]);
324 CkAssert(fabs(A[i]-savedtmat[i])<0.0001);
325 CkAssert(fabs(A[i]-savedtmat[i])<0.0001);
337 else if(thisIndex.y < thisIndex.x)
339 else if(thisIndex.y > thisIndex.x && config.phantomSym)
353 #ifdef _CP_ORTHO_DUMP_TMAT_
354 dumpMatrix(
"tmatT",(
double *)A, m, n,numGlobalIter,thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
360 #ifdef _CP_SUBSTEP_TIMING_
363 double oend=CmiWallTimer();
364 CkCallback cb(CkIndex_TimeKeeper::collectEnd(NULL),0,TimeKeeperProxy);
365 contribute(
sizeof(
double),&oend,CkReduction::max_double, cb , timeKeep);
398 double tolMax=fabs(((
double *) msg->getData())[0]);
400 CkPrintf(
"S matrix tolerance = %f while maxTolerance = %f\n",tolMax, cfg.
maxTolerance);
404 toleranceCheckOrthoT=
false;
405 thisProxy.do_iteration();
408 toleranceCheckOrthoT=
true;
409 CkPrintf(
"recalculating PsiV due to tolerance failure \n");
427 toleranceCheckOrthoT=
true;
441 internalType *lambda = (internalType*)msg->getData();
442 int lambdaCount = msg->getSize()/
sizeof(internalType);
444 #ifdef _CP_ORTHO_DUMP_LMAT_
445 dumpMatrix(
"lmat",lambda, m, n, numGlobalIter, thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
448 #ifdef _CP_ORTHO_DEBUG_COMPARE_LMAT_
451 savedlmat=
new double[m*n];
452 loadMatrix(
"lmat",(
double *)savedlmat, m, n,numGlobalIter, thisIndex.x*cfg.
grainSize,thisIndex.y*cfg.
grainSize,0,
false);
454 for(
int i=0;i<m*n;i++)
456 if(fabs(lambda[i]-savedlmat[i])>0.0001)
458 fprintf(stderr,
"O [%d,%d] %d element ortho %.10g not %.10g\n",thisIndex.x, thisIndex.y,i, lambda[i], savedlmat[i]);
461 CkAssert(fabs(lambda[i]-savedlmat[i])<0.0001);
462 CkAssert(fabs(lambda[i]-savedlmat[i])<0.0001);
469 if(toleranceCheckOrthoT)
471 if(thisIndex.x!=thisIndex.y)
473 bzero(orthoT,m*n*
sizeof(
double));
486 toleranceCheckOrthoT=
false;
489 ortho=
new internalType[m*n];
490 CmiMemcpy(ortho,orthoT,m*n*
sizeof(internalType));
491 matA1.multiply(1, 0, orthoT, Ortho::gamma_done_cb, (
void*)
this,
492 thisIndex.x, thisIndex.y);
493 matB1.multiply(1, 0, lambda, Ortho::gamma_done_cb, (
void*)
this,
494 thisIndex.x, thisIndex.y);
495 matC1.multiply(1, 0, B, Ortho::gamma_done_cb, (
void*)
this,
496 thisIndex.x, thisIndex.y);
504 CkAssert(lambdaCount==m*n);
505 for(
int i=0; i<m*n; i++)
506 CkAssert( isfinite(lambda[i]) );
511 #ifdef _CP_DEBUG_ORTHO_VERBOSE_
512 if(thisIndex.x==0 && thisIndex.y==0)
513 CkPrintf(
"[%d,%d] finishing asymm\n",thisIndex.x, thisIndex.y);
535 if( (thisIndex.x==0 && thisIndex.y==0) && (config.useOrthoSection || config.useOrthoSectionRed))
539 multiproxy = CProxySection_Ortho::ckNew(thisProxy.ckGetArrayID(), 0, numOrtho-1,1, 0, numOrtho-1, 1);
541 if(config.useOrthoSectionRed)
543 CProxySection_Ortho rproxy = multiproxy;
544 CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(oRedGID).ckLocalBranch();
545 CkAssert(mcastGrp != NULL);
546 rproxy.ckSectionDelegate(mcastGrp);
549 rproxy.orthoCookieinit(redMsg);
552 if(config.useOrthoSection)
555 if( config.useCommlib && config.useOrthoDirect)
558 ComlibAssociateProxy(orthoInstance,multiproxy);
563 CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(
oMCastGID).ckLocalBranch();
564 CkAssert(mcastGrp != NULL);
565 multiproxy.ckSectionDelegate(mcastGrp);
575 CkCallback orthoCB(CkIndex_Ortho::start_calc(NULL), thisProxy(thisIndex.x, thisIndex.y));
577 CkCallback orthoLambdaCB(CkIndex_Ortho::acceptSectionLambda(NULL), thisProxy(thisIndex.x, thisIndex.y));
595 #ifdef _CP_ORTHO_DUMP_GMAT_
596 dumpMatrix(
"gmat",(
double *)B, m, n, numGlobalIter, thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
602 #ifdef _CP_ORTHO_DEBUG_COMPARE_GMAT_
605 savedgmat=
new double[m*n];
606 loadMatrix(
"gmat",(
double *)savedgmat, m, n,numGlobalIter,thisIndex.x*cfg.
grainSize,thisIndex.y*cfg.
grainSize,0,
false);
608 for(
int i=0;i<m*n;i++)
610 if(fabs(S[i]-savedgmat[i])>0.0001)
612 fprintf(stderr,
"O [%d,%d] %d element ortho %.10g not %.10g\n",thisIndex.x, thisIndex.y,i, S[i], savedgmat[i]);
615 CkAssert(fabs(S[i]-savedgmat[i])<0.0001);
616 CkAssert(fabs(S[i]-savedgmat[i])<0.0001);
619 if(config.PCCollectTiles)
648 CkArrayID _step2Helper,
649 int timekeep, CkGroupID _oMCastGID, CkGroupID _oRedGID):
651 oMCastGID(_oMCastGID), oRedGID(_oRedGID),
652 step2Helper(_step2Helper)
656 parallelStep2=config.useOrthoHelpers;
658 invsqr_max_iter=config.invsqr_max_iter;
659 invsqr_tolerance=config.invsqr_tolerance;
660 if(invsqr_tolerance==0)
662 if(invsqr_max_iter==0)
663 invsqr_max_iter=INVSQR_MAX_ITER;
664 this->matA1 = _matA1; this->matB1 = _matB1; this->matC1 = _matC1;
665 this->matA2 = _matA2; this->matB2 = _matB2; this->matC2 = _matC2;
666 this->matA3 = _matA3; this->matB3 = _matB3; this->matC3 = _matC3;
670 if(thisIndex.x==borderOrtho)
671 this->m = _m + remOrtho;
674 if(thisIndex.y==borderOrtho)
675 this->n = _n + remOrtho;
678 A =
new internalType[this->m * this->n];
679 B =
new internalType[this->m * this->n];
680 C =
new internalType[this->m * this->n];
681 tmp_arr =
new internalType[this->m * this->n];
688 setMigratable(
false);
690 toleranceCheckOrthoT=
false;
695 #ifdef _CP_ORTHO_DEBUG_COMPARE_GMAT_
698 #ifdef _CP_ORTHO_DEBUG_COMPARE_SMAT_
701 #ifdef _CP_ORTHO_DEBUG_COMPARE_LMAT_
705 #ifdef _CP_ORTHO_DEBUG_COMPARE_TMAT_
719 CkPrintf(
"[%d,%d] Ortho::do_iteration \n", thisIndex.x, thisIndex.y);
722 memset(C, 0, m * n *
sizeof(internalType));
723 if(thisIndex.x == thisIndex.y){
724 for(
int i = 0; i < n; i++)
727 #ifdef _CP_ORTHO_DUMP_SMAT_
728 dumpMatrix(
"step1:A:",(
double *)A, m, n, iterations, thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
729 dumpMatrix(
"step1:B:",(
double *)B, m, n, iterations, thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
730 dumpMatrix(
"step1:C:",(
double *)C, m, n, iterations, thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
734 thisIndex.x, thisIndex.y);
735 CmiNetworkProgress();
737 thisIndex.x, thisIndex.y);
738 CmiNetworkProgress();
740 thisIndex.x, thisIndex.y);
754 if(config.useOrthoHelpers)
758 omsg->init(m*n, B,C,0.5, 0.5, 0.5);
764 #ifdef _CP_ORTHO_DUMP_SMAT_
765 dumpMatrix(
"step2:A:",(
double *)B, m, n, iterations, thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
766 dumpMatrix(
"step2:B:",(
double *)C, m, n, iterations, thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
770 matA2.multiply(0.5, 0, B, Ortho::step_3_cb, (
void*)
this,
771 thisIndex.x, thisIndex.y);
772 matB2.multiply(0.5, 0, C, Ortho::step_3_cb, (
void*)
this,
773 thisIndex.x, thisIndex.y);
774 matC2.multiply(0.5, 0, tmp_arr, Ortho::step_3_cb, (
void*)
this,
775 thisIndex.x, thisIndex.y);
787 CmiMemcpy(B, A, m * n *
sizeof(internalType));
788 #ifdef _CP_ORTHO_DUMP_SMAT_
789 dumpMatrix(
"step3:A:",(
double *)C, m, n, iterations, thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
790 dumpMatrix(
"step3:B:",(
double *)B, m, n, iterations, thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
791 dumpMatrix(
"step3:C:",(
double *)A, m, n, iterations, thisIndex.x * cfg.
grainSize, thisIndex.y * cfg.
grainSize, 0,
false);
794 matA3.multiply(0.5, 0, C, Ortho::tol_cb, (
void*)
this,
795 thisIndex.x, thisIndex.y);
796 matB3.multiply(0.5, 0, B, Ortho::tol_cb, (
void*)
this,
797 thisIndex.x, thisIndex.y);
798 matC3.multiply(0.5, 0, A, Ortho::tol_cb, (
void*)
this,
799 thisIndex.x, thisIndex.y);
813 for(
int i = 0; i < m * n; i++)
815 CkAssert( isfinite(A[i]) );
816 CkAssert( isfinite(B[i]) );
820 internalType ret = 0;
821 for(
int i = 0; i < m * n; i++){
822 internalType tmp = B[i] - A[i];
825 internalType *tmp = B;
828 if(config.useOrthoSectionRed)
830 CkCallback mycb(CkIndex_Ortho::collect_error(NULL), thisProxy(0, 0));
832 mcastGrp->contribute(
sizeof(internalType), &ret, CkReduction::sum_double, orthoCookie, mycb);
835 contribute(
sizeof(internalType), &ret, CkReduction::sum_double);
844 ArrayElement2D::pup(p);
855 p | toleranceCheckOrthoT;
858 if(p.isUnpacking() && thisIndex.x==0 &&thisIndex.y==0)
863 if(thisIndex.x==0 && thisIndex.y==0)
869 A =
new internalType[m * n];
870 B =
new internalType[m * n];
871 C =
new internalType[m * n];
872 tmp_arr =
new internalType[m * n];
874 PUParray(p,A, m * n);
875 PUParray(p,B, m * n);
876 PUParray(p,C, m * n);
877 PUParray(p,tmp_arr, m * n);
880 #include "orthoMap.h"
881 #include "ortho.def.h"
CkIndex2D computePCStateIndices(const int orthoX, const int orthoY)
Identify the state indices of the Paircalc chares this ortho chare needs to talk to.
void do_iteration(void)
Triggers the matrix multiplies in step 1 of an ortho iteration.
int grainSize
The block size for parallelization.
PCSectionManager asymmSectionMgr
Section of asymmetric PC chare array used by an Ortho chare.
virtual void pup(PUP::er &p)
pack/unpack method
int numStates
The number of states in the simulation (the dimension of the input square matrix) ...
CProxy_OrthoHelper step2Helper
The proxy of the step 2 helper chare array.
PCSectionManager symmSectionMgr
Section of symmetric PC chare array used by an Ortho chare.
static void step_2_cb(void *obj)
Static methods used as callbacks. Could be replaced by CkCallbacks.
double maxTolerance
The tolerance threshold for the S->T iterations in Ortho at which to trigger a PsiV update...
Ortho()
Default empty constructor. For?
void resumeV(CkReductionMsg *msg)
Once all GSpaceDriver chares are notified, they resume Ortho execution via a redn broadcast to this m...
void step_2()
Triggers step 2, and optionally step 3 (if ortho helpers are being used)
Configuration settings for the ortho world.
void collect_results(void)
Computes walltimes, prints simulation status msgs and calls resume() if more openatom iterations are ...
void print_results(void)
Dumps the T matrix to an appropriately named file.
void gamma_done()
Used in dynamics, to accept computed gamma and send it to the asymm PC instance. Also sends T if it h...
void step_3()
Triggers step 3 in the S->T process.
void sendMatrix(int n, internalType *ptr1, internalType *ptr2, int orthoX, int orthoY, int actionType, int priority)
Used to send OrthoT to the asymm instance. Replaces sendMatrix()
void sendOrthoTtoAsymm()
Used in dynamics, to send the results of S->T to the asymm paircalc instance which will use them...
bool isDynamics
Is this a minimization or dynamics run.
void collect_error(CkReductionMsg *msg)
Computes the RMS error and either launches the next ortho iteration (if needed) or calls collect_resu...
void tolerance_check()
Computes square of the residuals and contributes to a reduction rooted at Ortho(0,0)::collect_error()
#define INVSQR_TOLERANCE
<
Made by Eric Bohm Login bohm@localhost.localdomain
CkGroupID orthomCastGrpID
The multicast and reduction groups that handle comm.
void maxCheck(CkReductionMsg *msg)
Called on ortho(0,0). Checks if PsiV update is needed based on the max deviation in S received via a ...
void sendResults(int n, internalType *ptr1, internalType *ptr2, int orthoX, int orthoY, int actionType, int priority)
Sends out the results to the paircalc section. Replaces finishPairCalcSection()
void makeSections(const pc::pcConfig &cfgSymmPC, const pc::pcConfig &cfgAsymmPC, CkArrayID symAID, CkArrayID asymAID)
Trigger the creation of appropriate sections of paircalcs to talk to. Also setup internal comm sectio...
CkGroupID oMCastGID
Group IDs for the multicast manager groups.
double array_diag_max(int sizem, int sizen, internalType *array)
S should be equal to 2I. This returns max value of deviation from that in this ortho's portion of the...
CkCallback uponToleranceFailure
Callback to notify bubble owner (GSpace) that a tolerance update is needed.
void acceptSectionLambda(CkReductionMsg *msg)
Dumb structure that holds all the configuration inputs required for paircalc instantiation, functioning and interaction.
void resume()
Sends results (T matrix) to the symm PCs (and also the asymms if this is dynamics) ...
void start_calc(CkReductionMsg *msg)
Symmetric PCs contribute data that is summed via this reduction to deposit a portion of the S matrix ...
void init(const CkIndex2D orthoIdx, const pc::pcConfig &pcCfg, CkArrayID pcAID, CkGroupID oMCastGID, CkGroupID oRedGID)
An initializer method that fills this with data.
int msgPriority
The priority to use for messages to PC.
void setupArraySection(CkCallback cb, bool arePhantomsOn, bool useComlibForOrthoToPC)
Creates a paircalc array section given the necessary info. Replaces initOneRedSect() ...
Ortho is decomposed by orthoGrainSize.