00001
00065
00066 #include "ckcheckpoint.h"
00067 #include "ck.h"
00068 #include "trace.h"
00069 #include "ckrdma.h"
00070 #include "CkCheckpoint.decl.h"
00071 #include "ckmulticast.h"
00072 #include <sstream>
00073 #include <limits.h>
00074 #include "spanningTree.h"
00075 #if CMK_CHARMPY
00076 #include "GreedyRefineLB.h"
00077 #endif
00078
00079 #if CMK_CUDA
00080 #include "hapi_impl.h"
00081 #endif
00082
00083 void CkRestartMain(const char* dirname, CkArgMsg* args);
00084
00085 #define DEBUGF(x) //CmiPrintf x;
00086
00087 #define CMK_WITH_WARNINGS 0
00088
00089 #include "TopoManager.h"
00090
00091 UChar _defaultQueueing = CK_QUEUEING_FIFO;
00092
00093 UInt _printCS = 0;
00094 UInt _printSS = 0;
00095
00103 UInt _numExpectInitMsgs = 0;
00109 UInt _numInitMsgs = 0;
00115 CksvDeclare(UInt,_numInitNodeMsgs);
00116
00117 #if CMK_ONESIDED_IMPL
00118 UInt numZerocopyROops;
00119 UInt curROIndex;
00120 NcpyROBcastAckInfo *roBcastAckInfo;
00121 int _roRdmaDoneHandlerIdx;
00122 CksvDeclare(int, _numPendingRORdmaTransfers);
00123 #endif
00124
00125 int _infoIdx;
00126 int _charmHandlerIdx;
00127 int _initHandlerIdx;
00128 int _roRestartHandlerIdx;
00129 int _bocHandlerIdx;
00130 int _qdHandlerIdx;
00131 int _qdCommHandlerIdx;
00132 int _triggerHandlerIdx;
00133 bool _mainDone = false;
00134 CksvDeclare(bool, _triggersSent);
00135
00136 CkOutStream ckout;
00137 CkErrStream ckerr;
00138 CkInStream ckin;
00139
00140 CkpvDeclare(void*, _currentChare);
00141 CkpvDeclare(int, _currentChareType);
00142 CkpvDeclare(CkGroupID, _currentGroup);
00143 CkpvDeclare(void*, _currentNodeGroupObj);
00144 CkpvDeclare(CkGroupID, _currentGroupRednMgr);
00145 CkpvDeclare(GroupTable*, _groupTable);
00146 CkpvDeclare(GroupIDTable*, _groupIDTable);
00147 CkpvDeclare(CmiImmediateLockType, _groupTableImmLock);
00148 CkpvDeclare(UInt, _numGroups);
00149
00150 CkpvDeclare(CkCoreState *, _coreState);
00151
00152 CksvDeclare(UInt, _numNodeGroups);
00153 CksvDeclare(GroupTable*, _nodeGroupTable);
00154 CksvDeclare(GroupIDTable, _nodeGroupIDTable);
00155 CksvDeclare(CmiImmediateLockType, _nodeGroupTableImmLock);
00156 CksvDeclare(CmiNodeLock, _nodeLock);
00157 CksvStaticDeclare(PtrVec*,_nodeBocInitVec);
00158 CkpvDeclare(int, _charmEpoch);
00159
00160 CkpvDeclare(bool, _destroyingNodeGroup);
00161
00162
00163 CkpvDeclare(Stats*, _myStats);
00164 CkpvDeclare(MsgPool*, _msgPool);
00165
00166 CkpvDeclare(_CkOutStream*, _ckout);
00167 CkpvDeclare(_CkErrStream*, _ckerr);
00168
00169 CkpvStaticDeclare(int, _numInitsRecd);
00170 CkpvStaticDeclare(bool, _initdone);
00171 CkpvStaticDeclare(PtrQ*, _buffQ);
00172 CkpvStaticDeclare(PtrVec*, _bocInitVec);
00173
00174
00175 extern int userDrivenMode;
00176 extern void _libExitHandler(envelope *env);
00177 extern int _libExitHandlerIdx;
00178 CpvCExtern(int,interopExitFlag);
00179 void StopInteropScheduler();
00180
00181 #if CMK_SHRINK_EXPAND
00182
00183 int _ROGroupRestartHandlerIdx;
00184 const char* _shrinkexpand_basedir;
00185 #endif
00186
00187 #if CMK_FAULT_EVAC
00188 CpvExtern(char *, _validProcessors);
00189 CkpvDeclare(char ,startedEvac);
00190 #endif
00191
00192 int _exitHandlerIdx;
00193
00194 #if CMK_WITH_STATS
00195 static Stats** _allStats = 0;
00196 #endif
00197 static bool _exitStarted = false;
00198 static int _exitcode;
00199
00200 static InitCallTable _initCallTable;
00201
00202 #if CMK_WITH_STATS
00203 #define _STATS_ON(x) (x) = 1
00204 #else
00205 #define _STATS_ON(x) \
00206 if (CkMyPe()==0) CmiPrintf("stats unavailable in optimized version. ignoring...\n");
00207 #endif
00208
00209
00210 typedef void (*CkFtFn)(const char *, CkArgMsg *);
00211 static CkFtFn faultFunc = NULL;
00212 static char* _restartDir;
00213
00214 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
00215 int teamSize=1;
00216 int chkptPeriod=1000;
00217 bool fastRecovery = false;
00218 int parallelRecovery = 1;
00219 extern int BUFFER_TIME;
00220 #endif
00221
00222
00223 extern bool killFlag;
00224
00225 extern char *killFile;
00226
00227 void readKillFile();
00228 #if CMK_MESSAGE_LOGGING
00229
00230 extern bool diskCkptFlag;
00231 #endif
00232
00233 int _defaultObjectQ = 0;
00234 bool _ringexit = 0;
00235 int _ringtoken = 8;
00236 extern int _messageBufferingThreshold;
00237
00238 #if CMK_FAULT_EVAC
00239 static bool _raiseEvac=0;
00240 static char *_raiseEvacFile;
00241 void processRaiseEvacFile(char *raiseEvacFile);
00242 #endif
00243
00244 extern bool useNodeBlkMapping;
00245
00246 extern int quietMode;
00247 extern int quietModeRequested;
00248
00249
00250
00251
00252
00253
00254 std::set<std::string> _optSet;
00255 void _registerCommandLineOpt(const char* opt) {
00256
00257
00258 if (CkMyPe() == 0) {
00259 _optSet.insert(opt);
00260 }
00261 }
00262
00263 static inline void _parseCommandLineOpts(char **argv)
00264 {
00265 if (CmiGetArgFlagDesc(argv,"+cs", "Print extensive statistics at shutdown"))
00266 _STATS_ON(_printCS);
00267 if (CmiGetArgFlagDesc(argv,"+ss", "Print summary statistics at shutdown"))
00268 _STATS_ON(_printSS);
00269 if (CmiGetArgFlagDesc(argv,"+fifo", "Default to FIFO queuing"))
00270 _defaultQueueing = CK_QUEUEING_FIFO;
00271 if (CmiGetArgFlagDesc(argv,"+lifo", "Default to LIFO queuing"))
00272 _defaultQueueing = CK_QUEUEING_LIFO;
00273 if (CmiGetArgFlagDesc(argv,"+ififo", "Default to integer-prioritized FIFO queuing"))
00274 _defaultQueueing = CK_QUEUEING_IFIFO;
00275 if (CmiGetArgFlagDesc(argv,"+ilifo", "Default to integer-prioritized LIFO queuing"))
00276 _defaultQueueing = CK_QUEUEING_ILIFO;
00277 if (CmiGetArgFlagDesc(argv,"+bfifo", "Default to bitvector-prioritized FIFO queuing"))
00278 _defaultQueueing = CK_QUEUEING_BFIFO;
00279 if (CmiGetArgFlagDesc(argv,"+blifo", "Default to bitvector-prioritized LIFO queuing"))
00280 _defaultQueueing = CK_QUEUEING_BLIFO;
00281 if (CmiGetArgFlagDesc(argv,"+objq", "Default to use object queue for every obejct"))
00282 {
00283 #if CMK_OBJECT_QUEUE_AVAILABLE
00284 _defaultObjectQ = 1;
00285 if (CkMyPe()==0)
00286 CmiPrintf("Charm++> Create object queue for every Charm object.\n");
00287 #else
00288 CmiAbort("Charm++> Object queue not enabled, recompile Charm++ with CMK_OBJECT_QUEUE_AVAILABLE defined to 1.");
00289 #endif
00290 }
00291
00292 #if CMK_SHRINK_EXPAND
00293 if (!CmiGetArgStringDesc(argv, "+shrinkexpand_basedir", (char **)&_shrinkexpand_basedir,
00294 "Checkpoint directory used for shrink-expand (defaults to /dev/shm)"))
00295 # if defined __APPLE__
00296 _shrinkexpand_basedir = "/tmp";
00297 # else
00298 _shrinkexpand_basedir = "/dev/shm";
00299 # endif
00300 #endif
00301
00302 if(CmiGetArgString(argv,"+restart",&_restartDir))
00303 faultFunc = CkRestartMain;
00304 #if __FAULT__
00305 if (CmiGetArgIntDesc(argv,"+restartaftercrash",&CpvAccess(_curRestartPhase),"restarting this processor after a crash")){
00306 # if CMK_MEM_CHECKPOINT
00307 faultFunc = CkMemRestart;
00308 # endif
00309 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
00310 faultFunc = CkMlogRestart;
00311 #endif
00312 CmiPrintf("[%d] Restarting after crash \n",CmiMyPe());
00313 }
00314 #if CMK_MESSAGE_LOGGING
00315
00316 if (CmiGetArgFlagDesc(argv, "+ftc_disk", "Disk Checkpointing")) {
00317 diskCkptFlag = true;
00318 }
00319 #endif
00320
00321 if(CmiGetArgStringDesc(argv,"+killFile", &killFile,"Generates SIGKILL on specified processors")){
00322 if(faultFunc == NULL){
00323
00324 killFlag = true;
00325 if(CmiMyPe() == 0){
00326 printf("[%d] killFlag set to true for file %s\n",CkMyPe(),killFile);
00327 }
00328 }
00329 }
00330 #endif
00331
00332
00333 if (CmiGetArgIntDesc(argv,"+ringexit",&_ringtoken, "Program exits in a ring fashion"))
00334 {
00335 _ringexit = true;
00336 if (CkMyPe()==0)
00337 CkPrintf("Charm++> Program shutdown in token ring (%d).\n", _ringtoken);
00338 if (_ringtoken > CkNumPes()) _ringtoken = CkNumPes();
00339 }
00340 #if CMK_FAULT_EVAC
00341
00342 if(CmiGetArgStringDesc(argv,"+raiseevac", &_raiseEvacFile,"Generates processor evacuation on random processors")){
00343 _raiseEvac = 1;
00344 }
00345 #endif
00346 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
00347 if(!CmiGetArgIntDesc(argv,"+teamSize",&teamSize,"Set the team size for message logging")){
00348 teamSize = 1;
00349 }
00350 if(!CmiGetArgIntDesc(argv,"+chkptPeriod",&chkptPeriod,"Set the checkpoint period for the message logging fault tolerance algorithm in seconds")){
00351 chkptPeriod = 100;
00352 }
00353 if(CmiGetArgIntDesc(argv,"+fastRecovery", ¶llelRecovery, "Parallel recovery with message logging protocol")){
00354 fastRecovery = true;
00355 }
00356 #endif
00357
00358 if (!CmiGetArgIntDesc(argv, "+messageBufferingThreshold",
00359 &_messageBufferingThreshold,
00360 "Message size above which the runtime will buffer messages directed at unlocated array elements")) {
00361 _messageBufferingThreshold = INT_MAX;
00362 }
00363
00364
00365 _isAnytimeMigration = true;
00366 if (CmiGetArgFlagDesc(argv,"+noAnytimeMigration","The program does not require support for anytime migration")) {
00367 _isAnytimeMigration = false;
00368 }
00369
00370 _isNotifyChildInRed = true;
00371 if (CmiGetArgFlagDesc(argv,"+noNotifyChildInReduction","The program has at least one element per processor for each charm array created")) {
00372 _isNotifyChildInRed = false;
00373 }
00374
00375 _isStaticInsertion = false;
00376 if (CmiGetArgFlagDesc(argv,"+staticInsertion","Array elements are only inserted at construction")) {
00377 _isStaticInsertion = true;
00378 }
00379
00380 useNodeBlkMapping = false;
00381 if (CmiGetArgFlagDesc(argv,"+useNodeBlkMapping","Array elements are block-mapped in SMP-node level")) {
00382 useNodeBlkMapping = true;
00383 }
00384
00385 #if ! CMK_WITH_CONTROLPOINT
00386
00387 if( CmiGetArgFlag(argv,"+CPSamplePeriod") ||
00388 CmiGetArgFlag(argv,"+CPSamplePeriodMs") ||
00389 CmiGetArgFlag(argv,"+CPSchemeRandom") ||
00390 CmiGetArgFlag(argv,"+CPExhaustiveSearch") ||
00391 CmiGetArgFlag(argv,"+CPAlwaysUseDefaults") ||
00392 CmiGetArgFlag(argv,"+CPSimulAnneal") ||
00393 CmiGetArgFlag(argv,"+CPCriticalPathPrio") ||
00394 CmiGetArgFlag(argv,"+CPBestKnown") ||
00395 CmiGetArgFlag(argv,"+CPSteering") ||
00396 CmiGetArgFlag(argv,"+CPMemoryAware") ||
00397 CmiGetArgFlag(argv,"+CPSimplex") ||
00398 CmiGetArgFlag(argv,"+CPDivideConquer") ||
00399 CmiGetArgFlag(argv,"+CPLDBPeriod") ||
00400 CmiGetArgFlag(argv,"+CPLDBPeriodLinear") ||
00401 CmiGetArgFlag(argv,"+CPLDBPeriodQuadratic") ||
00402 CmiGetArgFlag(argv,"+CPLDBPeriodOptimal") ||
00403 CmiGetArgFlag(argv,"+CPDefaultValues") ||
00404 CmiGetArgFlag(argv,"+CPGatherAll") ||
00405 CmiGetArgFlag(argv,"+CPGatherMemoryUsage") ||
00406 CmiGetArgFlag(argv,"+CPGatherUtilization") ||
00407 CmiGetArgFlag(argv,"+CPSaveData") ||
00408 CmiGetArgFlag(argv,"+CPNoFilterData") ||
00409 CmiGetArgFlag(argv,"+CPLoadData") ||
00410 CmiGetArgFlag(argv,"+CPDataFilename") )
00411 {
00412 CkAbort("You specified a control point command line argument, but compiled charm++ without control point support.\n");
00413 }
00414 #endif
00415
00416 }
00417
00418 static void _bufferHandler(void *msg)
00419 {
00420 DEBUGF(("[%d] _bufferHandler called.\n", CkMyPe()));
00421 CkpvAccess(_buffQ)->enq(msg);
00422 }
00423
00424 static void _discardHandler(envelope *env)
00425 {
00426
00427
00428 DEBUGF(("[%d] _discardHandler called.\n", CkMyPe()));
00429 #if CMK_MEM_CHECKPOINT
00430
00431 if (CkInRestarting()) CpvAccess(_qd)->process();
00432 #endif
00433 CmiFree(env);
00434 }
00435
00436 #if CMK_WITH_STATS
00437 static inline void _printStats(void)
00438 {
00439 DEBUGF(("[%d] _printStats\n", CkMyPe()));
00440 int i;
00441 if(_printSS || _printCS) {
00442 Stats *total = new Stats();
00443 _MEMCHECK(total);
00444 for(i=0;i<CkNumPes();i++)
00445 total->combine(_allStats[i]);
00446 CkPrintf("Charm Kernel Summary Statistics:\n");
00447 for(i=0;i<CkNumPes();i++) {
00448 CkPrintf("Proc %d: [%d created, %d processed]\n", i,
00449 _allStats[i]->getCharesCreated(),
00450 _allStats[i]->getCharesProcessed());
00451 }
00452 CkPrintf("Total Chares: [%d created, %d processed]\n",
00453 total->getCharesCreated(), total->getCharesProcessed());
00454 }
00455 if(_printCS) {
00456 CkPrintf("Charm Kernel Detailed Statistics (R=requested P=processed):\n\n");
00457
00458 CkPrintf(" Create Mesgs Create Mesgs Create Mesgs\n");
00459 CkPrintf(" Chare for Group for Nodegroup for\n");
00460 CkPrintf("PE R/P Mesgs Chares Mesgs Groups Mesgs Nodegroups\n");
00461 CkPrintf("---- --- --------- --------- --------- --------- --------- ----------\n");
00462
00463 for(i=0;i<CkNumPes();i++) {
00464 CkPrintf("%4d R %9d %9d %9d %9d %9d %9d\n P %9d %9d %9d %9d %9d %9d\n",i,
00465 _allStats[i]->getCharesCreated(),
00466 _allStats[i]->getForCharesCreated(),
00467 _allStats[i]->getGroupsCreated(),
00468 _allStats[i]->getGroupMsgsCreated(),
00469 _allStats[i]->getNodeGroupsCreated(),
00470 _allStats[i]->getNodeGroupMsgsCreated(),
00471 _allStats[i]->getCharesProcessed(),
00472 _allStats[i]->getForCharesProcessed(),
00473 _allStats[i]->getGroupsProcessed(),
00474 _allStats[i]->getGroupMsgsProcessed(),
00475 _allStats[i]->getNodeGroupsProcessed(),
00476 _allStats[i]->getNodeGroupMsgsProcessed());
00477 }
00478 }
00479 }
00480 #else
00481 static inline void _printStats(void) {}
00482 #endif
00483
00484 typedef struct _statsHeader
00485 {
00486 int n;
00487 } statsHeader;
00488
00489 static void * mergeStats(int *size, void *data, void **remote, int count)
00490 {
00491 envelope *newData;
00492 statsHeader *dataMsg = (statsHeader*)EnvToUsr((envelope*) data), *newDataMsg;
00493 int nPes = dataMsg->n, currentIndex = 0;
00494
00495 for (int i = 0; i < count; ++i)
00496 {
00497 nPes += ((statsHeader *)EnvToUsr((envelope *)remote[i]))->n;
00498 }
00499
00500 newData = _allocEnv(StatMsg, sizeof(statsHeader) + sizeof(Stats)*nPes);
00501 *size = newData->getTotalsize();
00502 newDataMsg = (statsHeader *)EnvToUsr(newData);
00503 newDataMsg->n = nPes;
00504
00505 statsHeader *current = dataMsg;
00506 Stats *currentStats = (Stats*)(current + 1), *destination = (Stats*)(newDataMsg + 1);
00507 memcpy(destination + currentIndex, currentStats, sizeof(Stats) * current->n);
00508 currentIndex += current->n;
00509
00510 for (int i = 0; i < count; ++i)
00511 {
00512 current = ((statsHeader *)EnvToUsr((envelope *)remote[i]));
00513 currentStats = (Stats *)(current + 1);
00514 memcpy(destination + currentIndex, currentStats, sizeof(Stats) * current->n);
00515 currentIndex += current->n;
00516 }
00517
00518 CmiFree(data);
00519 return newData;
00520 }
00521
00522 static inline void _sendStats(void)
00523 {
00524 DEBUGF(("[%d] _sendStats\n", CkMyPe()));
00525 envelope *env = _allocEnv(StatMsg, sizeof(statsHeader) + sizeof(Stats));
00526 statsHeader* msg = (statsHeader*)EnvToUsr(env);
00527 msg->n = 1;
00528 memcpy(msg+1, CkpvAccess(_myStats), sizeof(Stats));
00529 CmiSetHandler(env, _exitHandlerIdx);
00530 CmiReduce(env, env->getTotalsize(), mergeStats);
00531 }
00532
00533 #if CMK_LOCKLESS_QUEUE
00534 typedef struct _WarningMsg{
00535 int queue_overflow_count;
00536 } WarningMsg;
00537
00538
00539 static void *mergeWarningMsg(int * size, void * data, void ** remote, int count){
00540 int i;
00541
00542 WarningMsg *msg = (WarningMsg*)EnvToUsr((envelope*) data), *m;
00543
00544
00545 for(i = 0; i < count; ++i)
00546 {
00547 m = (WarningMsg*)EnvToUsr((envelope*) remote[i]);
00548 msg->queue_overflow_count += m->queue_overflow_count;
00549 }
00550
00551 return data;
00552 }
00553
00554
00555 extern int messageQueueOverflow;
00556 static inline void _sendWarnings(void)
00557 {
00558 DEBUGF(("[%d] _sendWarnings\n", CkMyPe()));
00559
00560 envelope *env = _allocEnv(WarnMsg, sizeof(WarningMsg));
00561 WarningMsg* msg = (WarningMsg*)EnvToUsr(env);
00562
00563
00564 msg->queue_overflow_count = messageQueueOverflow;
00565
00566 CmiSetHandler(env, _exitHandlerIdx);
00567 CmiReduce(env, env->getTotalsize(), mergeWarningMsg);
00568 }
00569
00570
00571 static inline void ReportWarnings(WarningMsg * msg)
00572 {
00573 if(msg->queue_overflow_count > 0)
00574 {
00575 CmiPrintf("WARNING: Message queues overflowed during execution, this can negatively impact performance.\n");
00576 CmiPrintf("\tModify the size of the message queues using: +MessageQueueNodes and +MessageQueueNodeSize\n");
00577 }
00578 }
00579 #endif
00580
00581 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
00582 extern void _messageLoggingExit();
00583 #endif
00584
00585 #if __FAULT__
00586
00587
00588 extern int index_skipCldHandler;
00589 extern void _skipCldHandler(void *converseMsg);
00590
00591 void _discard_charm_message()
00592 {
00593 CkNumberHandler(_charmHandlerIdx,_discardHandler);
00594
00595 CkNumberHandler(index_skipCldHandler, _discardHandler);
00596 }
00597
00598 void _resume_charm_message()
00599 {
00600 CkNumberHandlerEx(_charmHandlerIdx, _processHandler, CkpvAccess(_coreState));
00601
00602 CkNumberHandler(index_skipCldHandler, _skipCldHandler);
00603 }
00604 #endif
00605
00606 static void _exitHandler(envelope *env)
00607 {
00608 DEBUGF(("exitHandler called on %d msgtype: %d\n", CkMyPe(), env->getMsgtype()));
00609 switch(env->getMsgtype()) {
00610 case StartExitMsg:
00611 CkAssert(CkMyPe()==0);
00612 if(_exitStarted) {
00613 CmiFree(env);
00614 return;
00615 }
00616 _exitStarted = true;
00617
00618
00619 case ExitMsg:
00620 CkAssert(CkMyPe()==0);
00621 if (!_CkExitFnVec.isEmpty()) {
00622 CmiFree(env);
00623 CkExitFn fn = _CkExitFnVec.deq();
00624 fn();
00625 break;
00626 }
00627
00628 CkNumberHandler(_charmHandlerIdx,_discardHandler);
00629 CkNumberHandler(_bocHandlerIdx, _discardHandler);
00630 #if !CMK_BIGSIM_THREAD
00631 env->setMsgtype(ReqStatMsg);
00632 env->setSrcPe(CkMyPe());
00633
00634 if (_ringexit){
00635 DEBUGF(("[%d] Ring Exit \n",CkMyPe()));
00636 const int stride = CkNumPes()/_ringtoken;
00637 int pe = 0;
00638 while (pe<CkNumPes()) {
00639 CmiSyncSend(pe, env->getTotalsize(), (char *)env);
00640 pe += stride;
00641 }
00642 CmiFree(env);
00643 }else{
00644 CmiSyncBroadcastAllAndFree(env->getTotalsize(), (char *)env);
00645 }
00646 #else
00647 CmiFree(env);
00648 ConverseExit(_exitcode);
00649 #endif
00650 break;
00651 case ReqStatMsg:
00652 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
00653 _messageLoggingExit();
00654 #endif
00655 DEBUGF(("ReqStatMsg on %d\n", CkMyPe()));
00656 CkNumberHandler(_charmHandlerIdx,_discardHandler);
00657 CkNumberHandler(_bocHandlerIdx, _discardHandler);
00658 #if CMK_FAULT_EVAC
00659 if(CmiNodeAlive(CkMyPe()))
00660 #endif
00661 {
00662 #if CMK_WITH_STATS
00663 _sendStats();
00664 #endif
00665 #if CMK_WITH_WARNINGS
00666 _sendWarnings();
00667 #endif
00668 _mainDone = true;
00669
00670
00671
00672
00673
00674 #if CMK_TRACE_ENABLED
00675 if (_ringexit) traceClose();
00676 #endif
00677 }
00678 if (_ringexit) {
00679 int stride = CkNumPes()/_ringtoken;
00680 int pe = CkMyPe()+1;
00681 if (pe < CkNumPes() && pe % stride != 0)
00682 CmiSyncSendAndFree(pe, env->getTotalsize(), (char *)env);
00683 else
00684 CmiFree(env);
00685 }
00686 else
00687 CmiFree(env);
00688 #if CMK_SHRINK_EXPAND
00689 ConverseCleanup();
00690 #endif
00691
00692 #if !CMK_WITH_STATS && !CMK_WITH_WARNINGS
00693 DEBUGF(("[%d] Calling converse exit from ReqStatMsg \n",CkMyPe()));
00694 ConverseExit(_exitcode);
00695 if(CharmLibInterOperate)
00696 CpvAccess(interopExitFlag) = 1;
00697 #endif
00698 break;
00699 #if CMK_WITH_STATS
00700 case StatMsg:
00701 {
00702 CkAssert(CkMyPe()==0);
00703 statsHeader* header = (statsHeader*)EnvToUsr(env);
00704 int n = header->n;
00705 Stats* currentStats = (Stats*)(header + 1);
00706 for (int i = 0; i < n; ++i)
00707 {
00708 _allStats[currentStats->getPe()] = currentStats;
00709 currentStats++;
00710 }
00711 DEBUGF(("StatMsg on %d with %d\n", CkMyPe(), n));
00712 _printStats();
00713
00714 envelope* env = _allocEnv(StatDoneMsg);
00715 CmiSetHandler(env, _exitHandlerIdx);
00716 CmiSyncBroadcastAllAndFree(env->getTotalsize(), (char*)env);
00717 }
00718 break;
00719
00720 case StatDoneMsg:
00721 DEBUGF(("[%d] Calling converse exit from StatDoneMsg \n",CkMyPe()));
00722 ConverseExit(_exitcode);
00723 if (CharmLibInterOperate)
00724 CpvAccess(interopExitFlag) = 1;
00725 break;
00726 #endif
00727 #if CMK_WITH_WARNINGS
00728 case WarnMsg:
00729 {
00730 CkAssert(CkMyPe()==0);
00731 WarningMsg* msg = (WarningMsg*)EnvToUsr(env);
00732 ReportWarnings(msg);
00733
00734 envelope* env = _allocEnv(WarnDoneMsg);
00735 CmiSetHandler(env, _exitHandlerIdx);
00736 CmiSyncBroadcastAllAndFree(env->getTotalsize(), (char*)env);
00737 break;
00738 }
00739 case WarnDoneMsg:
00740 DEBUGF(("[%d] Calling converse exit from WarnDoneMsg \n",CkMyPe()));
00741 ConverseExit(_exitcode);
00742 if (CharmLibInterOperate)
00743 CpvAccess(interopExitFlag) = 1;
00744 break;
00745 #endif
00746 default:
00747 CmiAbort("Internal Error(_exitHandler): Unknown-msg-type. Contact Developers.\n");
00748 }
00749 }
00750
00751 #if CMK_SHRINK_EXPAND
00752 void _ROGroupRestartHandler(void * msg){
00753 CkResumeRestartMain((char *)msg);
00754 }
00755 #endif
00756
00762 static inline void _processBufferedBocInits(void)
00763 {
00764 CkCoreState *ck = CkpvAccess(_coreState);
00765 CkNumberHandlerEx(_bocHandlerIdx,_processHandler, ck);
00766 PtrVec &inits=*CkpvAccess(_bocInitVec);
00767 int len = inits.size();
00768 for(int i=1; i<len; i++) {
00769 envelope *env = inits[i];
00770 if(env==0) {
00771 #if CMK_SHRINK_EXPAND
00772 if(_inrestart){
00773 CkPrintf("_processBufferedBocInits: empty message in restart, ignoring\n");
00774 break;
00775 }
00776 else
00777 CkAbort("_processBufferedBocInits: empty message");
00778 #else
00779 CkAbort("_processBufferedBocInits: empty message");
00780 #endif
00781 }
00782 if(env->isPacked())
00783 CkUnpackMessage(&env);
00784 _processBocInitMsg(ck,env);
00785 }
00786 delete &inits;
00787 }
00788
00794 static inline void _processBufferedNodeBocInits(void)
00795 {
00796 CkCoreState *ck = CkpvAccess(_coreState);
00797 PtrVec &inits=*CksvAccess(_nodeBocInitVec);
00798 int len = inits.size();
00799 for(int i=1; i<len; i++) {
00800 envelope *env = inits[i];
00801 if(env==0) CkAbort("_processBufferedNodeBocInits: empty message");
00802 if(env->isPacked())
00803 CkUnpackMessage(&env);
00804 _processNodeBocInitMsg(ck,env);
00805 }
00806 delete &inits;
00807 }
00808
00809 static inline void _processBufferedMsgs(void)
00810 {
00811 CkNumberHandlerEx(_charmHandlerIdx, _processHandler, CkpvAccess(_coreState));
00812 envelope *env;
00813 while(NULL!=(env=(envelope*)CkpvAccess(_buffQ)->deq())) {
00814 if(env->getMsgtype()==NewChareMsg || env->getMsgtype()==NewVChareMsg) {
00815 if(env->isForAnyPE())
00816 _CldEnqueue(CLD_ANYWHERE, env, _infoIdx);
00817 else
00818 _processHandler((void *)env, CkpvAccess(_coreState));
00819 } else {
00820 _processHandler((void *)env, CkpvAccess(_coreState));
00821 }
00822 }
00823 }
00824
00825 static int _charmLoadEstimator(void)
00826 {
00827 return CkpvAccess(_buffQ)->length();
00828 }
00829
00838 static void _sendTriggers(void)
00839 {
00840 int i, num, first;
00841 CmiImmediateLock(CksvAccess(_nodeGroupTableImmLock));
00842 if (!CksvAccess(_triggersSent))
00843 {
00844 CksvAccess(_triggersSent) = true;
00845 num = CmiMyNodeSize();
00846 envelope *env = _allocEnv(RODataMsg);
00847 env->setSrcPe(CkMyPe());
00848 CmiSetHandler(env, _triggerHandlerIdx);
00849 first = CmiNodeFirst(CmiMyNode());
00850 for (i=0; i < num; i++)
00851 if(first+i != CkMyPe())
00852 CmiSyncSend(first+i, env->getTotalsize(), (char *)env);
00853 CmiFree(env);
00854 }
00855 CmiImmediateUnlock(CksvAccess(_nodeGroupTableImmLock));
00856 }
00857
00867 void _initDone(void)
00868 {
00869 if (CkpvAccess(_initdone)) return;
00870 CkpvAccess(_initdone) = true;
00871 DEBUGF(("[%d] _initDone.\n", CkMyPe()));
00872 if (!CksvAccess(_triggersSent)) _sendTriggers();
00873 CkNumberHandler(_triggerHandlerIdx, _discardHandler);
00874 CmiNodeBarrier();
00875 if(CkMyRank() == 0) {
00876 _processBufferedNodeBocInits();
00877 quietMode = 0;
00878 }
00879 CmiNodeBarrier();
00880 _processBufferedBocInits();
00881 DEBUGF(("Reached CmiNodeBarrier(), pe = %d, rank = %d\n", CkMyPe(), CkMyRank()));
00882 CmiNodeBarrier();
00883 DEBUGF(("Crossed CmiNodeBarrier(), pe = %d, rank = %d\n", CkMyPe(), CkMyRank()));
00884 _processBufferedMsgs();
00885 CkpvAccess(_charmEpoch)=1;
00886 if (userDrivenMode) {
00887 StopInteropScheduler();
00888 }
00889 }
00890
00898 static void _triggerHandler(envelope *env)
00899 {
00900 DEBUGF(("Calling Init Done from _triggerHandler\n"));
00901 checkForInitDone(true);
00902 if (env!=NULL) CmiFree(env);
00903 }
00904
00905 static inline void _processROMsgMsg(envelope *env)
00906 {
00907 if(!CmiMyRank()) {
00908 *((char **)(_readonlyMsgs[env->getRoIdx()]->pMsg))=(char *)EnvToUsr(env);
00909 }
00910 }
00911
00912 static inline void _processRODataMsg(envelope *env)
00913 {
00914
00915 if(!CmiMyRank()) {
00916 #if CMK_ONESIDED_IMPL && CMK_SMP
00917 if(CMI_IS_ZC_BCAST(env)) {
00918
00919 CmiForwardMsgToPeers(env->getTotalsize(), (char *)env);
00920 }
00921 #endif
00922
00923
00924 PUP::fromMem pu((char *)EnvToUsr(env));
00925 CmiSpanningTreeInfo &t = *_topoTree;
00926
00927 #if CMK_ONESIDED_IMPL
00928 pu|numZerocopyROops;
00929
00930
00931 CksvAccess(_numPendingRORdmaTransfers) = numZerocopyROops;
00932
00933
00934 if(numZerocopyROops > 0 && t.child_count != 0) {
00935 readonlyAllocateOnSource();
00936 }
00937 #endif
00938
00939 for(size_t i=0;i<_readonlyTable.size();i++) {
00940 _readonlyTable[i]->pupData(pu);
00941 }
00942 } else {
00943 CmiFree(env);
00944 }
00945 }
00946
00953 static void _roRestartHandler(void *msg)
00954 {
00955 CkAssert(CkMyPe()!=0);
00956 envelope *env = (envelope *) msg;
00957 CkpvAccess(_numInitsRecd)++;
00958 _numExpectInitMsgs = env->getCount();
00959 _processRODataMsg(env);
00960
00961
00962
00963
00964
00965
00966 _triggerHandler(NULL);
00967 }
00968
00969 #if CMK_ONESIDED_IMPL
00970 static void _roRdmaDoneHandler(envelope *env) {
00971
00972 switch(env->getMsgtype()) {
00973 case ROPeerCompletionMsg:
00974 checkForInitDone(true);
00975
00976 if (env!=NULL) CmiFree(env);
00977 break;
00978 case ROChildCompletionMsg:
00979 roBcastAckInfo->counter++;
00980 if(roBcastAckInfo->counter == roBcastAckInfo->numChildren) {
00981
00982 for(int i=0; i < roBcastAckInfo->numops; i++) {
00983 NcpyROBcastBuffAckInfo *buffAckInfo = &(roBcastAckInfo->buffAckInfo[i]);
00984 CmiDeregisterMem(buffAckInfo->ptr,
00985 buffAckInfo->layerInfo +CmiGetRdmaCommonInfoSize(),
00986 buffAckInfo->pe,
00987 buffAckInfo->regMode);
00988 }
00989
00990 if(roBcastAckInfo->isRoot != 1) {
00991 if(_topoTree == NULL) CkAbort("CkRdmaIssueRgets:: topo tree has not been calculated \n");
00992 CmiSpanningTreeInfo &t = *_topoTree;
00993
00994
00995
00996 envelope *compEnv = _allocEnv(ROChildCompletionMsg);
00997 compEnv->setSrcPe(CkMyPe());
00998 CmiSetHandler(compEnv, _roRdmaDoneHandlerIdx);
00999 CmiSyncSendAndFree(t.parent, compEnv->getTotalsize(), (char *)compEnv);
01000 }
01001
01002 CmiFree(roBcastAckInfo);
01003 }
01004 break;
01005 default:
01006 CmiAbort("Invalid msg type\n");
01007 break;
01008 }
01009 }
01010 #endif
01011
01012 void checkForInitDone(bool rdmaROCompleted) {
01013
01014 bool noPendingRORdmaTransfers = true;
01015 #if CMK_ONESIDED_IMPL
01016
01017 if(CmiMyRank() == 0 && numZerocopyROops > 0)
01018 noPendingRORdmaTransfers = rdmaROCompleted;
01019 #endif
01020 if (_numExpectInitMsgs && CkpvAccess(_numInitsRecd) + CksvAccess(_numInitNodeMsgs) == _numExpectInitMsgs && noPendingRORdmaTransfers)
01021 _initDone();
01022 }
01023
01035 static void _initHandler(void *msg, CkCoreState *ck)
01036 {
01037 CkAssert(CkMyPe()!=0);
01038 envelope *env = (envelope *) msg;
01039
01040 if (ck->watcher!=NULL) {
01041 if (!ck->watcher->processMessage(&env,ck)) return;
01042 }
01043
01044 switch (env->getMsgtype()) {
01045 case BocInitMsg:
01046 if (env->getGroupEpoch()==0) {
01047 CkpvAccess(_numInitsRecd)++;
01048
01049 if (CkpvAccess(_bocInitVec)->size() < env->getGroupNum().idx + 1) {
01050 CkpvAccess(_bocInitVec)->resize(env->getGroupNum().idx + 1);
01051 }
01052 (*CkpvAccess(_bocInitVec))[env->getGroupNum().idx] = env;
01053 } else _bufferHandler(msg);
01054 break;
01055 case NodeBocInitMsg:
01056 if (env->getGroupEpoch()==0) {
01057 CmiImmediateLock(CksvAccess(_nodeGroupTableImmLock));
01058 CksvAccess(_numInitNodeMsgs)++;
01059 if (CksvAccess(_nodeBocInitVec)->size() < env->getGroupNum().idx + 1) {
01060 CksvAccess(_nodeBocInitVec)->resize(env->getGroupNum().idx + 1);
01061 }
01062 (*CksvAccess(_nodeBocInitVec))[env->getGroupNum().idx] = env;
01063 CmiImmediateUnlock(CksvAccess(_nodeGroupTableImmLock));
01064
01065 } else _bufferHandler(msg);
01066 break;
01067 case ROMsgMsg:
01068 CkpvAccess(_numInitsRecd)++;
01069 CpvAccess(_qd)->process();
01070 if(env->isPacked()) CkUnpackMessage(&env);
01071 _processROMsgMsg(env);
01072 break;
01073 case RODataMsg:
01074 CkpvAccess(_numInitsRecd)++;
01075 CpvAccess(_qd)->process();
01076 _numExpectInitMsgs = env->getCount();
01077 _processRODataMsg(env);
01078 break;
01079 default:
01080 CmiAbort("Internal Error: Unknown-msg-type. Contact Developers.\n");
01081 }
01082 DEBUGF(("[%d,%.6lf] _numExpectInitMsgs %d CkpvAccess(_numInitsRecd)+CksvAccess(_numInitNodeMsgs) %d+%d\n",CmiMyPe(),CmiWallTimer(),_numExpectInitMsgs,CkpvAccess(_numInitsRecd),CksvAccess(_numInitNodeMsgs)));
01083 checkForInitDone(false);
01084 }
01085
01086 #if CMK_SHRINK_EXPAND
01087 void CkCleanup()
01088 {
01089
01090 envelope *env = _allocEnv(StartExitMsg);
01091 env->setSrcPe(CkMyPe());
01092 CmiSetHandler(env, _exitHandlerIdx);
01093 CmiSyncSendAndFree(0, env->getTotalsize(), (char *)env);
01094 }
01095 #endif
01096
01097 CkQ<CkExitFn> _CkExitFnVec;
01098
01099
01100
01101
01102 void CkExit(int exitcode)
01103 {
01104 DEBUGF(("[%d] CkExit called \n",CkMyPe()));
01105
01106
01107
01108 _exitcode = exitcode;
01109 envelope *env = _allocEnv(StartExitMsg);
01110 env->setSrcPe(CkMyPe());
01111 CmiSetHandler(env, _exitHandlerIdx);
01112 CmiSyncSendAndFree(0, env->getTotalsize(), (char *)env);
01113
01114 #if ! CMK_BIGSIM_THREAD
01115 _TRACE_END_EXECUTE();
01116
01117 if(!CharmLibInterOperate)
01118 CsdScheduler(-1);
01119 #endif
01120 }
01121
01122 void CkContinueExit()
01123 {
01124 envelope *env = _allocEnv(ExitMsg);
01125 env->setSrcPe(CkMyPe());
01126 CmiSetHandler(env, _exitHandlerIdx);
01127 CmiSyncSendAndFree(0, env->getTotalsize(), (char *)env);
01128 }
01129
01130
01131
01132
01133
01134 void EmergencyExit(void) {
01135 #ifndef __BIGSIM__
01136
01137 if (CkpvAccess(_coreState) != NULL) {
01138 delete CkpvAccess(_coreState);
01139 CkpvAccess(_coreState) = NULL;
01140 }
01141 #endif
01142 }
01143
01144 static void _nullFn(void *, void *)
01145 {
01146 CmiAbort("Null-Method Called. Program may have Unregistered Module!!\n");
01147 }
01148
01149 extern void _registerLBDatabase(void);
01150 extern void _registerMetaBalancer(void);
01151 extern void _registerPathHistory(void);
01152 #if CMK_WITH_CONTROLPOINT
01153 extern void _registerControlPoints(void);
01154 #endif
01155 extern void _registerTraceControlPoints();
01156 extern void _registerExternalModules(char **argv);
01157 extern void _ckModuleInit(void);
01158 extern void _loadbalancerInit();
01159 extern void _metabalancerInit();
01160 #if CMK_SMP && CMK_TASKQUEUE
01161 extern void _taskqInit();
01162 #endif
01163 #if CMK_SMP
01164 extern void LBTopoInit();
01165 #endif
01166 extern void _initChareTables();
01167 #if CMK_MEM_CHECKPOINT
01168 extern void init_memcheckpt(char **argv);
01169 #endif
01170 extern "C" void initCharmProjections();
01171 void CmiInitCPUTopology(char **argv);
01172 void CmiCheckAffinity();
01173 void CmiInitMemAffinity(char **argv);
01174 void CmiInitPxshm(char **argv);
01175
01176
01177
01178 void _registerInitCall(CkInitCallFn fn, int isNodeCall)
01179 {
01180 if (isNodeCall) _initCallTable.initNodeCalls.enq(fn);
01181 else _initCallTable.initProcCalls.enq(fn);
01182 }
01183
01184 void InitCallTable::enumerateInitCalls()
01185 {
01186 int i;
01187 #ifdef __BIGSIM__
01188 if(BgNodeRank()==0)
01189 #else
01190 if(CkMyRank()==0)
01191 #endif
01192 {
01193 for (i=0; i<initNodeCalls.length(); i++) initNodeCalls[i]();
01194 }
01195
01196 CmiNodeAllBarrier();
01197 for (i=0; i<initProcCalls.length(); i++) initProcCalls[i]();
01198 }
01199
01200 CpvCExtern(int, cpdSuspendStartup);
01201 void CpdFreeze(void);
01202
01203 extern int _dummy_dq;
01204
01205 void initQd(char **argv)
01206 {
01207 CpvInitialize(QdState*, _qd);
01208 CpvAccess(_qd) = new QdState();
01209 if (CmiMyRank() == 0) {
01210 #if !defined(CMK_CPV_IS_SMP) && !CMK_SHARED_VARS_UNIPROCESSOR
01211 CpvAccessOther(_qd, 1) = new QdState();
01212 #endif
01213 }
01214 CmiAssignOnce(&_qdHandlerIdx, CmiRegisterHandler((CmiHandler)_qdHandler));
01215 CmiAssignOnce(&_qdCommHandlerIdx, CmiRegisterHandler((CmiHandler)_qdCommHandler));
01216 if (CmiGetArgIntDesc(argv,"+qd",&_dummy_dq, "QD time in seconds")) {
01217 if (CmiMyPe()==0)
01218 CmiPrintf("Charm++> Fake QD using %d seconds.\n", _dummy_dq);
01219 }
01220 }
01221
01222 #if CMK_BIGSIM_CHARM && CMK_CHARMDEBUG
01223 void CpdBgInit();
01224 #endif
01225 void CpdBreakPointInit();
01226
01227 extern void (*CkRegisterMainModuleCallback)();
01228
01229 void _sendReadonlies() {
01230 for(int i=0;i<_readonlyMsgs.size();i++)
01231 {
01232 void *roMsg = (void *) *((char **)(_readonlyMsgs[i]->pMsg));
01233 if(roMsg==0)
01234 continue;
01235
01236 envelope *env = UsrToEnv(roMsg);
01237 env->setSrcPe(CkMyPe());
01238 env->setMsgtype(ROMsgMsg);
01239 env->setRoIdx(i);
01240 CmiSetHandler(env, _initHandlerIdx);
01241 CkPackMessage(&env);
01242 CmiSyncBroadcast(env->getTotalsize(), (char *)env);
01243 CpvAccess(_qd)->create(CkNumPes()-1);
01244
01245
01246 CkUnpackMessage(&env);
01247 _processROMsgMsg(env);
01248 _numInitMsgs++;
01249 }
01250
01251 #if CMK_ONESIDED_IMPL
01252 numZerocopyROops = 0;
01253 curROIndex = 0;
01254 #endif
01255
01256
01257 PUP::sizer ps;
01258
01259 #if CMK_ONESIDED_IMPL
01260 ps|numZerocopyROops;
01261 #endif
01262
01263 for(int i=0;i<_readonlyTable.size();i++) _readonlyTable[i]->pupData(ps);
01264
01265 #if CMK_ONESIDED_IMPL
01266 if(numZerocopyROops > 0) {
01267 readonlyAllocateOnSource();
01268 }
01269 #endif
01270
01271
01272 envelope *env = _allocEnv(RODataMsg, ps.size());
01273 PUP::toMem pp((char *)EnvToUsr(env));
01274 #if CMK_ONESIDED_IMPL
01275 pp|numZerocopyROops;
01276 #endif
01277 for(int i=0;i<_readonlyTable.size();i++) _readonlyTable[i]->pupData(pp);
01278
01279 env->setCount(++_numInitMsgs);
01280 env->setSrcPe(CkMyPe());
01281 CmiSetHandler(env, _initHandlerIdx);
01282 DEBUGF(("[%d,%.6lf] RODataMsg being sent of size %d \n",CmiMyPe(),CmiWallTimer(),env->getTotalsize()));
01283 CmiSyncBroadcast(env->getTotalsize(), (char *)env);
01284 #if CMK_ONESIDED_IMPL && CMK_SMP
01285 if(numZerocopyROops > 0) {
01286
01287 CmiForwardMsgToPeers(env->getTotalsize(), (char *)env);
01288 }
01289 #endif
01290 CmiFree(env);
01291 CpvAccess(_qd)->create(CkNumPes()-1);
01292 _initDone();
01293 }
01294
01304 void _initCharm(int unused_argc, char **argv)
01305 {
01306 int inCommThread = (CmiMyRank() == CmiMyNodeSize());
01307
01308 DEBUGF(("[%d,%.6lf ] _initCharm started\n",CmiMyPe(),CmiWallTimer()));
01309 std::set_terminate([](){ CkAbort("Unhandled C++ exception in user code.\n");});
01310
01311 CkpvInitialize(size_t *, _offsets);
01312 CkpvAccess(_offsets) = new size_t[32];
01313 CkpvInitialize(PtrQ*,_buffQ);
01314 CkpvInitialize(PtrVec*,_bocInitVec);
01315 CkpvInitialize(void*, _currentChare);
01316 CkpvInitialize(int, _currentChareType);
01317 CkpvInitialize(CkGroupID, _currentGroup);
01318 CkpvInitialize(void *, _currentNodeGroupObj);
01319 CkpvInitialize(CkGroupID, _currentGroupRednMgr);
01320 CkpvInitialize(GroupTable*, _groupTable);
01321 CkpvInitialize(GroupIDTable*, _groupIDTable);
01322 CkpvInitialize(CmiImmediateLockType, _groupTableImmLock);
01323 CkpvInitialize(bool, _destroyingNodeGroup);
01324 CkpvAccess(_destroyingNodeGroup) = false;
01325 CkpvInitialize(UInt, _numGroups);
01326 CkpvInitialize(int, _numInitsRecd);
01327 CkpvInitialize(bool, _initdone);
01328 CkpvInitialize(char**, Ck_argv); CkpvAccess(Ck_argv)=argv;
01329 CkpvInitialize(MsgPool*, _msgPool);
01330 CkpvInitialize(CkCoreState *, _coreState);
01331
01332 #if CMK_FAULT_EVAC
01333 #ifndef __BIGSIM__
01334 CpvInitialize(char *,_validProcessors);
01335 #endif
01336 CkpvInitialize(char ,startedEvac);
01337 #endif
01338 CpvInitialize(int,serializer);
01339
01340 _initChareTables();
01341
01342 CksvInitialize(UInt, _numNodeGroups);
01343 CksvInitialize(GroupTable*, _nodeGroupTable);
01344 CksvInitialize(GroupIDTable, _nodeGroupIDTable);
01345 CksvInitialize(CmiImmediateLockType, _nodeGroupTableImmLock);
01346 CksvInitialize(CmiNodeLock, _nodeLock);
01347 CksvInitialize(PtrVec*,_nodeBocInitVec);
01348 CksvInitialize(UInt,_numInitNodeMsgs);
01349 CkpvInitialize(int,_charmEpoch);
01350 CkpvAccess(_charmEpoch)=0;
01351 CksvInitialize(bool, _triggersSent);
01352 CksvAccess(_triggersSent) = false;
01353
01354 #if CMK_ONESIDED_IMPL
01355 CksvInitialize(int, _numPendingRORdmaTransfers);
01356 #endif
01357
01358 CkpvInitialize(_CkOutStream*, _ckout);
01359 CkpvInitialize(_CkErrStream*, _ckerr);
01360 CkpvInitialize(Stats*, _myStats);
01361
01362 CkpvAccess(_groupIDTable) = new GroupIDTable(0);
01363 CkpvAccess(_groupTable) = new GroupTable;
01364 CkpvAccess(_groupTable)->init();
01365 CkpvAccess(_groupTableImmLock) = CmiCreateImmediateLock();
01366 CkpvAccess(_numGroups) = 1;
01367 CkpvAccess(_buffQ) = new PtrQ();
01368 CkpvAccess(_bocInitVec) = new PtrVec();
01369
01370 CkpvAccess(_currentNodeGroupObj) = NULL;
01371
01372 if(CkMyRank()==0)
01373 {
01374 CksvAccess(_numNodeGroups) = 1;
01375 CksvAccess(_numInitNodeMsgs) = 0;
01376
01377 #if CMK_ONESIDED_IMPL
01378 CksvAccess(_numPendingRORdmaTransfers) = 0;
01379 #endif
01380
01381 CksvAccess(_nodeLock) = CmiCreateLock();
01382 CksvAccess(_nodeGroupTable) = new GroupTable();
01383 CksvAccess(_nodeGroupTable)->init();
01384 CksvAccess(_nodeGroupTableImmLock) = CmiCreateImmediateLock();
01385 CksvAccess(_nodeBocInitVec) = new PtrVec();
01386 }
01387
01388 CkCallbackInit();
01389
01390 CmiNodeAllBarrier();
01391
01392 #if ! CMK_BIGSIM_CHARM
01393 initQd(argv);
01394 #endif
01395
01396 CkpvAccess(_coreState)=new CkCoreState();
01397
01398 CkpvAccess(_numInitsRecd) = 0;
01399 CkpvAccess(_initdone) = false;
01400
01401 CkpvAccess(_ckout) = new _CkOutStream();
01402 CkpvAccess(_ckerr) = new _CkErrStream();
01403
01404 CmiAssignOnce(&_charmHandlerIdx, CkRegisterHandler(_bufferHandler));
01405 CmiAssignOnce(&_initHandlerIdx, CkRegisterHandlerEx(_initHandler, CkpvAccess(_coreState)));
01406 CmiAssignOnce(&_roRestartHandlerIdx, CkRegisterHandler(_roRestartHandler));
01407
01408 #if CMK_ONESIDED_IMPL
01409 CmiAssignOnce(&_roRdmaDoneHandlerIdx, CkRegisterHandler(_roRdmaDoneHandler));
01410 #endif
01411
01412 CmiAssignOnce(&_exitHandlerIdx, CkRegisterHandler(_exitHandler));
01413
01414 CmiAssignOnce(&_libExitHandlerIdx, CkRegisterHandler(_libExitHandler));
01415 CmiAssignOnce(&_bocHandlerIdx, CkRegisterHandlerEx(_initHandler, CkpvAccess(_coreState)));
01416 #if CMK_SHRINK_EXPAND
01417
01418 CmiAssignOnce(&_ROGroupRestartHandlerIdx, CkRegisterHandler(_ROGroupRestartHandler));
01419 #endif
01420
01421 #ifdef __BIGSIM__
01422 if(BgNodeRank()==0)
01423 #endif
01424 _infoIdx = CldRegisterInfoFn((CldInfoFn)_infoFn);
01425
01426 CmiAssignOnce(&_triggerHandlerIdx, CkRegisterHandler(_triggerHandler));
01427 _ckModuleInit();
01428
01429 CldRegisterEstimator((CldEstimator)_charmLoadEstimator);
01430
01431 _futuresModuleInit();
01432 _loadbalancerInit();
01433 _metabalancerInit();
01434
01435 #if CMK_SMP
01436 if (CmiMyRank() == 0) {
01437 LBTopoInit();
01438 }
01439 #endif
01440 #if CMK_MEM_CHECKPOINT
01441 init_memcheckpt(argv);
01442 #endif
01443
01444 initCharmProjections();
01445 #if CMK_TRACE_IN_CHARM
01446
01447 traceCharmInit(argv);
01448 #endif
01449
01450 CkpvInitialize(int, envelopeEventID);
01451 CkpvAccess(envelopeEventID) = 0;
01452 CkMessageWatcherInit(argv,CkpvAccess(_coreState));
01453
01454
01455 CmiSetDirectNcpyAckHandler(CkRdmaDirectAckHandler);
01456
01457 #if CMK_ONESIDED_IMPL
01458
01459 CmiSetEMNcpyAckHandler(CkRdmaEMAckHandler, CkRdmaEMBcastAckHandler, CkRdmaEMBcastPostAckHandler);
01460 #endif
01461
01474 #ifdef __BIGSIM__
01475 if(BgNodeRank()==0)
01476 #else
01477 if(CkMyRank()==0)
01478 #endif
01479 {
01480 SDAG::registerPUPables();
01481 CmiArgGroup("Charm++",NULL);
01482 _parseCommandLineOpts(argv);
01483 _registerInit();
01484 CkRegisterMsg("System", 0, 0, CkFreeMsg, sizeof(int));
01485 CkRegisterChareInCharm(CkRegisterChare("null", 0, TypeChare));
01486 CkIndex_Chare::__idx=CkRegisterChare("Chare", sizeof(Chare), TypeChare);
01487 CkRegisterChareInCharm(CkIndex_Chare::__idx);
01488 CkIndex_Group::__idx=CkRegisterChare("Group", sizeof(Group), TypeGroup);
01489 CkRegisterChareInCharm(CkIndex_Group::__idx);
01490 CkRegisterEp("null", (CkCallFnPtr)_nullFn, 0, 0, 0+CK_EP_INTRINSIC);
01491
01499 _registerCkFutures();
01500 _registerCkArray();
01501 _registerLBDatabase();
01502 _registerMetaBalancer();
01503 _registerCkCallback();
01504 _registerwaitqd();
01505 _registerCkCheckpoint();
01506 _registerCkMulticast();
01507 #if CMK_MEM_CHECKPOINT
01508 _registerCkMemCheckpoint();
01509 #endif
01510 #if CMK_CHARMPY
01511
01520 _registerGreedyRefineLB();
01521 #endif
01522
01528 #if !CMK_CHARMPY
01529 CkRegisterMainModule();
01530 #else
01531
01532
01533 if (CkRegisterMainModuleCallback)
01534 CkRegisterMainModuleCallback();
01535 else
01536 CkAbort("No callback for CkRegisterMainModule");
01537 #endif
01538
01549 #if !CMK_CHARMPY
01550 _registerExternalModules(argv);
01551 #endif
01552 }
01553
01554
01555 if (CkMyRank() == 0) {
01556 CpdBreakPointInit();
01557 }
01558 CmiNodeAllBarrier();
01559
01560
01561 _initCallTable.enumerateInitCalls();
01562
01563 #if CMK_CHARMDEBUG
01564 CpdFinishInitialization();
01565 #endif
01566 if (CkMyRank() == 0)
01567 _registerDone();
01568 CmiNodeAllBarrier();
01569
01570 CkpvAccess(_myStats) = new Stats();
01571 CkpvAccess(_msgPool) = new MsgPool();
01572
01573 CmiNodeAllBarrier();
01574
01575 #if !(__FAULT__)
01576 CmiBarrier();
01577 CmiBarrier();
01578 CmiBarrier();
01579 #endif
01580 #if CMK_SMP_TRACE_COMMTHREAD
01581 _TRACE_BEGIN_COMPUTATION();
01582 #else
01583 if (!inCommThread) {
01584 _TRACE_BEGIN_COMPUTATION();
01585 }
01586 #endif
01587
01588 #ifdef ADAPT_SCHED_MEM
01589 if(CkMyRank()==0){
01590 memCriticalEntries = new int[numMemCriticalEntries];
01591 int memcnt=0;
01592 for(int i=0; i<_entryTable.size(); i++){
01593 if(_entryTable[i]->isMemCritical){
01594 memCriticalEntries[memcnt++] = i;
01595 }
01596 }
01597 }
01598 #endif
01599
01600 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
01601 _messageLoggingInit();
01602 #endif
01603
01604 #if CMK_FAULT_EVAC
01605 #ifndef __BIGSIM__
01606 CpvAccess(_validProcessors) = new char[CkNumPes()];
01607 for(int vProc=0;vProc<CkNumPes();vProc++){
01608 CpvAccess(_validProcessors)[vProc]=1;
01609 }
01610 CmiAssignOnce(&_ckEvacBcastIdx, CkRegisterHandler(_ckEvacBcast));
01611 CmiAssignOnce(&_ckAckEvacIdx, CkRegisterHandler(_ckAckEvac));
01612 #endif
01613
01614 CkpvAccess(startedEvac) = 0;
01615 evacuate = 0;
01616 CcdCallOnCondition(CcdSIGUSR1,(CcdVoidFn)CkDecideEvacPe,0);
01617 #endif
01618 CpvAccess(serializer) = 0;
01619
01620 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
01621 CcdCallOnCondition(CcdSIGUSR2,(CcdVoidFn)CkMlogRestart,0);
01622 #endif
01623
01624 #if CMK_FAULT_EVAC
01625 if(_raiseEvac){
01626 processRaiseEvacFile(_raiseEvacFile);
01627
01628
01629
01630
01631
01632
01633
01634
01635 }
01636 #endif
01637
01638 if (CkMyRank() == 0) {
01639 TopoManager_init();
01640 }
01641 CmiNodeAllBarrier();
01642
01643 if (!_replaySystem) {
01644 CkFtFn faultFunc_restart = CkRestartMain;
01645 if (faultFunc == NULL || faultFunc == faultFunc_restart) {
01646
01647 #if ! CMK_BIGSIM_CHARM
01648 CmiInitCPUAffinity(argv);
01649 CmiInitMemAffinity(argv);
01650 #endif
01651 }
01652 CmiInitCPUTopology(argv);
01653 if (CkMyRank() == 0) {
01654 TopoManager_reset();
01655 #if !CMK_BIGSIM_CHARM
01656 _topoTree = ST_RecursivePartition_getTreeInfo(0);
01657 #endif
01658 }
01659 CmiNodeAllBarrier();
01660 #if CMK_SHARED_VARS_POSIX_THREADS_SMP
01661 if (CmiCpuTopologyEnabled()) {
01662 int *pelist;
01663 int num;
01664 CmiGetPesOnPhysicalNode(0, &pelist, &num);
01665 #if !CMK_MULTICORE && !CMK_SMP_NO_COMMTHD
01666
01667
01668 num += num/CmiMyNodeSize();
01669 #endif
01670 if (!_Cmi_forceSpinOnIdle && num > CmiNumCores())
01671 {
01672 if (CmiMyPe() == 0)
01673 CmiPrintf("\nCharm++> Warning: the number of SMP threads (%d) is greater than the number of physical cores (%d), so threads will sleep while idling. Use +CmiSpinOnIdle or +CmiSleepOnIdle to control this directly.\n\n", num, CmiNumCores());
01674 CmiLock(CksvAccess(_nodeLock));
01675 if (! _Cmi_sleepOnIdle) _Cmi_sleepOnIdle = 1;
01676 CmiUnlock(CksvAccess(_nodeLock));
01677 }
01678 }
01679 #endif
01680 }
01681
01682 #if CMK_CUDA
01683 if (CmiMyRank() == 0) {
01684 initHybridAPI();
01685 }
01686 else {
01687 setHybridAPIDevice();
01688 }
01689 initEventQueues();
01690
01691
01692 if (CmiMyRank() < CmiMyNodeSize()) {
01693 CmiNodeBarrier();
01694 }
01695 hapiRegisterCallbacks();
01696 #endif
01697
01698 if(CmiMyPe() == 0) {
01699 char *topoFilename;
01700 if(CmiGetArgStringDesc(argv,"+printTopo",&topoFilename,"topo file name"))
01701 {
01702 std::stringstream sstm;
01703 sstm << topoFilename << "." << CmiMyPartition();
01704 std::string result = sstm.str();
01705 FILE *fp;
01706 fp = fopen(result.c_str(), "w");
01707 if (fp == NULL) {
01708 CkPrintf("Error opening %s file, writing to stdout\n", topoFilename);
01709 fp = stdout;
01710 }
01711 TopoManager_printAllocation(fp);
01712 fclose(fp);
01713 }
01714 }
01715
01716 #if CMK_USE_PXSHM && ( CMK_CRAYXE || CMK_CRAYXC ) && CMK_SMP
01717
01718
01719 if (CkMyRank() == 0) {
01720 CmiInitPxshm(argv);
01721 }
01722 CmiNodeAllBarrier();
01723 #endif
01724
01725
01726 #if CMK_BIGSIM_CHARM && CMK_CHARMDEBUG
01727
01728
01729
01730 CpdBgInit();
01731 #endif
01732
01733 if (faultFunc) {
01734 #if CMK_WITH_STATS
01735 if (CkMyPe()==0) _allStats = new Stats*[CkNumPes()];
01736 #endif
01737 if (!inCommThread) {
01738 CkArgMsg *msg = (CkArgMsg *)CkAllocMsg(0, sizeof(CkArgMsg), 0, GroupDepNum{});
01739 msg->argc = CmiGetArgc(argv);
01740 msg->argv = argv;
01741 faultFunc(_restartDir, msg);
01742 CkFreeMsg(msg);
01743 }
01744 }else if(CkMyPe()==0){
01745 #if CMK_WITH_STATS
01746 _allStats = new Stats*[CkNumPes()];
01747 #endif
01748 size_t i, nMains=_mainTable.size();
01749
01750
01751
01752
01753 int count = 0;
01754 int argc = CmiGetArgc(argv);
01755 for (int i = 1; i < argc; i++) {
01756
01757
01758
01759 if (strncmp(argv[i],"+vp",3) == 0) {
01760 if (_optSet.count("+vp") == 0) {
01761 count++;
01762 CmiPrintf("WARNING: %s is a TCharm command line argument, but you have not compiled with TCharm\n", argv[i]);
01763 }
01764 } else if (strncmp(argv[i],"-vp",3) == 0) {
01765 CmiPrintf("WARNING: %s is no longer valid because -vp has been deprecated. Please use +vp.\n", argv[i]);
01766 } else if (argv[i][0] == '+' && _optSet.count(argv[i]) == 0) {
01767 count++;
01768 CmiPrintf("WARNING: %s is a command line argument beginning with a '+' but was not parsed by the RTS.\n", argv[i]);
01769 } else if (argv[i][0] == '+' && _optSet.count(argv[i]) != 0) {
01770 fprintf(stderr,"%s is used more than once. Please remove duplicate arguments.\n", argv[i]);
01771 CmiAbort("Bad command-line argument\n");
01772 }
01773 }
01774 if (count) {
01775 CmiPrintf("If any of the above arguments were intended for the RTS you may need to recompile Charm++ with different options.\n");
01776 }
01777
01778 CmiCheckAffinity();
01779
01780 for(i=0;i<nMains;i++)
01781 {
01782 size_t size = _chareTable[_mainTable[i]->chareIdx]->size;
01783 void *obj = malloc(size);
01784 _MEMCHECK(obj);
01785 _mainTable[i]->setObj(obj);
01786 CkpvAccess(_currentChare) = obj;
01787 CkpvAccess(_currentChareType) = _mainTable[i]->chareIdx;
01788 CkArgMsg *msg = (CkArgMsg *)CkAllocMsg(0, sizeof(CkArgMsg), 0, GroupDepNum{});
01789 msg->argc = CmiGetArgc(argv);
01790 msg->argv = argv;
01791 quietMode = 0;
01792 _entryTable[_mainTable[i]->entryIdx]->call(msg, obj);
01793 if (quietModeRequested) quietMode = 1;
01794 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
01795 CpvAccess(_currentObj) = (Chare *)obj;
01796 #endif
01797 }
01798 _mainDone = true;
01799
01800 _STATS_RECORD_CREATE_CHARE_N(nMains);
01801 _STATS_RECORD_PROCESS_CHARE_N(nMains);
01802
01803 if (!userDrivenMode) {
01804 _sendReadonlies();
01805 }
01806 } else {
01807
01808 CmiCheckAffinity();
01809
01810 }
01811
01812 DEBUGF(("[%d,%d%.6lf] inCommThread %d\n",CmiMyPe(),CmiMyRank(),CmiWallTimer(),inCommThread));
01813
01814 if (inCommThread) {
01815 CkNumberHandlerEx(_bocHandlerIdx, _processHandler, CkpvAccess(_coreState));
01816 CkNumberHandlerEx(_charmHandlerIdx, _processHandler, CkpvAccess(_coreState));
01817 _processBufferedMsgs();
01818 }
01819
01820 #if CMK_CHARMDEBUG
01821
01822 if (CpvAccess(cpdSuspendStartup))
01823 {
01824
01825 CpdFreeze();
01826 }
01827 #endif
01828
01829
01830 #if __FAULT__
01831 if(killFlag){
01832 readKillFile();
01833 }
01834 #endif
01835
01836 }
01837
01838 int charm_main(int argc, char **argv)
01839 {
01840 int stack_top=0;
01841 memory_stack_top = &stack_top;
01842
01843 ConverseInit(argc, argv, (CmiStartFn) _initCharm, 0, 0);
01844
01845 return 0;
01846 }
01847
01848 void FTN_NAME(CHARM_MAIN_FORTRAN_WRAPPER, charm_main_fortran_wrapper)(int *argc, char **argv)
01849 {
01850 charm_main(*argc, argv);
01851 }
01852
01853
01854
01855
01856
01857 void registerExitFn(CkExitFn fn)
01858 {
01859 #if CMK_SHRINK_EXPAND
01860 CkAbort("registerExitFn is called when shrink-expand is enabled!");
01861 #else
01862 _CkExitFnVec.enq(fn);
01863 #endif
01864 }
01865