#include <ckmemcheckpoint.h>
Definition at line 138 of file ckmemcheckpoint.h.
CkMemCheckPT::CkMemCheckPT | ( | int | w | ) |
Definition at line 328 of file ckmemcheckpoint.C.
References _memChkptOn, ackCount, CcdCallOnCondition(), Converse::CkMyPe(), Converse::CkNumPes(), CmiNumPhysicalNodes(), expectCount, initEntry(), inRestarting, peCount, pingBuddy(), pingCheckHandler(), recvChkpCount, recvCount, and where.
CkMemCheckPT::CkMemCheckPT | ( | CkMigrateMessage * | m | ) | [inline] |
Definition at line 141 of file ckmemcheckpoint.h.
CkMemCheckPT::~CkMemCheckPT | ( | ) | [virtual] |
void CkMemCheckPT::pup | ( | PUP::er & | p | ) |
Definition at line 380 of file ckmemcheckpoint.C.
References ackCount, CcdCallOnCondition(), Converse::CkNumPes(), cpCallback, cpStarter, expectCount, failedPes, inCheckpointing, PUP::er::isUnpacking(), peCount, pingBuddy(), pingCheckHandler(), recvChkpCount, recvCount, thisFailedPe, and where.
Definition at line 138 of file ckmemcheckpoint.C.
References Converse::CkNumPes(), CmiAbort(), CmiGetPesOnPhysicalNode(), CmiNumPhysicalNodes(), CmiPhysicalNodeID(), CmiPhysicalRank(), CmiPrintf(), and isFailed().
Referenced by isMaster().
void CkMemCheckPT::doItNow | ( | int | sp, | |
CkCallback && | cb | |||
) |
Definition at line 513 of file ckmemcheckpoint.C.
References CkCheckPTInfo::aid, cb, checkpointed, Converse::CkMyPe(), CkSendMsgArray(), ckTable, CmiWallTimer(), cpCallback, cpFinish(), cpStarter, inCheckpointing, CkCheckPTInfo::index, isMaster(), len, msg, CkCheckPTInfo::pNo, sendProcData(), startArrayCheckpoint(), and startTime.
void CkMemCheckPT::restart | ( | int | diePe | ) |
void CkMemCheckPT::removeArrayElements | ( | ) |
Definition at line 467 of file ckmemcheckpoint.C.
References CkCheckPTInfo::aid, Converse::CkMyPe(), ckTable, CmiAbort(), idx, CkCheckPTInfo::index, len, CkCheckPTInfo::locMgr, and where.
Referenced by recoverEntry().
void CkMemCheckPT::recvData | ( | CkArrayCheckPTMessage * | msg | ) |
Definition at line 709 of file ckmemcheckpoint.C.
References ckTable, CmiAbort(), CkArrayCheckPTMessage::cp_flag, cpFinish(), cpStarter, idx, CkCheckPTInfo::index, CkArrayCheckPTMessage::index, len, CkCheckPTInfo::locMgr, CkArrayCheckPTMessage::locMgr, recvCount, syncFiles(), and where.
Referenced by recoverEntry().
void CkMemCheckPT::gotData | ( | ) |
void CkMemCheckPT::recvProcData | ( | CkProcCheckPTMessage * | msg | ) |
Definition at line 691 of file ckmemcheckpoint.C.
References Converse::CkMyPe(), cpFinish(), recvChkpCount, and CkProcCheckPTMessage::reportPe.
void CkMemCheckPT::cpFinish | ( | ) |
Definition at line 752 of file ckmemcheckpoint.C.
References Converse::CkMyPe(), CmiPrintf(), CmiWallTimer(), cpCallback, cpStarter, peCount, CkCallback::send(), and startTime.
Referenced by doItNow(), recvArrayCheckpoint(), recvData(), recvProcData(), and syncFiles().
void CkMemCheckPT::syncFiles | ( | void | ) |
Definition at line 740 of file ckmemcheckpoint.C.
References CmiAbort(), cpFinish(), and cpStarter.
Referenced by recvArrayCheckpoint(), and recvData().
void CkMemCheckPT::report | ( | ) |
Definition at line 767 of file ckmemcheckpoint.C.
References Converse::CkMyPe(), ckTable, CkCheckPTInfo::getSize(), inCheckpointing, and len.
void CkMemCheckPT::recoverBuddies | ( | ) |
void CkMemCheckPT::recoverEntry | ( | CkArrayCheckPTMessage * | msg | ) |
Definition at line 495 of file ckmemcheckpoint.C.
References CkArrayCheckPTMessage::aid, CkArrayCheckPTMessage::bud1, CkArrayCheckPTMessage::bud2, Converse::CkMyPe(), createEntry(), CkArrayCheckPTMessage::index, initEntry(), CkArrayCheckPTMessage::locMgr, recvArrayCheckpoint(), and recvData().
void CkMemCheckPT::recoverArrayElements | ( | ) |
void CkMemCheckPT::quiescence | ( | CkCallback && | ) |
void CkMemCheckPT::resetReductionMgr | ( | ) |
void CkMemCheckPT::finishUp | ( | ) |
void CkMemCheckPT::gotReply | ( | ) |
void CkMemCheckPT::inmem_restore | ( | CkArrayCheckPTMessage * | m | ) |
Definition at line 407 of file ckmemcheckpoint.C.
References CkArrayCheckPTMessage::aid, CkArrayCheckPTMessage::bud1, CkArrayCheckPTMessage::bud2, ArrayElement::budPEs, c, CkArrayID::ckLocalBranch(), CmiMyPe(), CkArrayCheckPTMessage::index, PUP::l, list, ArrayElement::listenerData, CkArrayCheckPTMessage::locMgr, CkArray::lookup(), CkLocMgr::lookupID(), CkLocMgr::migratableList(), CkMigratable::myRec, p, CkArrayCheckPTMessage::packData, contributorInfo::redNo, and CkLocMgr::resume().
Referenced by isMaster().
void CkMemCheckPT::resetLB | ( | int | diepe | ) |
Definition at line 447 of file ckmemcheckpoint.C.
References failedPes.
Referenced by BuddyPE(), failed(), and isMaster().
void CkMemCheckPT::pupAllElements | ( | PUP::er & | p | ) |
Definition at line 565 of file ckmemcheckpoint.C.
References CkCountArrayElements(), and PUP::er::isUnpacking().
Referenced by startArrayCheckpoint().
void CkMemCheckPT::startArrayCheckpoint | ( | ) |
Definition at line 579 of file ckmemcheckpoint.C.
References CkArrayCheckPTMessage::bud1, CkArrayCheckPTMessage::bud2, chkpTable, ChkptOnPe(), CkCopyMsg(), Converse::CkMyPe(), CkArrayCheckPTMessage::cp_flag, CkArrayCheckPTMessage::len, msg, p, CkArrayCheckPTMessage::packData, pupAllElements(), recvCount, PUP::sizer::size(), size, and CkCheckPTEntry::updateBuffer().
Referenced by doItNow().
void CkMemCheckPT::recvArrayCheckpoint | ( | CkArrayCheckPTMessage * | m | ) |
Definition at line 605 of file ckmemcheckpoint.C.
References CkArrayCheckPTMessage::bud1, chkpTable, Converse::CkMyPe(), CmiAbort(), CkArrayCheckPTMessage::cp_flag, cpFinish(), cpStarter, idx, recvChkpCount, recvCount, syncFiles(), CkCheckPTEntry::updateBuffer(), and where.
Referenced by recoverEntry().
void CkMemCheckPT::recoverAll | ( | CkArrayCheckPTMessage * | msg, | |
std::vector< CkGroupID > * | gmap = NULL , |
|||
std::vector< CkArrayIndex > * | imap = NULL | |||
) |
void CkMemCheckPT::initEntry | ( | ) | [private] |
Definition at line 365 of file ckmemcheckpoint.C.
References chkpTable, CkCheckPTEntry::init(), and where.
Referenced by CkMemCheckPT(), and recoverEntry().
* : function for killing a process
: reads the file with the kill information
Definition at line 791 of file ckmemcheckpoint.C.
References _discard_charm_message(), _handleProcData(), _initDone(), _memChkptOn, _resume_charm_message(), ackCount, argv, barrier, CkArrayCheckPTMessage::bud1, CkArrayCheckPTMessage::bud2, buddyDieHandlerIdx, BuddyPE(), cb, CcdCallFnAfter(), CcdCallOnCondition(), changePhaseHandlerIdx, checkpointed, chkpTable, ChkptOnPe(), CkDieNow(), CkFreeMsg(), CkHasCheckpoints(), CkInLdb(), CkInRestarting(), IrrGroup::ckJustMigrated(), CkMemRestart(), BGConverse::CkMyNode(), BGConverse::CkMyNodeSize(), Converse::CkMyPe(), BGConverse::CkMyRank(), Converse::CkNumPes(), CkPackMessage(), CkResetInLdb(), CkSetInLdb(), CkStartMemCheckpoint(), CkStartQD(), ckTable, CkUnpackMessage(), CmiAbort(), CmiAlloc(), CmiFree(), CmiGetArgFlagDesc(), CmiMyPe(), CmiMyRank(), CmiNodeFirst(), CmiNumPesOnPhysicalNode(), CmiNumPhysicalNodes(), CmiPhysicalNodeID(), CmiPrintf(), CmiReduce(), CmiResetGlobalReduceSeqID(), Converse::CmiSyncBroadcastAllAndFree(), Converse::CmiSyncSendAndFree(), CmiWallTimer(), count, CkArrayCheckPTMessage::cp_flag, cpCallback, CkProcCheckPTMessage::cur_restart_phase, data, doNothingMsg(), EnvToUsr(), expectCount, failed(), failedPes, find_spare_mpirank(), finishUp(), flag, IrrGroup::flushStates(), get_avail_vector(), CkCheckPTEntry::getCopy(), CkCheckPTInfo::getCopy(), CkLocation::getIndex(), envelope::getTotalsize(), gotData(), gotReply(), CkLocMgr::homePe(), _ckGroupID::idx, idx, inCheckpointing, CkArrayCheckPTMessage::index, init_memcheckpt(), inLoadbalancing, inmem_restore(), inRestarting, isFailed(), CkCallback::isInvalid(), PUP::er::isUnpacking(), killFile, killLocal(), killTime, lastPingTime, len, CkArrayCheckPTMessage::locMgr, PUP::m, mpi_restart_crashed(), msg, n, notify_crash_fn, p, CkProcCheckPTMessage::packData, CkArrayCheckPTMessage::packData, peCount, pingBuddy(), pingCheckHandler(), pingCheckHandlerIdx, pingHandlerIdx, CkCheckPTInfo::pNo, CkProcCheckPTMessage::pointer, quiescence(), quietModeRequested, rank, readKillFile(), recoverAll(), recoverArrayElements(), recoverBuddies(), recvChkpCount, recvCount, removeArrayElements(), replicaDieBcastHandlerIdx, replicaDieHandlerIdx, resetLB(), resetReductionMgr(), restart(), CkLocMgr::resume(), CkCallback::send(), set_avail_vector(), size, stage, startTime, thisFailedPe, totalFailed(), CkCheckPTInfo::updateBuddy(), CkLocMgr::updateLocation(), updateLocations(), and UsrToEnv().
Referenced by doItNow().
void CkMemCheckPT::failed | ( | int | pe | ) | [private] |
Definition at line 455 of file ckmemcheckpoint.C.
References failedPes, and isFailed().
Referenced by isMaster().
int CkMemCheckPT::totalFailed | ( | ) | [private] |
Definition at line 461 of file ckmemcheckpoint.C.
References failedPes.
Referenced by isMaster().
void CkMemCheckPT::sendProcData | ( | ) | [private] |
Definition at line 669 of file ckmemcheckpoint.C.
References _handleProcData(), ChkptOnPe(), Converse::CkMyPe(), cpStarter, CkProcCheckPTMessage::len, msg, p, CkProcCheckPTMessage::packData, CkProcCheckPTMessage::pe, CkProcCheckPTMessage::reportPe, PUP::sizer::size(), and size.
Referenced by doItNow().
CkCallback CkMemCheckPT::cpCallback [static] |
Definition at line 171 of file ckmemcheckpoint.h.
Referenced by cpFinish(), doItNow(), isMaster(), and pup().
bool CkMemCheckPT::inRestarting = false [static] |
Definition at line 173 of file ckmemcheckpoint.h.
Referenced by CkMemCheckPT(), CkRestartMain(), CkResumeRestartMain(), and isMaster().
bool CkMemCheckPT::inCheckpointing = false [static] |
Definition at line 174 of file ckmemcheckpoint.h.
Referenced by doItNow(), isMaster(), pup(), and report().
bool CkMemCheckPT::inLoadbalancing = false [static] |
double CkMemCheckPT::startTime [static] |
Definition at line 176 of file ckmemcheckpoint.h.
Referenced by cpFinish(), doItNow(), and isMaster().
char * CkMemCheckPT::stage [static] |
std::vector<CkCheckPTInfo *> CkMemCheckPT::ckTable [private] |
Definition at line 180 of file ckmemcheckpoint.h.
Referenced by createEntry(), doItNow(), isMaster(), recvData(), report(), and ~CkMemCheckPT().
CkCheckPTEntry CkMemCheckPT::chkpTable[2] [private] |
Definition at line 181 of file ckmemcheckpoint.h.
Referenced by initEntry(), isMaster(), recvArrayCheckpoint(), and startArrayCheckpoint().
int CkMemCheckPT::recvCount [private] |
Definition at line 183 of file ckmemcheckpoint.h.
Referenced by CkMemCheckPT(), isMaster(), pup(), recvArrayCheckpoint(), recvData(), and startArrayCheckpoint().
int CkMemCheckPT::peCount [private] |
Definition at line 183 of file ckmemcheckpoint.h.
Referenced by CkMemCheckPT(), cpFinish(), isMaster(), and pup().
int CkMemCheckPT::expectCount [private] |
Definition at line 184 of file ckmemcheckpoint.h.
Referenced by CkMemCheckPT(), isMaster(), and pup().
int CkMemCheckPT::ackCount [private] |
Definition at line 184 of file ckmemcheckpoint.h.
Referenced by CkMemCheckPT(), isMaster(), and pup().
int CkMemCheckPT::recvChkpCount [private] |
Definition at line 185 of file ckmemcheckpoint.h.
Referenced by CkMemCheckPT(), isMaster(), pup(), recvArrayCheckpoint(), and recvProcData().
int CkMemCheckPT::cpStarter [private] |
the processor who initiate the checkpointing
Definition at line 187 of file ckmemcheckpoint.h.
Referenced by cpFinish(), doItNow(), pup(), recvArrayCheckpoint(), recvData(), sendProcData(), and syncFiles().
std::vector<int> CkMemCheckPT::failedPes [private] |
Definition at line 188 of file ckmemcheckpoint.h.
Referenced by failed(), isFailed(), isMaster(), pup(), and totalFailed().
int CkMemCheckPT::thisFailedPe [private] |
int CkMemCheckPT::where [private] |
to use memory or disk checkpointing
Definition at line 192 of file ckmemcheckpoint.h.
Referenced by CkMemCheckPT(), createEntry(), initEntry(), pup(), recvArrayCheckpoint(), and recvData().