PPL Logo

ck-core/ckmessagelogging.h File Reference

Go to the source code of this file.

Data Structures

class  RSSN
 Class that stores all received-sender-sequence-numbers (rssn) from another object. More...
class  ChareMlogData
 This file includes the definition of the class for storing the meta data associdated with the message logging protocol. More...
class  MlogEntry
 Entry in a message log. More...
class  StoredCheckpoint
struct  CheckPointDataMsg
struct  DistributeObjectMsg
struct  RestartRequest
 Struct to request a particular action during restart. More...
struct  RestartProcessorData
struct  ResendRequest
struct  ReceivedTNData
struct  ReceivedDetData
struct  ResendData
struct  MigrationRecord
struct  MigrationNotice
struct  MigrationNoticeAck
struct  RetainedMigratedObject
struct  VerifyAckMsg
struct  CheckpointBarrierMsg
struct  CurrentLocationMsg
struct  LBStepMsg
struct  DummyMigrationMsg

Typedefs

typedef CheckPointDataMsg CheckPointAck
typedef RestartRequest CkPingMsg
typedef RestartRequest CheckpointRequest
typedef ResendRequest RemoveLogRequest
typedef void(* MlogFn )(void *, ChareMlogData *)

Functions

 CpvExtern (Chare *, _currentObj)
 CpvExtern (int, _numImmigrantRecObjs)
void _messageLoggingInit ()
 Initialize message logging data structures and register handlers.
void sendGroupMsg (envelope *env, int destPE, int _infoIdx)
 Sends a group message that might be a broadcast.
void sendArrayMsg (envelope *env, int destPE, int _infoIdx)
 Sends a message to an array element.
void sendChareMsg (envelope *env, int destPE, int _infoIdx, const CkChareID *pCid)
 Sends a message to a singleton chare.
void sendNodeGroupMsg (envelope *env, int destNode, int _infoIdx)
 Sends a nodegroup message that might be a broadcast.
void sendCommonMsg (CkObjID &recver, envelope *env, int destPE, int _infoIdx)
 A method to generate the actual ticket requests for groups, nodegroups or arrays.
void sendRemoteMsg (CkObjID &sender, CkObjID &recver, int destPE, MlogEntry *entry, MCount SN, int resend)
 Method that does the actual send by creating a ticket request filling it up and sending it.
void sendLocalMsg (envelope *env, int _infoIdx)
 Function to send a local message.
void _pingHandler (CkPingMsg *msg)
void _skipCldEnqueue (int pe, envelope *env, int infoFn)
void _noCldNodeEnqueue (int node, envelope *env)
void generalCldEnqueue (int destPE, envelope *env, int _infoIdx)
int preProcessReceivedMessage (envelope *env, Chare **objPointer, MlogEntry **localLogEntry)
void postProcessReceivedMessage (Chare *obj, CkObjID &sender, MCount SN, MlogEntry *entry)
 Updates a few variables once a message has been processed.
 CpvExtern (StoredCheckpoint *, _storedCheckpointData)
void CkStartMlogCheckpoint (CkCallback &cb)
 Starts checkpoint phase at PE 0.
void checkpointAlarm (void *_dummy, double curWallTime)
void startMlogCheckpoint (void *_dummy, double curWallTime)
 Starts the checkpoint phase after migration.
void pupArrayElementsSkip (PUP::er &p, bool create, MigrationRecord *listToSkip, int listSize=0)
 Pups all the array elements in this processor.
void _checkpointRequestHandler (CheckpointRequest *request)
void _storeCheckpointHandler (char *msg)
void _checkpointAckHandler (CheckPointAck *ackMsg)
void _removeProcessedLogHandler (char *requestMsg)
 Removes messages in the log according to the received ticket numbers.
void garbageCollectMlog ()
 Garbage collects the message log and other data structures.
void _startCheckpointHandler (CheckpointBarrierMsg *msg)
 Starts checkpoint: send its checkpoint to its partner.
void _endCheckpointHandler (char *msg)
 Finishes checkpoint process by making the callback.
void CkMlogRestart (const char *dummy, CkArgMsg *dummyMsg)
 Function for restarting the crashed processor.
void CkMlogRestartDouble (void *, double)
void initializeRestart (void *data, ChareMlogData *mlogData)
 Initializes variables and flags for restarting procedure.
void distributeRestartedObjects ()
 Distributes objects to accelerate recovery after a failure.
void sendDummyMigration (int restartPE, CkGroupID lbID, CkGroupID locMgrID, CkArrayIndexMax &idx, int locationPE)
 this method is used to send messages to a restarted processor to tell it that a particular expected object is not going to get to it
void CkMlogRestartLocal ()
void _getCheckpointHandler (RestartRequest *restartMsg)
 Gets the stored checkpoint for its buddy processor.
void _recvCheckpointHandler (char *_restartData)
 Receives the checkpoint data from its buddy, restores the state of all the objects and asks everyone else to update its home.
void _resendMessagesHandler (char *msg)
 Resends messages since last checkpoint to the list of objects included in the request.
void _sendDetsHandler (char *msg)
 Send all remote determinants to a particular failed PE.
void _sendDetsReplyHandler (char *msg)
 Receives determinants stored on remote nodes.
void _receivedTNDataHandler (ReceivedTNData *msg)
 Receives a list of TNs coming from the home PE of a migrated object (parallel restart).
void _receivedDetDataHandler (ReceivedDetData *msg)
 Receives a list of determinants coming from the home PE of a migrated object (parallel restart).
void _distributedLocationHandler (char *receivedMsg)
 Handler to update information about an object just received.
void _sendBackLocationHandler (char *receivedMsg)
 Handler to receive back a location.
void _updateHomeRequestHandler (RestartRequest *updateRequest)
 Updates the homePe for all chares in this processor.
void _updateHomeAckHandler (RestartRequest *updateHomeAck)
 Receives the updateHome ACKs from all other processors.
void _verifyAckRequestHandler (VerifyAckMsg *verifyRequest)
void _verifyAckHandler (VerifyAckMsg *verifyReply)
void _dummyMigrationHandler (DummyMigrationMsg *msg)
 this handler is used to process a dummy migration msg.
void _restartHandler (RestartRequest *restartMsg)
 Function to restart this processor.
void _getRestartCheckpointHandler (RestartRequest *restartMsg)
 Gets the stored checkpoint but calls another function in the sender.
void _recvRestartCheckpointHandler (char *_restartData)
 Receives the checkpoint coming from its buddy.
void startLoadBalancingMlog (void(*fnPtr)(void *), void *_centralLb)
 Load Balancing.
void finishedCheckpointLoadBalancing ()
void sendMlogLocation (int targetPE, envelope *env)
void resumeFromSyncRestart (void *data, ChareMlogData *mlogData)
void restoreParallelRecovery (void(*fnPtr)(void *), void *_centralLb)
 Restores objects after parallel recovery, either by sending back the immigrant objects or by waiting for all emigrant objects to be back.
void _receiveMlogLocationHandler (void *buf)
void _receiveMigrationNoticeHandler (MigrationNotice *msg)
void _receiveMigrationNoticeAckHandler (MigrationNoticeAck *msg)
void _getGlobalStepHandler (LBStepMsg *msg)
void _recvGlobalStepHandler (LBStepMsg *msg)
 Receives the global step handler from PE 0.
void _checkpointBarrierHandler (CheckpointBarrierMsg *msg)
 Processor 0 receives a contribution from every other processor after checkpoint.
void _checkpointBarrierAckHandler (CheckpointBarrierMsg *msg)
int getCheckPointPE ()
 Getting the pe number of the current processor's buddy.
void forAllCharesDo (MlogFn fnPointer, void *data)
 Map function pointed by fnPointer over all the chares living in this processor.
envelopecopyEnvelope (envelope *env)
void _initDone (void)
 This function (not a handler) is called once and only once per processor.
void _resetNodeBocInitVec (void)
void informLocationHome (CkGroupID mgrID, CkArrayIndexMax idx, int homePE, int currentPE)
 method that informs an array elements home processor of its current location It is a converse method to bypass the charm++ message logging framework
void _receiveLocationHandler (CurrentLocationMsg *data)
void CmiDeliverRemoteMsgHandlerRange (int lowerHandler, int higherHandler)

Variables

char objString [100]
int _checkpointRequestHandlerIdx
int _storeCheckpointHandlerIdx
int _checkpointAckHandlerIdx
int _removeProcessedLogHandlerIdx
int _getCheckpointHandlerIdx
int _recvCheckpointHandlerIdx
int _resendMessagesHandlerIdx
int _sendDetsHandlerIdx
int _sendDetsReplyHandlerIdx
int _receivedTNDataHandlerIdx
int _receivedDetDataHandlerIdx
int _distributedLocationHandlerIdx
int _updateHomeRequestHandlerIdx
int _updateHomeAckHandlerIdx
int _verifyAckRequestHandlerIdx
int _verifyAckHandlerIdx
int _dummyMigrationHandlerIdx
int onGoingLoadBalancing
 For testing on clusters we might carry out restarts on a porcessor without actually starting it 1 -> false restart 0 -> restart after an actual crash.
void * centralLb
void(* resumeLbFnPtr )(void *)
int _receiveMlogLocationHandlerIdx
int _receiveMigrationNoticeHandlerIdx
int _receiveMigrationNoticeAckHandlerIdx
int _getGlobalStepHandlerIdx
int _recvGlobalStepHandlerIdx
int _checkpointBarrierHandlerIdx
int _checkpointBarrierAckHandlerIdx
std::vector< MigrationRecordmigratedNoticeList
std::vector
< RetainedMigratedObject * > 
retainedObjectList
int _receiveLocationHandlerIdx


Typedef Documentation

Definition at line 259 of file ckmessagelogging.h.

Definition at line 270 of file ckmessagelogging.h.

Definition at line 271 of file ckmessagelogging.h.

Definition at line 291 of file ckmessagelogging.h.

typedef void(* MlogFn)(void *, ChareMlogData *)

Definition at line 386 of file ckmessagelogging.h.


Function Documentation

CpvExtern ( Chare ,
_currentObj   
)

CpvExtern ( int  ,
_numImmigrantRecObjs   
)

void _messageLoggingInit (  ) 

Initialize message logging data structures and register handlers.

Definition at line 277 of file ckcausalmlog.C.

References _checkpointAckHandler(), _checkpointAckHandlerIdx, _checkpointBarrierAckHandler(), _checkpointBarrierAckHandlerIdx, _checkpointBarrierHandler(), _checkpointBarrierHandlerIdx, _checkpointRequestHandler(), _checkpointRequestHandlerIdx, _distributedLocationHandler(), _distributedLocationHandlerIdx, _dummyMigrationHandler(), _dummyMigrationHandlerIdx, _endCheckpointHandler(), _endCheckpointIdx, _getCheckpointHandler(), _getCheckpointHandlerIdx, _getGlobalStepHandler(), _getGlobalStepHandlerIdx, _getRestartCheckpointHandler(), _getRestartCheckpointHandlerIdx, _indexBufferedDets, _maxBufferedDets, _numBufferedDets, _phaseBufferedDets, _pingHandler(), _pingHandlerIdx, _receivedDetDataHandler(), _receivedDetDataHandlerIdx, _receivedTNDataHandler(), _receivedTNDataHandlerIdx, _receiveLocationHandler(), _receiveLocationHandlerIdx, _receiveMigrationNoticeAckHandler(), _receiveMigrationNoticeAckHandlerIdx, _receiveMigrationNoticeHandler(), _receiveMigrationNoticeHandlerIdx, _receiveMlogLocationHandler(), _receiveMlogLocationHandlerIdx, _recvCheckpointHandler(), _recvCheckpointHandlerIdx, _recvGlobalStepHandler(), _recvGlobalStepHandlerIdx, _recvRestartCheckpointHandler(), _recvRestartCheckpointHandlerIdx, _removeDeterminantsHandler(), _removeDeterminantsHandlerIdx, _removeProcessedLogHandler(), _removeProcessedLogHandlerIdx, _resendMessagesHandler(), _resendMessagesHandlerIdx, _restartHandler(), _restartHandlerIdx, _sendBackLocationHandler(), _sendBackLocationHandlerIdx, _sendDetsHandler(), _sendDetsHandlerIdx, _sendDetsReplyHandler(), _sendDetsReplyHandlerIdx, _startCheckpointHandler(), _startCheckpointIdx, _storeCheckpointHandler(), _storeCheckpointHandlerIdx, _storeDeterminantsHandler(), _storeDeterminantsHandlerIdx, _updateHomeAckHandler(), _updateHomeAckHandlerIdx, _updateHomeRequestHandler(), _updateHomeRequestHandlerIdx, _verifyAckHandler(), _verifyAckHandlerIdx, _verifyAckRequestHandler(), _verifyAckRequestHandlerIdx, bufferedDetsSize, CcdCallOnCondition(), Converse::CkMyPe(), Converse::CkNumPes(), CmiAlloc(), CmiWallTimer(), CqsCreate(), diskCkptFlag, fName, heartBeatCheckHandler(), heartBeatCheckHandlerIdx, heartBeatHandler(), heartBeatHandlerIdx, heartBeatPartner(), int, lastCompletedAlarm, lastRestart, msgLogSize, numDets, numDupDets, numMsgsTarget, numPiggyDets, partnerFailureHandler(), partnerFailureHandlerIdx, sizeMsgsTarget, storedDetsSize, totalMsgsSize, totalMsgsTarget, and traceRegisterUserEvent().

Referenced by _initCharm().

Here is the call graph for this function:

Here is the caller graph for this function:

void sendGroupMsg ( envelope env,
int  destPE,
int  _infoIdx 
)

Sends a group message that might be a broadcast.

Definition at line 628 of file ckcausalmlog.C.

References CkCopyMsg(), Converse::CkMyPe(), CmiMyPe(), CkObjID::data, EnvToUsr(), envelope::getGroupNum(), _ObjectID::group, _ObjectID::id, _ObjectID::onPE, sendCommonMsg(), sendGroupMsg(), CkObjID::type, TypeGroup, TypeInvalid, and UsrToEnv().

Referenced by _sendMsgBranch(), and sendGroupMsg().

Here is the call graph for this function:

Here is the caller graph for this function:

void sendArrayMsg ( envelope env,
int  destPE,
int  _infoIdx 
)

Sends a message to an array element.

Definition at line 693 of file ckcausalmlog.C.

References _ObjectID::array, CkArrayIndexBase::asChild(), Converse::CkMyPe(), CkObjID::data, envelope::getArrayMgr(), _ObjectID::s_array::id, _ObjectID::s_array::idx, sendCommonMsg(), CkObjID::toString(), CkObjID::type, and TypeArray.

Referenced by CkArrayManagerDeliver().

Here is the call graph for this function:

Here is the caller graph for this function:

void sendChareMsg ( envelope env,
int  destPE,
int  _infoIdx,
const CkChareID pCid 
)

Sends a message to a singleton chare.

Definition at line 715 of file ckcausalmlog.C.

References _ObjectID::chare, Converse::CkMyPe(), CkObjID::data, _ObjectID::id, sendCommonMsg(), CkObjID::toString(), CkObjID::type, TypeArray, and TypeChare.

Referenced by CkSendMsg().

Here is the call graph for this function:

Here is the caller graph for this function:

void sendNodeGroupMsg ( envelope env,
int  destNode,
int  _infoIdx 
)

Sends a nodegroup message that might be a broadcast.

Definition at line 661 of file ckcausalmlog.C.

References CkCopyMsg(), Converse::CkMyPe(), CkObjID::data, EnvToUsr(), envelope::getGroupNum(), _ObjectID::group, _ObjectID::id, _ObjectID::onPE, sendCommonMsg(), sendNodeGroupMsg(), CkObjID::type, TypeInvalid, TypeNodeGroup, and UsrToEnv().

Referenced by _sendMsgNodeBranch(), and sendNodeGroupMsg().

Here is the call graph for this function:

Here is the caller graph for this function:

void sendCommonMsg ( CkObjID recver,
envelope env,
int  destPE,
int  _infoIdx 
)

A method to generate the actual ticket requests for groups, nodegroups or arrays.

Definition at line 736 of file ckcausalmlog.C.

References Converse::CkMyPe(), CmiMemoryCheck(), generalCldEnqueue(), SNToTicket::get(), CkHashtableT< KEY, OBJ >::get(), CkObjID::getObject(), isLocal(), isTeamLocal(), Chare::mlogData, ChareMlogData::nextSN(), sendLocalMsg(), sendMsg(), sendRemoteMsg(), teamSize, ChareMlogData::teamTable, Ticket::TN, CkObjID::toString(), and TypeInvalid.

Referenced by sendArrayMsg(), sendChareMsg(), sendGroupMsg(), and sendNodeGroupMsg().

Here is the call graph for this function:

Here is the caller graph for this function:

void sendRemoteMsg ( CkObjID sender,
CkObjID recver,
int  destPE,
MlogEntry entry,
MCount  SN,
int  resend 
)

Method that does the actual send by creating a ticket request filling it up and sending it.

Definition at line 624 of file ckmessagelogging.C.

References MlogEntry::_infoIdx, ChareMlogData::addLogEntry(), Converse::CkMyPe(), CmiMemoryCheck(), MlogEntry::env, float, generalCldEnqueue(), envelope::getTotalsize(), Chare::mlogData, MLOGFT_totalLogSize, MLOGFT_totalMessages, msgLogSize, numMsgsTarget, sizeMsgsTarget, totalMsgsSize, and totalMsgsTarget.

Referenced by sendCommonMsg().

Here is the call graph for this function:

Here is the caller graph for this function:

void sendLocalMsg ( envelope env,
int  _infoIdx 
)

Function to send a local message.

It first gets a ticket and then enqueues the message. If we are recovering, then the message is enqueued in a delay queue.

Definition at line 937 of file ckcausalmlog.C.

References _skipCldEnqueue(), CmiMemoryCheck(), and CmiMyPe().

Referenced by sendCommonMsg().

Here is the call graph for this function:

Here is the caller graph for this function:

void _pingHandler ( CkPingMsg msg  ) 

Definition at line 1343 of file ckcausalmlog.C.

References Converse::CkMyPe(), CmiFree(), and RestartRequest::PE.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _skipCldEnqueue ( int  pe,
envelope env,
int  infoFn 
)

void _noCldNodeEnqueue ( int  node,
envelope env 
)

Definition at line 1556 of file ck.C.

References CkPackMessage(), CkRdmaPrepareBcastMsg(), CmiFree(), ConverseDeliver(), envelope::getTotalsize(), and len.

Referenced by _sendMsgNodeBranch(), CkSendMsgNodeBranchImmediate(), and generalCldEnqueue().

Here is the call graph for this function:

Here is the caller graph for this function:

void generalCldEnqueue ( int  destPE,
envelope env,
int  _infoIdx 
)

Definition at line 1322 of file ckcausalmlog.C.

References _noCldNodeEnqueue(), _skipCldEnqueue(), and TypeNodeGroup.

Referenced by sendCommonMsg(), sendMsg(), and sendRemoteMsg().

Here is the call graph for this function:

Here is the caller graph for this function:

int preProcessReceivedMessage ( envelope env,
Chare **  objPointer,
MlogEntry **  localLogEntry 
)

void postProcessReceivedMessage ( Chare obj,
CkObjID sender,
MCount  SN,
MlogEntry entry 
)

Updates a few variables once a message has been processed.

Definition at line 1302 of file ckcausalmlog.C.

References Converse::CkMyPe(), CmiMemoryCheck(), MlogEntry::env, CkObjID::guessPE(), Chare::mlogData, and ChareMlogData::tProcessed.

Referenced by _processHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

CpvExtern ( StoredCheckpoint ,
_storedCheckpointData   
)

void CkStartMlogCheckpoint ( CkCallback cb  ) 

Starts checkpoint phase at PE 0.

Definition at line 832 of file ckmessagelogging.C.

References _startCheckpointIdx, CmiAlloc(), and Converse::CmiSyncBroadcastAllAndFree().

Here is the call graph for this function:

void checkpointAlarm ( void *  _dummy,
double  curWallTime 
)

Definition at line 1358 of file ckcausalmlog.C.

References _checkpointRequestHandlerIdx, CcdCallFnAfter(), checkpointAlarm(), chkptPeriod, Converse::CkMyPe(), CmiInitMsgHeader(), Converse::CmiSyncBroadcastAll(), RestartRequest::header, lastCompletedAlarm, RestartRequest::PE, and request.

Referenced by checkpointAlarm(), and startMlogCheckpoint().

Here is the call graph for this function:

Here is the caller graph for this function:

void startMlogCheckpoint ( void *  _dummy,
double  curWallTime 
)

void pupArrayElementsSkip ( PUP::er p,
bool  create,
MigrationRecord listToSkip,
int  listSize = 0 
)

Pups all the array elements in this processor.

Definition at line 1498 of file ckcausalmlog.C.

References CkCountArrayElements(), Converse::CkMyPe(), CmiMyPe(), flag, CkLocMgr::homePe(), _ckGroupID::idx, idx, MigrationRecord::idx, idx2str(), informLocationHome(), PUP::er::isUnpacking(), CkLocMgr::numLocalElements(), and CkLocMgr::resume().

Referenced by _recvCheckpointHandler(), _recvRestartCheckpointHandler(), _startCheckpointHandler(), and startMlogCheckpoint().

Here is the call graph for this function:

Here is the caller graph for this function:

void _checkpointRequestHandler ( CheckpointRequest request  ) 

Definition at line 1376 of file ckcausalmlog.C.

References CmiWallTimer(), and startMlogCheckpoint().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _storeCheckpointHandler ( char *  msg  ) 

void _checkpointAckHandler ( CheckPointAck ackMsg  ) 

void _removeProcessedLogHandler ( char *  requestMsg  ) 

Removes messages in the log according to the received ticket numbers.

Definition at line 1783 of file ckcausalmlog.C.

References Converse::CkMyPe(), clearUpMigratedRetainedLists(), CmiFree(), CmiMemoryCheck(), forAllCharesDo(), ResendRequest::PE, populateDeterminantTable(), removeProcessedLogs(), request, and traceUserBracketEvent().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void garbageCollectMlog (  ) 

Garbage collects the message log and other data structures.

In case of synchronized checkpoint, we use an optimization to avoid causal message logging protocol to communicate all determinants to the rest of the processors.

Definition at line 3580 of file ckcausalmlog.C.

References _indexBufferedDets, _numBufferedDets, _phaseBufferedDets, Converse::CkMyPe(), forAllCharesDo(), garbageCollectMlogForChare(), CkHashtableIterator::hasNext(), and CkHashtableIterator::next().

Referenced by _startCheckpointHandler(), and initMlogLBStep().

Here is the call graph for this function:

Here is the caller graph for this function:

void _startCheckpointHandler ( CheckpointBarrierMsg startMsg  ) 

Starts checkpoint: send its checkpoint to its partner.

This checkpointing strategy is NOT connected to the load balancer, hence onGoingLoadBalancer==0.

Definition at line 847 of file ckmessagelogging.C.

References _storeCheckpointHandlerIdx, buf, bufferedDetsSize, checkpointCount, Converse::CkMyPe(), CkPupGroupData(), CkPupNodeGroupData(), CkPupROData(), CmiAlloc(), CmiFree(), CmiMemoryCheck(), CmiMyPe(), Converse::CmiSyncSendAndFree(), CmiTimer(), CmiWallTimer(), CheckPointDataMsg::dataSize, dataSize, garbageCollectMlog(), getCheckPointPE(), _ckGroupID::idx, inCkptFlag, msgLogSize, CheckPointDataMsg::PE, pupArrayElementsSkip(), PUP::sizer::size(), storedDetsSize, and unAckedCheckpoint.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _endCheckpointHandler ( char *  msg  ) 

Finishes checkpoint process by making the callback.

Definition at line 926 of file ckmessagelogging.C.

References CmiFree(), and CkCallback::send().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void CkMlogRestart ( const char *  dummy,
CkArgMsg dummyMsg 
)

Function for restarting the crashed processor.

It sets the restart flag and contacts the buddy processor to get the latest checkpoint.

Definition at line 1847 of file ckcausalmlog.C.

References _getCheckpointHandlerIdx, _numRestartResponses, _recoveryFlag, _restartFlag, _restartHandlerIdx, Converse::CkMyPe(), Converse::CkNumPes(), CmiInitMsgHeader(), Converse::CmiSyncSend(), CmiWallTimer(), getCheckPointPE(), RestartRequest::header, msg, RestartRequest::PE, and teamSize.

Referenced by _initCharm(), _parseCommandLineOpts(), CkMlogRestartDouble(), and CkMlogRestartLocal().

Here is the call graph for this function:

Here is the caller graph for this function:

void CkMlogRestartDouble ( void *  ,
double   
)

Definition at line 2063 of file ckcausalmlog.C.

References CkMlogRestart().

Here is the call graph for this function:

void initializeRestart ( void *  data,
ChareMlogData mlogData 
)

Initializes variables and flags for restarting procedure.

Definition at line 2355 of file ckcausalmlog.C.

References ChareMlogData::receivedTNs, ChareMlogData::resendReplyRecvd, and ChareMlogData::restartFlag.

Referenced by _recvCheckpointHandler(), and _recvRestartCheckpointHandler().

Here is the caller graph for this function:

void distributeRestartedObjects (  ) 

Distributes objects to accelerate recovery after a failure.

Definition at line 3075 of file ckcausalmlog.C.

References Converse::CkMyPe().

Referenced by _recvGlobalStepHandler(), _sendDetsReplyHandler(), and _updateHomeAckHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

void sendDummyMigration ( int  restartPE,
CkGroupID  lbID,
CkGroupID  locMgrID,
CkArrayIndexMax idx,
int  locationPE 
)

this method is used to send messages to a restarted processor to tell it that a particular expected object is not going to get to it

Definition at line 3168 of file ckcausalmlog.C.

References _dummyMigrationHandlerIdx, buf, CmiInitMsgHeader(), Converse::CmiSyncSend(), DummyMigrationMsg::flag, DummyMigrationMsg::header, DummyMigrationMsg::idx, DummyMigrationMsg::lbID, DummyMigrationMsg::locationPE, and DummyMigrationMsg::mgrID.

Here is the call graph for this function:

void CkMlogRestartLocal (  ) 

Definition at line 2068 of file ckcausalmlog.C.

References CkMlogRestart().

Here is the call graph for this function:

void _getCheckpointHandler ( RestartRequest restartMsg  ) 

void _recvCheckpointHandler ( char *  _restartData  ) 

void _resendMessagesHandler ( char *  msg  ) 

Resends messages since last checkpoint to the list of objects included in the request.

It also sends stored remote determinants to the particular failed PE.

Definition at line 2663 of file ckcausalmlog.C.

References Converse::CkMyPe(), CmiFree(), CmiMemoryCheck(), CmiResetGlobalReduceSeqID(), CmiWallTimer(), PUP::d, fillTicketForChare(), forAllCharesDo(), isTeamLocal(), lastRestart, ResendData::listObjects, ResendRequest::numberObjects, ResendData::numberObjects, ResendRequest::PE, ResendData::PE, and resendMessageForChare().

Referenced by _messageLoggingInit(), and _recvRestartCheckpointHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

void _sendDetsHandler ( char *  msg  ) 

Send all remote determinants to a particular failed PE.

It only sends determinants to those objects on the list.

Definition at line 2557 of file ckcausalmlog.C.

References _sendDetsReplyHandlerIdx, Converse::CkMyPe(), CmiAlloc(), CmiFree(), CmiMemoryCheck(), CmiResetGlobalReduceSeqID(), Converse::CmiSyncSendAndFree(), CmiWallTimer(), PUP::d, int, lastRestart, ResendData::listObjects, ResendRequest::numberObjects, ResendData::numberObjects, ResendRequest::PE, ResendData::PE, printDet(), TProcessedLog::recver, ResendData::ticketVecs, and TProcessedLog::tProcessed.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _sendDetsReplyHandler ( char *  msg  ) 

void _receivedTNDataHandler ( ReceivedTNData msg  ) 

Receives a list of TNs coming from the home PE of a migrated object (parallel restart).

Definition at line 2846 of file ckcausalmlog.C.

References CmiFree(), CmiMyPe(), Converse::CmiSyncSendAndFree(), CkObjID::getObject(), CkObjID::guessPE(), Chare::mlogData, ReceivedTNData::numTNs, ChareMlogData::objID, processReceivedTN(), ReceivedTNData::recver, and CkObjID::toString().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _receivedDetDataHandler ( ReceivedDetData msg  ) 

Receives a list of determinants coming from the home PE of a migrated object (parallel restart).

Definition at line 2828 of file ckcausalmlog.C.

References CmiFree(), CmiMyPe(), Converse::CmiSyncSendAndFree(), CkObjID::getObject(), CkObjID::guessPE(), Chare::mlogData, ReceivedDetData::numDets, ChareMlogData::objID, processReceivedDet(), ReceivedDetData::recver, and CkObjID::toString().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _distributedLocationHandler ( char *  receivedMsg  ) 

void _sendBackLocationHandler ( char *  receivedMsg  ) 

void _updateHomeRequestHandler ( RestartRequest updateRequest  ) 

void _updateHomeAckHandler ( RestartRequest updateHomeAck  ) 

Receives the updateHome ACKs from all other processors.

Once everybody has replied, it sends a request to resend the logged messages.

Definition at line 2307 of file ckcausalmlog.C.

References _resendMessagesHandlerIdx, Converse::CkMyPe(), CmiAlloc(), CmiFree(), Converse::CmiSyncBroadcastAllAndFree(), CmiWallTimer(), countUpdateHomeAcks, createObjIDList(), distributeRestartedObjects(), fastRecovery, forAllCharesDo(), lb, ResendRequest::numberObjects, ResendRequest::PE, CentralLB::ReceiveDummyMigration(), and restartDecisionNumber.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _verifyAckRequestHandler ( VerifyAckMsg verifyRequest  ) 

void _verifyAckHandler ( VerifyAckMsg verifyReply  ) 

Definition at line 2139 of file ckcausalmlog.C.

References CmiMyPe(), CmiPrintf(), idx, idx2str(), VerifyAckMsg::index, index, migratedNoticeList, VerifyAckMsg::migRecord, sendCheckpointData(), verifyAckCount, and verifyAckTotal.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _dummyMigrationHandler ( DummyMigrationMsg msg  ) 

this handler is used to process a dummy migration msg.

it looks up the load balancer and calls migrated for it

Definition at line 3203 of file ckcausalmlog.C.

References CmiFree(), CmiMyPe(), CmiPrintf(), DummyMigrationMsg::count, DummyMigrationMsg::flag, h, DummyMigrationMsg::idx, _ckGroupID::idx, idx2str(), lb, DummyMigrationMsg::lbID, DummyMigrationMsg::locationPE, DummyMigrationMsg::mgrID, CentralLB::Migrated(), and verifyAckedRequests.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _restartHandler ( RestartRequest restartMsg  ) 

Function to restart this processor.

The handler is invoked by a member of its same team in message logging.

Definition at line 1879 of file ckcausalmlog.C.

References _getRestartCheckpointHandlerIdx, _numRestartResponses, _restartFlag, Converse::CkMyPe(), CmiInitMsgHeader(), Converse::CmiSyncSend(), CmiWallTimer(), getCheckPointPE(), RestartRequest::header, msg, and RestartRequest::PE.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _getRestartCheckpointHandler ( RestartRequest restartMsg  ) 

Gets the stored checkpoint but calls another function in the sender.

Definition at line 1911 of file ckcausalmlog.C.

References _verifyAckRequestHandlerIdx, CmiInitMsgHeader(), CmiMyPe(), CmiPrintf(), Converse::CmiSyncSend(), VerifyAckMsg::fromPE, VerifyAckMsg::header, idx, idx2str(), VerifyAckMsg::index, migratedNoticeList, VerifyAckMsg::migRecord, msg, StoredCheckpoint::PE, RestartRequest::PE, sendCheckpointData(), verifyAckCount, and verifyAckTotal.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _recvRestartCheckpointHandler ( char *  _restartData  ) 

void startLoadBalancingMlog ( void(*)(void *)  fnPtr,
void *  _centralLb 
)

Load Balancing.

Definition at line 3394 of file ckcausalmlog.C.

References centralLb, CmiMyPe(), CmiWallTimer(), countLBMigratedAway, countLBToMigrate, migrationDoneCalled, resumeLbFnPtr, and startMlogCheckpoint().

Referenced by CentralLB::MigrationDoneImpl().

Here is the call graph for this function:

Here is the caller graph for this function:

void finishedCheckpointLoadBalancing (  ) 

Definition at line 3407 of file ckcausalmlog.C.

References _checkpointBarrierHandlerIdx, CmiAlloc(), CmiMyPe(), CmiReduce(), and doNothingMsg().

Referenced by _checkpointAckHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

void sendMlogLocation ( int  targetPE,
envelope env 
)

void resumeFromSyncRestart ( void *  data,
ChareMlogData mlogData 
)

Definition at line 3513 of file ckcausalmlog.C.

void restoreParallelRecovery ( void(*)(void *)  fnPtr,
void *  _centralLb 
)

Restores objects after parallel recovery, either by sending back the immigrant objects or by waiting for all emigrant objects to be back.

Definition at line 3377 of file ckcausalmlog.C.

References centralLb, resumeLbFnPtr, and sendBackImmigrantRecObjs().

Referenced by CentralLB::ReceiveMigration().

Here is the call graph for this function:

Here is the caller graph for this function:

void _receiveMlogLocationHandler ( void *  buf  ) 

Definition at line 3499 of file ckcausalmlog.C.

References Converse::CkMyPe(), CkUnpackMessage(), EnvToUsr(), envelope::getTotalsize(), CkArrayElementMigrateMessage::gid, _ckGroupID::idx, and CkLocMgr::immigrate().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _receiveMigrationNoticeHandler ( MigrationNotice msg  ) 

void _receiveMigrationNoticeAckHandler ( MigrationNoticeAck msg  ) 

void _getGlobalStepHandler ( LBStepMsg msg  ) 

Definition at line 3668 of file ckcausalmlog.C.

References _recvGlobalStepHandlerIdx, CmiMyPe(), CmiPrintf(), Converse::CmiSyncSend(), LBStepMsg::fromPE, _ckGroupID::idx, lb, LBStepMsg::lbID, and LBStepMsg::step.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _recvGlobalStepHandler ( LBStepMsg msg  ) 

void _checkpointBarrierHandler ( CheckpointBarrierMsg msg  ) 

Processor 0 receives a contribution from every other processor after checkpoint.

Definition at line 3527 of file ckcausalmlog.C.

References _checkpointBarrierAckHandlerIdx, CmiAlloc(), CmiFree(), and Converse::CmiSyncBroadcastAllAndFree().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _checkpointBarrierAckHandler ( CheckpointBarrierMsg msg  ) 

Definition at line 3538 of file ckcausalmlog.C.

References centralLb, Converse::CkMyPe(), CmiFree(), CmiMyPe(), CmiPrintf(), inCkptFlag, and sendRemoveLogRequests().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

int getCheckPointPE (  ) 

Getting the pe number of the current processor's buddy.

In the team-based approach each processor might checkpoint in the next team, but currently teams are only meant to reduce memory overhead. Note: function getReverseCheckPointPE performs the reverse map. It must be changed accordingly.

Definition at line 4097 of file ckcausalmlog.C.

References CmiMyPe().

Referenced by _receiveMigrationNoticeHandler(), _restartHandler(), _startCheckpointHandler(), _updateHomeRequestHandler(), CkMlogRestart(), heartBeatPartner(), sendMlogLocation(), and startMlogCheckpoint().

Here is the call graph for this function:

Here is the caller graph for this function:

void forAllCharesDo ( MlogFn  fnPointer,
void *  data 
)

Map function pointed by fnPointer over all the chares living in this processor.

Definition at line 3254 of file ckcausalmlog.C.

References caller, and Chare::mlogData.

Referenced by _recvCheckpointHandler(), _recvGlobalStepHandler(), _recvRestartCheckpointHandler(), _removeProcessedLogHandler(), _resendMessagesHandler(), _sendDetsReplyHandler(), _updateHomeAckHandler(), _updateHomeRequestHandler(), garbageCollectMlog(), and startMlogCheckpoint().

Here is the caller graph for this function:

envelope* copyEnvelope ( envelope env  ) 

Definition at line 4109 of file ckcausalmlog.C.

References CmiAlloc(), and envelope::getTotalsize().

Referenced by resendMessageForChare().

Here is the call graph for this function:

Here is the caller graph for this function:

void _resetNodeBocInitVec ( void   ) 

void informLocationHome ( CkGroupID  mgrID,
CkArrayIndexMax  idx,
int  homePE,
int  currentPE 
)

method that informs an array elements home processor of its current location It is a converse method to bypass the charm++ message logging framework

Definition at line 3610 of file ckcausalmlog.C.

References _receiveLocationHandlerIdx, Converse::CkMyPe(), CmiInitMsgHeader(), CmiMyPe(), CmiPrintf(), Converse::CmiSyncSend(), CmiWallTimer(), CurrentLocationMsg::fromPE, CurrentLocationMsg::header, _ckGroupID::idx, CurrentLocationMsg::idx, idx2str(), CurrentLocationMsg::locationPE, CurrentLocationMsg::mgrID, and traceUserBracketEvent().

Referenced by _distributedLocationHandler(), _receiveMigrationNoticeAckHandler(), _sendBackLocationHandler(), pupArrayElementsSkip(), and updateHomePE().

Here is the call graph for this function:

Here is the caller graph for this function:

void _receiveLocationHandler ( CurrentLocationMsg data  ) 

void CmiDeliverRemoteMsgHandlerRange ( int  lowerHandler,
int  higherHandler 
)


Variable Documentation

char objString[100]

Definition at line 177 of file ckcausalmlog.C.

Referenced by buildProcessedTicketLog(), createObjIDList(), and processReceivedTN().

Definition at line 178 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and checkpointAlarm().

Definition at line 180 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _storeCheckpointHandler().

Definition at line 183 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and sendRemoveLogRequests().

Definition at line 181 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and CkMlogRestart().

Definition at line 182 of file ckcausalmlog.C.

Referenced by _getCheckpointHandler(), _messageLoggingInit(), and sendCheckpointData().

Definition at line 196 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _recvCheckpointHandler().

Definition at line 197 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _sendDetsHandler().

Definition at line 198 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _sendDetsReplyHandler().

Definition at line 199 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _sendDetsReplyHandler().

Definition at line 200 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and ElementDistributor::addLocation().

Definition at line 193 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Definition at line 194 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _updateHomeRequestHandler().

Definition at line 186 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _verifyAckRequestHandler().

For testing on clusters we might carry out restarts on a porcessor without actually starting it 1 -> false restart 0 -> restart after an actual crash.

Definition at line 227 of file ckcausalmlog.C.

Referenced by _checkpointAckHandler(), initMlogLBStep(), and startMlogCheckpoint().

void* centralLb

void(* resumeLbFnPtr)(void *)

Definition at line 230 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _receiveMigrationNoticeAckHandler().

Definition at line 232 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _receiveMigrationNoticeHandler().

Definition at line 190 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and getGlobalStep().

Definition at line 191 of file ckcausalmlog.C.

Referenced by _getGlobalStepHandler(), and _messageLoggingInit().

Definition at line 233 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and finishedCheckpointLoadBalancing().

Definition at line 234 of file ckcausalmlog.C.

Referenced by _checkpointBarrierHandler(), and _messageLoggingInit().

Definition at line 251 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and informLocationHome().


Generated on Mon Sep 21 07:58:22 2020 for Charm++ by  doxygen 1.5.5