PPL Logo

ck-core/ckcausalmlog.C File Reference

Go to the source code of this file.

Data Structures

class  ElementPacker
class  ElementDistributor
class  ElementCaller

Functions

const char * idx2str (const CkArrayIndex &ind)
const char * idx2str (const ArrayElement *el)
void getGlobalStep (CkGroupID gID)
bool fault_aware (CkObjID &recver)
void sendCheckpointData (int mode)
 Sends the checkpoint to its buddy.
void createObjIDList (void *data, ChareMlogData *mlogData)
bool isLocal (int destPE)
 Determines if the message is local or not.
bool isTeamLocal (int destPE)
 Determines if the message is group local or not.
void printLog (TProcessedLog *log)
 Prints a processed log.
void readKillFile ()
 CpvDeclare (Chare *, _currentObj)
 CpvDeclare (StoredCheckpoint *, _storedCheckpointData)
 CpvDeclare (CkQ< MlogEntry * > *, _delayedLocalMsgs)
 CpvDeclare (Queue, _outOfOrderMessageQueue)
 CpvDeclare (Queue, _delayedRemoteMessageQueue)
 CpvDeclare (char **, _bufferedTicketRequests)
 CpvDeclare (int *, _numBufferedTicketRequests)
 CpvDeclare (char *, _localDets)
 CpvDeclare (CkDeterminantHashtableT *, _remoteDets)
 CpvDeclare (char *, _incarnation)
 CpvDeclare (RemoveDeterminantsHeader *, _removeDetsHeader)
 CpvDeclare (StoreDeterminantsHeader *, _storeDetsHeader)
 CpvDeclare (int *, _storeDetsSizes)
 CpvDeclare (char **, _storeDetsPtrs)
 CpvDeclare (int, _numEmigrantRecObjs)
 CpvDeclare (int, _numImmigrantRecObjs)
 CpvDeclare (std::vector< CkLocation * > *, _immigrantRecObjs)
void setTeamRecovery (void *data, ChareMlogData *mlogData)
 Turns on the flag for team recovery that selectively restores particular metadata information.
void unsetTeamRecovery (void *data, ChareMlogData *mlogData)
 Turns off the flag for team recovery.
void mpi_restart_crashed (int pe, int rank)
int find_spare_mpirank (int pe, int partition)
void heartBeatPartner ()
 Pings buddy to let it know this PE is alive.
void heartBeatHandler (void *msg)
 Registers last time it knew about the PE that checkpoints on it.
void heartBeatCheckHandler ()
 Checks whether the PE that checkpoints on it is still alive.
void partnerFailureHandler (char *msg)
 Receives the notification of a failure and updates pe-to-rank mapping.
int getReverseCheckPointPE ()
 Getting the pe that checkpoints on this pe.
static void * doNothingMsg (int *size, void *data, void **remote, int count)
void _messageLoggingInit ()
 Initialize message logging data structures and register handlers.
void killLocal (void *_dummy, double curWallTime)
void readFaultFile ()
 : reads the PE that will be failing throughout the execution and the mean time between failures.
void CkDieNow ()
void addBufferedDeterminant (CkObjID sender, CkObjID receiver, MCount SN, MCount TN)
 Adds a determinants to the buffered determinants and checks whether the array of buffered determinants needs to be extended.
void sendGroupMsg (envelope *env, int destPE, int _infoIdx)
 Sends a group message that might be a broadcast.
void sendNodeGroupMsg (envelope *env, int destNode, int _infoIdx)
 Sends a nodegroup message that might be a broadcast.
void sendArrayMsg (envelope *env, int destPE, int _infoIdx)
 Sends a message to an array element.
void sendChareMsg (envelope *env, int destPE, int _infoIdx, const CkChareID *pCid)
 Sends a message to a singleton chare.
void sendCommonMsg (CkObjID &recver, envelope *_env, int destPE, int _infoIdx)
 A method to generate the actual ticket requests for groups, nodegroups or arrays.
void sendMsg (CkObjID &sender, CkObjID &recver, int destPE, MlogEntry *entry, MCount SN, MCount TN, int resend)
 Method that does the actual send by creating a ticket request filling it up and sending it.
void sendLocalMsg (envelope *env, int _infoIdx)
 Function to send a local message.
void _removeDeterminantsHandler (char *buffer)
 Removes the determinants after a particular index in the _localDets array.
void _storeDeterminantsHandler (char *buffer)
 Stores the determinants coming from other processor.
void _ticketRequestHandler (TicketRequest *ticketRequest)
 If there are any delayed requests, process them first before processing this request.
bool _getTicket (envelope *env, int *flag)
 Gets a ticket for a recently received message.
int preProcessReceivedMessage (envelope *env, Chare **objPointer, MlogEntry **logEntryPointer)
void postProcessReceivedMessage (Chare *obj, CkObjID &sender, MCount SN, MlogEntry *entry)
 Updates a few variables once a message has been processed.
void generalCldEnqueue (int destPE, envelope *env, int _infoIdx)
void _pingHandler (CkPingMsg *msg)
void buildProcessedTicketLog (void *data, ChareMlogData *mlogData)
 A chare adds the latest ticket number processed.
void clearUpMigratedRetainedLists (int PE)
void checkpointAlarm (void *_dummy, double curWallTime)
void _checkpointRequestHandler (CheckpointRequest *request)
void startMlogCheckpoint (void *_dummy, double curWallTime)
 Starts the checkpoint phase after migration.
void pupArrayElementsSkip (PUP::er &p, bool create, MigrationRecord *listToSkip, int listsize)
 Pups all the array elements in this processor.
void readCheckpointFromDisk (int size, char *data)
 Reads a checkpoint from disk.
void writeCheckpointToDisk (int size, char *data)
 Writes a checkpoint to disk.
void _storeCheckpointHandler (char *msg)
void sendRemoveLogRequests ()
 Sends out the messages asking senders to throw away message logs below a certain ticket number.
void _checkpointAckHandler (CheckPointAck *ackMsg)
void populateDeterminantTable (char *data)
 Inserts all the determinants into a hash table.
void removeProcessedLogs (void *_data, ChareMlogData *mlogData)
void _removeProcessedLogHandler (char *requestMsg)
 Removes messages in the log according to the received ticket numbers.
void CkMlogRestart (const char *dummy, CkArgMsg *dummyMsg)
 Function for restarting the crashed processor.
void _restartHandler (RestartRequest *restartMsg)
 Function to restart this processor.
void _getRestartCheckpointHandler (RestartRequest *restartMsg)
 Gets the stored checkpoint but calls another function in the sender.
void _recvRestartCheckpointHandler (char *_restartData)
 Receives the checkpoint coming from its buddy.
void CkMlogRestartDouble (void *, double)
void CkMlogRestartLocal ()
void _getCheckpointHandler (RestartRequest *restartMsg)
 Gets the stored checkpoint for its buddy processor.
void _verifyAckRequestHandler (VerifyAckMsg *verifyRequest)
void _verifyAckHandler (VerifyAckMsg *verifyReply)
void _recvCheckpointHandler (char *_restartData)
 Receives the checkpoint data from its buddy, restores the state of all the objects and asks everyone else to update its home.
void _updateHomeAckHandler (RestartRequest *updateHomeAck)
 Receives the updateHome ACKs from all other processors.
void initializeRestart (void *data, ChareMlogData *mlogData)
 Initializes variables and flags for restarting procedure.
void updateHomePE (void *data, ChareMlogData *mlogData)
 Updates the homePe of chare array elements.
void _updateHomeRequestHandler (RestartRequest *updateRequest)
 Updates the homePe for all chares in this processor.
void fillTicketForChare (void *data, ChareMlogData *mlogData)
 Fills up the ticket vector for each chare.
void printMsg (envelope *env, const char *par)
 Prints information about a message.
void printDet (Determinant *det, const char *par)
 Prints information about a determinant.
void resendMessageForChare (void *data, ChareMlogData *mlogData)
 Resends all the logged messages to a particular chare list.
void _sendDetsHandler (char *msg)
 Send all remote determinants to a particular failed PE.
void _resendMessagesHandler (char *msg)
 Resends messages since last checkpoint to the list of objects included in the request.
MCount maxVec (std::vector< MCount > *TNvec)
 Returns the maximum ticket from a vector.
void sortVec (std::vector< MCount > *TNvec)
int searchVec (std::vector< MCount > *TNVec, MCount searchTN)
void processDelayedRemoteMsgQueue ()
 Processes the messages in the delayed remote message queue.
void _sendDetsReplyHandler (char *msg)
 Receives determinants stored on remote nodes.
void _receivedDetDataHandler (ReceivedDetData *msg)
 Receives a list of determinants coming from the home PE of a migrated object (parallel restart).
void _receivedTNDataHandler (ReceivedTNData *msg)
 Receives a list of TNs coming from the home PE of a migrated object (parallel restart).
void processReceivedDet (Chare *obj, int listSize, Determinant *listDets)
 Processes the received list of determinants from a particular PE.
void processReceivedTN (Chare *obj, int listSize, MCount *listTNs)
 Processes the received list of tickets from a particular PE.
void distributeRestartedObjects ()
 Distributes objects to accelerate recovery after a failure.
void _sendBackLocationHandler (char *receivedMsg)
 Handler to receive back a location.
void _distributedLocationHandler (char *receivedMsg)
 Handler to update information about an object just received.
void sendDummyMigration (int restartPE, CkGroupID lbID, CkGroupID locMgrID, CkArrayIndexMax &idx, int locationPE)
 this method is used to send messages to a restarted processor to tell it that a particular expected object is not going to get to it
void sendDummyMigrationCounts (int *dummyCounts)
 this method is used by a restarted processor to tell other processors that they are not going to receive these many objects.
void _dummyMigrationHandler (DummyMigrationMsg *msg)
 this handler is used to process a dummy migration msg.
void forAllCharesDo (MlogFn fnPointer, void *data)
 Map function pointed by fnPointer over all the chares living in this processor.
void initMlogLBStep (CkGroupID gid)
 This is the first time Converse is called after AtSync method has been called by every local object.
void pupLocation (CkLocation *loc, CkLocMgr *locMgr, PUP::er &p)
 Pups a location.
void sendBackImmigrantRecObjs ()
 Sends back the immigrant recovering object to their origin PE.
void restoreParallelRecovery (void(*_fnPtr)(void *), void *_centralLb)
 Restores objects after parallel recovery, either by sending back the immigrant objects or by waiting for all emigrant objects to be back.
void startLoadBalancingMlog (void(*_fnPtr)(void *), void *_centralLb)
 Load Balancing.
void finishedCheckpointLoadBalancing ()
void sendMlogLocation (int targetPE, envelope *env)
void _receiveMigrationNoticeHandler (MigrationNotice *msg)
void _receiveMigrationNoticeAckHandler (MigrationNoticeAck *msg)
void _receiveMlogLocationHandler (void *buf)
void resumeFromSyncRestart (void *data, ChareMlogData *mlogData)
void _checkpointBarrierHandler (CheckpointBarrierMsg *barrierMsg)
 Processor 0 receives a contribution from every other processor after checkpoint.
void _checkpointBarrierAckHandler (CheckpointBarrierMsg *msg)
void garbageCollectMlogForChare (void *data, ChareMlogData *mlogData)
 Function to remove all messages in the message log of a particular chare.
void garbageCollectMlog ()
 Garbage collects the message log and other data structures.
void informLocationHome (CkGroupID locMgrID, CkArrayIndexMax idx, int homePE, int currentPE)
 method that informs an array elements home processor of its current location It is a converse method to bypass the charm++ message logging framework
void _receiveLocationHandler (CurrentLocationMsg *data)
void _getGlobalStepHandler (LBStepMsg *msg)
void _recvGlobalStepHandler (LBStepMsg *msg)
 Receives the global step handler from PE 0.
void _messageLoggingExit ()
 Function to wrap up performance information.
int getCheckPointPE ()
 Getting the pe number of the current processor's buddy.
envelopecopyEnvelope (envelope *env)
bool isSameDet (Determinant *first, Determinant *second)

Variables

bool _recoveryFlag = false
bool _restartFlag = false
int _numRestartResponses = 0
int countHashRefs = 0
int countHashCollisions = 0
char * checkpointDirectory = "."
int unAckedCheckpoint = 0
int countLocal = 0
int countBuffered = 0
int countPiggy = 0
int countClearBufferedLocalCalls = 0
int countUpdateHomeAcks = 0
int teamSize
int chkptPeriod
bool fastRecovery
int parallelRecovery
char * killFile
char * faultFile
int killFlag = 0
int faultFlag = 0
int restartingMlogFlag = 0
double killTime = 0.0
double faultMean
int checkpointCount = 0
int diskCkptFlag = 0
static char fName [100]
int _numBufferedDets
int _indexBufferedDets
int _phaseBufferedDets
int _maxBufferedDets
intnumMsgsTarget
intsizeMsgsTarget
int totalMsgsTarget
float totalMsgsSize
int numPiggyDets
int numDets
int numDupDets
int msgLogSize
int bufferedDetsSize
int storedDetsSize
float MLOGFT_totalLogSize = 0.0
float MLOGFT_totalMessages = 0.0
float MLOGFT_totalMcastLogSize = 0.0
float MLOGFT_totalReductionLogSize = 0.0
static double adjustChkptPeriod = 0.0
static double nextCheckpointTime = 0.0
static CkHashtableT
< CkHashtableAdaptorT< CkObjID >
, CkHashtableT
< CkHashtableAdaptorT< CkObjID >
, SNToTicket * > * > 
detTable (1000, 0.3)
int _pingHandlerIdx
char objString [100]
int _checkpointRequestHandlerIdx
int _storeCheckpointHandlerIdx
int _checkpointAckHandlerIdx
int _getCheckpointHandlerIdx
int _recvCheckpointHandlerIdx
int _removeProcessedLogHandlerIdx
int _verifyAckRequestHandlerIdx
int _verifyAckHandlerIdx
int _dummyMigrationHandlerIdx
int _getGlobalStepHandlerIdx
int _recvGlobalStepHandlerIdx
int _updateHomeRequestHandlerIdx
int _updateHomeAckHandlerIdx
int _resendMessagesHandlerIdx
int _sendDetsHandlerIdx
int _sendDetsReplyHandlerIdx
int _receivedTNDataHandlerIdx
int _receivedDetDataHandlerIdx
int _distributedLocationHandlerIdx
int _sendBackLocationHandlerIdx
int _storeDeterminantsHandlerIdx
int _removeDeterminantsHandlerIdx
int _restartHandlerIdx
int _getRestartCheckpointHandlerIdx
int _recvRestartCheckpointHandlerIdx
int verifyAckTotal
int verifyAckCount
int verifyAckedRequests = 0
RestartRequeststoredRequest
int _falseRestart = 0
int onGoingLoadBalancing = 0
 For testing on clusters we might carry out restarts on a porcessor without actually starting it 1 -> false restart 0 -> restart after an actual crash.
void * centralLb
void(* resumeLbFnPtr )(void *)
int _receiveMlogLocationHandlerIdx
int _receiveMigrationNoticeHandlerIdx
int _receiveMigrationNoticeAckHandlerIdx
int _checkpointBarrierHandlerIdx
int _checkpointBarrierAckHandlerIdx
std::vector< MigrationRecordmigratedNoticeList
std::vector
< RetainedMigratedObject * > 
retainedObjectList
int donotCountMigration = 0
int countLBMigratedAway = 0
int countLBToMigrate = 0
int migrationDoneCalled = 0
int checkpointBarrierCount = 0
int globalResumeCount = 0
CkGroupID globalLBID
int restartDecisionNumber = -1
double lastCompletedAlarm = 0
double lastRestart = 0
int _receiveLocationHandlerIdx
static int heartBeatHandlerIdx
static int heartBeatCheckHandlerIdx
static int partnerFailureHandlerIdx
static double lastPingTime = -1
int inCkptFlag = 0
int calledRetryTicketRequest = 0
 This method is used to retry the ticket requests that had been queued up earlier.
std::vector< TProcessedLogprocessedTicketLog
double totalSearchRestoredTime = 0
double totalSearchRestoredCount = 0


Function Documentation

const char* idx2str ( const CkArrayIndex &  ind  ) 

const char* idx2str ( const ArrayElement el  ) 

void getGlobalStep ( CkGroupID  gID  ) 

Definition at line 3657 of file ckcausalmlog.C.

References _getGlobalStepHandlerIdx, CmiInitMsgHeader(), CmiMyPe(), Converse::CmiSyncSend(), LBStepMsg::fromPE, LBStepMsg::header, LBStepMsg::lbID, and LBStepMsg::step.

Referenced by _recvCheckpointHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

bool fault_aware ( CkObjID recver  ) 

Definition at line 1165 of file ckcausalmlog.C.

References CkObjID::type, TypeArray, TypeChare, TypeGroup, TypeMainChare, and TypeNodeGroup.

Referenced by preProcessReceivedMessage().

Here is the caller graph for this function:

void sendCheckpointData ( int  mode  ) 

Sends the checkpoint to its buddy.

The mode distinguishes between the two cases: MLOG_RESTARTED: sending the checkpoint to a team member that did not crash but is restarting. MLOG_CRASHED: sending the checkpoint to the processor that crashed.

Definition at line 2155 of file ckcausalmlog.C.

References _recvCheckpointHandlerIdx, _recvRestartCheckpointHandlerIdx, StoredCheckpoint::buf, buf, StoredCheckpoint::bufSize, RestartProcessorData::checkPointSize, Converse::CkMyPe(), CmiAlloc(), CmiFree(), CmiMyPe(), Converse::CmiSyncSendAndFree(), CmiTimer(), diskCkptFlag, RestartProcessorData::lbGroupID, RestartProcessorData::migratedElementSize, migratedNoticeList, msg, RestartProcessorData::numMigratedAwayElements, RestartProcessorData::numMigratedInElements, RestartProcessorData::PE, RestartRequest::PE, readCheckpointFromDisk(), and RestartProcessorData::restartWallTime.

Referenced by _getCheckpointHandler(), _getRestartCheckpointHandler(), and _verifyAckHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

void createObjIDList ( void *  data,
ChareMlogData mlogData 
)

bool isLocal ( int  destPE  )  [inline]

Determines if the message is local or not.

A message is local if: 1) Both the destination and origin are the same PE.

Definition at line 828 of file ckcausalmlog.C.

References Converse::CkMyPe().

Referenced by sendCommonMsg().

Here is the call graph for this function:

Here is the caller graph for this function:

bool isTeamLocal ( int  destPE  )  [inline]

Determines if the message is group local or not.

A message is group local if: 1) They belong to the same group in the group-based message logging.

A message is group local if: 1) They belong to the same team in the team-based message logging.

Definition at line 840 of file ckcausalmlog.C.

References Converse::CkMyPe(), and teamSize.

Referenced by _resendMessagesHandler(), sendCommonMsg(), and sendMsg().

Here is the call graph for this function:

Here is the caller graph for this function:

void printLog ( TProcessedLog log  ) 

Prints a processed log.

Definition at line 2475 of file ckcausalmlog.C.

References Converse::CkMyPe(), TProcessedLog::recver, CkObjID::toString(), and TProcessedLog::tProcessed.

Referenced by createObjIDList().

Here is the call graph for this function:

Here is the caller graph for this function:

void readKillFile (  ) 

Definition at line 525 of file ckcausalmlog.C.

References CcdCallFnAfter(), Converse::CkMyPe(), CmiWallTimer(), killFile, killLocal(), and killTime.

Referenced by _initCharm(), and CkMemCheckPT::isMaster().

Here is the call graph for this function:

Here is the caller graph for this function:

CpvDeclare ( Chare ,
_currentObj   
)

CpvDeclare ( StoredCheckpoint ,
_storedCheckpointData   
)

CpvDeclare ( CkQ< MlogEntry * > *  ,
_delayedLocalMsgs   
)

CpvDeclare ( Queue  ,
_outOfOrderMessageQueue   
)

CpvDeclare ( Queue  ,
_delayedRemoteMessageQueue   
)

CpvDeclare ( char **  ,
_bufferedTicketRequests   
)

CpvDeclare ( int ,
_numBufferedTicketRequests   
)

CpvDeclare ( char *  ,
_localDets   
)

Note:
All the determinants generated by a PE are stored in variable _localDets. As soon as a message is sent, then all the determinants are appended to the message, but those determinants are not deleted. We must wait until an ACK comes from the receiver to delete the determinants. In the meantime the same determinants may be appended to other messages and more determinants can be added to _localDets. A simple solution to this problem was to have a primitive array and keep adding determinants at the end. However, to avoid multiple copies of determinants, we will keep a pointer to the first 'valid' determinant in the array. Alternatively, we can keep a pointer to the latest determinant and a number of how many valid determinants there are behind it. We do not remove determinants until a checkpoint is made, since these determinants may have to be added to messages in case of a recovery.

CpvDeclare ( CkDeterminantHashtableT ,
_remoteDets   
)

CpvDeclare ( char *  ,
_incarnation   
)

CpvDeclare ( RemoveDeterminantsHeader ,
_removeDetsHeader   
)

CpvDeclare ( StoreDeterminantsHeader ,
_storeDetsHeader   
)

CpvDeclare ( int ,
_storeDetsSizes   
)

CpvDeclare ( char **  ,
_storeDetsPtrs   
)

CpvDeclare ( int  ,
_numEmigrantRecObjs   
)

CpvDeclare ( int  ,
_numImmigrantRecObjs   
)

CpvDeclare ( std::vector< CkLocation * > *  ,
_immigrantRecObjs   
)

void setTeamRecovery ( void *  data,
ChareMlogData mlogData 
)

Turns on the flag for team recovery that selectively restores particular metadata information.

Definition at line 2460 of file ckcausalmlog.C.

References name, and ChareMlogData::teamRecoveryFlag.

Referenced by _recvRestartCheckpointHandler().

Here is the caller graph for this function:

void unsetTeamRecovery ( void *  data,
ChareMlogData mlogData 
)

Turns off the flag for team recovery.

Definition at line 2468 of file ckcausalmlog.C.

References ChareMlogData::teamRecoveryFlag.

Referenced by _recvRestartCheckpointHandler().

Here is the caller graph for this function:

void mpi_restart_crashed ( int  pe,
int  rank 
)

Referenced by CkMemCheckPT::isMaster(), partnerFailureHandler(), and SendMsgBuf().

Here is the caller graph for this function:

int find_spare_mpirank ( int  pe,
int  partition 
)

Referenced by CkMemCheckPT::isMaster(), partnerFailureHandler(), and SendMsgBuf().

Here is the caller graph for this function:

void heartBeatPartner (  ) 

Pings buddy to let it know this PE is alive.

Used for failure detection.

Definition at line 511 of file ckcausalmlog.C.

References CcdCallOnCondition(), CmiAlloc(), CmiMyPe(), Converse::CmiSyncSendAndFree(), getCheckPointPE(), heartBeatHandlerIdx, and msg.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void heartBeatHandler ( void *  msg  ) 

Registers last time it knew about the PE that checkpoints on it.

Definition at line 478 of file ckcausalmlog.C.

References CmiFree(), CmiWallTimer(), and lastPingTime.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void heartBeatCheckHandler (  ) 

Checks whether the PE that checkpoints on it is still alive.

Definition at line 487 of file ckcausalmlog.C.

References CcdCallOnCondition(), CmiAlloc(), CmiMyPe(), CmiPrintf(), Converse::CmiSyncSendAndFree(), CmiWallTimer(), getReverseCheckPointPE(), inCkptFlag, lastPingTime, msg, and partnerFailureHandlerIdx.

Referenced by _messageLoggingInit(), and partnerFailureHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

void partnerFailureHandler ( char *  msg  ) 

Receives the notification of a failure and updates pe-to-rank mapping.

Definition at line 462 of file ckcausalmlog.C.

References CcdCallOnCondition(), find_spare_mpirank(), getReverseCheckPointPE(), heartBeatCheckHandler(), and mpi_restart_crashed().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

int getReverseCheckPointPE (  ) 

Getting the pe that checkpoints on this pe.

Definition at line 4104 of file ckcausalmlog.C.

References CmiMyPe().

Referenced by heartBeatCheckHandler(), and partnerFailureHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

static void* doNothingMsg ( int size,
void *  data,
void **  remote,
int  count 
) [static]

Definition at line 270 of file ckcausalmlog.C.

Referenced by _checkpointAckHandler(), finishedCheckpointLoadBalancing(), and CkMemCheckPT::isMaster().

Here is the caller graph for this function:

void _messageLoggingInit (  ) 

Initialize message logging data structures and register handlers.

Definition at line 277 of file ckcausalmlog.C.

Referenced by _initCharm().

Here is the caller graph for this function:

void killLocal ( void *  _dummy,
double  curWallTime 
)

Definition at line 562 of file ckcausalmlog.C.

References CcdCallFnAfter(), CkDieNow(), Converse::CkMyPe(), CmiWallTimer(), and killTime.

Referenced by CkDieNow(), CkMemCheckPT::isMaster(), readFaultFile(), and readKillFile().

Here is the call graph for this function:

Here is the caller graph for this function:

void readFaultFile (  ) 

: reads the PE that will be failing throughout the execution and the mean time between failures.

We assume an exponential distribution for the mean-time-between-failures.

Definition at line 546 of file ckcausalmlog.C.

References CcdCallFnAfter(), Converse::CkMyPe(), faultFile, faultMean, and killLocal().

Here is the call graph for this function:

void CkDieNow (  ) 

Definition at line 576 of file ckcausalmlog.C.

Referenced by CkMemCheckPT::isMaster(), killLocal(), and SendMsgBuf().

Here is the caller graph for this function:

void addBufferedDeterminant ( CkObjID  sender,
CkObjID  receiver,
MCount  SN,
MCount  TN 
) [inline]

Adds a determinants to the buffered determinants and checks whether the array of buffered determinants needs to be extended.

Definition at line 591 of file ckcausalmlog.C.

References _indexBufferedDets, _maxBufferedDets, _numBufferedDets, bufferedDetsSize, Converse::CkMyPe(), CmiAlloc(), CmiFree(), numDets, Determinant::receiver, Determinant::sender, Determinant::SN, and Determinant::TN.

Referenced by preProcessReceivedMessage().

Here is the call graph for this function:

Here is the caller graph for this function:

void sendGroupMsg ( envelope env,
int  destPE,
int  _infoIdx 
)

Sends a group message that might be a broadcast.

Definition at line 628 of file ckcausalmlog.C.

Referenced by _sendMsgBranch(), and sendGroupMsg().

Here is the caller graph for this function:

void sendNodeGroupMsg ( envelope env,
int  destNode,
int  _infoIdx 
)

Sends a nodegroup message that might be a broadcast.

Definition at line 661 of file ckcausalmlog.C.

Referenced by _sendMsgNodeBranch(), and sendNodeGroupMsg().

Here is the caller graph for this function:

void sendArrayMsg ( envelope env,
int  destPE,
int  _infoIdx 
)

Sends a message to an array element.

Definition at line 693 of file ckcausalmlog.C.

Referenced by CkArrayManagerDeliver().

Here is the caller graph for this function:

void sendChareMsg ( envelope env,
int  destPE,
int  _infoIdx,
const CkChareID pCid 
)

Sends a message to a singleton chare.

Definition at line 715 of file ckcausalmlog.C.

Referenced by CkSendMsg().

Here is the caller graph for this function:

void sendCommonMsg ( CkObjID recver,
envelope _env,
int  destPE,
int  _infoIdx 
)

A method to generate the actual ticket requests for groups, nodegroups or arrays.

Definition at line 736 of file ckcausalmlog.C.

Referenced by sendArrayMsg(), sendChareMsg(), sendGroupMsg(), and sendNodeGroupMsg().

Here is the caller graph for this function:

void sendMsg ( CkObjID sender,
CkObjID recver,
int  destPE,
MlogEntry entry,
MCount  SN,
MCount  TN,
int  resend 
)

void sendLocalMsg ( envelope env,
int  _infoIdx 
)

Function to send a local message.

It first gets a ticket and then enqueues the message. If we are recovering, then the message is enqueued in a delay queue.

Definition at line 937 of file ckcausalmlog.C.

Referenced by sendCommonMsg().

Here is the caller graph for this function:

void _removeDeterminantsHandler ( char *  buffer  ) 

Removes the determinants after a particular index in the _localDets array.

Definition at line 1006 of file ckcausalmlog.C.

References _indexBufferedDets, _numBufferedDets, _phaseBufferedDets, CmiFree(), RemoveDeterminantsHeader::index, index, and RemoveDeterminantsHeader::phase.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _storeDeterminantsHandler ( char *  buffer  ) 

void _ticketRequestHandler ( TicketRequest ticketRequest  )  [inline]

If there are any delayed requests, process them first before processing this request.

Definition at line 1097 of file ckcausalmlog.C.

References Converse::CkMyPe(), and CmiFree().

Here is the call graph for this function:

bool _getTicket ( envelope env,
int flag 
) [inline]

Gets a ticket for a recently received message.

Precondition:
env->recver has to be on this processor.
Returns:
Returns true if ticket assignment is successful, otherwise returns false. A false result is due to the fact that we are recovering.

Definition at line 1110 of file ckcausalmlog.C.

References Converse::CkMyPe(), CmiMemoryCheck(), CmiWallTimer(), CkObjID::getObject(), ChareMlogData::getTicket(), Chare::mlogData, ChareMlogData::next_ticket(), ChareMlogData::restartFlag, Ticket::state, teamSize, Ticket::TN, CkObjID::toString(), ChareMlogData::tProcessed, and ChareMlogData::verifyTicket().

Referenced by preProcessReceivedMessage().

Here is the call graph for this function:

Here is the caller graph for this function:

int preProcessReceivedMessage ( envelope env,
Chare **  objPointer,
MlogEntry **  logEntryPointer 
)

Definition at line 1181 of file ckcausalmlog.C.

Referenced by _processHandler().

Here is the caller graph for this function:

void postProcessReceivedMessage ( Chare obj,
CkObjID sender,
MCount  SN,
MlogEntry entry 
)

Updates a few variables once a message has been processed.

Definition at line 1302 of file ckcausalmlog.C.

Referenced by _processHandler().

Here is the caller graph for this function:

void generalCldEnqueue ( int  destPE,
envelope env,
int  _infoIdx 
)

Definition at line 1322 of file ckcausalmlog.C.

Referenced by sendCommonMsg(), sendMsg(), and sendRemoteMsg().

Here is the caller graph for this function:

void _pingHandler ( CkPingMsg msg  ) 

Definition at line 1343 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void buildProcessedTicketLog ( void *  data,
ChareMlogData mlogData 
)

A chare adds the latest ticket number processed.

Definition at line 1468 of file ckcausalmlog.C.

References Converse::CkMyPe(), ChareMlogData::objID, objString, TProcessedLog::recver, CkObjID::toString(), ChareMlogData::tProcessed, and TProcessedLog::tProcessed.

Referenced by startMlogCheckpoint().

Here is the call graph for this function:

Here is the caller graph for this function:

void clearUpMigratedRetainedLists ( int  PE  ) 

Definition at line 1809 of file ckcausalmlog.C.

References CmiFree(), CmiMemoryCheck(), CmiMyPe(), count, migratedNoticeList, RetainedMigratedObject::msg, and retainedObjectList.

Referenced by _removeProcessedLogHandler(), and sendRemoveLogRequests().

Here is the call graph for this function:

Here is the caller graph for this function:

void checkpointAlarm ( void *  _dummy,
double  curWallTime 
)

Definition at line 1358 of file ckcausalmlog.C.

Referenced by checkpointAlarm(), and startMlogCheckpoint().

Here is the caller graph for this function:

void _checkpointRequestHandler ( CheckpointRequest request  ) 

Definition at line 1376 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void startMlogCheckpoint ( void *  _dummy,
double  curWallTime 
)

Starts the checkpoint phase after migration.

Definition at line 1383 of file ckcausalmlog.C.

Referenced by _checkpointRequestHandler(), _receiveMigrationNoticeAckHandler(), _updateHomeRequestHandler(), and startLoadBalancingMlog().

Here is the caller graph for this function:

void pupArrayElementsSkip ( PUP::er p,
bool  create,
MigrationRecord listToSkip,
int  listsize 
)

Pups all the array elements in this processor.

Definition at line 1498 of file ckcausalmlog.C.

Referenced by _recvCheckpointHandler(), _recvRestartCheckpointHandler(), _startCheckpointHandler(), and startMlogCheckpoint().

Here is the caller graph for this function:

void readCheckpointFromDisk ( int  size,
char *  data 
)

Reads a checkpoint from disk.

Assumes variable fName contains the name of the file.

Definition at line 1557 of file ckcausalmlog.C.

References fName.

Referenced by _getCheckpointHandler(), and sendCheckpointData().

Here is the caller graph for this function:

void writeCheckpointToDisk ( int  size,
char *  data 
)

Writes a checkpoint to disk.

Assumes variable fName contains the name of the file.

Definition at line 1566 of file ckcausalmlog.C.

References fName.

Referenced by _storeCheckpointHandler().

Here is the caller graph for this function:

void _storeCheckpointHandler ( char *  msg  ) 

Definition at line 1574 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void sendRemoveLogRequests (  ) 

Sends out the messages asking senders to throw away message logs below a certain ticket number.

Note:
The remove log request message looks like |RemoveLogRequest||List of TProcessedLog||Number of Determinants||List of Determinants|

Definition at line 1632 of file ckcausalmlog.C.

References _removeProcessedLogHandlerIdx, Converse::CkMyPe(), Converse::CkNumPes(), clearUpMigratedRetainedLists(), CmiAbort(), CmiAlloc(), CmiFree(), CmiMemoryCheck(), CmiMyPe(), Converse::CmiSyncSend(), ResendRequest::numberObjects, ResendRequest::PE, processedTicketLog, request, and traceUserBracketEvent().

Referenced by _checkpointAckHandler(), and _checkpointBarrierAckHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

void _checkpointAckHandler ( CheckPointAck ackMsg  ) 

Definition at line 1671 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void populateDeterminantTable ( char *  data  )  [inline]

Inserts all the determinants into a hash table.

Definition at line 1690 of file ckcausalmlog.C.

References CmiMemoryCheck(), detTable, CkHashtableT< KEY, OBJ >::get(), list, ResendRequest::numberObjects, numDets, SNToTicket::put(), CkHashtableTslow< KEY, OBJ >::put(), request, Determinant::TN, and Ticket::TN.

Referenced by _removeProcessedLogHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

void removeProcessedLogs ( void *  _data,
ChareMlogData mlogData 
)

void _removeProcessedLogHandler ( char *  requestMsg  ) 

Removes messages in the log according to the received ticket numbers.

Definition at line 1783 of file ckcausalmlog.C.

References Converse::CkMyPe(), clearUpMigratedRetainedLists(), CmiFree(), CmiMemoryCheck(), forAllCharesDo(), ResendRequest::PE, populateDeterminantTable(), removeProcessedLogs(), request, and traceUserBracketEvent().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void CkMlogRestart ( const char *  dummy,
CkArgMsg dummyMsg 
)

Function for restarting the crashed processor.

It sets the restart flag and contacts the buddy processor to get the latest checkpoint.

Definition at line 1847 of file ckcausalmlog.C.

Referenced by _initCharm(), _parseCommandLineOpts(), CkMlogRestartDouble(), and CkMlogRestartLocal().

Here is the caller graph for this function:

void _restartHandler ( RestartRequest restartMsg  ) 

Function to restart this processor.

The handler is invoked by a member of its same team in message logging.

Definition at line 1879 of file ckcausalmlog.C.

References _getRestartCheckpointHandlerIdx, _numRestartResponses, _restartFlag, Converse::CkMyPe(), CmiInitMsgHeader(), Converse::CmiSyncSend(), CmiWallTimer(), getCheckPointPE(), RestartRequest::header, msg, and RestartRequest::PE.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _getRestartCheckpointHandler ( RestartRequest restartMsg  ) 

Gets the stored checkpoint but calls another function in the sender.

Definition at line 1911 of file ckcausalmlog.C.

References _verifyAckRequestHandlerIdx, CmiInitMsgHeader(), CmiMyPe(), CmiPrintf(), Converse::CmiSyncSend(), VerifyAckMsg::fromPE, VerifyAckMsg::header, idx, idx2str(), VerifyAckMsg::index, migratedNoticeList, VerifyAckMsg::migRecord, msg, StoredCheckpoint::PE, RestartRequest::PE, sendCheckpointData(), verifyAckCount, and verifyAckTotal.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _recvRestartCheckpointHandler ( char *  _restartData  ) 

void CkMlogRestartDouble ( void *  ,
double   
)

Definition at line 2063 of file ckcausalmlog.C.

void CkMlogRestartLocal (  ) 

Definition at line 2068 of file ckcausalmlog.C.

References CkMlogRestart().

Here is the call graph for this function:

void _getCheckpointHandler ( RestartRequest restartMsg  ) 

Gets the stored checkpoint for its buddy processor.

Definition at line 2075 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void _verifyAckRequestHandler ( VerifyAckMsg verifyRequest  ) 

void _verifyAckHandler ( VerifyAckMsg verifyReply  ) 

Definition at line 2139 of file ckcausalmlog.C.

References CmiMyPe(), CmiPrintf(), idx, idx2str(), VerifyAckMsg::index, index, migratedNoticeList, VerifyAckMsg::migRecord, sendCheckpointData(), verifyAckCount, and verifyAckTotal.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _recvCheckpointHandler ( char *  _restartData  ) 

Receives the checkpoint data from its buddy, restores the state of all the objects and asks everyone else to update its home.

Definition at line 2234 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void _updateHomeAckHandler ( RestartRequest updateHomeAck  ) 

Receives the updateHome ACKs from all other processors.

Once everybody has replied, it sends a request to resend the logged messages.

Definition at line 2307 of file ckcausalmlog.C.

References _resendMessagesHandlerIdx, Converse::CkMyPe(), CmiAlloc(), CmiFree(), Converse::CmiSyncBroadcastAllAndFree(), CmiWallTimer(), countUpdateHomeAcks, createObjIDList(), distributeRestartedObjects(), fastRecovery, forAllCharesDo(), lb, ResendRequest::numberObjects, ResendRequest::PE, CentralLB::ReceiveDummyMigration(), and restartDecisionNumber.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void initializeRestart ( void *  data,
ChareMlogData mlogData 
)

Initializes variables and flags for restarting procedure.

Definition at line 2355 of file ckcausalmlog.C.

Referenced by _recvCheckpointHandler(), and _recvRestartCheckpointHandler().

Here is the caller graph for this function:

void updateHomePE ( void *  data,
ChareMlogData mlogData 
)

Updates the homePe of chare array elements.

Definition at line 2364 of file ckcausalmlog.C.

References _ObjectID::array, CkArrayIndexBase::asChild(), CkArrayID::ckLocalBranch(), Converse::CkMyPe(), CkObjID::data, CkLocMgr::getGroupID(), CkArray::getLocMgr(), CkLocMgr::homePe(), _ObjectID::s_array::id, _ObjectID::s_array::idx, informLocationHome(), ChareMlogData::objID, RestartRequest::PE, CkObjID::type, and TypeArray.

Referenced by _updateHomeRequestHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

void _updateHomeRequestHandler ( RestartRequest updateRequest  ) 

void fillTicketForChare ( void *  data,
ChareMlogData mlogData 
)

Fills up the ticket vector for each chare.

Definition at line 2418 of file ckcausalmlog.C.

References count, SNToTicket::get(), SNToTicket::getFinishSN(), SNToTicket::getStartSN(), CkHashtable::iterator(), ResendData::listObjects, name, CkHashtableIterator::next(), ResendData::numberObjects, ResendData::PE, ChareMlogData::teamTable, ResendData::ticketVecs, and Ticket::TN.

Referenced by _resendMessagesHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

void printMsg ( envelope env,
const char *  par 
)

Prints information about a message.

Definition at line 2483 of file ckcausalmlog.C.

References Converse::CkMyPe().

Referenced by processDelayedRemoteMsgQueue(), and resendMessageForChare().

Here is the call graph for this function:

Here is the caller graph for this function:

void printDet ( Determinant det,
const char *  par 
)

Prints information about a determinant.

Definition at line 2492 of file ckcausalmlog.C.

References Converse::CkMyPe(), Determinant::receiver, Determinant::sender, Determinant::SN, Determinant::TN, and CkObjID::toString().

Referenced by _sendDetsHandler(), and processReceivedDet().

Here is the call graph for this function:

Here is the caller graph for this function:

void resendMessageForChare ( void *  data,
ChareMlogData mlogData 
)

Resends all the logged messages to a particular chare list.

Parameters:
data is of type ResendData which contains the array of objects on the restartedProcessor.
mlogData a particular chare living in this processor.

Definition at line 2503 of file ckcausalmlog.C.

References Converse::CkMyPe(), Converse::CmiSyncSend(), copyEnvelope(), count, CqsEnqueueGeneral(), MlogEntry::env, ChareMlogData::getMlog(), envelope::getPriobits(), envelope::getPrioPtr(), envelope::getQueueing(), envelope::getTotalsize(), CkQ< T >::length(), ResendData::listObjects, ResendData::numberObjects, ChareMlogData::objID, ResendData::PE, printMsg(), CkObjID::toString(), TypeInvalid, and TypeNodeGroup.

Referenced by _resendMessagesHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

void _sendDetsHandler ( char *  msg  ) 

Send all remote determinants to a particular failed PE.

It only sends determinants to those objects on the list.

Definition at line 2557 of file ckcausalmlog.C.

References _sendDetsReplyHandlerIdx, Converse::CkMyPe(), CmiAlloc(), CmiFree(), CmiMemoryCheck(), CmiResetGlobalReduceSeqID(), Converse::CmiSyncSendAndFree(), CmiWallTimer(), PUP::d, int, lastRestart, ResendData::listObjects, ResendRequest::numberObjects, ResendData::numberObjects, ResendRequest::PE, ResendData::PE, printDet(), TProcessedLog::recver, ResendData::ticketVecs, and TProcessedLog::tProcessed.

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _resendMessagesHandler ( char *  msg  ) 

Resends messages since last checkpoint to the list of objects included in the request.

It also sends stored remote determinants to the particular failed PE.

Definition at line 2663 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _recvRestartCheckpointHandler().

Here is the caller graph for this function:

MCount maxVec ( std::vector< MCount > *  TNvec  ) 

Returns the maximum ticket from a vector.

Definition at line 2969 of file ckcausalmlog.C.

References max().

Referenced by processReceivedTN().

Here is the call graph for this function:

Here is the caller graph for this function:

void sortVec ( std::vector< MCount > *  TNvec  ) 

Definition at line 2978 of file ckcausalmlog.C.

References sort().

Referenced by processReceivedTN().

Here is the call graph for this function:

Here is the caller graph for this function:

int searchVec ( std::vector< MCount > *  TNVec,
MCount  searchTN 
)

Definition at line 2983 of file ckcausalmlog.C.

Referenced by processReceivedTN().

Here is the caller graph for this function:

void processDelayedRemoteMsgQueue (  ) 

Processes the messages in the delayed remote message queue.

Definition at line 2700 of file ckcausalmlog.C.

References Converse::CkMyPe(), CmiMemoryCheck(), CqsDequeue(), CqsEmpty(), CqsEnqueueGeneral(), envelope::getPriobits(), envelope::getPrioPtr(), and printMsg().

Here is the call graph for this function:

void _sendDetsReplyHandler ( char *  msg  ) 

void _receivedDetDataHandler ( ReceivedDetData msg  ) 

Receives a list of determinants coming from the home PE of a migrated object (parallel restart).

Definition at line 2828 of file ckcausalmlog.C.

References CmiFree(), CmiMyPe(), Converse::CmiSyncSendAndFree(), CkObjID::getObject(), CkObjID::guessPE(), Chare::mlogData, ReceivedDetData::numDets, ChareMlogData::objID, processReceivedDet(), ReceivedDetData::recver, and CkObjID::toString().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void _receivedTNDataHandler ( ReceivedTNData msg  ) 

Receives a list of TNs coming from the home PE of a migrated object (parallel restart).

Definition at line 2846 of file ckcausalmlog.C.

References CmiFree(), CmiMyPe(), Converse::CmiSyncSendAndFree(), CkObjID::getObject(), CkObjID::guessPE(), Chare::mlogData, ReceivedTNData::numTNs, ChareMlogData::objID, processReceivedTN(), ReceivedTNData::recver, and CkObjID::toString().

Referenced by _messageLoggingInit().

Here is the call graph for this function:

Here is the caller graph for this function:

void processReceivedDet ( Chare obj,
int  listSize,
Determinant listDets 
)

Processes the received list of determinants from a particular PE.

Definition at line 2864 of file ckcausalmlog.C.

References Converse::CkMyPe(), CmiMemoryCheck(), Chare::mlogData, printDet(), Determinant::sender, Determinant::SN, Determinant::TN, and ChareMlogData::verifyTicket().

Referenced by _receivedDetDataHandler(), and _sendDetsReplyHandler().

Here is the call graph for this function:

Here is the caller graph for this function:

void processReceivedTN ( Chare obj,
int  listSize,
MCount *  listTNs 
)

void distributeRestartedObjects (  ) 

Distributes objects to accelerate recovery after a failure.

Definition at line 3075 of file ckcausalmlog.C.

Referenced by _recvGlobalStepHandler(), _sendDetsReplyHandler(), and _updateHomeAckHandler().

Here is the caller graph for this function:

void _sendBackLocationHandler ( char *  receivedMsg  ) 

Handler to receive back a location.

Definition at line 3085 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void _distributedLocationHandler ( char *  receivedMsg  ) 

Handler to update information about an object just received.

Definition at line 3122 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void sendDummyMigration ( int  restartPE,
CkGroupID  lbID,
CkGroupID  locMgrID,
CkArrayIndexMax idx,
int  locationPE 
)

this method is used to send messages to a restarted processor to tell it that a particular expected object is not going to get to it

Definition at line 3168 of file ckcausalmlog.C.

void _dummyMigrationHandler ( DummyMigrationMsg msg  ) 

this handler is used to process a dummy migration msg.

it looks up the load balancer and calls migrated for it

Definition at line 3203 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void forAllCharesDo ( MlogFn  fnPointer,
void *  data 
)

Map function pointed by fnPointer over all the chares living in this processor.

Definition at line 3254 of file ckcausalmlog.C.

Referenced by _recvCheckpointHandler(), _recvGlobalStepHandler(), _recvRestartCheckpointHandler(), _removeProcessedLogHandler(), _resendMessagesHandler(), _sendDetsReplyHandler(), _updateHomeAckHandler(), _updateHomeRequestHandler(), garbageCollectMlog(), and startMlogCheckpoint().

Here is the caller graph for this function:

void pupLocation ( CkLocation loc,
CkLocMgr locMgr,
PUP::er p 
)

Pups a location.

Definition at line 3298 of file ckcausalmlog.C.

References IrrGroup::ckGetGroupID(), CkLocation::getIndex(), and idx.

Referenced by sendBackImmigrantRecObjs().

Here is the call graph for this function:

Here is the caller graph for this function:

void sendBackImmigrantRecObjs (  ) 

void restoreParallelRecovery ( void(*)(void *)  _fnPtr,
void *  _centralLb 
)

Restores objects after parallel recovery, either by sending back the immigrant objects or by waiting for all emigrant objects to be back.

Definition at line 3377 of file ckcausalmlog.C.

Referenced by CentralLB::ReceiveMigration().

Here is the caller graph for this function:

void startLoadBalancingMlog ( void(*)(void *)  _fnPtr,
void *  _centralLb 
)

Load Balancing.

Definition at line 3394 of file ckcausalmlog.C.

Referenced by CentralLB::MigrationDoneImpl().

Here is the caller graph for this function:

void finishedCheckpointLoadBalancing (  ) 

Definition at line 3407 of file ckcausalmlog.C.

Referenced by _checkpointAckHandler().

Here is the caller graph for this function:

void sendMlogLocation ( int  targetPE,
envelope env 
)

void _receiveMigrationNoticeHandler ( MigrationNotice msg  ) 

void _receiveMigrationNoticeAckHandler ( MigrationNoticeAck msg  ) 

void _receiveMlogLocationHandler ( void *  buf  ) 

Definition at line 3499 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void resumeFromSyncRestart ( void *  data,
ChareMlogData mlogData 
)

Definition at line 3513 of file ckcausalmlog.C.

void _checkpointBarrierHandler ( CheckpointBarrierMsg barrierMsg  ) 

Processor 0 receives a contribution from every other processor after checkpoint.

Definition at line 3527 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void _checkpointBarrierAckHandler ( CheckpointBarrierMsg msg  ) 

Definition at line 3538 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void garbageCollectMlogForChare ( void *  data,
ChareMlogData mlogData 
)

Function to remove all messages in the message log of a particular chare.

Definition at line 3561 of file ckcausalmlog.C.

References CkQ< T >::deq(), ChareMlogData::getMlog(), and CkQ< T >::length().

Referenced by garbageCollectMlog().

Here is the call graph for this function:

Here is the caller graph for this function:

void garbageCollectMlog (  ) 

Garbage collects the message log and other data structures.

In case of synchronized checkpoint, we use an optimization to avoid causal message logging protocol to communicate all determinants to the rest of the processors.

Definition at line 3580 of file ckcausalmlog.C.

Referenced by _startCheckpointHandler(), and initMlogLBStep().

Here is the caller graph for this function:

void informLocationHome ( CkGroupID  locMgrID,
CkArrayIndexMax  idx,
int  homePE,
int  currentPE 
)

method that informs an array elements home processor of its current location It is a converse method to bypass the charm++ message logging framework

Definition at line 3610 of file ckcausalmlog.C.

Referenced by _distributedLocationHandler(), _receiveMigrationNoticeAckHandler(), _sendBackLocationHandler(), pupArrayElementsSkip(), and updateHomePE().

Here is the caller graph for this function:

void _receiveLocationHandler ( CurrentLocationMsg data  ) 

Definition at line 3626 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void _getGlobalStepHandler ( LBStepMsg msg  ) 

Definition at line 3668 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

void _recvGlobalStepHandler ( LBStepMsg msg  ) 

Receives the global step handler from PE 0.

Definition at line 3680 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Here is the caller graph for this function:

int getCheckPointPE (  ) 

Getting the pe number of the current processor's buddy.

In the team-based approach each processor might checkpoint in the next team, but currently teams are only meant to reduce memory overhead. Note: function getReverseCheckPointPE performs the reverse map. It must be changed accordingly.

Definition at line 4097 of file ckcausalmlog.C.

Referenced by _receiveMigrationNoticeHandler(), _restartHandler(), _startCheckpointHandler(), _updateHomeRequestHandler(), CkMlogRestart(), heartBeatPartner(), sendMlogLocation(), and startMlogCheckpoint().

Here is the caller graph for this function:

envelope* copyEnvelope ( envelope env  ) 

Definition at line 4109 of file ckcausalmlog.C.

Referenced by resendMessageForChare().

Here is the caller graph for this function:

bool isSameDet ( Determinant first,
Determinant second 
) [inline]

Definition at line 4116 of file ckcausalmlog.C.

References Determinant::receiver, Determinant::sender, Determinant::SN, and Determinant::TN.

Referenced by _storeDeterminantsHandler().

Here is the caller graph for this function:


Variable Documentation

Definition at line 58 of file ckcausalmlog.C.

Referenced by CkMlogRestart(), preProcessReceivedMessage(), and startMlogCheckpoint().

Definition at line 63 of file ckcausalmlog.C.

Definition at line 64 of file ckcausalmlog.C.

char* checkpointDirectory = "."

Definition at line 66 of file ckcausalmlog.C.

Definition at line 69 of file ckcausalmlog.C.

Definition at line 69 of file ckcausalmlog.C.

Definition at line 70 of file ckcausalmlog.C.

Definition at line 71 of file ckcausalmlog.C.

Definition at line 73 of file ckcausalmlog.C.

Referenced by _updateHomeAckHandler().

char* faultFile

Definition at line 81 of file ckcausalmlog.C.

Referenced by readFaultFile().

Definition at line 83 of file ckcausalmlog.C.

Definition at line 84 of file ckcausalmlog.C.

double killTime = 0.0

Definition at line 86 of file ckcausalmlog.C.

Referenced by CkDieNow(), CkMemCheckPT::isMaster(), killLocal(), and readKillFile().

double faultMean

Definition at line 87 of file ckcausalmlog.C.

Referenced by readFaultFile().

char fName[100] [static]

Definition at line 125 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and addBufferedDeterminant().

Definition at line 148 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), _messageLoggingInit(), sendMsg(), and sendRemoteMsg().

Definition at line 149 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), _messageLoggingInit(), sendMsg(), and sendRemoteMsg().

Definition at line 150 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), _messageLoggingInit(), sendMsg(), and sendRemoteMsg().

Definition at line 151 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), _messageLoggingInit(), sendMsg(), and sendRemoteMsg().

Definition at line 154 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), _messageLoggingInit(), and sendMsg().

Definition at line 165 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), sendMsg(), and sendRemoteMsg().

Definition at line 166 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), sendMsg(), and sendRemoteMsg().

Definition at line 167 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), and sendMsg().

Definition at line 168 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), and sendMsg().

double adjustChkptPeriod = 0.0 [static]

Definition at line 171 of file ckcausalmlog.C.

Referenced by _recvCheckpointHandler(), and _recvRestartCheckpointHandler().

double nextCheckpointTime = 0.0 [static]

Definition at line 172 of file ckcausalmlog.C.

Definition at line 175 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

char objString[100]

Definition at line 177 of file ckcausalmlog.C.

Referenced by buildProcessedTicketLog(), createObjIDList(), and processReceivedTN().

Definition at line 178 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and checkpointAlarm().

Definition at line 180 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _storeCheckpointHandler().

Definition at line 181 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and CkMlogRestart().

Definition at line 182 of file ckcausalmlog.C.

Referenced by _getCheckpointHandler(), _messageLoggingInit(), and sendCheckpointData().

Definition at line 183 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and sendRemoveLogRequests().

Definition at line 186 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _verifyAckRequestHandler().

Definition at line 190 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and getGlobalStep().

Definition at line 191 of file ckcausalmlog.C.

Referenced by _getGlobalStepHandler(), and _messageLoggingInit().

Definition at line 193 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Definition at line 194 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _updateHomeRequestHandler().

Definition at line 196 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _recvCheckpointHandler().

Definition at line 197 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _sendDetsHandler().

Definition at line 198 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _sendDetsReplyHandler().

Definition at line 199 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _sendDetsReplyHandler().

Definition at line 200 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and ElementDistributor::addLocation().

Definition at line 201 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and sendBackImmigrantRecObjs().

Definition at line 202 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and sendMsg().

Definition at line 203 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _storeDeterminantsHandler().

Definition at line 206 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and CkMlogRestart().

Definition at line 207 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _restartHandler().

Definition at line 208 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and sendCheckpointData().

Definition at line 215 of file ckcausalmlog.C.

Referenced by _dummyMigrationHandler(), and _verifyAckRequestHandler().

Definition at line 217 of file ckcausalmlog.C.

Definition at line 219 of file ckcausalmlog.C.

For testing on clusters we might carry out restarts on a porcessor without actually starting it 1 -> false restart 0 -> restart after an actual crash.

Definition at line 227 of file ckcausalmlog.C.

Referenced by _checkpointAckHandler(), initMlogLBStep(), and startMlogCheckpoint().

void* centralLb

void(* resumeLbFnPtr)(void *)

Definition at line 230 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _receiveMigrationNoticeAckHandler().

Definition at line 232 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _receiveMigrationNoticeHandler().

Definition at line 233 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and finishedCheckpointLoadBalancing().

Definition at line 234 of file ckcausalmlog.C.

Referenced by _checkpointBarrierHandler(), and _messageLoggingInit().

Definition at line 242 of file ckcausalmlog.C.

Referenced by initMlogLBStep().

Definition at line 244 of file ckcausalmlog.C.

double lastCompletedAlarm = 0

Definition at line 247 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), checkpointAlarm(), and startMlogCheckpoint().

double lastRestart = 0

Definition at line 248 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), _resendMessagesHandler(), and _sendDetsHandler().

Definition at line 251 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and informLocationHome().

Definition at line 254 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and heartBeatPartner().

Definition at line 255 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

Definition at line 256 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and heartBeatCheckHandler().

double lastPingTime = -1 [static]

Definition at line 257 of file ckcausalmlog.C.

Referenced by heartBeatCheckHandler(), heartBeatHandler(), and CkMemCheckPT::isMaster().

This method is used to retry the ticket requests that had been queued up earlier.

Definition at line 1341 of file ckcausalmlog.C.

Definition at line 1354 of file ckcausalmlog.C.

Referenced by sendRemoveLogRequests(), and startMlogCheckpoint().

Definition at line 3903 of file ckcausalmlog.C.

Definition at line 3904 of file ckcausalmlog.C.


Generated on Mon Sep 21 07:57:41 2020 for Charm++ by  doxygen 1.5.5