PPL Logo

ck-core/ckcausalmlog.C File Reference

Go to the source code of this file.

Data Structures

class  ElementPacker
class  ElementDistributor
class  ElementCaller

Functions

const char * idx2str (const CkArrayIndex &ind)
const char * idx2str (const ArrayElement *el)
void getGlobalStep (CkGroupID gID)
bool fault_aware (CkObjID &recver)
void sendCheckpointData (int mode)
 Sends the checkpoint to its buddy.
void createObjIDList (void *data, ChareMlogData *mlogData)
bool isLocal (int destPE)
 Determines if the message is local or not.
bool isTeamLocal (int destPE)
 Determines if the message is group local or not.
void printLog (TProcessedLog *log)
 Prints a processed log.
void readKillFile ()
 CpvDeclare (Chare *, _currentObj)
 CpvDeclare (StoredCheckpoint *, _storedCheckpointData)
 CpvDeclare (CkQ< MlogEntry * > *, _delayedLocalMsgs)
 CpvDeclare (Queue, _outOfOrderMessageQueue)
 CpvDeclare (Queue, _delayedRemoteMessageQueue)
 CpvDeclare (char **, _bufferedTicketRequests)
 CpvDeclare (int *, _numBufferedTicketRequests)
 CpvDeclare (char *, _localDets)
 CpvDeclare (CkDeterminantHashtableT *, _remoteDets)
 CpvDeclare (char *, _incarnation)
 CpvDeclare (CkVec< LocationID * > *, _emigrantRecObjs)
 CpvDeclare (CkVec< CkLocRec_local * > *, _immigrantRecObjs)
void setTeamRecovery (void *data, ChareMlogData *mlogData)
 Turns on the flag for team recovery that selectively restores particular metadata information.
void unsetTeamRecovery (void *data, ChareMlogData *mlogData)
 Turns off the flag for team recovery.
void _messageLoggingInit ()
 Initialize message logging data structures and register handlers.
void killLocal (void *_dummy, double curWallTime)
void readFaultFile ()
 : reads the PE that will be failing throughout the execution and the mean time between failures.
void addBufferedDeterminant (CkObjID sender, CkObjID receiver, MCount SN, MCount TN)
 Adds a determinants to the buffered determinants and checks whether the array of buffered determinants needs to be extended.
void sendGroupMsg (envelope *env, int destPE, int _infoIdx)
 Sends a group message that might be a broadcast.
void sendNodeGroupMsg (envelope *env, int destNode, int _infoIdx)
 Sends a nodegroup message that might be a broadcast.
void sendArrayMsg (envelope *env, int destPE, int _infoIdx)
 Sends a message to an array element.
void sendChareMsg (envelope *env, int destPE, int _infoIdx, const CkChareID *pCid)
 Sends a message to a singleton chare.
void sendCommonMsg (CkObjID &recver, envelope *_env, int destPE, int _infoIdx)
 A method to generate the actual ticket requests for groups, nodegroups or arrays.
void sendMsg (CkObjID &sender, CkObjID &recver, int destPE, MlogEntry *entry, MCount SN, MCount TN, int resend)
 Method that does the actual send by creating a ticket request filling it up and sending it.
void sendLocalMsg (MlogEntry *entry)
 Function to send a local message.
void _removeDeterminantsHandler (char *buffer)
 Removes the determinants after a particular index in the _localDets array.
void _storeDeterminantsHandler (char *buffer)
 Stores the determinants coming from other processor.
void _ticketRequestHandler (TicketRequest *ticketRequest)
 If there are any delayed requests, process them first before processing this request.
bool _getTicket (envelope *env, int *flag)
 Gets a ticket for a recently received message.
int preProcessReceivedMessage (envelope *env, Chare **objPointer, MlogEntry **logEntryPointer)
void postProcessReceivedMessage (Chare *obj, CkObjID &sender, MCount SN, MlogEntry *entry)
 Updates a few variables once a message has been processed.
void generalCldEnqueue (int destPE, envelope *env, int _infoIdx)
void _pingHandler (CkPingMsg *msg)
void buildProcessedTicketLog (void *data, ChareMlogData *mlogData)
 A chare adds the latest ticket number processed.
void clearUpMigratedRetainedLists (int PE)
void checkpointAlarm (void *_dummy, double curWallTime)
void _checkpointRequestHandler (CheckpointRequest *request)
void startMlogCheckpoint (void *_dummy, double curWallTime)
 Starts the checkpoint phase after migration.
void pupArrayElementsSkip (PUP::er &p, CmiBool create, MigrationRecord *listToSkip, int listsize)
 Pups all the array elements in this processor.
void writeCheckpointToDisk (int size, char *chkpt)
void _storeCheckpointHandler (char *msg)
void sendRemoveLogRequests ()
 Sends out the messages asking senders to throw away message logs below a certain ticket number.
void _checkpointAckHandler (CheckPointAck *ackMsg)
void populateDeterminantTable (char *data)
 Inserts all the determinants into a hash table.
void removeProcessedLogs (void *_data, ChareMlogData *mlogData)
void _removeProcessedLogHandler (char *requestMsg)
 Removes messages in the log according to the received ticket numbers.
void CkMlogRestart (const char *dummy, CkArgMsg *dummyMsg)
 Function for restarting the crashed processor.
void _restartHandler (RestartRequest *restartMsg)
 Function to restart this processor.
void _getRestartCheckpointHandler (RestartRequest *restartMsg)
 Gets the stored checkpoint but calls another function in the sender.
void _recvRestartCheckpointHandler (char *_restartData)
 Receives the checkpoint coming from its buddy.
void CkMlogRestartDouble (void *, double)
void CkMlogRestartLocal ()
void _getCheckpointHandler (RestartRequest *restartMsg)
 Gets the stored checkpoint for its buddy processor.
void _verifyAckRequestHandler (VerifyAckMsg *verifyRequest)
void _verifyAckHandler (VerifyAckMsg *verifyReply)
void _recvCheckpointHandler (char *_restartData)
 Receives the checkpoint data from its buddy, restores the state of all the objects and asks everyone else to update its home.
void _updateHomeAckHandler (RestartRequest *updateHomeAck)
 Receives the updateHome ACKs from all other processors.
void initializeRestart (void *data, ChareMlogData *mlogData)
 Initializes variables and flags for restarting procedure.
void updateHomePE (void *data, ChareMlogData *mlogData)
 Updates the homePe of chare array elements.
void _updateHomeRequestHandler (RestartRequest *updateRequest)
 Updates the homePe for all chares in this processor.
void fillTicketForChare (void *data, ChareMlogData *mlogData)
 Fills up the ticket vector for each chare.
void printMsg (envelope *env, const char *par)
 Prints information about a message.
void printDet (Determinant *det, const char *par)
 Prints information about a determinant.
void resendMessageForChare (void *data, ChareMlogData *mlogData)
 Resends all the logged messages to a particular chare list.
void _sendDetsHandler (char *msg)
 Send all remote determinants to a particular failed PE.
void _resendMessagesHandler (char *msg)
 Resends messages since last checkpoint to the list of objects included in the request.
MCount maxVec (CkVec< MCount > *TNvec)
 Returns the maximum ticket from a vector.
void sortVec (CkVec< MCount > *TNvec)
int searchVec (CkVec< MCount > *TNVec, MCount searchTN)
void processDelayedRemoteMsgQueue ()
 Processes the messages in the delayed remote message queue.
void _sendDetsReplyHandler (char *msg)
 Receives determinants stored on remote nodes.
void _receivedDetDataHandler (ReceivedDetData *msg)
 Receives a list of determinants coming from the home PE of a migrated object (parallel restart).
void _receivedTNDataHandler (ReceivedTNData *msg)
 Receives a list of TNs coming from the home PE of a migrated object (parallel restart).
void processReceivedDet (Chare *obj, int listSize, Determinant *listDets)
 Processes the received list of determinants from a particular PE.
void processReceivedTN (Chare *obj, int listSize, MCount *listTNs)
 Processes the received list of tickets from a particular PE.
void distributeRestartedObjects ()
 Distributes objects to accelerate recovery after a failure.
void _distributedLocationHandler (char *receivedMsg)
 Handler to update information about an object just received.
void sendDummyMigration (int restartPE, CkGroupID lbID, CkGroupID locMgrID, CkArrayIndexMax &idx, int locationPE)
 this method is used to send messages to a restarted processor to tell it that a particular expected object is not going to get to it
void sendDummyMigrationCounts (int *dummyCounts)
 this method is used by a restarted processor to tell other processors that they are not going to receive these many objects.
void _dummyMigrationHandler (DummyMigrationMsg *msg)
 this handler is used to process a dummy migration msg.
void forAllCharesDo (MlogFn fnPointer, void *data)
 Map function pointed by fnPointer over all the chares living in this processor.
void initMlogLBStep (CkGroupID gid)
 This is the first time Converse is called after AtSync method has been called by every local object.
void startLoadBalancingMlog (void(*_fnPtr)(void *), void *_centralLb)
 Load Balancing.
void finishedCheckpointLoadBalancing ()
void sendMlogLocation (int targetPE, envelope *env)
void _receiveMigrationNoticeHandler (MigrationNotice *msg)
void _receiveMigrationNoticeAckHandler (MigrationNoticeAck *msg)
void _receiveMlogLocationHandler (void *buf)
void resumeFromSyncRestart (void *data, ChareMlogData *mlogData)
void checkAndSendCheckpointBarrierAcks (CheckpointBarrierMsg *msg)
 Processor 0 sends a broadcast to every other processor after checkpoint barrier.
void _checkpointBarrierHandler (CheckpointBarrierMsg *msg)
 Processor 0 receives a contribution from every other processor after checkpoint.
void _checkpointBarrierAckHandler (CheckpointBarrierMsg *msg)
void garbageCollectMlogForChare (void *data, ChareMlogData *mlogData)
 Function to remove all messages in the message log of a particular chare.
void garbageCollectMlog ()
 Garbage collects the message log and other data structures.
void informLocationHome (CkGroupID locMgrID, CkArrayIndexMax idx, int homePE, int currentPE)
 method that informs an array elements home processor of its current location It is a converse method to bypass the charm++ message logging framework
void _receiveLocationHandler (CurrentLocationMsg *data)
void _getGlobalStepHandler (LBStepMsg *msg)
void _recvGlobalStepHandler (LBStepMsg *msg)
 Receives the global step handler from PE 0.
void _messageLoggingExit ()
 Function to wrap up performance information.
int getCheckPointPE ()
 Getting the pe number of the current processor's buddy.
envelopecopyEnvelope (envelope *env)
int isSameDet (Determinant *first, Determinant *second)

Variables

int _restartFlag = 0
int _numRestartResponses = 0
int countHashRefs = 0
int countHashCollisions = 0
char * checkpointDirectory = "."
int unAckedCheckpoint = 0
int countLocal = 0
int countBuffered = 0
int countPiggy = 0
int countClearBufferedLocalCalls = 0
int countUpdateHomeAcks = 0
int teamSize
int chkptPeriod
bool fastRecovery
int parallelRecovery
char * killFile
char * faultFile
int killFlag = 0
int faultFlag = 0
int restartingMlogFlag = 0
double killTime = 0.0
double faultMean
int checkpointCount = 0
int _numBufferedDets
int _indexBufferedDets
int _phaseBufferedDets
int _maxBufferedDets
intnumMsgsTarget
intsizeMsgsTarget
int totalMsgsTarget
float totalMsgsSize
int numPiggyDets
int numDets
int numDupDets
int msgLogSize
int bufferedDetsSize
int storedDetsSize
float MLOGFT_totalLogSize = 0.0
float MLOGFT_totalMessages = 0.0
static double adjustChkptPeriod = 0.0
static double nextCheckpointTime = 0.0
static CkHashtableT
< CkHashtableAdaptorT< CkObjID >
, CkHashtableT
< CkHashtableAdaptorT< CkObjID >
, SNToTicket * > * > 
detTable (1000, 0.3)
int _pingHandlerIdx
char objString [100]
int _checkpointRequestHandlerIdx
int _storeCheckpointHandlerIdx
int _checkpointAckHandlerIdx
int _getCheckpointHandlerIdx
int _recvCheckpointHandlerIdx
int _removeProcessedLogHandlerIdx
int _verifyAckRequestHandlerIdx
int _verifyAckHandlerIdx
int _dummyMigrationHandlerIdx
int _getGlobalStepHandlerIdx
int _recvGlobalStepHandlerIdx
int _updateHomeRequestHandlerIdx
int _updateHomeAckHandlerIdx
int _resendMessagesHandlerIdx
int _sendDetsHandlerIdx
int _sendDetsReplyHandlerIdx
int _receivedTNDataHandlerIdx
int _receivedDetDataHandlerIdx
int _distributedLocationHandlerIdx
int _storeDeterminantsHandlerIdx
int _removeDeterminantsHandlerIdx
int _restartHandlerIdx
int _getRestartCheckpointHandlerIdx
int _recvRestartCheckpointHandlerIdx
int verifyAckTotal
int verifyAckCount
int verifyAckedRequests = 0
RestartRequeststoredRequest
int _falseRestart = 0
int onGoingLoadBalancing = 0
 For testing on clusters we might carry out restarts on a porcessor without actually starting it 1 -> false restart 0 -> restart after an actual crash.
void * centralLb
void(* resumeLbFnPtr )(void *)
int _receiveMlogLocationHandlerIdx
int _receiveMigrationNoticeHandlerIdx
int _receiveMigrationNoticeAckHandlerIdx
int _checkpointBarrierHandlerIdx
int _checkpointBarrierAckHandlerIdx
CkVec< MigrationRecordmigratedNoticeList
CkVec< RetainedMigratedObject * > retainedObjectList
int donotCountMigration = 0
int countLBMigratedAway = 0
int countLBToMigrate = 0
int migrationDoneCalled = 0
int checkpointBarrierCount = 0
int globalResumeCount = 0
CkGroupID globalLBID
int restartDecisionNumber = -1
double lastCompletedAlarm = 0
double lastRestart = 0
int _receiveLocationHandlerIdx
int calledRetryTicketRequest = 0
 This method is used to retry the ticket requests that had been queued up earlier.
CkVec< TProcessedLogprocessedTicketLog
double totalSearchRestoredTime = 0
double totalSearchRestoredCount = 0


Function Documentation

const char * idx2str ( const CkArrayIndex ind  ) 

Definition at line 28 of file ckmessagelogging.C.

References idx2str().

const char* idx2str ( const ArrayElement el  ) 

void getGlobalStep ( CkGroupID  gID  ) 

bool fault_aware ( CkObjID recver  ) 

void sendCheckpointData ( int  mode  ) 

void createObjIDList ( void *  data,
ChareMlogData mlogData 
)

bool isLocal ( int  destPE  )  [inline]

Determines if the message is local or not.

A message is local if: 1) Both the destination and origin are the same PE.

Definition at line 660 of file ckcausalmlog.C.

References Converse::CkMyPe().

Referenced by generateCommonTicketRequest(), and sendCommonMsg().

bool isTeamLocal ( int  destPE  )  [inline]

Determines if the message is group local or not.

A message is group local if: 1) They belong to the same group in the group-based message logging.

Definition at line 672 of file ckcausalmlog.C.

References Converse::CkMyPe().

Referenced by _resendMessagesHandler(), _ticketHandler(), generateCommonTicketRequest(), sendCommonMsg(), sendMsg(), and sendTicketRequest().

void printLog ( TProcessedLog log  ) 

Prints a processed log.

Definition at line 2287 of file ckcausalmlog.C.

References Converse::CkMyPe(), TProcessedLog::recver, CkObjID::toString(), and TProcessedLog::tProcessed.

Referenced by createObjIDList(), and KillOnAllSigs().

void readKillFile (  ) 

CpvDeclare ( Chare ,
_currentObj   
)

CpvDeclare ( StoredCheckpoint ,
_storedCheckpointData   
)

CpvDeclare ( CkQ< MlogEntry * > *  ,
_delayedLocalMsgs   
)

CpvDeclare ( Queue  ,
_outOfOrderMessageQueue   
)

CpvDeclare ( Queue  ,
_delayedRemoteMessageQueue   
)

CpvDeclare ( char **  ,
_bufferedTicketRequests   
)

CpvDeclare ( int ,
_numBufferedTicketRequests   
)

CpvDeclare ( char *  ,
_localDets   
)

Note:
All the determinants generated by a PE are stored in variable _localDets. As soon as a message is sent, then all the determinants are appended to the message, but those determinants are not deleted. We must wait until an ACK comes from the receiver to delete the determinants. In the meantime the same determinants may be appended to other messages and more determinants can be added to _localDets. A simple solution to this problem was to have a primitive array and keep adding determinants at the end. However, to avoid multiple copies of determinants, we will keep a pointer to the first 'valid' determinant in the array. Alternatively, we can keep a pointer to the latest determinant and a number of how many valid determinants there are behind it. We do not remove determinants until a checkpoint is made, since these determinants may have to be added to messages in case of a recovery.

CpvDeclare ( CkDeterminantHashtableT ,
_remoteDets   
)

CpvDeclare ( char *  ,
_incarnation   
)

CpvDeclare ( CkVec< LocationID * > *  ,
_emigrantRecObjs   
)

CpvDeclare ( CkVec< CkLocRec_local * > *  ,
_immigrantRecObjs   
)

void setTeamRecovery ( void *  data,
ChareMlogData mlogData 
)

Turns on the flag for team recovery that selectively restores particular metadata information.

Definition at line 2272 of file ckcausalmlog.C.

References ChareMlogData::teamRecoveryFlag.

Referenced by _recvRestartCheckpointHandler().

void unsetTeamRecovery ( void *  data,
ChareMlogData mlogData 
)

Turns off the flag for team recovery.

Definition at line 2280 of file ckcausalmlog.C.

References ChareMlogData::teamRecoveryFlag.

Referenced by _recvRestartCheckpointHandler().

void _messageLoggingInit (  ) 

Initialize message logging data structures and register handlers.

Definition at line 237 of file ckcausalmlog.C.

Referenced by _initCharm().

void killLocal ( void *  _dummy,
double  curWallTime 
)

void readFaultFile (  ) 

: reads the PE that will be failing throughout the execution and the mean time between failures.

We assume an exponential distribution for the mean-time-between-failures.

Definition at line 401 of file ckcausalmlog.C.

References CcdCallFnAfter(), Converse::CkMyPe(), faultFile, killLocal(), and printf().

void addBufferedDeterminant ( CkObjID  sender,
CkObjID  receiver,
MCount  SN,
MCount  TN 
) [inline]

Adds a determinants to the buffered determinants and checks whether the array of buffered determinants needs to be extended.

Definition at line 432 of file ckcausalmlog.C.

References Converse::CkMyPe(), CmiAlloc(), CmiFree(), Determinant::receiver, Determinant::sender, Determinant::SN, and Determinant::TN.

Referenced by preProcessReceivedMessage().

void sendGroupMsg ( envelope env,
int  destPE,
int  _infoIdx 
)

void sendNodeGroupMsg ( envelope env,
int  destNode,
int  _infoIdx 
)

void sendArrayMsg ( envelope env,
int  destPE,
int  _infoIdx 
)

void sendChareMsg ( envelope env,
int  destPE,
int  _infoIdx,
const CkChareID pCid 
)

Sends a message to a singleton chare.

Definition at line 556 of file ckcausalmlog.C.

References _ObjectID::chare, Converse::CkMyPe(), CkObjID::data, _ObjectID::id, printf(), sendCommonMsg(), envelope::SN, envelope::TN, CkObjID::toString(), CkObjID::type, TypeArray, and TypeChare.

Referenced by CkSendMsg().

void sendCommonMsg ( CkObjID recver,
envelope _env,
int  destPE,
int  _infoIdx 
)

void sendMsg ( CkObjID sender,
CkObjID recver,
int  destPE,
MlogEntry entry,
MCount  SN,
MCount  TN,
int  resend 
)

void sendLocalMsg ( MlogEntry entry  ) 

Function to send a local message.

It first gets a ticket and then enqueues the message. If we are recovering, then the message is enqueued in a delay queue.

Definition at line 765 of file ckcausalmlog.C.

References MlogEntry::_infoIdx, _skipCldEnqueue(), _startTime, CmiMemoryCheck(), CmiMyPe(), MlogEntry::env, CkObjID::getObject(), printf(), envelope::recver, envelope::sender, envelope::SN, and CkObjID::toString().

Referenced by sendCommonMsg().

void _removeDeterminantsHandler ( char *  buffer  ) 

Removes the determinants after a particular index in the _localDets array.

Definition at line 834 of file ckcausalmlog.C.

References CmiFree(), RemoveDeterminantsHeader::index, and RemoveDeterminantsHeader::phase.

Referenced by _messageLoggingInit().

void _storeDeterminantsHandler ( char *  buffer  ) 

void _ticketRequestHandler ( TicketRequest ticketRequest  )  [inline]

If there are any delayed requests, process them first before processing this request.

Definition at line 928 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

bool _getTicket ( envelope env,
int flag 
) [inline]

Gets a ticket for a recently received message.

Precondition:
env->recver has to be on this processor.
Returns:
Returns true if ticket assignment is successful, otherwise returns false. A false result is due to the fact that we are recovering.

Definition at line 941 of file ckcausalmlog.C.

References Converse::CkMyPe(), CmiMemoryCheck(), CmiWallTimer(), CkObjID::getObject(), ChareMlogData::getTicket(), Chare::mlogData, ChareMlogData::next_ticket(), printf(), envelope::recver, ChareMlogData::restartFlag, envelope::sender, envelope::SN, Ticket::state, Ticket::TN, envelope::TN, CkObjID::toString(), ChareMlogData::tProcessed, and ChareMlogData::verifyTicket().

Referenced by preProcessReceivedMessage().

int preProcessReceivedMessage ( envelope env,
Chare **  objPointer,
MlogEntry **  logEntryPointer 
)

Definition at line 1012 of file ckcausalmlog.C.

Referenced by _processHandler().

void postProcessReceivedMessage ( Chare obj,
CkObjID sender,
MCount  SN,
MlogEntry entry 
)

Updates a few variables once a message has been processed.

Definition at line 1123 of file ckcausalmlog.C.

Referenced by _processHandler().

void generalCldEnqueue ( int  destPE,
envelope env,
int  _infoIdx 
)

void _pingHandler ( CkPingMsg msg  ) 

Definition at line 1164 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void buildProcessedTicketLog ( void *  data,
ChareMlogData mlogData 
)

void clearUpMigratedRetainedLists ( int  PE  ) 

void checkpointAlarm ( void *  _dummy,
double  curWallTime 
)

Definition at line 1179 of file ckcausalmlog.C.

Referenced by checkpointAlarm(), and startMlogCheckpoint().

void _checkpointRequestHandler ( CheckpointRequest request  ) 

Definition at line 1196 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void startMlogCheckpoint ( void *  _dummy,
double  curWallTime 
)

Starts the checkpoint phase after migration.

Definition at line 1203 of file ckcausalmlog.C.

Referenced by _checkpointRequestHandler(), _receiveMigrationNoticeAckHandler(), _updateHomeRequestHandler(), and startLoadBalancingMlog().

void pupArrayElementsSkip ( PUP::er p,
CmiBool  create,
MigrationRecord listToSkip,
int  listsize 
)

Pups all the array elements in this processor.

Definition at line 1313 of file ckcausalmlog.C.

Referenced by _recvCheckpointHandler(), _recvRestartCheckpointHandler(), and startMlogCheckpoint().

void writeCheckpointToDisk ( int  size,
char *  chkpt 
)

Definition at line 1369 of file ckcausalmlog.C.

References checkpointDirectory, Converse::CkMyPe(), and write().

Referenced by _storeCheckpointHandler().

void _storeCheckpointHandler ( char *  msg  ) 

Definition at line 1386 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void sendRemoveLogRequests (  ) 

Sends out the messages asking senders to throw away message logs below a certain ticket number.

Note:
The remove log request message looks like |RemoveLogRequest||List of TProcessedLog||Number of Determinants||List of Determinants|

Definition at line 1437 of file ckcausalmlog.C.

References _startTime, Converse::CkMyPe(), Converse::CkNumPes(), clearUpMigratedRetainedLists(), CmiAbort(), CmiAlloc(), CmiFree(), CmiMemoryCheck(), CmiMyPe(), Converse::CmiSyncSend(), CkVec< T >::getVec(), ResendRequest::numberObjects, ResendRequest::PE, CkVec< T >::size(), and traceUserBracketEvent().

Referenced by _checkpointAckHandler(), and _checkpointBarrierAckHandler().

void _checkpointAckHandler ( CheckPointAck ackMsg  ) 

Definition at line 1476 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void populateDeterminantTable ( char *  data  )  [inline]

void removeProcessedLogs ( void *  _data,
ChareMlogData mlogData 
)

void _removeProcessedLogHandler ( char *  requestMsg  ) 

Removes messages in the log according to the received ticket numbers.

Definition at line 1588 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void CkMlogRestart ( const char *  dummy,
CkArgMsg dummyMsg 
)

Function for restarting the crashed processor.

It sets the restart flag and contacts the buddy processor to get the latest checkpoint.

Definition at line 1652 of file ckcausalmlog.C.

Referenced by _initCharm(), _parseCommandLineOpts(), CkMlogRestartDouble(), and CkMlogRestartLocal().

void _restartHandler ( RestartRequest restartMsg  ) 

Function to restart this processor.

The handler is invoked by a member of its same team in message logging.

Definition at line 1683 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void _getRestartCheckpointHandler ( RestartRequest restartMsg  ) 

Gets the stored checkpoint but calls another function in the sender.

Definition at line 1715 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void _recvRestartCheckpointHandler ( char *  _restartData  ) 

Receives the checkpoint coming from its buddy.

This is the case of restart for one team member that did not crash.

Definition at line 1754 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void CkMlogRestartDouble ( void *  ,
double   
)

Definition at line 1866 of file ckcausalmlog.C.

void CkMlogRestartLocal (  ) 

Definition at line 1871 of file ckcausalmlog.C.

void _getCheckpointHandler ( RestartRequest restartMsg  ) 

Gets the stored checkpoint for its buddy processor.

Definition at line 1878 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void _verifyAckRequestHandler ( VerifyAckMsg verifyRequest  ) 

Definition at line 1915 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void _verifyAckHandler ( VerifyAckMsg verifyReply  ) 

Definition at line 1943 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void _recvCheckpointHandler ( char *  _restartData  ) 

Receives the checkpoint data from its buddy, restores the state of all the objects and asks everyone else to update its home.

Definition at line 2036 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void _updateHomeAckHandler ( RestartRequest updateHomeAck  ) 

Receives the updateHome ACKs from all other processors.

Once everybody has replied, it sends a request to resend the logged messages.

Definition at line 2114 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _recvGlobalStepHandler().

void initializeRestart ( void *  data,
ChareMlogData mlogData 
)

Initializes variables and flags for restarting procedure.

Definition at line 2168 of file ckcausalmlog.C.

Referenced by _recvCheckpointHandler(), and _recvRestartCheckpointHandler().

void updateHomePE ( void *  data,
ChareMlogData mlogData 
)

void _updateHomeRequestHandler ( RestartRequest updateRequest  ) 

Updates the homePe for all chares in this processor.

Definition at line 2201 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void fillTicketForChare ( void *  data,
ChareMlogData mlogData 
)

void printMsg ( envelope env,
const char *  par 
)

Prints information about a message.

Definition at line 2295 of file ckcausalmlog.C.

References Converse::CkMyPe(), envelope::recver, envelope::sender, envelope::SN, and CkObjID::toString().

Referenced by processDelayedRemoteMsgQueue(), and resendMessageForChare().

void printDet ( Determinant det,
const char *  par 
)

Prints information about a determinant.

Definition at line 2304 of file ckcausalmlog.C.

References Converse::CkMyPe(), Determinant::receiver, Determinant::sender, Determinant::SN, Determinant::TN, and CkObjID::toString().

Referenced by _sendDetsHandler(), and processReceivedDet().

void resendMessageForChare ( void *  data,
ChareMlogData mlogData 
)

void _sendDetsHandler ( char *  msg  ) 

void _resendMessagesHandler ( char *  msg  ) 

Resends messages since last checkpoint to the list of objects included in the request.

It also sends stored remote determinants to the particular failed PE.

Definition at line 2472 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), _recvRestartCheckpointHandler(), _sendDetsReplyHandler(), and _updateHomeAckHandler().

MCount maxVec ( CkVec< MCount > *  TNvec  ) 

Returns the maximum ticket from a vector.

Definition at line 2784 of file ckcausalmlog.C.

References max(), and CkVec< T >::size().

Referenced by processReceivedTN().

void sortVec ( CkVec< MCount > *  TNvec  ) 

Definition at line 2793 of file ckcausalmlog.C.

References CkVec< T >::push_back(), CkVec< T >::removeAll(), and CkVec< T >::size().

Referenced by processReceivedTN().

int searchVec ( CkVec< MCount > *  TNVec,
MCount  searchTN 
)

Definition at line 2824 of file ckcausalmlog.C.

References left(), size, and CkVec< T >::size().

Referenced by processReceivedTN().

void processDelayedRemoteMsgQueue (  ) 

Processes the messages in the delayed remote message queue.

Definition at line 2509 of file ckcausalmlog.C.

References Converse::CkMyPe(), CmiMemoryCheck(), CqsDequeue(), CqsEmpty(), CqsEnqueueGeneral(), envelope::getPriobits(), envelope::getPrioPtr(), printf(), and printMsg().

void _sendDetsReplyHandler ( char *  msg  ) 

void _receivedDetDataHandler ( ReceivedDetData msg  ) 

Receives a list of determinants coming from the home PE of a migrated object (parallel restart).

Definition at line 2643 of file ckcausalmlog.C.

References CmiFree(), CmiMyPe(), Converse::CmiSyncSendAndFree(), CkObjID::getObject(), CkObjID::guessPE(), Chare::mlogData, ReceivedDetData::numDets, ChareMlogData::objID, printf(), processReceivedDet(), ReceivedDetData::recver, and CkObjID::toString().

Referenced by _messageLoggingInit().

void _receivedTNDataHandler ( ReceivedTNData msg  ) 

Receives a list of TNs coming from the home PE of a migrated object (parallel restart).

Definition at line 2661 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void processReceivedDet ( Chare obj,
int  listSize,
Determinant listDets 
)

Processes the received list of determinants from a particular PE.

Definition at line 2679 of file ckcausalmlog.C.

References Converse::CkMyPe(), CmiMemoryCheck(), Chare::mlogData, printDet(), Determinant::sender, Determinant::SN, Determinant::TN, and ChareMlogData::verifyTicket().

Referenced by _receivedDetDataHandler(), and _sendDetsReplyHandler().

void processReceivedTN ( Chare obj,
int  listSize,
MCount *  listTNs 
)

Processes the received list of tickets from a particular PE.

Definition at line 2696 of file ckcausalmlog.C.

Referenced by _receivedTNDataHandler(), _resendReplyHandler(), and _sendDetsReplyHandler().

void distributeRestartedObjects (  ) 

Distributes objects to accelerate recovery after a failure.

Definition at line 2934 of file ckcausalmlog.C.

Referenced by _sendDetsReplyHandler(), and _updateHomeAckHandler().

void _distributedLocationHandler ( char *  receivedMsg  ) 

Handler to update information about an object just received.

Definition at line 2944 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void sendDummyMigration ( int  restartPE,
CkGroupID  lbID,
CkGroupID  locMgrID,
CkArrayIndexMax idx,
int  locationPE 
)

this method is used to send messages to a restarted processor to tell it that a particular expected object is not going to get to it

Definition at line 2980 of file ckcausalmlog.C.

Referenced by _resendMessagesHandler().

void _dummyMigrationHandler ( DummyMigrationMsg msg  ) 

this handler is used to process a dummy migration msg.

it looks up the load balancer and calls migrated for it

Definition at line 3013 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void forAllCharesDo ( MlogFn  fnPointer,
void *  data 
)

void startLoadBalancingMlog ( void(*)(void *)  _fnPtr,
void *  _centralLb 
)

Load Balancing.

Definition at line 3105 of file ckcausalmlog.C.

Referenced by CentralLB::MigrationDone().

void finishedCheckpointLoadBalancing (  ) 

Definition at line 3118 of file ckcausalmlog.C.

Referenced by _checkpointAckHandler().

void sendMlogLocation ( int  targetPE,
envelope env 
)

Definition at line 3130 of file ckcausalmlog.C.

Referenced by CkLocMgr::emigrate().

void _receiveMigrationNoticeHandler ( MigrationNotice msg  ) 

Definition at line 3181 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void _receiveMigrationNoticeAckHandler ( MigrationNoticeAck msg  ) 

Definition at line 3191 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void _receiveMlogLocationHandler ( void *  buf  ) 

Definition at line 3211 of file ckcausalmlog.C.

Referenced by _messageLoggingInit(), and _resendMessagesHandler().

void resumeFromSyncRestart ( void *  data,
ChareMlogData mlogData 
)

Definition at line 3225 of file ckcausalmlog.C.

void checkAndSendCheckpointBarrierAcks ( CheckpointBarrierMsg msg  )  [inline]

Processor 0 sends a broadcast to every other processor after checkpoint barrier.

Definition at line 3239 of file ckcausalmlog.C.

References Converse::CmiSyncSend().

Referenced by _checkpointBarrierHandler().

void _checkpointBarrierHandler ( CheckpointBarrierMsg msg  ) 

Processor 0 receives a contribution from every other processor after checkpoint.

Definition at line 3251 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void _checkpointBarrierAckHandler ( CheckpointBarrierMsg msg  ) 

Definition at line 3270 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void garbageCollectMlogForChare ( void *  data,
ChareMlogData mlogData 
)

Function to remove all messages in the message log of a particular chare.

Definition at line 3289 of file ckcausalmlog.C.

References CkQ< T >::deq(), ChareMlogData::getMlog(), and CkQ< T >::length().

Referenced by garbageCollectMlog().

void garbageCollectMlog (  ) 

Garbage collects the message log and other data structures.

In case of synchronized checkpoint, we use an optimization to avoid causal message logging protocol to communicate all determinants to the rest of the processors.

Definition at line 3308 of file ckcausalmlog.C.

References Converse::CkMyPe(), forAllCharesDo(), garbageCollectMlogForChare(), CkHashtableIterator::hasNext(), CkHashtableIterator::next(), and CkVec< T >::removeAll().

Referenced by initMlogLBStep().

void informLocationHome ( CkGroupID  locMgrID,
CkArrayIndexMax  idx,
int  homePE,
int  currentPE 
)

method that informs an array elements home processor of its current location It is a converse method to bypass the charm++ message logging framework

Definition at line 3338 of file ckcausalmlog.C.

Referenced by _distributedLocationHandler(), _receiveMigrationNoticeAckHandler(), _resendMessagesHandler(), CkLocMgr::informHome(), pupArrayElementsSkip(), and updateHomePE().

void _receiveLocationHandler ( CurrentLocationMsg data  ) 

Definition at line 3353 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void _getGlobalStepHandler ( LBStepMsg msg  ) 

Definition at line 3394 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

void _recvGlobalStepHandler ( LBStepMsg msg  ) 

Receives the global step handler from PE 0.

Definition at line 3406 of file ckcausalmlog.C.

Referenced by _messageLoggingInit().

int getCheckPointPE (  ) 

Getting the pe number of the current processor's buddy.

In the team-based approach each processor might checkpoint in the next team, but currently teams are only meant to reduce memory overhead.

Definition at line 3953 of file ckcausalmlog.C.

Referenced by _bufferedLocalMessageCopyHandler(), _receiveMigrationNoticeHandler(), _removeProcessedLogHandler(), _restartHandler(), _updateHomeRequestHandler(), CkMlogRestart(), sendBufferedLocalMessageCopy(), sendLocalMessageCopy(), sendMlogLocation(), and startMlogCheckpoint().

envelope* copyEnvelope ( envelope env  ) 

int isSameDet ( Determinant first,
Determinant second 
) [inline]


Variable Documentation

Definition at line 56 of file ckcausalmlog.C.

Definition at line 59 of file ckcausalmlog.C.

Definition at line 60 of file ckcausalmlog.C.

char* checkpointDirectory = "."

Definition at line 62 of file ckcausalmlog.C.

Referenced by readCheckpointFromDisk(), and writeCheckpointToDisk().

Definition at line 63 of file ckcausalmlog.C.

Definition at line 65 of file ckcausalmlog.C.

Definition at line 65 of file ckcausalmlog.C.

Referenced by sendBufferedLocalMessageCopy().

Definition at line 66 of file ckcausalmlog.C.

Definition at line 67 of file ckcausalmlog.C.

Definition at line 69 of file ckcausalmlog.C.

char* faultFile

Definition at line 77 of file ckcausalmlog.C.

Referenced by readFaultFile().

Definition at line 79 of file ckcausalmlog.C.

Definition at line 80 of file ckcausalmlog.C.

Definition at line 82 of file ckcausalmlog.C.

Definition at line 83 of file ckcausalmlog.C.

Definition at line 84 of file ckcausalmlog.C.

Definition at line 110 of file ckcausalmlog.C.

Definition at line 112 of file ckcausalmlog.C.

Definition at line 114 of file ckcausalmlog.C.

Definition at line 119 of file ckcausalmlog.C.

Definition at line 132 of file ckcausalmlog.C.

Definition at line 133 of file ckcausalmlog.C.

Definition at line 134 of file ckcausalmlog.C.

Definition at line 135 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), _messageLoggingInit(), and sendMsg().

Definition at line 138 of file ckcausalmlog.C.

Definition at line 139 of file ckcausalmlog.C.

Definition at line 140 of file ckcausalmlog.C.

Definition at line 143 of file ckcausalmlog.C.

Definition at line 144 of file ckcausalmlog.C.

Definition at line 145 of file ckcausalmlog.C.

float MLOGFT_totalLogSize = 0.0

Definition at line 149 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), sendMsg(), and sendTicketRequest().

float MLOGFT_totalMessages = 0.0

Definition at line 150 of file ckcausalmlog.C.

Referenced by _messageLoggingExit(), sendMsg(), and sendTicketRequest().

double adjustChkptPeriod = 0.0 [static]

Definition at line 153 of file ckcausalmlog.C.

double nextCheckpointTime = 0.0 [static]

Definition at line 154 of file ckcausalmlog.C.

Definition at line 157 of file ckcausalmlog.C.

char objString[100]

Definition at line 159 of file ckcausalmlog.C.

Referenced by buildProcessedTicketLog(), createObjIDList(), and processReceivedTN().

Definition at line 160 of file ckcausalmlog.C.

Definition at line 161 of file ckcausalmlog.C.

Definition at line 162 of file ckcausalmlog.C.

Definition at line 163 of file ckcausalmlog.C.

Definition at line 164 of file ckcausalmlog.C.

Definition at line 165 of file ckcausalmlog.C.

Definition at line 167 of file ckcausalmlog.C.

Definition at line 168 of file ckcausalmlog.C.

Definition at line 169 of file ckcausalmlog.C.

Definition at line 172 of file ckcausalmlog.C.

Definition at line 173 of file ckcausalmlog.C.

Definition at line 175 of file ckcausalmlog.C.

Definition at line 176 of file ckcausalmlog.C.

Definition at line 177 of file ckcausalmlog.C.

Definition at line 178 of file ckcausalmlog.C.

Definition at line 179 of file ckcausalmlog.C.

Definition at line 180 of file ckcausalmlog.C.

Definition at line 181 of file ckcausalmlog.C.

Definition at line 182 of file ckcausalmlog.C.

Definition at line 183 of file ckcausalmlog.C.

Definition at line 184 of file ckcausalmlog.C.

Definition at line 187 of file ckcausalmlog.C.

Definition at line 188 of file ckcausalmlog.C.

Definition at line 189 of file ckcausalmlog.C.

Definition at line 193 of file ckcausalmlog.C.

Definition at line 194 of file ckcausalmlog.C.

Definition at line 196 of file ckcausalmlog.C.

Definition at line 198 of file ckcausalmlog.C.

Definition at line 200 of file ckcausalmlog.C.

For testing on clusters we might carry out restarts on a porcessor without actually starting it 1 -> false restart 0 -> restart after an actual crash.

Definition at line 208 of file ckcausalmlog.C.

void* centralLb

Definition at line 209 of file ckcausalmlog.C.

Referenced by _checkpointBarrierAckHandler(), and startLoadBalancingMlog().

void(* resumeLbFnPtr)(void *)

Referenced by startLoadBalancingMlog().

Definition at line 211 of file ckcausalmlog.C.

Definition at line 212 of file ckcausalmlog.C.

Definition at line 213 of file ckcausalmlog.C.

Definition at line 214 of file ckcausalmlog.C.

Definition at line 215 of file ckcausalmlog.C.

Definition at line 217 of file ckcausalmlog.C.

Definition at line 218 of file ckcausalmlog.C.

Definition at line 220 of file ckcausalmlog.C.

Definition at line 221 of file ckcausalmlog.C.

Definition at line 222 of file ckcausalmlog.C.

Definition at line 223 of file ckcausalmlog.C.

Definition at line 225 of file ckcausalmlog.C.

Definition at line 226 of file ckcausalmlog.C.

Definition at line 228 of file ckcausalmlog.C.

Definition at line 229 of file ckcausalmlog.C.

Definition at line 232 of file ckcausalmlog.C.

This method is used to retry the ticket requests that had been queued up earlier.

Definition at line 1162 of file ckcausalmlog.C.

Definition at line 1175 of file ckcausalmlog.C.

Definition at line 3762 of file ckcausalmlog.C.

Definition at line 3763 of file ckcausalmlog.C.


Generated on Thu May 24 07:56:00 2012 for Charm++ by  doxygen 1.5.5