Go to the source code of this file.
Data Structures | |
struct | s_pathfixlist |
struct | s_ppdef |
struct | ppdeffind |
struct | TopologyRequest |
Typedefs | |
typedef struct s_pathfixlist * | pathfixlist |
typedef struct s_ppdef * | ppdef |
Functions | |
static double | GetClock (void) |
static int | probefile (const char *path) |
static const char * | mylogin (void) |
static pathfixlist | pathfix_append (char *s1, char *s2, pathfixlist l) |
static char * | pathfix (const char *path, pathfixlist fixes) |
static char * | pathextfix (const char *path, pathfixlist fixes, char *ext) |
static int | is_quote (char c) |
static void | zap_newline (char *s) |
static char * | substr (const char *lo, const char *hi) |
static int | subeqs (const char *lo, const char *hi, const char *str) |
static const char * | skipblanks (const char *p) |
static const char * | skipstuff (const char *p) |
static char * | cstring_join (const std::vector< const char * > &vec, const char *separator) |
static const char * | getenv_ssh () |
static char * | getenv_display () |
static char * | getenv_display_no_tamper () |
static ppdeffind | pparam_find (const char *lname) |
static ppdef | pparam_cell (const char *lname) |
static void | pparam_int (int *where, int defValue, const char *arg, const char *doc) |
static void | pparam_flag (int *where, int defValue, const char *arg, const char *doc) |
static void | pparam_real (double *where, double defValue, const char *arg, const char *doc) |
static void | pparam_str (const char **where, const char *defValue, const char *arg, const char *doc) |
static int | pparam_setdef (ppdef def, const char *value) |
static int | pparam_set (char *lname, char *value) |
static const char * | pparam_getdef (ppdef def) |
static void | pparam_printdocs () |
static void | pparam_delarg (int i) |
static int | pparam_countargs (const char **argv) |
static int | pparam_parseopt () |
static int | pparam_parsecmd (char optchr, const char **argv) |
static char ** | dupargv (const char **argv) |
static void | arg_init (int argc, const char **argv) |
Variables | |
static const int | MAX_NUM_RETRIES = 3 |
static int | mynodes_start |
static double | ftTimer |
static double | start_timer |
static unsigned int | server_port |
static char | server_addr [1024] |
static SOCKET | server_fd |
static ppdef | ppdefs |
static int | pparam_pos |
static const char ** | pparam_argv |
static char | pparam_optc = '-' |
static char | pparam_error [100] |
static const char ** | arg_argv |
static int | arg_argc |
static int | arg_requested_pes |
static int | arg_requested_nodes |
static int | arg_requested_numhosts |
static int | arg_timeout |
static int | arg_timelimit |
static int | arg_verbose |
static const char * | arg_nodelist |
static const char * | arg_nodegroup |
static const char * | arg_runscript |
static const char * | arg_charmrunip |
static int | arg_debug |
static int | arg_debug_no_pause |
static int | arg_debug_no_xrdb |
static int | arg_charmdebug |
static const char * | arg_debug_commands |
static int | arg_quiet |
static int | arg_local |
static int | arg_batch_spawn |
static int | arg_scalable_start |
static int | arg_hierarchical_start |
static int | arg_child_charmrun |
static int | arg_help |
static int | arg_ppn |
static int | arg_usehostname |
static char ** | saved_argv |
static int | saved_argc |
static int | arg_realloc_pes |
static int | arg_old_pes |
static int | arg_shrinkexpand |
static int | arg_charmrun_port |
static const char * | arg_shrinkexpand_basedir |
static int | arg_maxssh |
static const char * | arg_shell |
static int | arg_in_xterm |
static const char * | arg_debugger |
static const char * | arg_xterm |
static const char * | arg_display |
static int | arg_ssh_display |
static const char * | arg_mylogin |
static int | arg_mpiexec |
static int | arg_mpiexec_no_n |
static int | arg_no_va_rand |
static const char * | arg_nodeprog_a |
static const char * | arg_nodeprog_r |
static char * | arg_currdir_a |
static char * | arg_currdir_r |
static int | arg_server |
static int | arg_server_port = 0 |
static const char * | arg_server_auth = NULL |
static int | replay_single = 0 |
static int | arg_startpe |
static int | arg_endpe |
static int | arg_singlemaster |
static int | arg_skipmaster |
TopologyRequest | proc_per |
TopologyRequest | onewth_per |
int | auto_provision |
typedef struct s_pathfixlist * pathfixlist |
static double GetClock | ( | void | ) | [static] |
Definition at line 97 of file charmrun.C.
Referenced by arg_init().
static int probefile | ( | const char * | path | ) | [static] |
Definition at line 114 of file charmrun.C.
Referenced by arg_init().
static const char* mylogin | ( | void | ) | [static] |
Definition at line 123 of file charmrun.C.
References len, name, p, and strdup().
Referenced by arg_init().
static pathfixlist pathfix_append | ( | char * | s1, | |
char * | s2, | |||
pathfixlist | l | |||
) | [static] |
Definition at line 168 of file charmrun.C.
References malloc(), s_pathfixlist::next, s_pathfixlist::s1, and s_pathfixlist::s2.
Referenced by arg_init().
static char* pathfix | ( | const char * | path, | |
pathfixlist | fixes | |||
) | [static] |
Definition at line 177 of file charmrun.C.
References PUP::l, len, and strdup().
Referenced by arg_init(), and pathextfix().
static char* pathextfix | ( | const char * | path, | |
pathfixlist | fixes, | |||
char * | ext | |||
) | [static] |
Definition at line 199 of file charmrun.C.
References free(), malloc(), and pathfix().
Referenced by arg_init().
static int is_quote | ( | char | c | ) | [static] |
Definition at line 217 of file charmrun.C.
Referenced by substr().
static void zap_newline | ( | char * | s | ) | [static] |
Definition at line 219 of file charmrun.C.
References p.
Referenced by arg_init().
static char* substr | ( | const char * | lo, | |
const char * | hi | |||
) | [static] |
Definition at line 231 of file charmrun.C.
References is_quote(), len, and malloc().
Referenced by arg_init().
static int subeqs | ( | const char * | lo, | |
const char * | hi, | |||
const char * | str | |||
) | [static] |
Definition at line 244 of file charmrun.C.
References len.
Referenced by arg_init().
static const char* skipblanks | ( | const char * | p | ) | [static] |
Definition at line 255 of file charmrun.C.
Referenced by arg_init().
static const char* skipstuff | ( | const char * | p | ) | [static] |
Definition at line 263 of file charmrun.C.
Referenced by arg_init().
static char* cstring_join | ( | const std::vector< const char * > & | vec, | |
const char * | separator | |||
) | [static] |
Definition at line 284 of file charmrun.C.
References length, malloc(), and p.
Referenced by arg_init().
static const char* getenv_ssh | ( | ) | [static] |
Definition at line 305 of file charmrun.C.
Referenced by arg_init().
static char* getenv_display | ( | ) | [static] |
Definition at line 313 of file charmrun.C.
References p, skt_my_ip(), and skt_print_ip().
static char* getenv_display_no_tamper | ( | ) | [static] |
Definition at line 329 of file charmrun.C.
References p.
Referenced by arg_init().
static ppdeffind pparam_find | ( | const char * | lname | ) | [static] |
Definition at line 381 of file charmrun.C.
References s_ppdef::lname, and s_ppdef::next.
Referenced by pparam_cell(), and pparam_parseopt().
static ppdef pparam_cell | ( | const char * | lname | ) | [static] |
Definition at line 399 of file charmrun.C.
References ppdeffind::def, s_ppdef::initFlag, malloc(), and pparam_find().
Referenced by pparam_flag(), pparam_int(), pparam_real(), pparam_set(), and pparam_str().
Definition at line 415 of file charmrun.C.
References s_ppdef::doc, s_ppdef::i, s_ppdef::lname, pparam_cell(), s_ppdef::type, and s_ppdef::where.
Referenced by arg_init().
Definition at line 425 of file charmrun.C.
References s_ppdef::doc, s_ppdef::f, s_ppdef::lname, pparam_cell(), s_ppdef::type, and s_ppdef::where.
Referenced by arg_init().
static void pparam_real | ( | double * | where, | |
double | defValue, | |||
const char * | arg, | |||
const char * | doc | |||
) | [static] |
Definition at line 435 of file charmrun.C.
References s_ppdef::doc, s_ppdef::lname, pparam_cell(), s_ppdef::r, s_ppdef::type, and s_ppdef::where.
static void pparam_str | ( | const char ** | where, | |
const char * | defValue, | |||
const char * | arg, | |||
const char * | doc | |||
) | [static] |
Definition at line 446 of file charmrun.C.
References s_ppdef::doc, s_ppdef::lname, pparam_cell(), s_ppdef::s, s_ppdef::type, and s_ppdef::where.
Referenced by arg_init().
Definition at line 457 of file charmrun.C.
References calloc(), s_ppdef::f, s_ppdef::i, s_ppdef::initFlag, s_ppdef::lname, p, s_ppdef::r, s_ppdef::s, s_ppdef::type, and s_ppdef::where.
Referenced by pparam_parseopt(), and pparam_set().
static int pparam_set | ( | char * | lname, | |
char * | value | |||
) | [static] |
Definition at line 503 of file charmrun.C.
References pparam_cell(), and pparam_setdef().
static const char* pparam_getdef | ( | ppdef | def | ) | [static] |
Definition at line 509 of file charmrun.C.
References s_ppdef::f, s_ppdef::i, s_ppdef::r, s_ppdef::s, s_ppdef::type, and s_ppdef::where.
Referenced by pparam_printdocs().
static void pparam_printdocs | ( | ) | [static] |
Definition at line 528 of file charmrun.C.
References len, s_ppdef::next, pparam_getdef(), and pparam_optc.
Referenced by arg_init().
static void pparam_delarg | ( | int | i | ) | [static] |
Definition at line 551 of file charmrun.C.
References pparam_argv.
Referenced by pparam_parseopt().
static int pparam_countargs | ( | const char ** | argv | ) | [static] |
Definition at line 557 of file charmrun.C.
References argc.
Referenced by arg_init().
static int pparam_parseopt | ( | ) | [static] |
Definition at line 565 of file charmrun.C.
References name, pparam_argv, pparam_delarg(), pparam_error, pparam_find(), pparam_pos, and pparam_setdef().
Referenced by pparam_parsecmd().
static int pparam_parsecmd | ( | char | optchr, | |
const char ** | argv | |||
) | [static] |
Definition at line 632 of file charmrun.C.
References pparam_argv, pparam_error, pparam_optc, pparam_parseopt(), and pparam_pos.
Referenced by arg_init().
static char** dupargv | ( | const char ** | argv | ) | [static] |
Definition at line 651 of file charmrun.C.
References argc, copy(), len, and malloc().
Referenced by arg_init().
static void arg_init | ( | int | argc, | |
const char ** | argv | |||
) | [static] |
Macro to switch on the case when charmrun stays up even if one of the processor crashes
This is the only place where charmrun talks back to anyone.
Gets the array of node numbers, IPs, and ports. This is used by the node-programs to talk to one another.
Handles an ACK after a crash. Once it has received all the pending acks, it sends the nodetab table to the crashed node.
should also send a message to all the other processors telling them that this guy has crashed
after the crashed processor has been recreated it connects to charmrun. That data must now be filled into my_process_table and the nodetab_table
return 1 if connection is opened succesfully with client
Relaunches a program on the crashed node.
write the startScript file to be sent
add an argument to the argv of the new process so that the restarting processor knows that it is a restarting processor
change the nodetable entry of the crashed processor to connect it to a new one
start the new processor
wait for the reply from the new process
Reconnects a crashed node. It waits for the I-tuple from the just relaunched program. It also: i) Broadcast the nodetabtable to every other node. ii) Announces the crash to every other node.
update the nodetab entry corresponding to this node, skip the restarted one
Sends a message announcing the crash to every other node. This message will be used to trigger fault tolerance methods.
Definition at line 803 of file charmrun.C.
References _Cmi_numnodes, _exitcode, _skt_invalid_ip, PUP::a, abort(), TopologyRequest::active(), arg_argc, arg_argv, arg_batch_spawn, arg_charmdebug, arg_charmrun_port, arg_charmrunip, arg_child_charmrun, arg_currdir_a, arg_currdir_r, arg_debug, arg_debug_commands, arg_debug_no_pause, arg_debug_no_xrdb, arg_debugger, arg_display, arg_endpe, arg_help, arg_hierarchical_start, arg_in_xterm, arg_local, arg_maxssh, arg_mpiexec, arg_mpiexec_no_n, arg_mylogin, arg_no_va_rand, arg_nodegroup, arg_nodelist, arg_nodeprog_a, arg_nodeprog_r, arg_old_pes, arg_ppn, arg_quiet, arg_realloc_pes, arg_requested_nodes, arg_requested_numhosts, arg_requested_pes, arg_runscript, arg_scalable_start, arg_server, arg_server_auth, arg_server_port, arg_shell, arg_shrinkexpand, arg_singlemaster, arg_skipmaster, arg_ssh_display, arg_startpe, arg_timelimit, arg_timeout, arg_usehostname, arg_verbose, arg_xterm, taskStruct::argLength, assert, CcsImplHeader::attr, CcsSecAttr::auth, auto_provision, PUP::b, buf, c, CcsServer_fd(), CcsServer_new(), CcsServer_recvRequest(), CcsServer_sendReply(), check_stdio_header(), ChMessage_free(), ChMessage_new(), ChMessage_recv(), ChMessage_send(), ChMessageData_recv(), ChMessageHeader_new(), ChMessageHeader_recv(), ChMessageInt(), ChMessageInt_new(), Ck::IO::close(), TopologyRequest::core, count, cstring_join(), taskStruct::cwd, daemon_status2msg(), ChMessage::data, data, ChNodeinfo::dataport, dest, CpuTopoDetails::done, dupargv(), e1, taskStruct::env, envCat(), fd, file, free(), ftruncate(), ftTimer, GetClock(), getenv_display_no_tamper(), getenv_ssh(), group, h, ChMessage::header, TopologyRequest::host, index, ChSingleNodeinfo::info, info, ChNodeinfo::IP, ChMessageHeader::len, ChMessage::len, len, PUP::m, taskStruct::magic, main(), malloc(), MAX_NUM_RETRIES, min(), msg, mylogin(), mynodes_start, n, name, ChInfiAddr::nodeno, ChNodeinfo::nodeno, ChSingleNodeinfo::nodeNo, ChNodeinfo::nPE, npes, ChNodeinfo::nProcessesInPhysNode, ChSingleNodeinfo::num_cores, ChSingleNodeinfo::num_pus, ChSingleNodeinfo::num_sockets, Ck::IO::open(), p, param, pathextfix(), pathfix(), pathfix_append(), CcsImplHeader::pe, taskStruct::pgm, pparam_argv, pparam_countargs(), pparam_error, pparam_flag(), pparam_int(), pparam_parsecmd(), pparam_printdocs(), pparam_str(), probefile(), TopologyRequest::pu, r, rank, realloc(), replay_single, reply_abortFn(), CcsImplHeader::replyFd, PUP::s, saved_argc, saved_argv, server_addr, server_fd, server_port, skipblanks(), skipstuff(), skt_accept(), skt_close(), skt_connect(), skt_init(), skt_innode_lookup_ip(), skt_innode_my_ip(), skt_ip_match(), skt_lookup_ip(), skt_print_ip(), skt_recvN(), skt_select1(), skt_sendN(), skt_sendV(), skt_server(), skt_set_abort(), skt_set_idle(), skt_tcp_no_nagle(), sleep(), TopologyRequest::socket, speed(), src, start_timer, status, statusCode, strdup(), subeqs(), substr(), stats::time(), ChMessageHeader::type, type, TopologyRequest::unit(), warned, Ck::IO::write(), write_stdio_duplicate(), and zap_newline().
const int MAX_NUM_RETRIES = 3 [static] |
int mynodes_start [static] |
double ftTimer [static] |
double start_timer [static] |
Definition at line 95 of file charmrun.C.
Referenced by arg_init(), zlib_compress(), and zlib_decompress().
unsigned int server_port [static] |
char server_addr[1024] [static] |
SOCKET server_fd [static] |
Definition at line 368 of file charmrun.C.
int pparam_pos [static] |
const char** pparam_argv [static] |
Definition at line 371 of file charmrun.C.
Referenced by arg_init(), pparam_delarg(), pparam_parsecmd(), and pparam_parseopt().
char pparam_optc = '-' [static] |
char pparam_error[100] [static] |
Definition at line 373 of file charmrun.C.
Referenced by arg_init(), pparam_parsecmd(), and pparam_parseopt().
const char** arg_argv [static] |
int arg_requested_pes [static] |
int arg_requested_nodes [static] |
int arg_requested_numhosts [static] |
int arg_timeout [static] |
int arg_timelimit [static] |
int arg_verbose [static] |
const char* arg_nodelist [static] |
const char* arg_nodegroup [static] |
const char* arg_runscript [static] |
const char* arg_charmrunip [static] |
int arg_debug_no_pause [static] |
int arg_debug_no_xrdb [static] |
int arg_charmdebug [static] |
const char* arg_debug_commands [static] |
int arg_batch_spawn [static] |
int arg_scalable_start [static] |
int arg_hierarchical_start [static] |
int arg_child_charmrun [static] |
int arg_usehostname [static] |
char** saved_argv [static] |
Definition at line 726 of file charmrun.C.
Referenced by arg_init(), CmiGetArgIntDesc(), FORTRAN_AS_C_RETURN(), and FTN_NAME().
int saved_argc [static] |
int arg_realloc_pes [static] |
int arg_old_pes [static] |
int arg_shrinkexpand [static] |
int arg_charmrun_port [static] |
const char* arg_shrinkexpand_basedir [static] |
Definition at line 732 of file charmrun.C.
int arg_maxssh [static] |
const char* arg_shell [static] |
int arg_in_xterm [static] |
const char* arg_debugger [static] |
const char* arg_xterm [static] |
const char* arg_display [static] |
int arg_ssh_display [static] |
const char* arg_mylogin [static] |
int arg_mpiexec [static] |
int arg_mpiexec_no_n [static] |
int arg_no_va_rand [static] |
const char* arg_nodeprog_a [static] |
const char* arg_nodeprog_r [static] |
char* arg_currdir_a [static] |
char* arg_currdir_r [static] |
int arg_server [static] |
int arg_server_port = 0 [static] |
const char* arg_server_auth = NULL [static] |
int replay_single = 0 [static] |
int arg_startpe [static] |
int arg_singlemaster [static] |
int arg_skipmaster [static] |
Definition at line 799 of file charmrun.C.
Definition at line 800 of file charmrun.C.