00001
00005
00006 #include "converse.h"
00007
00008
00009
00010
00011
00012 #include "LBDatabase.h"
00013 #include "LBSimulation.h"
00014 #include "topology.h"
00015
00016 #include "NullLB.h"
00017
00018 CkGroupID _lbdb;
00019
00020 CkpvDeclare(int, numLoadBalancers);
00021 CkpvDeclare(int, hasNullLB);
00022 CkpvDeclare(int, lbdatabaseInited);
00024
00025 CkLBArgs _lb_args;
00026 int _lb_predict=0;
00027 int _lb_predict_delay=10;
00028 int _lb_predict_window=20;
00029
00030
00031 class LBDBRegistry {
00032 friend class LBDBInit;
00033 friend class LBDatabase;
00034 private:
00035
00036 struct LBDBEntry {
00037 const char *name;
00038 LBCreateFn cfn;
00039 LBAllocFn afn;
00040 const char *help;
00041 int shown;
00042 LBDBEntry(): name(0), cfn(0), afn(0), help(0), shown(1) {}
00043 LBDBEntry(int) {}
00044 LBDBEntry(const char *n, LBCreateFn cf, LBAllocFn af,
00045 const char *h, int show=1):
00046 name(n), cfn(cf), afn(af), help(h), shown(show) {};
00047 };
00048 CkVec<LBDBEntry> lbtables;
00049 CkVec<const char *> compile_lbs;
00050 CkVec<const char *> runtime_lbs;
00051 public:
00052 LBDBRegistry() {}
00053 void displayLBs()
00054 {
00055 CmiPrintf("\nAvailable load balancers:\n");
00056 for (int i=0; i<lbtables.length(); i++) {
00057 LBDBEntry &entry = lbtables[i];
00058 if (entry.shown) CmiPrintf("* %s: %s\n", entry.name, entry.help);
00059 }
00060 CmiPrintf("\n");
00061 }
00062 void addEntry(const char *name, LBCreateFn fn, LBAllocFn afn, const char *help, int shown) {
00063 lbtables.push_back(LBDBEntry(name, fn, afn, help, shown));
00064 }
00065 void addCompiletimeBalancer(const char *name) {
00066 compile_lbs.push_back(name);
00067 }
00068 void addRuntimeBalancer(const char *name) {
00069 runtime_lbs.push_back(name);
00070 }
00071 LBCreateFn search(const char *name) {
00072 char *ptr = strpbrk((char *)name, ":,");
00073 int slen = ptr!=NULL?ptr-name:strlen(name);
00074 for (int i=0; i<lbtables.length(); i++)
00075 if (0==strncmp(name, lbtables[i].name, slen)) return lbtables[i].cfn;
00076 return NULL;
00077 }
00078 LBAllocFn getLBAllocFn(const char *name) {
00079 char *ptr = strpbrk((char *)name, ":,");
00080 int slen = ptr-name;
00081 for (int i=0; i<lbtables.length(); i++)
00082 if (0==strncmp(name, lbtables[i].name, slen)) return lbtables[i].afn;
00083 return NULL;
00084 }
00085 };
00086
00087 static LBDBRegistry lbRegistry;
00088
00089 void LBDefaultCreate(const char *lbname)
00090 {
00091 lbRegistry.addCompiletimeBalancer(lbname);
00092 }
00093
00094
00095 void LBRegisterBalancer(const char *name, LBCreateFn fn, LBAllocFn afn, const char *help, int shown)
00096 {
00097 lbRegistry.addEntry(name, fn, afn, help, shown);
00098 }
00099
00100 LBAllocFn getLBAllocFn(char *lbname) {
00101 return lbRegistry.getLBAllocFn(lbname);
00102 }
00103
00104
00105 static void createLoadBalancer(const char *lbname)
00106 {
00107 LBCreateFn fn = lbRegistry.search(lbname);
00108 if (!fn) {
00109 CmiPrintf("Abort: Unknown load balancer: '%s'!\n", lbname);
00110 lbRegistry.displayLBs();
00111 CkAbort("Abort");
00112 }
00113
00114 fn();
00115 }
00116
00117
00118 LBDBInit::LBDBInit(CkArgMsg *m)
00119 {
00120 #if CMK_LBDB_ON
00121 _lbdb = CProxy_LBDatabase::ckNew();
00122
00123
00124 if (lbRegistry.runtime_lbs.size() > 0) {
00125 for (int i=0; i<lbRegistry.runtime_lbs.size(); i++) {
00126 const char *balancer = lbRegistry.runtime_lbs[i];
00127 createLoadBalancer(balancer);
00128 }
00129 }
00130 else if (lbRegistry.compile_lbs.size() > 0) {
00131 for (int i=0; i<lbRegistry.compile_lbs.size(); i++) {
00132 const char* balancer = lbRegistry.compile_lbs[i];
00133 createLoadBalancer(balancer);
00134 }
00135 }
00136 else {
00137
00138
00139
00140
00141 createLoadBalancer("NullLB");
00142 }
00143
00144
00145 if (LBSimulation::doSimulation) {
00146 CmiPrintf("Charm++> Entering Load Balancer Simulation Mode ... \n");
00147 CProxy_LBDatabase(_lbdb).ckLocalBranch()->StartLB();
00148 }
00149 #endif
00150 delete m;
00151 }
00152
00153
00154 void _loadbalancerInit()
00155 {
00156 CkpvInitialize(int, lbdatabaseInited);
00157 CkpvAccess(lbdatabaseInited) = 0;
00158 CkpvInitialize(int, numLoadBalancers);
00159 CkpvAccess(numLoadBalancers) = 0;
00160 CkpvInitialize(int, hasNullLB);
00161 CkpvAccess(hasNullLB) = 0;
00162
00163 char **argv = CkGetArgv();
00164 char *balancer = NULL;
00165 CmiArgGroup("Charm++","Load Balancer");
00166 while (CmiGetArgStringDesc(argv, "+balancer", &balancer, "Use this load balancer")) {
00167 if (CkMyRank() == 0)
00168 lbRegistry.addRuntimeBalancer(balancer);
00169 }
00170
00171
00172
00173 CmiGetArgDoubleDesc(argv,"+LBPeriod", &_lb_args.lbperiod(),"the minimum time period in seconds allowed for two consecutive automatic load balancing");
00174 _lb_args.loop() = CmiGetArgFlagDesc(argv, "+LBLoop", "Use multiple load balancing strategies in loop");
00175
00176
00177
00178 CmiGetArgStringDesc(argv, "+LBTopo", &_lbtopo, "define load balancing topology");
00179
00180 CmiGetArgIntDesc(argv, "+LBNumMoves", &_lb_args.percentMovesAllowed() , "Percentage of chares to be moved (used by RefineKLB) [0-100]");
00181
00182
00183 _lb_predict = CmiGetArgFlagDesc(argv, "+LBPredictor", "Turn on LB future predictor");
00184 CmiGetArgIntDesc(argv, "+LBPredictorDelay", &_lb_predict_delay, "Number of balance steps before learning a model");
00185 CmiGetArgIntDesc(argv, "+LBPredictorWindow", &_lb_predict_window, "Number of steps to use to learn a model");
00186 if (_lb_predict_window < _lb_predict_delay) {
00187 CmiPrintf("LB> [%d] Argument LBPredictorWindow (%d) less than LBPredictorDelay (%d) , fixing\n", CkMyPe(), _lb_predict_window, _lb_predict_delay);
00188 _lb_predict_delay = _lb_predict_window;
00189 }
00190
00191
00192
00193 CmiGetArgIntDesc(argv, "+LBVersion", &_lb_args.lbversion(), "LB database file version number");
00194 CmiGetArgIntDesc(argv, "+LBCentPE", &_lb_args.central_pe(), "CentralLB processor");
00195 int _lb_dump_activated = 0;
00196 if (CmiGetArgIntDesc(argv, "+LBDump", &LBSimulation::dumpStep, "Dump the LB state from this step"))
00197 _lb_dump_activated = 1;
00198 if (_lb_dump_activated && LBSimulation::dumpStep < 0) {
00199 CmiPrintf("LB> Argument LBDump (%d) negative, setting to 0\n",LBSimulation::dumpStep);
00200 LBSimulation::dumpStep = 0;
00201 }
00202 CmiGetArgIntDesc(argv, "+LBDumpSteps", &LBSimulation::dumpStepSize, "Dump the LB state for this amount of steps");
00203 if (LBSimulation::dumpStepSize <= 0) {
00204 CmiPrintf("LB> Argument LBDumpSteps (%d) too small, setting to 1\n",LBSimulation::dumpStepSize);
00205 LBSimulation::dumpStepSize = 1;
00206 }
00207 CmiGetArgStringDesc(argv, "+LBDumpFile", &LBSimulation::dumpFile, "Set the LB state file name");
00208
00209 LBSimulation::doSimulation = CmiGetArgIntDesc(argv, "+LBSim", &LBSimulation::simStep, "Read LB state from LBDumpFile since this step");
00210
00211 if (LBSimulation::doSimulation && LBSimulation::simStep < 0) {
00212 CmiPrintf("LB> Argument LBSim (%d) invalid, should be >= 0\n");
00213 CkExit();
00214 return;
00215 }
00216 CmiGetArgIntDesc(argv, "+LBSimSteps", &LBSimulation::simStepSize, "Read LB state for this number of steps");
00217 if (LBSimulation::simStepSize <= 0) {
00218 CmiPrintf("LB> Argument LBSimSteps (%d) too small, setting to 1\n",LBSimulation::simStepSize);
00219 LBSimulation::simStepSize = 1;
00220 }
00221
00222
00223 LBSimulation::simProcs = 0;
00224 CmiGetArgIntDesc(argv, "+LBSimProcs", &LBSimulation::simProcs, "Number of target processors.");
00225
00226 LBSimulation::showDecisionsOnly =
00227 CmiGetArgFlagDesc(argv, "+LBShowDecisions",
00228 "Write to File: Load Balancing Object to Processor Map decisions during LB Simulation");
00229
00230
00231 _lb_args.syncResume() = CmiGetArgFlagDesc(argv, "+LBSyncResume",
00232 "LB performs a barrier after migration is finished");
00233
00234
00235 if (!CmiGetArgIntDesc(argv, "+LBDebug", &_lb_args.debug(),
00236 "Turn on LB debugging printouts"))
00237 _lb_args.debug() = CmiGetArgFlagDesc(argv, "+LBDebug",
00238 "Turn on LB debugging printouts");
00239
00240
00241 if (!CmiGetArgIntDesc(argv, "+teamSize", &_lb_args.teamSize(),
00242 "Team size"))
00243 _lb_args.teamSize() = 1;
00244
00245
00246 _lb_args.printSummary() = CmiGetArgFlagDesc(argv, "+LBPrintSummary",
00247 "Print load balancing result summary");
00248
00249
00250 _lb_args.ignoreBgLoad() = CmiGetArgFlagDesc(argv, "+LBNoBackground",
00251 "Load balancer ignores the background load.");
00252 #ifdef __BIGSIM__
00253 _lb_args.ignoreBgLoad() = 1;
00254 #endif
00255 _lb_args.migObjOnly() = CmiGetArgFlagDesc(argv, "+LBObjOnly",
00256 "Only load balancing migratable objects, ignoring all others.");
00257 if (_lb_args.migObjOnly()) _lb_args.ignoreBgLoad() = 1;
00258
00259
00260 _lb_args.testPeSpeed() = CmiGetArgFlagDesc(argv, "+LBTestPESpeed",
00261 "Load balancer test all CPUs speed.");
00262 _lb_args.samePeSpeed() = CmiGetArgFlagDesc(argv, "+LBSameCpus",
00263 "Load balancer assumes all CPUs are of same speed.");
00264 if (!_lb_args.testPeSpeed()) _lb_args.samePeSpeed() = 1;
00265
00266 _lb_args.useCpuTime() = CmiGetArgFlagDesc(argv, "+LBUseCpuTime",
00267 "Load balancer uses CPU time instead of wallclock time.");
00268
00269
00270 _lb_args.statsOn() = !CmiGetArgFlagDesc(argv, "+LBOff",
00271 "Turn load balancer instrumentation off");
00272
00273
00274 _lb_args.traceComm() = !CmiGetArgFlagDesc(argv, "+LBCommOff",
00275 "Turn load balancer instrumentation of communication off");
00276
00277
00278 _lb_args.alpha() = PER_MESSAGE_SEND_OVERHEAD_DEFAULT;
00279 _lb_args.beeta() = PER_BYTE_SEND_OVERHEAD_DEFAULT;
00280 CmiGetArgDoubleDesc(argv,"+LBAlpha", &_lb_args.alpha(),
00281 "per message send overhead");
00282 CmiGetArgDoubleDesc(argv,"+LBBeta", &_lb_args.beeta(),
00283 "per byte send overhead");
00284
00285 if (CkMyPe() == 0) {
00286 if (_lb_args.debug()) {
00287 CmiPrintf("CharmLB> Verbose level %d, load balancing period: %g seconds\n", _lb_args.debug(), _lb_args.lbperiod());
00288 }
00289 if (_lb_args.debug() > 1) {
00290 CmiPrintf("CharmLB> Topology %s alpha: %es beta: %es.\n", _lbtopo, _lb_args.alpha(), _lb_args.beeta());
00291 }
00292 if (_lb_args.printSummary())
00293 CmiPrintf("CharmLB> Load balancer print summary of load balancing result.\n");
00294 if (_lb_args.ignoreBgLoad())
00295 CmiPrintf("CharmLB> Load balancer ignores processor background load.\n");
00296 if (_lb_args.samePeSpeed())
00297 CmiPrintf("CharmLB> Load balancer assumes all CPUs are same.\n");
00298 if (_lb_args.useCpuTime())
00299 CmiPrintf("CharmLB> Load balancer uses CPU time instead of wallclock time.\n");
00300 if (LBSimulation::doSimulation)
00301 CmiPrintf("CharmLB> Load balancer running in simulation mode on file '%s' version %d.\n", LBSimulation::dumpFile, _lb_args.lbversion());
00302 if (_lb_args.statsOn()==0)
00303 CkPrintf("CharmLB> Load balancing instrumentation is off.\n");
00304 if (_lb_args.traceComm()==0)
00305 CkPrintf("CharmLB> Load balancing instrumentation for communication is off.\n");
00306 if (_lb_args.migObjOnly())
00307 CkPrintf("LB> Load balancing strategy ignores non-migratable objects.\n");
00308 }
00309 }
00310
00311 int LBDatabase::manualOn = 0;
00312 char *LBDatabase::avail_vector = NULL;
00313 CmiNodeLock avail_vector_lock;
00314
00315 static LBRealType * _expectedLoad = NULL;
00316
00317 void LBDatabase::initnodeFn()
00318 {
00319 int proc;
00320 int num_proc = CkNumPes();
00321 avail_vector= new char[num_proc];
00322 for(proc = 0; proc < num_proc; proc++)
00323 avail_vector[proc] = 1;
00324 avail_vector_lock = CmiCreateLock();
00325
00326 _expectedLoad = new LBRealType[num_proc];
00327 for (proc=0; proc<num_proc; proc++) _expectedLoad[proc]=0.0;
00328 }
00329
00330
00331 void LBDatabase::init(void)
00332 {
00333 myLDHandle = LDCreate();
00334 mystep = 0;
00335 nloadbalancers = 0;
00336 new_ld_balancer = 0;
00337
00338 CkpvAccess(lbdatabaseInited) = 1;
00339 #if CMK_LBDB_ON
00340 if (manualOn) TurnManualLBOn();
00341 #endif
00342 }
00343
00344 LBDatabase::LastLBInfo::LastLBInfo()
00345 {
00346 expectedLoad = _expectedLoad;
00347 }
00348
00349 void LBDatabase::get_avail_vector(char * bitmap) {
00350 CmiAssert(bitmap && avail_vector);
00351 const int num_proc = CkNumPes();
00352 for(int proc = 0; proc < num_proc; proc++){
00353 bitmap[proc] = avail_vector[proc];
00354 }
00355 }
00356
00357
00358
00359
00360 void LBDatabase::set_avail_vector(char * bitmap, int new_ld){
00361 int assigned = 0;
00362 const int num_proc = CkNumPes();
00363 if (new_ld == -2) assigned = 1;
00364 else if (new_ld >= 0) {
00365 CmiAssert(new_ld < num_proc);
00366 new_ld_balancer = new_ld;
00367 assigned = 1;
00368 }
00369 CmiAssert(bitmap && avail_vector);
00370 for(int count = 0; count < num_proc; count++){
00371 avail_vector[count] = bitmap[count];
00372 if((bitmap[count] == 1) && !assigned){
00373 new_ld_balancer = count;
00374 assigned = 1;
00375 }
00376 }
00377 }
00378
00379
00380
00381
00382 int LBDatabase::getLoadbalancerTicket() {
00383 int seq = nloadbalancers;
00384 nloadbalancers ++;
00385 loadbalancers.resize(nloadbalancers);
00386 loadbalancers[seq] = NULL;
00387 return seq;
00388 }
00389
00390 void LBDatabase::addLoadbalancer(BaseLB *lb, int seq) {
00391
00392 if (seq == -1) return;
00393 if (CkMyPe() == 0) {
00394 CmiAssert(seq < nloadbalancers);
00395 if (loadbalancers[seq]) {
00396 CmiPrintf("Duplicate load balancer created at %d\n", seq);
00397 CmiAbort("LBDatabase");
00398 }
00399 }
00400 else
00401 nloadbalancers ++;
00402 loadbalancers.resize(seq+1);
00403 loadbalancers[seq] = lb;
00404 }
00405
00406
00407 void LBDatabase::nextLoadbalancer(int seq) {
00408 if (seq == -1) return;
00409 int next = seq+1;
00410 if (_lb_args.loop()) {
00411 if (next == nloadbalancers) next = 0;
00412 }
00413 else {
00414 if (next == nloadbalancers) next --;
00415 }
00416 if (seq != next) {
00417 loadbalancers[seq]->turnOff();
00418 CmiAssert(loadbalancers[next]);
00419 loadbalancers[next]->turnOn();
00420 }
00421 }
00422
00423
00424
00425
00426 const char *LBDatabase::loadbalancer(int seq) {
00427 if (lbRegistry.runtime_lbs.length()) {
00428 CmiAssert(seq < lbRegistry.runtime_lbs.length());
00429 return lbRegistry.runtime_lbs[seq];
00430 }
00431 else {
00432 CmiAssert(seq < lbRegistry.compile_lbs.length());
00433 return lbRegistry.compile_lbs[seq];
00434 }
00435 }
00436
00437 void LBDatabase::pup(PUP::er& p)
00438 {
00439 IrrGroup::pup(p);
00440
00441 int np;
00442 if (!p.isUnpacking()) np = CkNumPes();
00443 p|np;
00444 CmiAssert(avail_vector);
00445
00446 if (p.isUnpacking() && np > CkNumPes()) {
00447 CmiLock(avail_vector_lock);
00448 delete [] avail_vector;
00449 avail_vector = new char[np];
00450 for (int i=0; i<np; i++) avail_vector[i] = 1;
00451 CmiUnlock(avail_vector_lock);
00452 }
00453 p(avail_vector, np);
00454 p|mystep;
00455 if(p.isUnpacking()) nloadbalancers = 0;
00456 }
00457
00458
00459 void LBDatabase::EstObjLoad(const LDObjHandle &_h, double cputime)
00460 {
00461 #if CMK_LBDB_ON
00462 LBDB *const db = (LBDB*)(_h.omhandle.ldb.handle);
00463 LBObj *const obj = db->LbObj(_h);
00464
00465 CmiAssert(obj != NULL);
00466 obj->setTiming(cputime);
00467 #endif
00468 }
00469
00470
00471
00472
00473 void TurnManualLBOn()
00474 {
00475 #if CMK_LBDB_ON
00476 LBDatabase * myLbdb = LBDatabase::Object();
00477 if (myLbdb) {
00478 myLbdb->TurnManualLBOn();
00479 }
00480 else {
00481 LBDatabase::manualOn = 1;
00482 }
00483 #endif
00484 }
00485
00486 void TurnManualLBOff()
00487 {
00488 #if CMK_LBDB_ON
00489 LBDatabase * myLbdb = LBDatabase::Object();
00490 if (myLbdb) {
00491 myLbdb->TurnManualLBOff();
00492 }
00493 else {
00494 LBDatabase::manualOn = 0;
00495 }
00496 #endif
00497 }
00498
00499 extern "C" void LBTurnInstrumentOn() {
00500 #if CMK_LBDB_ON
00501 if (CkpvAccess(lbdatabaseInited))
00502 LBDatabase::Object()->CollectStatsOn();
00503 else
00504 _lb_args.statsOn() = 1;
00505 #endif
00506 }
00507
00508 extern "C" void LBTurnInstrumentOff() {
00509 #if CMK_LBDB_ON
00510 if (CkpvAccess(lbdatabaseInited))
00511 LBDatabase::Object()->CollectStatsOff();
00512 else
00513 _lb_args.statsOn() = 0;
00514 #endif
00515 }
00516 void LBClearLoads() {
00517 #if CMK_LBDB_ON
00518 LBDatabase::Object()->ClearLoads();
00519 #endif
00520 }
00521
00522 void LBTurnPredictorOn(LBPredictorFunction *model) {
00523 #if CMK_LBDB_ON
00524 LBDatabase::Object()->PredictorOn(model);
00525 #endif
00526 }
00527
00528 void LBTurnPredictorOn(LBPredictorFunction *model, int wind) {
00529 #if CMK_LBDB_ON
00530 LBDatabase::Object()->PredictorOn(model, wind);
00531 #endif
00532 }
00533
00534 void LBTurnPredictorOff() {
00535 #if CMK_LBDB_ON
00536 LBDatabase::Object()->PredictorOff();
00537 #endif
00538 }
00539
00540 void LBChangePredictor(LBPredictorFunction *model) {
00541 #if CMK_LBDB_ON
00542 LBDatabase::Object()->ChangePredictor(model);
00543 #endif
00544 }
00545
00546 void LBSetPeriod(double second) {
00547 #if CMK_LBDB_ON
00548 if (CkpvAccess(lbdatabaseInited))
00549 LBDatabase::Object()->SetLBPeriod(second);
00550 else
00551 _lb_args.lbperiod() = second;
00552 #endif
00553 }
00554
00555 #include "LBDatabase.def.h"
00556