00001
00005
00023 #include "charm++.h"
00024 #include "ckgraph.h"
00025 #include "GreedyRefineLB.h"
00026
00027 #include <float.h>
00028 #include <limits.h>
00029 #include <algorithm>
00030 #include <math.h>
00031
00032 extern int quietModeRequested;
00033
00034
00035
00036
00037 #define LOAD_MIG_BAL 1.003
00038
00039 using namespace std;
00040
00041 class GreedyRefineLB::Solution {
00042 public:
00043 Solution() {}
00044 Solution(int pe, double maxLoad, int nmoves) : pe(pe), max_load(maxLoad), migrations(nmoves) {}
00045 int pe;
00046 float max_load;
00047 int migrations;
00048
00049 void pup(PUP::er &p) {
00050 p|pe;
00051 p|max_load;
00052 p|migrations;
00053 }
00054 };
00055
00056
00057 class GreedyRefineLB::PHeap {
00058 public:
00059 PHeap(int numpes) {
00060 Q.reserve(numpes+1);
00061 Q.push_back(NULL);
00062 }
00063
00064 void addProcessors(std::vector<GreedyRefineLB::GProc> &procs, bool bgLoadZero, bool insert=true) {
00065 for (int i=0; i < procs.size(); i++) {
00066 GreedyRefineLB::GProc &p = procs[i];
00067 if (p.available) {
00068 p.load = p.bgload;
00069 if (insert) {
00070 Q.push_back(&p);
00071 p.pos = Q.size()-1;
00072 }
00073 }
00074 }
00075 if (!bgLoadZero) buildMinHeap();
00076 }
00077
00078 inline GreedyRefineLB::GProc *top() const {
00079 CkAssert(Q.size() > 1);
00080 return Q[1];
00081 }
00082
00083 inline void push(GreedyRefineLB::GProc *p) {
00084 Q.push_back(p);
00085 p->pos = Q.size()-1;
00086 siftUp(p->pos);
00087 }
00088
00089 inline GreedyRefineLB::GProc *pop() {
00090 if (Q.size() == 1) return NULL;
00091 GreedyRefineLB::GProc *retval;
00092 if (Q.size() == 2) {
00093 retval = Q[1];
00094 Q.pop_back();
00095 return retval;
00096 }
00097 retval = Q[1];
00098 Q[1] = Q.back();
00099 Q.pop_back();
00100 Q[1]->pos = 1;
00101 siftDown(1);
00102 return retval;
00103 }
00104
00105
00106 void remove(GreedyRefineLB::GProc *p) {
00107 int pos = p->pos;
00108 if ((Q.size() == 2) || (pos == Q.size()-1)) return Q.pop_back();
00109 if (pos == 1) { pop(); return; }
00110 Q[pos] = Q.back();
00111 Q.pop_back();
00112 Q[pos]->pos = pos;
00113 if (Q[pos/2]->load > Q[pos]->load) siftUp(pos);
00114 else siftDown(pos);
00115 }
00116
00117 inline void clear() {
00118 Q.clear();
00119 Q.push_back(NULL);
00120 }
00121
00122 private:
00123
00124 void min_heapify(int i) {
00125 const int left = 2*i;
00126 const int right = 2*i + 1;
00127 int smallest = i;
00128 if ((left < Q.size()) && (Q[left]->load < Q[smallest]->load)) smallest = left;
00129 if ((right < Q.size()) && (Q[right]->load < Q[smallest]->load)) smallest = right;
00130 if (smallest != i) {
00131 swap(i,smallest);
00132 Q[i]->pos = i;
00133 Q[smallest]->pos = smallest;
00134 min_heapify(smallest);
00135 }
00136 }
00137
00138 void inline buildMinHeap() {
00139 for (int i=Q.size()/2; i > 0; i--) min_heapify(i);
00140 }
00141
00142 inline void swap(int pos1, int pos2) {
00143 GreedyRefineLB::GProc *t = Q[pos1];
00144 Q[pos1] = Q[pos2];
00145 Q[pos2] = t;
00146 }
00147
00148 void siftUp(int pos) {
00149 if (pos == 1) return;
00150 int ppos = pos/2;
00151 if (Q[ppos]->load > Q[pos]->load) {
00152 swap(ppos,pos);
00153 Q[ppos]->pos = ppos;
00154 Q[pos]->pos = pos;
00155 siftUp(ppos);
00156 }
00157 }
00158
00159 inline int minChild(int pos) const {
00160 int c1 = pos*2;
00161 int c2 = pos*2 + 1;
00162 if (c1 >= Q.size()) return -1;
00163 if (c2 >= Q.size()) return c1;
00164 if (Q[c1]->load < Q[c2]->load) return c1;
00165 else return c2;
00166 }
00167
00168 void siftDown(int pos) {
00169 int cpos = minChild(pos);
00170 if (cpos == -1) return;
00171 if (Q[pos]->load > Q[cpos]->load) {
00172 swap(pos,cpos);
00173 Q[cpos]->pos = cpos;
00174 Q[pos]->pos = pos;
00175 siftDown(cpos);
00176 }
00177 }
00178
00179 std::vector<GreedyRefineLB::GProc*> Q;
00180 };
00181
00182 CreateLBFunc_Def(GreedyRefineLB, "Greedy refinement-based algorithm")
00183
00184 GreedyRefineLB::GreedyRefineLB(const CkLBOptions &opt): CBase_GreedyRefineLB(opt), migrationTolerance(1.0)
00185 {
00186 lbname = "GreedyRefineLB";
00187 if ((CkMyPe() == 0) && !quietModeRequested)
00188 CkPrintf("CharmLB> GreedyRefineLB created.\n");
00189 if (_lb_args.percentMovesAllowed() < 100) {
00190 migrationTolerance = float(_lb_args.percentMovesAllowed())/100.0;
00191 }
00192 concurrent = true;
00193 }
00194
00195 GreedyRefineLB::GreedyRefineLB(CkMigrateMessage *m): CBase_GreedyRefineLB(m), migrationTolerance(1.0) {
00196 lbname = "GreedyRefineLB";
00197 if (_lb_args.percentMovesAllowed() < 100)
00198 migrationTolerance = float(_lb_args.percentMovesAllowed())/100.0;
00199 concurrent = true;
00200 }
00201
00202
00203
00204
00205 double GreedyRefineLB::greedyLB(const std::vector<GreedyRefineLB::GObj*> &pobjs,
00206 GreedyRefineLB::PHeap &procHeap,
00207 const BaseLB::LDStats *stats) const
00208 {
00209 double max_load = 0;
00210 int nmoves = 0;
00211 for (int i=0; i < pobjs.size(); i++) {
00212 const GreedyRefineLB::GObj *obj = pobjs[i];
00213 GreedyRefineLB::GProc *p = procHeap.pop();
00214
00215 p->load += (obj->load / p->speed);
00216 procHeap.push(p);
00217
00218 if (p->id != obj->oldPE) nmoves++;
00219 if (p->load > max_load) max_load = p->load;
00220 }
00221
00222 if ((CkMyPe() == cur_ld_balancer+1) && (_lb_args.debug() > 1)) {
00223 CkPrintf("[%d] %f : Greedy strategy nmoves=%d, max_load=%f\n", CkMyPe(),
00224 CkWallTimer() - strategyStartTime, nmoves, max_load);
00225 }
00226 return max_load;
00227 }
00228
00229
00230 #if __DEBUG_GREEDY_REFINE_
00231 #include <fstream>
00232 void GreedyRefineLB::dumpObjLoads(std::vector<GreedyRefineLB::GObj> &objs) {
00233 std::ofstream outfile("objloads.txt");
00234 outfile << objs.size() << std::endl;
00235 for (int i=0; i < objs.size(); i++) {
00236 GreedyRefineLB::GObj &obj = objs[i];
00237 if ((i > 0) && (i % 100 == 0)) outfile << obj.load << std::endl;
00238 else outfile << obj.load << " ";
00239 }
00240 outfile.close();
00241 }
00242 void GreedyRefineLB::dumpProcLoads(std::vector<GreedyRefineLB::GProc> &procs) {
00243 std::ofstream outfile("proc_bg_loads.txt");
00244 outfile << procs.size() << std::endl;
00245 for (int i=0; i < procs.size(); i++) {
00246 GreedyRefineLB::GProc &p = procs[i];
00247 if ((i > 0) && (i % 100 == 0)) outfile << p.load << std::endl;
00248 else outfile << p.load << " ";
00249 }
00250 outfile.close();
00251 }
00252 #endif
00253
00254 double GreedyRefineLB::fillData(LDStats *stats,
00255 std::vector<GreedyRefineLB::GObj> &objs,
00256 std::vector<GreedyRefineLB::GObj*> &pobjs,
00257 std::vector<GreedyRefineLB::GProc> &procs,
00258 PHeap &procHeap)
00259 {
00260 const int n_pes = stats->nprocs();
00261 const int n_objs = stats->n_objs;
00262
00263 int unmigratableObjs = 0;
00264 availablePes = 0; totalObjLoad = 0;
00265 double minBGLoad = DBL_MAX; double avgBGLoad = 0; double maxBGLoad = 0;
00266 double minSpeed = DBL_MAX; double maxSpeed = 0; double avgSpeed = 0;
00267 double minOload = DBL_MAX; double maxOload = 0;
00268
00269 for (int pe=0; pe < n_pes; pe++) {
00270 GreedyRefineLB::GProc &p = procs[pe];
00271 p.id = pe;
00272 p.available = stats->procs[pe].available;
00273 p.speed = stats->procs[pe].pe_speed;
00274 if (p.available) {
00275 availablePes++;
00276 p.bgload = stats->procs[pe].bg_walltime;
00277 if (p.bgload > maxBGLoad) maxBGLoad = p.bgload;
00278 if (_lb_args.debug() > 1) {
00279 double &speed = stats->procs[pe].pe_speed;
00280 if (speed < minSpeed) minSpeed = speed;
00281 if (speed > maxSpeed) maxSpeed = speed;
00282 avgSpeed += speed;
00283 }
00284 }
00285 }
00286 if (!availablePes) CkAbort("GreedyRefineLB: No available processors\n");
00287
00288 for (int i=0; i < n_objs; i++) {
00289 LDObjData &oData = stats->objData[i];
00290 GreedyRefineLB::GObj &obj = objs[i];
00291 int pe = stats->from_proc[i];
00292 obj.id = i;
00293 obj.oldPE = pe;
00294 CkAssert(pe >= 0 && pe <= n_pes);
00295 if (pe == n_pes) obj.oldPE = -1;
00296 if (!oData.migratable) {
00297 CkAssert(pe < n_pes);
00298 unmigratableObjs++;
00299 GreedyRefineLB::GProc &p = procs[pe];
00300 if (!p.available)
00301 CkAbort("GreedyRefineLB: nonmigratable object on unavailable processor\n");
00302 p.bgload += oData.wallTime;
00303 if (p.bgload > maxBGLoad) maxBGLoad = p.bgload;
00304 } else {
00305 obj.load = oData.wallTime * stats->procs[pe].pe_speed;
00306 pobjs.push_back(&obj);
00307 totalObjLoad += obj.load;
00308 if (_lb_args.debug() > 1) {
00309 if (obj.load < minOload) minOload = obj.load;
00310 if (obj.load > maxOload) maxOload = obj.load;
00311 }
00312 }
00313 }
00314
00315 procHeap.addProcessors(procs, (maxBGLoad <= 0.001), true);
00316
00317
00318 if ((_lb_args.debug() > 1) && (!concurrent || (CkMyPe() == cur_ld_balancer))) {
00319 for (int pe=0; pe < n_pes; pe++) {
00320 GreedyRefineLB::GProc &p = procs[pe];
00321 if (!p.available) continue;
00322 if (p.bgload < minBGLoad) minBGLoad = p.bgload;
00323 avgBGLoad += p.bgload;
00324 }
00325 CkPrintf("[%d] GreedyRefineLB: num pes=%d, num objs=%d\n", CkMyPe(), n_pes, n_objs);
00326 CkPrintf("[%d] Unavailable processors=%d, Unmigratable objs=%d\n", CkMyPe(), n_pes - availablePes, unmigratableObjs);
00327 CkPrintf("[%d] min_bgload=%f mean_bgload=%f max_bgload=%f\n", CkMyPe(), minBGLoad, (avgBGLoad / availablePes), maxBGLoad);
00328 CkPrintf("[%d] min_oload=%f mean_oload=%f max_oload=%f\n", CkMyPe(), minOload, (totalObjLoad / (n_objs - unmigratableObjs)), maxOload);
00329 CkPrintf("[%d] min_speed=%f mean_speed=%f max_speed=%f\n", CkMyPe(), minSpeed, (avgSpeed / availablePes), maxSpeed);
00330
00331 double maxLoad = 0;
00332 std::vector<double> ploads(n_pes, -1);
00333 for (int i=0; i < n_objs; i++) {
00334 GreedyRefineLB::GObj &o = objs[i];
00335 int pe = o.oldPE;
00336 if (pe < 0) continue;
00337 if (ploads[pe] < 0) ploads[pe] = procs[pe].bgload;
00338 if (stats->objData[i].migratable)
00339 ploads[pe] += o.load;
00340 if (ploads[pe] > maxLoad) maxLoad = ploads[pe];
00341 }
00342 CkPrintf("[%d] maxload with current map=%f\n", CkMyPe(), maxLoad);
00343
00344
00345 }
00346
00347 return maxBGLoad;
00348 }
00349
00350 static const float Avals[] = {1.0, 1.005, 1.01, 1.015, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08, 1.16, 1.20, 1.30};
00351 static const float Bvals[] = {1.0, 1.05, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, FLT_MAX};
00352 #define Avals_len 14
00353 #define Bvals_len 16
00354 #define NUM_SOLUTIONS Avals_len*Bvals_len+1
00355 static void getGreedyRefineParams(int rank, float &A, float &B) {
00356 if (rank == 0) { A = 0; B = -1; return; }
00357 rank--;
00358 int x = rank / Bvals_len;
00359 if (x >= Avals_len) {
00360 A = B = -1;
00361 } else {
00362 A = Avals[x];
00363 B = Bvals[rank % Bvals_len];
00364 }
00365 }
00366
00367 void GreedyRefineLB::sendSolution(double maxLoad, int migrations)
00368 {
00369
00370
00371
00372 GreedyRefineLB::Solution sol(CkMyPe(), maxLoad, migrations);
00373 size_t buf_size = sizeof(GreedyRefineLB::Solution);
00374 void *buffer = malloc(buf_size);
00375 PUP::toMem pd(buffer);
00376 pd|sol;
00377
00378 CkCallback cb(CkIndex_GreedyRefineLB::receiveSolutions((CkReductionMsg*)NULL), thisProxy[cur_ld_balancer]);
00379 contribute(buf_size, buffer, CkReduction::set, cb);
00380
00381 if ((_lb_args.debug() > 1) && (CkMyPe() == cur_ld_balancer)) {
00382 CkPrintf("[%d] %f : Called gather/reduction\n", CkMyPe(), CkWallTimer() - strategyStartTime);
00383 }
00384
00385 free(buffer);
00386 }
00387
00388 void GreedyRefineLB::work(LDStats *stats)
00389 {
00390 strategyStartTime = CkWallTimer();
00391 float A = 1.001, B = FLT_MAX;
00392 if (concurrent) {
00393 getGreedyRefineParams(CkMyPe(), A, B);
00394 if (A < 0) {
00395 sendSolution(-1,-1);
00396 return;
00397 }
00398 }
00399
00400 const int n_pes = stats->nprocs();
00401 totalObjs = stats->n_objs;
00402
00403 std::vector<GreedyRefineLB::GObj> objs(totalObjs);
00404
00405
00406 std::vector<GreedyRefineLB::GObj*> pobjs;
00407 pobjs.reserve(totalObjs);
00408
00409 std::vector<GreedyRefineLB::GProc> procs(n_pes);
00410 PHeap procHeap(n_pes);
00411
00412
00413 double maxLoad = fillData(stats, objs, pobjs, procs, procHeap);
00414
00415
00416
00417 std::sort(pobjs.begin(), pobjs.end(), GreedyRefineLB::ObjLoadGreater());
00418
00419 double M = 0, greedyMaxLoad = 0;
00420 if (B > 0) {
00421
00422 M = greedyLB(pobjs, procHeap, stats);
00423 greedyMaxLoad = M;
00424 procHeap.addProcessors(procs, (maxLoad <= 0.001), false);
00425 }
00426
00427
00428 int nmoves = 0;
00429
00430
00431
00432
00433 M *= A;
00434 if ((_lb_args.debug() > 1) && (CkMyPe() == cur_ld_balancer)) {
00435 CkPrintf("maxLoad=%f totalObjLoad=%f M=%f A=%f B=%f\n", maxLoad, totalObjLoad, M, A, B);
00436 }
00437 for (int i=0; i < pobjs.size(); i++) {
00438 const GreedyRefineLB::GObj *obj = pobjs[i];
00439 GreedyRefineLB::GProc *llp = procHeap.top();
00440 GreedyRefineLB::GProc *prevPe = NULL;
00441 if (obj->oldPE >= 0) prevPe = &(procs[obj->oldPE]);
00442
00443
00444 GreedyRefineLB::GProc *p = llp;
00445 if (prevPe && (prevPe->load <= (llp->load+0.01)*B) && (prevPe->load + obj->load <= M) && (prevPe->available))
00446 p = prevPe;
00447
00448
00449 procHeap.remove(p);
00450 p->load += (obj->load / p->speed);
00451 procHeap.push(p);
00452
00453 if (p->id != obj->oldPE) {
00454 nmoves++;
00455 stats->to_proc[obj->id] = p->id;
00456 }
00457 if (p->load > maxLoad) {
00458 maxLoad = p->load;
00459 if (maxLoad > M) M = maxLoad;
00460 }
00461 }
00462
00463
00464 if (concurrent) {
00465
00466 sendSolution(maxLoad, nmoves);
00467
00468 #if __DEBUG_GREEDY_REFINE_
00469 CkCallback cb(CkReductionTarget(GreedyRefineLB, receiveTotalTime), thisProxy[cur_ld_balancer]);
00470 contribute(sizeof(double), &strategyStartTime, CkReduction::sum_double, cb);
00471 #endif
00472 } else if (_lb_args.debug() > 0) {
00473 double greedyRatio = 1.0;
00474 if (B > 0) greedyRatio = maxLoad / greedyMaxLoad;
00475 double migrationRatio = nmoves/double(pobjs.size());
00476 if ((greedyRatio > 1.03) && (migrationRatio < migrationTolerance)) {
00477 CkPrintf("[%d] GreedyRefine: WARNING - migration ratio is %.3f (within user-specified tolerance).\n"
00478 "but maxload after lb is %f higher than greedy. Consider testing with A=0, B=-1\n",
00479 CkMyPe(), migrationRatio, greedyRatio);
00480 }
00481 CkPrintf("[%d] GreedyRefineLB: after lb, max_load=%.3f, migrations=%d(%.2f%), ratioToGreedy=%.3f\n",
00482 CkMyPe(), maxLoad, nmoves, 100.0*migrationRatio, greedyRatio);
00483 }
00484 }
00485
00486 void GreedyRefineLB::receiveTotalTime(double time)
00487 {
00488 CkPrintf("Avg start time of GreedyRefineLB strategy is %f\n", time / CkNumPes());
00489 }
00490
00491
00492 void GreedyRefineLB::receiveSolutions(CkReductionMsg *msg)
00493 {
00494 std::vector<GreedyRefineLB::Solution> results(NUM_SOLUTIONS);
00495
00496 int migrationsAllowed = totalObjs * migrationTolerance;
00497
00498 bool feasibleSolutions = false;
00499 float lowest_max_load = FLT_MAX;
00500 float lowest_max_load_f = FLT_MAX;
00501 float highest_max_load = 0;
00502 int lowestMigrations = INT_MAX;
00503 const GreedyRefineLB::Solution *bestSol = NULL;
00504
00505
00506
00507 CkReduction::setElement *current = (CkReduction::setElement*)msg->getData();
00508 int numSolutions = 0;
00509 for ( ; current && (numSolutions < NUM_SOLUTIONS); current = current->next()) {
00510 PUP::fromMem pd(¤t->data);
00511 pd|results[numSolutions];
00512 if (results[numSolutions].migrations >= 0) {
00513 const GreedyRefineLB::Solution &r = results[numSolutions++];
00514 if ((r.migrations <= migrationsAllowed) && (r.max_load < lowest_max_load_f)) {
00515 lowest_max_load_f = r.max_load;
00516 feasibleSolutions = true;
00517 }
00518
00519 if ((r.migrations < lowestMigrations) ||
00520 ((r.migrations == lowestMigrations) && (r.max_load < bestSol->max_load))) {
00521 lowestMigrations = r.migrations;
00522 bestSol = &r;
00523 }
00524
00525 if (r.max_load < lowest_max_load) lowest_max_load = r.max_load;
00526 if (r.max_load > highest_max_load) highest_max_load = r.max_load;
00527 }
00528 }
00529 results.resize(numSolutions);
00530 CkAssert(numSolutions > 0);
00531
00532 if (feasibleSolutions) {
00533
00534 int bestMigrations = INT_MAX;
00535 for (int i=0; i < results.size(); i++) {
00536 const GreedyRefineLB::Solution &r = results[i];
00537
00538
00539
00540
00541 if ((r.migrations < bestMigrations && r.max_load <= lowest_max_load_f*LOAD_MIG_BAL) ||
00542 (r.migrations == bestMigrations && r.max_load < bestSol->max_load)) {
00543 bestMigrations = r.migrations;
00544 bestSol = &r;
00545 }
00546 }
00547 }
00548
00549
00550
00551 if (_lb_args.debug() > 1) {
00552 CkPrintf("GreedyRefineLB: Lowest max_load is %f, worst max_load is %f, lowest migrations=%d\n",
00553 lowest_max_load, highest_max_load, lowestMigrations);
00554
00555 CkPrintf("GreedyRefineLB: Got %d solutions at %f\nBest one is from PE %d with max_load=%f, migrations=%d\n",
00556 numSolutions, CkWallTimer(), bestSol->pe, bestSol->max_load, bestSol->migrations);
00557 float A, B;
00558 getGreedyRefineParams(bestSol->pe, A, B);
00559 CkPrintf("Best PE used params A=%f B=%f\n", A, B);
00560 }
00561
00562
00563 thisProxy[bestSol->pe].ApplyDecision();
00564 }
00565
00566 #include "GreedyRefineLB.def.h"
00567