00001
00013
00014 #include "OrbLB.h"
00015
00016
00017
00018 extern int quietModeRequested;
00019
00020 CreateLBFunc_Def(OrbLB, "partition objects based on coordinates")
00021
00022 CkpvExtern(int, _lb_obj_index);
00023
00024 void OrbLB::init()
00025 {
00026 lbname = "OrbLB";
00027 #if CMK_LB_USER_DATA
00028 if (use_udata && CkpvAccess(_lb_obj_index) == -1)
00029 CkpvAccess(_lb_obj_index) = LBRegisterObjUserData(sizeof(CkArrayIndex));
00030 #endif
00031 }
00032
00033
00034 OrbLB::OrbLB(const CkLBOptions &opt, bool userdata): CBase_OrbLB(opt)
00035 {
00036 use_udata = userdata;
00037 init();
00038 if (CkMyPe() == 0 && !quietModeRequested)
00039 CkPrintf("CharmLB> OrbLB created.\n");
00040 }
00041
00042 OrbLB::OrbLB(const CkLBOptions &opt): CBase_OrbLB(opt)
00043 {
00044 use_udata = true;
00045 init();
00046 if (CkMyPe() == 0)
00047 CkPrintf("[%d] OrbLB created\n",CkMyPe());
00048 }
00049
00050 bool OrbLB::QueryBalanceNow(int _step)
00051 {
00052 return true;
00053 }
00054
00055 void OrbLB::rec_divide(int n, Partition &p)
00056 {
00057 int i;
00058 int midpos;
00059 int n1, n2;
00060 double load1, currentload;
00061 int maxdir, count;
00062 Partition p1, p2;
00063
00064 if (_lb_args.debug()>=2) {
00065 CmiPrintf("rec_divide starts: partition n:%d count:%d load:%f (%d %d %d, %d %d %d)\n", n, p.count, p.load, p.origin[0], p.origin[1], p.origin[2], p.corner[0], p.corner[1], p.corner[2]);
00066 }
00067
00068 if (n==1) {
00069 partitions[currentp++] = p;
00070 return;
00071 }
00072
00073
00074
00075
00076 if (_lb_args.debug()>=2) {
00077 CmiPrintf("{\n");
00078 }
00079
00080
00081 n2 = n/2;
00082 n1 = n-n2;
00083
00084
00085 load1 = (1.0*n1/n) * p.load;
00086 if (_lb_args.debug()>=2)
00087 CmiPrintf("goal: n1: %d with load1: %f; n2: %d load2: %f\n", n1, load1, n2, p.load-load1);
00088
00089 p1 = p;
00090 p1.refno = ++refno;
00091 p1.bkpes.resize(0);
00092
00093 p2 = p;
00094 p2.refno = ++refno;
00095 p2.bkpes.resize(0);
00096
00097
00098 int maxSpan=-1;
00099 maxdir = XDIR;
00100 for (i=XDIR; i<=ZDIR; i++) {
00101 int myspan = p.corner[i] - p.origin[i];
00102 if (myspan > maxSpan) {
00103 maxdir = i;
00104 maxSpan = myspan;
00105 }
00106 }
00107
00108
00109 int dir2 = (maxdir+1)%3;
00110 int dir3 = (maxdir+2)%3;
00111
00112 currentload = 0.0;
00113
00114 if (!_lb_args.ignoreBgLoad()) {
00115 CmiAssert(p.bkpes.size() == n);
00116
00117 for (i=0; i<n1; i++) currentload += statsData->procs[p.bkpes[i]].bg_walltime;
00118 }
00119
00120 count = 0;
00121 midpos = p.origin[maxdir];
00122 for (i=0; i<nObjs; i++) {
00123
00124 if (computeLoad[vArray[maxdir][i].id].refno != p.refno) continue;
00125 if (vArray[maxdir][i].v<p.origin[maxdir]) continue;
00126 if (vArray[maxdir][i].v>p.corner[maxdir]) break;
00127
00128 int cid = vArray[maxdir][i].id;
00129
00130 if ( computeLoad[cid].v[dir2] >= p.origin[dir2] &&
00131 computeLoad[cid].v[dir2] <= p.corner[dir2] &&
00132 computeLoad[cid].v[dir3] >= p.origin[dir3] &&
00133 computeLoad[cid].v[dir3] <= p.corner[dir3] ) {
00134
00135 if (currentload <= load1) {
00136 computeLoad[cid].refno = p1.refno;
00137 currentload += computeLoad[cid].load;
00138 count ++;
00139 midpos = computeLoad[cid].v[maxdir];
00140 }
00141 else {
00142 computeLoad[cid].refno = p2.refno;
00143 }
00144 }
00145 }
00146 #ifdef DEBUG
00147
00148 CmiPrintf("DIR:%d %d load:%f\n", maxdir, midpos, currentload);
00149 #endif
00150
00151 p1.corner[maxdir] = midpos;
00152 p2.origin[maxdir] = midpos;
00153
00154 p1.load = currentload;
00155 p1.count = count;
00156 p2.load = p.load - p1.load;
00157 p2.count = p.count - p1.count;
00158
00159
00160 if (!_lb_args.ignoreBgLoad()) {
00161 for (i=0; i<n; i++)
00162 if (i<n1) p1.bkpes.push_back(p.bkpes[i]);
00163 else p2.bkpes.push_back(p.bkpes[i]);
00164 }
00165
00166 if (_lb_args.debug()>=2) {
00167 CmiPrintf("p1: n:%d count:%d load:%f\n", n1, p1.count, p1.load);
00168 CmiPrintf("p2: n:%d count:%d load:%f\n", n2, p2.count, p2.load);
00169 CmiPrintf("}\n");
00170 }
00171
00172 rec_divide(n1, p1);
00173 rec_divide(n2, p2);
00174 }
00175
00176 void OrbLB::setVal(int x, int y, int z)
00177 {
00178 int i;
00179 for (i=0; i<nObjs; i++) {
00180 computeLoad[i].tv = 1000000.0*computeLoad[i].v[x]+
00181 1000.0*computeLoad[i].v[y]+
00182 computeLoad[i].v[z];
00183 }
00184 #if 0
00185 CmiPrintf("original:%d\n", x);
00186 for (i=0; i<numComputes; i++)
00187 CmiPrintf("%d ", computeLoad[i].tv);
00188 CmiPrintf("\n");
00189 #endif
00190 }
00191
00192 int OrbLB::sort_partition(int x, int p, int r)
00193 {
00194 double mid = computeLoad[vArray[x][p].id].tv;
00195 int i= p;
00196 int j= r;
00197 while (1) {
00198 while (computeLoad[vArray[x][j].id].tv > mid && j>i) j--;
00199 while (computeLoad[vArray[x][i].id].tv < mid && i<j) i++;
00200 if (i<j) {
00201 if (computeLoad[vArray[x][i].id].tv == computeLoad[vArray[x][j].id].tv)
00202 {
00203 if (computeLoad[vArray[x][i].id].tv != mid) CmiAbort("my god!\n");
00204 if (i-p < r-j) i++;
00205 else j--;
00206 continue;
00207 }
00208 VecArray tmp = vArray[x][i];
00209 vArray[x][i] = vArray[x][j];
00210 vArray[x][j] = tmp;
00211 }
00212 else
00213 return j;
00214 }
00215 }
00216
00217 void OrbLB::qsort(int x, int p, int r)
00218 {
00219 if (p<r) {
00220 int q = sort_partition(x, p, r);
00221
00222 qsort(x, p, q-1);
00223 qsort(x, q+1, r);
00224 }
00225 }
00226
00227 void OrbLB::quicksort(int x)
00228 {
00229 int y = (x+1)%3;
00230 int z = (x+2)%3;
00231 setVal(x, y, z);
00232 qsort(x, 0, nObjs-1);
00233
00234 #if 0
00235 CmiPrintf("result for :%d\n", x);
00236 for (int i=0; i<nObjs; i++)
00237 CmiPrintf("%d ", computeLoad[vArray[x][i].id].tv);
00238 CmiPrintf("\n");
00239 #endif
00240 }
00241
00242 void OrbLB::mapPartitionsToNodes()
00243 {
00244 int i,j;
00245 #if 1
00246 if (!_lb_args.ignoreBgLoad()) {
00247
00248 for (i=0; i<npartition; i++) partitions[i].node = partitions[i].bkpes[0];
00249 }
00250 else {
00251 int n = 0;
00252 for (i=0; i<P; i++) {
00253 if (!statsData->procs[i].available) continue;
00254 partitions[n++].node = i;
00255 }
00256 }
00257 #else
00258 PatchMap *patchMap = PatchMap::Object();
00259
00260 int **pool = new int *[P];
00261 for (i=0; i<P; i++) pool[i] = new int[P];
00262 for (i=0; i<P; i++) for (j=0; j<P; j++) pool[i][j] = 0;
00263
00264
00265 for (i=0; i<numComputes; i++)
00266 {
00267 for (j=0; j<P; j++)
00268 if (computeLoad[i].refno == partitions[j].refno)
00269 {
00270 int node1 = patchMap->node(computes[i].patch1);
00271 int node2 = patchMap->node(computes[i].patch2);
00272 pool[j][node1]++;
00273 pool[j][node2]++;
00274 }
00275 }
00276 #ifdef DEBUG
00277 for (i=0; i<P; i++) {
00278 for (j=0; j<P; j++) CmiPrintf("%d ", pool[i][j]);
00279 CmiPrintf("\n");
00280 }
00281 #endif
00282 while (1)
00283 {
00284 int index=-1, node=0, eager=-1;
00285 for (j=0; j<npartition; j++) {
00286 if (partitions[j].node != -1) continue;
00287 int wantmost=-1, maxnodes=-1;
00288 for (k=0; k<P; k++) if (pool[j][k] > maxnodes && !partitions[k].mapped) {wantmost=k; maxnodes = pool[j][k];}
00289 if (maxnodes > eager) {
00290 index = j; node = wantmost; eager = maxnodes;
00291 }
00292 }
00293 if (eager == -1) break;
00294 partitions[index].node = node;
00295 partitions[node].mapped = 1;
00296 }
00297
00298 for (i=0; i<P; i++) delete [] pool[i];
00299 delete [] pool;
00300 #endif
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312 if (_lb_args.debug()) {
00313 CmiPrintf("After partitioning: \n");
00314 for (i=0; i<npartition; i++) {
00315 double bgload = 0.0;
00316 if (!_lb_args.ignoreBgLoad())
00317 bgload = statsData->procs[partitions[i].bkpes[0]].bg_walltime;
00318 CmiPrintf("[%d=>%d] (%d,%d,%d) (%d,%d,%d) load:%f count:%d objload:%f\n", i, partitions[i].node, partitions[i].origin[0], partitions[i].origin[1], partitions[i].origin[2], partitions[i].corner[0], partitions[i].corner[1], partitions[i].corner[2], partitions[i].load, partitions[i].count, partitions[i].load-bgload);
00319 }
00320 for (i=npartition; i<P; i++) CmiPrintf("[%d] --------- \n", i);
00321 }
00322
00323 }
00324
00325 void OrbLB::work(LDStats* stats)
00326 {
00327 #if CMK_LBDB_ON
00328 int i,j;
00329
00330 statsData = stats;
00331
00332 P = stats->nprocs();
00333
00334
00335 nObjs = stats->n_migrateobjs;
00336 #ifdef DEBUG
00337 CmiPrintf("ORB: num objects:%d\n", nObjs);
00338 #endif
00339
00340
00341 computeLoad = new ComputeLoad[nObjs];
00342 for (i=XDIR; i<=ZDIR; i++) vArray[i] = new VecArray[nObjs];
00343
00344
00345
00346 int objIdx = 0;
00347 for (i=0; i<stats->n_objs; i++) {
00348 LDObjData &odata = stats->objData[i];
00349 if (odata.migratable == 0) continue;
00350 computeLoad[objIdx].id = objIdx;
00351 #if CMK_LB_USER_DATA
00352 int x, y, z;
00353 if (use_udata) {
00354 CkArrayIndex *idx =
00355 (CkArrayIndex *)odata.getUserData(CkpvAccess(_lb_obj_index));
00356 x = idx->data()[0];
00357 y = idx->data()[1];
00358 z = idx->data()[2];
00359 } else {
00360 CkGroupID locMgrGid;
00361 locMgrGid.idx = odata.omHandle().id.id.idx;
00362 CkLocMgr *localLocMgr = (CkLocMgr *) CkLocalBranch(locMgrGid);
00363 CkArrayIndex arr_idx = localLocMgr->lookupIdx(odata.objID());
00364 x = arr_idx.data()[0];
00365 y = arr_idx.data()[1];
00366 z = arr_idx.data()[2];
00367 }
00368 computeLoad[objIdx].v[XDIR] = x;
00369 computeLoad[objIdx].v[YDIR] = y;
00370 computeLoad[objIdx].v[ZDIR] = z;
00371 #else
00372 CkGroupID locMgrGid;
00373 locMgrGid.idx = odata.omHandle().id.id.idx;
00374 CkLocMgr *localLocMgr = (CkLocMgr *) CkLocalBranch(locMgrGid);
00375 CkArrayIndex arr_idx = localLocMgr->lookupIdx(odata.objID());
00376 computeLoad[objIdx].v[XDIR] = arr_idx.data()[0];
00377 computeLoad[objIdx].v[YDIR] = arr_idx.data()[1];
00378 computeLoad[objIdx].v[ZDIR] = arr_idx.data()[2];
00379 #endif
00380 #if CMK_LB_CPUTIMER
00381 computeLoad[objIdx].load = _lb_args.useCpuTime()?odata.cpuTime:odata.wallTime;
00382 #else
00383 computeLoad[objIdx].load = odata.wallTime;
00384 #endif
00385 computeLoad[objIdx].refno = 0;
00386 computeLoad[objIdx].partition = NULL;
00387 for (int k=XDIR; k<=ZDIR; k++) {
00388 vArray[k][objIdx].id = objIdx;
00389 vArray[k][objIdx].v = computeLoad[objIdx].v[k];
00390 }
00391 #ifdef DEBUG
00392 CmiPrintf("Object %d: %d %d %d load:%f\n", objIdx, computeLoad[objIdx].v[XDIR], computeLoad[objIdx].v[YDIR], computeLoad[objIdx].v[ZDIR], computeLoad[objIdx].load);
00393 #endif
00394 objIdx ++;
00395 }
00396 CmiAssert(nObjs == objIdx);
00397
00398 double t = CkWallTimer();
00399
00400 quicksort(XDIR);
00401 quicksort(YDIR);
00402 quicksort(ZDIR);
00403 #ifdef DEBUG
00404 CmiPrintf("qsort time: %f\n", CkWallTimer() - t);
00405 #endif
00406
00407 npartition = 0;
00408 for (i=0; i<P; i++)
00409 if (stats->procs[i].available == true) npartition++;
00410 partitions = new Partition[npartition];
00411
00412 double totalLoad = 0.0;
00413 int minx, miny, minz, maxx, maxy, maxz;
00414 minx = maxx= computeLoad[0].v[XDIR];
00415 miny = maxy= computeLoad[0].v[YDIR];
00416 minz = maxz= computeLoad[0].v[ZDIR];
00417 for (i=1; i<nObjs; i++) {
00418 totalLoad += computeLoad[i].load;
00419 if (computeLoad[i].v[XDIR] < minx) minx = computeLoad[i].v[XDIR];
00420 else if (computeLoad[i].v[XDIR] > maxx) maxx = computeLoad[i].v[XDIR];
00421 if (computeLoad[i].v[YDIR] < miny) miny = computeLoad[i].v[YDIR];
00422 else if (computeLoad[i].v[YDIR] > maxy) maxy = computeLoad[i].v[YDIR];
00423 if (computeLoad[i].v[ZDIR] < minz) minz = computeLoad[i].v[ZDIR];
00424 else if (computeLoad[i].v[ZDIR] > maxz) maxz = computeLoad[i].v[ZDIR];
00425 }
00426
00427 top_partition.origin[XDIR] = minx;
00428 top_partition.origin[YDIR] = miny;
00429 top_partition.origin[ZDIR] = minz;
00430 top_partition.corner[XDIR] = maxx;
00431 top_partition.corner[YDIR] = maxy;
00432 top_partition.corner[ZDIR] = maxz;
00433
00434 top_partition.refno = 0;
00435 top_partition.load = 0.0;
00436 top_partition.count = nObjs;
00437
00438
00439 if (!_lb_args.ignoreBgLoad()) {
00440 top_partition.bkpes.resize(0);
00441 double total = totalLoad;
00442 for (i=0; i<P; i++) {
00443 if (!stats->procs[i].available) continue;
00444 double bkload = stats->procs[i].bg_walltime;
00445 total += bkload;
00446 }
00447 double averageLoad = total / npartition;
00448 for (i=0; i<P; i++) {
00449 if (!stats->procs[i].available) continue;
00450 double bkload = stats->procs[i].bg_walltime;
00451 if (bkload < averageLoad) top_partition.bkpes.push_back(i);
00452 else CkPrintf("OrbLB Info> PE %d with %f background load will have 0 object.\n", i, bkload);
00453 }
00454 npartition = top_partition.bkpes.size();
00455
00456 for (i=0; i<npartition; i++)
00457 totalLoad += stats->procs[top_partition.bkpes[i]].bg_walltime;
00458 if (_lb_args.debug()>=2) {
00459 CkPrintf("BG load: ");
00460 for (i=0; i<P; i++) CkPrintf(" %f", stats->procs[i].bg_walltime);
00461 CkPrintf("\n");
00462 CkPrintf("Partition BG load: ");
00463 for (i=0; i<npartition; i++) CkPrintf(" %f", stats->procs[top_partition.bkpes[i]].bg_walltime);
00464 CkPrintf("\n");
00465 }
00466 }
00467
00468 top_partition.load = totalLoad;
00469
00470 currentp = 0;
00471 refno = 0;
00472
00473
00474 rec_divide(npartition, top_partition);
00475
00476
00477 mapPartitionsToNodes();
00478
00479
00480 int *num = new int[P];
00481 for (i=0; i<P; i++) num[i] = 0;
00482
00483 for (i=0; i<nObjs; i++)
00484 {
00485 for (j=0; j<npartition; j++)
00486 if (computeLoad[i].refno == partitions[j].refno) {
00487 computeLoad[i].partition = partitions+j;
00488 num[j] ++;
00489 }
00490 CmiAssert(computeLoad[i].partition != NULL);
00491 }
00492
00493 for (i=0; i<npartition; i++)
00494 if (num[i] != partitions[i].count)
00495 CmiAbort("OrbLB: Compute counts don't agree!\n");
00496
00497 delete [] num;
00498
00499
00500 objIdx = 0;
00501 for(int obj=0;obj<stats->n_objs;obj++) {
00502 stats->to_proc[obj] = stats->from_proc[obj];
00503 LDObjData &odata = stats->objData[obj];
00504 if (odata.migratable == 0) { continue; }
00505 int frompe = stats->from_proc[obj];
00506 int tope = computeLoad[objIdx].partition->node;
00507 if (frompe != tope) {
00508 if (_lb_args.debug() >= 3) {
00509 CkPrintf("[%d] Obj %d migrating from %d to %d\n",
00510 CkMyPe(),obj,frompe,tope);
00511 }
00512 stats->to_proc[obj] = tope;
00513 }
00514 objIdx ++;
00515 }
00516
00517
00518 delete [] computeLoad;
00519 for (i=0; i<3; i++) delete [] vArray[i];
00520 delete [] partitions;
00521
00522 if (_lb_args.debug() >= 1)
00523 CkPrintf("OrbLB finished time: %fs\n", CkWallTimer() - t);
00524 #endif
00525 }
00526
00527 #include "OrbLB.def.h"
00528