00001
00013
00014 #include "OrbLB.h"
00015
00016
00017
00018 CreateLBFunc_Def(OrbLB, "partition objects based on coordinates")
00019
00020 OrbLB::OrbLB(const CkLBOptions &opt): CentralLB(opt)
00021 {
00022 lbname = "OrbLB";
00023 if (CkMyPe() == 0)
00024 CkPrintf("[%d] OrbLB created\n",CkMyPe());
00025 }
00026
00027 CmiBool OrbLB::QueryBalanceNow(int _step)
00028 {
00029 return CmiTrue;
00030 }
00031
00032 void OrbLB::rec_divide(int n, Partition &p)
00033 {
00034 int i;
00035 int midpos;
00036 int n1, n2;
00037 double load1, currentload;
00038 int maxdir, count;
00039 Partition p1, p2;
00040
00041 if (_lb_args.debug()>=2) {
00042 CmiPrintf("rec_divide starts: partition n:%d count:%d load:%f (%d %d %d, %d %d %d)\n", n, p.count, p.load, p.origin[0], p.origin[1], p.origin[2], p.corner[0], p.corner[1], p.corner[2]);
00043 }
00044
00045 if (n==1) {
00046 partitions[currentp++] = p;
00047 return;
00048 }
00049
00050
00051
00052
00053 if (_lb_args.debug()>=2) {
00054 CmiPrintf("{\n");
00055 }
00056
00057
00058 n2 = n/2;
00059 n1 = n-n2;
00060
00061
00062 load1 = (1.0*n1/n) * p.load;
00063 if (_lb_args.debug()>=2)
00064 CmiPrintf("goal: n1: %d with load1: %f; n2: %d load2: %f\n", n1, load1, n2, p.load-load1);
00065
00066 p1 = p;
00067 p1.refno = ++refno;
00068 p1.bkpes.resize(0);
00069
00070 p2 = p;
00071 p2.refno = ++refno;
00072 p2.bkpes.resize(0);
00073
00074
00075 int maxSpan=-1;
00076 maxdir = XDIR;
00077 for (i=XDIR; i<=ZDIR; i++) {
00078 int myspan = p.corner[i] - p.origin[i];
00079 if (myspan > maxSpan) {
00080 maxdir = i;
00081 maxSpan = myspan;
00082 }
00083 }
00084
00085
00086 int dir2 = (maxdir+1)%3;
00087 int dir3 = (maxdir+2)%3;
00088
00089 currentload = 0.0;
00090
00091 if (!_lb_args.ignoreBgLoad()) {
00092 CmiAssert(p.bkpes.size() == n);
00093
00094 for (i=0; i<n1; i++) currentload += statsData->procs[p.bkpes[i]].bg_walltime;
00095 }
00096
00097 count = 0;
00098 midpos = p.origin[maxdir];
00099 for (i=0; i<nObjs; i++) {
00100
00101 if (computeLoad[vArray[maxdir][i].id].refno != p.refno) continue;
00102 if (vArray[maxdir][i].v<p.origin[maxdir]) continue;
00103 if (vArray[maxdir][i].v>p.corner[maxdir]) break;
00104
00105 int cid = vArray[maxdir][i].id;
00106
00107 if ( computeLoad[cid].v[dir2] >= p.origin[dir2] &&
00108 computeLoad[cid].v[dir2] <= p.corner[dir2] &&
00109 computeLoad[cid].v[dir3] >= p.origin[dir3] &&
00110 computeLoad[cid].v[dir3] <= p.corner[dir3] ) {
00111
00112 if (currentload <= load1) {
00113 computeLoad[cid].refno = p1.refno;
00114 currentload += computeLoad[cid].load;
00115 count ++;
00116 midpos = computeLoad[cid].v[maxdir];
00117 }
00118 else {
00119 computeLoad[cid].refno = p2.refno;
00120 }
00121 }
00122 }
00123 #ifdef DEBUG
00124
00125 CmiPrintf("DIR:%d %d load:%f\n", maxdir, midpos, currentload);
00126 #endif
00127
00128 p1.corner[maxdir] = midpos;
00129 p2.origin[maxdir] = midpos;
00130
00131 p1.load = currentload;
00132 p1.count = count;
00133 p2.load = p.load - p1.load;
00134 p2.count = p.count - p1.count;
00135
00136
00137 if (!_lb_args.ignoreBgLoad()) {
00138 for (i=0; i<n; i++)
00139 if (i<n1) p1.bkpes.push_back(p.bkpes[i]);
00140 else p2.bkpes.push_back(p.bkpes[i]);
00141 }
00142
00143 if (_lb_args.debug()>=2) {
00144 CmiPrintf("p1: n:%d count:%d load:%f\n", n1, p1.count, p1.load);
00145 CmiPrintf("p2: n:%d count:%d load:%f\n", n2, p2.count, p2.load);
00146 CmiPrintf("}\n");
00147 }
00148
00149 rec_divide(n1, p1);
00150 rec_divide(n2, p2);
00151 }
00152
00153 void OrbLB::setVal(int x, int y, int z)
00154 {
00155 int i;
00156 for (i=0; i<nObjs; i++) {
00157 computeLoad[i].tv = 1000000.0*computeLoad[i].v[x]+
00158 1000.0*computeLoad[i].v[y]+
00159 computeLoad[i].v[z];
00160 }
00161 #if 0
00162 CmiPrintf("original:%d\n", x);
00163 for (i=0; i<numComputes; i++)
00164 CmiPrintf("%d ", computeLoad[i].tv);
00165 CmiPrintf("\n");
00166 #endif
00167 }
00168
00169 int OrbLB::sort_partition(int x, int p, int r)
00170 {
00171 double mid = computeLoad[vArray[x][p].id].tv;
00172 int i= p;
00173 int j= r;
00174 while (1) {
00175 while (computeLoad[vArray[x][j].id].tv > mid && j>i) j--;
00176 while (computeLoad[vArray[x][i].id].tv < mid && i<j) i++;
00177 if (i<j) {
00178 if (computeLoad[vArray[x][i].id].tv == computeLoad[vArray[x][j].id].tv)
00179 {
00180 if (computeLoad[vArray[x][i].id].tv != mid) CmiAbort("my god!\n");
00181 if (i-p < r-j) i++;
00182 else j--;
00183 continue;
00184 }
00185 VecArray tmp = vArray[x][i];
00186 vArray[x][i] = vArray[x][j];
00187 vArray[x][j] = tmp;
00188 }
00189 else
00190 return j;
00191 }
00192 }
00193
00194 void OrbLB::qsort(int x, int p, int r)
00195 {
00196 if (p<r) {
00197 int q = sort_partition(x, p, r);
00198
00199 qsort(x, p, q-1);
00200 qsort(x, q+1, r);
00201 }
00202 }
00203
00204 void OrbLB::quicksort(int x)
00205 {
00206 int y = (x+1)%3;
00207 int z = (x+2)%3;
00208 setVal(x, y, z);
00209 qsort(x, 0, nObjs-1);
00210
00211 #if 0
00212 CmiPrintf("result for :%d\n", x);
00213 for (int i=0; i<nObjs; i++)
00214 CmiPrintf("%d ", computeLoad[vArray[x][i].id].tv);
00215 CmiPrintf("\n");
00216 #endif
00217 }
00218
00219 void OrbLB::mapPartitionsToNodes()
00220 {
00221 int i,j;
00222 #if 1
00223 if (!_lb_args.ignoreBgLoad()) {
00224
00225 for (i=0; i<npartition; i++) partitions[i].node = partitions[i].bkpes[0];
00226 }
00227 else {
00228 int n = 0;
00229 for (i=0; i<P; i++) {
00230 if (!statsData->procs[i].available) continue;
00231 partitions[n++].node = i;
00232 }
00233 }
00234 #else
00235 PatchMap *patchMap = PatchMap::Object();
00236
00237 int **pool = new int *[P];
00238 for (i=0; i<P; i++) pool[i] = new int[P];
00239 for (i=0; i<P; i++) for (j=0; j<P; j++) pool[i][j] = 0;
00240
00241
00242 for (i=0; i<numComputes; i++)
00243 {
00244 for (j=0; j<P; j++)
00245 if (computeLoad[i].refno == partitions[j].refno)
00246 {
00247 int node1 = patchMap->node(computes[i].patch1);
00248 int node2 = patchMap->node(computes[i].patch2);
00249 pool[j][node1]++;
00250 pool[j][node2]++;
00251 }
00252 }
00253 #ifdef DEBUG
00254 for (i=0; i<P; i++) {
00255 for (j=0; j<P; j++) CmiPrintf("%d ", pool[i][j]);
00256 CmiPrintf("\n");
00257 }
00258 #endif
00259 while (1)
00260 {
00261 int index=-1, node=0, eager=-1;
00262 for (j=0; j<npartition; j++) {
00263 if (partitions[j].node != -1) continue;
00264 int wantmost=-1, maxnodes=-1;
00265 for (k=0; k<P; k++) if (pool[j][k] > maxnodes && !partitions[k].mapped) {wantmost=k; maxnodes = pool[j][k];}
00266 if (maxnodes > eager) {
00267 index = j; node = wantmost; eager = maxnodes;
00268 }
00269 }
00270 if (eager == -1) break;
00271 partitions[index].node = node;
00272 partitions[node].mapped = 1;
00273 }
00274
00275 for (i=0; i<P; i++) delete [] pool[i];
00276 delete [] pool;
00277 #endif
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289 if (_lb_args.debug()) {
00290 CmiPrintf("After partitioning: \n");
00291 for (i=0; i<npartition; i++) {
00292 double bgload = 0.0;
00293 if (!_lb_args.ignoreBgLoad())
00294 bgload = statsData->procs[partitions[i].bkpes[0]].bg_walltime;
00295 CmiPrintf("[%d=>%d] (%d,%d,%d) (%d,%d,%d) load:%f count:%d objload:%f\n", i, partitions[i].node, partitions[i].origin[0], partitions[i].origin[1], partitions[i].origin[2], partitions[i].corner[0], partitions[i].corner[1], partitions[i].corner[2], partitions[i].load, partitions[i].count, partitions[i].load-bgload);
00296 }
00297 for (i=npartition; i<P; i++) CmiPrintf("[%d] --------- \n", i);
00298 }
00299
00300 }
00301
00302 void OrbLB::work(LDStats* stats)
00303 {
00304 #if CMK_LBDB_ON
00305 int i,j;
00306
00307 statsData = stats;
00308
00309 P = stats->nprocs();
00310
00311
00312 nObjs = stats->n_migrateobjs;
00313 #ifdef DEBUG
00314 CmiPrintf("ORB: num objects:%d\n", nObjs);
00315 #endif
00316
00317
00318 computeLoad = new ComputeLoad[nObjs];
00319 for (i=XDIR; i<=ZDIR; i++) vArray[i] = new VecArray[nObjs];
00320
00321
00322
00323 int objIdx = 0;
00324 for (i=0; i<stats->n_objs; i++) {
00325 LDObjData &odata = stats->objData[i];
00326 if (odata.migratable == 0) continue;
00327 computeLoad[objIdx].id = objIdx;
00328 computeLoad[objIdx].v[XDIR] = odata.objID().id[0];
00329 computeLoad[objIdx].v[YDIR] = odata.objID().id[1];
00330 computeLoad[objIdx].v[ZDIR] = odata.objID().id[2];
00331 #if CMK_LB_CPUTIMER
00332 computeLoad[objIdx].load = _lb_args.useCpuTime()?odata.cpuTime:odata.wallTime;
00333 #else
00334 computeLoad[objIdx].load = odata.wallTime;
00335 #endif
00336 computeLoad[objIdx].refno = 0;
00337 computeLoad[objIdx].partition = NULL;
00338 for (int k=XDIR; k<=ZDIR; k++) {
00339 vArray[k][objIdx].id = objIdx;
00340 vArray[k][objIdx].v = computeLoad[objIdx].v[k];
00341 }
00342 #ifdef DEBUG
00343 CmiPrintf("Object %d: %d %d %d load:%f\n", objIdx, computeLoad[objIdx].v[XDIR], computeLoad[objIdx].v[YDIR], computeLoad[objIdx].v[ZDIR], computeLoad[objIdx].load);
00344 #endif
00345 objIdx ++;
00346 }
00347 CmiAssert(nObjs == objIdx);
00348
00349 double t = CkWallTimer();
00350
00351 quicksort(XDIR);
00352 quicksort(YDIR);
00353 quicksort(ZDIR);
00354 #ifdef DEBUG
00355 CmiPrintf("qsort time: %f\n", CkWallTimer() - t);
00356 #endif
00357
00358 npartition = 0;
00359 for (i=0; i<P; i++)
00360 if (stats->procs[i].available == CmiTrue) npartition++;
00361 partitions = new Partition[npartition];
00362
00363 double totalLoad = 0.0;
00364 int minx, miny, minz, maxx, maxy, maxz;
00365 minx = maxx= computeLoad[0].v[XDIR];
00366 miny = maxy= computeLoad[0].v[YDIR];
00367 minz = maxz= computeLoad[0].v[ZDIR];
00368 for (i=1; i<nObjs; i++) {
00369 totalLoad += computeLoad[i].load;
00370 if (computeLoad[i].v[XDIR] < minx) minx = computeLoad[i].v[XDIR];
00371 else if (computeLoad[i].v[XDIR] > maxx) maxx = computeLoad[i].v[XDIR];
00372 if (computeLoad[i].v[YDIR] < miny) miny = computeLoad[i].v[YDIR];
00373 else if (computeLoad[i].v[YDIR] > maxy) maxy = computeLoad[i].v[YDIR];
00374 if (computeLoad[i].v[ZDIR] < minz) minz = computeLoad[i].v[ZDIR];
00375 else if (computeLoad[i].v[ZDIR] > maxz) maxz = computeLoad[i].v[ZDIR];
00376 }
00377
00378 top_partition.origin[XDIR] = minx;
00379 top_partition.origin[YDIR] = miny;
00380 top_partition.origin[ZDIR] = minz;
00381 top_partition.corner[XDIR] = maxx;
00382 top_partition.corner[YDIR] = maxy;
00383 top_partition.corner[ZDIR] = maxz;
00384
00385 top_partition.refno = 0;
00386 top_partition.load = 0.0;
00387 top_partition.count = nObjs;
00388
00389
00390 if (!_lb_args.ignoreBgLoad()) {
00391 top_partition.bkpes.resize(0);
00392 double total = totalLoad;
00393 for (i=0; i<P; i++) {
00394 if (!stats->procs[i].available) continue;
00395 double bkload = stats->procs[i].bg_walltime;
00396 total += bkload;
00397 }
00398 double averageLoad = total / npartition;
00399 for (i=0; i<P; i++) {
00400 if (!stats->procs[i].available) continue;
00401 double bkload = stats->procs[i].bg_walltime;
00402 if (bkload < averageLoad) top_partition.bkpes.push_back(i);
00403 else CkPrintf("OrbLB Info> PE %d with %f background load will have 0 object.\n", i, bkload);
00404 }
00405 npartition = top_partition.bkpes.size();
00406
00407 for (i=0; i<npartition; i++)
00408 totalLoad += stats->procs[top_partition.bkpes[i]].bg_walltime;
00409 if (_lb_args.debug()>=2) {
00410 CkPrintf("BG load: ");
00411 for (i=0; i<P; i++) CkPrintf(" %f", stats->procs[i].bg_walltime);
00412 CkPrintf("\n");
00413 CkPrintf("Partition BG load: ");
00414 for (i=0; i<npartition; i++) CkPrintf(" %f", stats->procs[top_partition.bkpes[i]].bg_walltime);
00415 CkPrintf("\n");
00416 }
00417 }
00418
00419 top_partition.load = totalLoad;
00420
00421 currentp = 0;
00422 refno = 0;
00423
00424
00425 rec_divide(npartition, top_partition);
00426
00427
00428 mapPartitionsToNodes();
00429
00430
00431 int *num = new int[P];
00432 for (i=0; i<P; i++) num[i] = 0;
00433
00434 for (i=0; i<nObjs; i++)
00435 {
00436 for (j=0; j<npartition; j++)
00437 if (computeLoad[i].refno == partitions[j].refno) {
00438 computeLoad[i].partition = partitions+j;
00439 num[j] ++;
00440 }
00441 CmiAssert(computeLoad[i].partition != NULL);
00442 }
00443
00444 for (i=0; i<npartition; i++)
00445 if (num[i] != partitions[i].count)
00446 CmiAbort("OrbLB: Compute counts don't agree!\n");
00447
00448 delete [] num;
00449
00450
00451 objIdx = 0;
00452 for(int obj=0;obj<stats->n_objs;obj++) {
00453 stats->to_proc[obj] = stats->from_proc[obj];
00454 LDObjData &odata = stats->objData[obj];
00455 if (odata.migratable == 0) { continue; }
00456 int frompe = stats->from_proc[obj];
00457 int tope = computeLoad[objIdx].partition->node;
00458 if (frompe != tope) {
00459 if (_lb_args.debug() >= 3) {
00460 CkPrintf("[%d] Obj %d migrating from %d to %d\n",
00461 CkMyPe(),obj,frompe,tope);
00462 }
00463 stats->to_proc[obj] = tope;
00464 }
00465 objIdx ++;
00466 }
00467
00468
00469 delete [] computeLoad;
00470 for (i=0; i<3; i++) delete [] vArray[i];
00471 delete [] partitions;
00472
00473 if (_lb_args.debug() >= 1)
00474 CkPrintf("OrbLB finished time: %fs\n", CkWallTimer() - t);
00475 #endif
00476 }
00477
00478 #include "OrbLB.def.h"
00479