00001 #include "partitioning_strategies.h"
00002 #include "hilbert.h"
00003 #include "TopoManager.h"
00004 #include "converse.h"
00005
00006 #ifdef __cplusplus
00007 #include <queue>
00008 #include <vector>
00009 #include <iostream>
00010 #include <sstream>
00011 #include <algorithm>
00012 #include <math.h>
00013
00014 #ifndef __STDC_FORMAT_MACROS
00015 # define __STDC_FORMAT_MACROS
00016 #endif
00017 #ifndef __STDC_LIMIT_MACROS
00018 # define __STDC_LIMIT_MACROS
00019 #endif
00020 #include <inttypes.h>
00021
00022 using namespace std;
00023
00024 #ifndef PARTITION_TOPOLOGY_VERBOSE
00025 #define PARTITION_TOPOLOGY_VERBOSE 0
00026 #endif
00027
00039 void getHilbertList(int * procList)
00040 {
00041 vector<int> hcoords;
00042
00043 int numDims;
00044 int *dims, *pdims;
00045 int *ranks, numranks;
00046
00047 TopoManager_getDimCount(&numDims);
00048
00049 dims = new int[numDims+1];
00050 pdims = new int[numDims+1];
00051
00052 TopoManager_getDims(dims);
00053 ranks = new int[dims[numDims]];
00054
00055
00056 int maxDim = dims[0];
00057 for(int i = 1; i < numDims; i++) {
00058 if(maxDim < dims[i]) maxDim = dims[i];
00059 }
00060
00061 int pow2 = 1;
00062 while(maxDim>pow2)
00063 pow2 *= 2;
00064
00065 int cubeDim = pow2;
00066
00067 for(int i = 1; i < numDims; i++) {
00068 cubeDim *= pow2;
00069 }
00070
00071 int currPos = 0;
00072 for(int i = 0; i < cubeDim; i++) {
00073 hcoords = int_to_Hilbert(i,numDims);
00074
00075 for(int i = 0; i < numDims; i++) {
00076 if(hcoords[i] >= dims[i]) continue;
00077 }
00078
00079
00080 for(int i = 0; i < numDims; i++) {
00081 pdims[i] = hcoords[i];
00082 }
00083 TopoManager_getRanks(&numranks, ranks, pdims);
00084 if(numranks == 0) continue;
00085
00086
00087 for(int j = 0; j < numranks; j++) {
00088 procList[currPos++] = ranks[j];
00089 }
00090 }
00091
00092 CmiAssert(currPos == CmiNumNodes());
00093
00094 delete [] dims;
00095 delete [] pdims;
00096 delete [] ranks;
00097 }
00098
00101 void getPlanarList(int *procList)
00102 {
00103 int numDims;
00104 int *dims, *pdims;
00105 int *ranks, numranks;
00106 int phynodes;
00107
00108 TopoManager_getDimCount(&numDims);
00109
00110 dims = new int[numDims+1];
00111 pdims = new int[numDims+1];
00112
00113 TopoManager_getDims(dims);
00114 ranks = new int[dims[numDims]];
00115
00116 phynodes = 1;
00117 for(int i = 0; i < numDims; i++) {
00118 phynodes *= dims[i];
00119 pdims[i] = 0;
00120 }
00121
00122 int currPos = 0;
00123 for(int i = 0; i < phynodes; i++) {
00124
00125 TopoManager_getRanks(&numranks, ranks, pdims);
00126 for(int j = numDims - 1; j > -1; j--) {
00127 pdims[j] = (pdims[j]+1) % dims[j];
00128 if(pdims[j] != 0) break;
00129 }
00130 if(numranks == 0) continue;
00131
00132 for(int j = 0; j < numranks; j++) {
00133 procList[currPos++] = ranks[j];
00134 }
00135 }
00136
00137 CmiAssert(currPos == CmiNumNodes());
00138
00139 delete [] dims;
00140 delete [] pdims;
00141 delete [] ranks;
00142 }
00143
00144 namespace {
00145
00146
00147 struct TopoManagerWrapper {
00148 TopoManager tmgr;
00149 int a_dim, b_dim, c_dim, d_dim, e_dim;
00150 int a_rot, b_rot, c_rot, d_rot, e_rot;
00151 int a_mod, b_mod, c_mod, d_mod, e_mod;
00152 int fixnode(int node) {
00153 return node;
00154 }
00155 TopoManagerWrapper() {
00156 #if CMK_BLUEGENEQ
00157 int na=tmgr.getDimNA();
00158 int nb=tmgr.getDimNB();
00159 int nc=tmgr.getDimNC();
00160 int nd=tmgr.getDimND();
00161 int ne=tmgr.getDimNE();
00162 #else
00163 int na=tmgr.getDimNX();
00164 int nb=tmgr.getDimNY();
00165 int nc=tmgr.getDimNZ();
00166 int nd=1;
00167 int ne=1;
00168 #endif
00169 std::vector<int> a_flags(na);
00170 std::vector<int> b_flags(nb);
00171 std::vector<int> c_flags(nc);
00172 std::vector<int> d_flags(nd);
00173 std::vector<int> e_flags(ne);
00174 for ( int i=0; i<na; ++i ) { a_flags[i] = 0; }
00175 for ( int i=0; i<nb; ++i ) { b_flags[i] = 0; }
00176 for ( int i=0; i<nc; ++i ) { c_flags[i] = 0; }
00177 for ( int i=0; i<nd; ++i ) { d_flags[i] = 0; }
00178 for ( int i=0; i<ne; ++i ) { e_flags[i] = 0; }
00179 int nnodes = CmiNumNodes();
00180 for ( int node=0; node<nnodes; ++node ) {
00181 int a,b,c,d,e,t;
00182 #if CMK_BLUEGENEQ
00183 tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,d,e,t);
00184 #else
00185 tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,t);
00186 d=0; e=0;
00187 #endif
00188 if ( a < 0 || a >= na ) CmiAbort("inconsistent torus topology!");
00189 if ( b < 0 || b >= nb ) CmiAbort("inconsistent torus topology!");
00190 if ( c < 0 || c >= nc ) CmiAbort("inconsistent torus topology!");
00191 if ( d < 0 || d >= nd ) CmiAbort("inconsistent torus topology!");
00192 if ( e < 0 || e >= ne ) CmiAbort("inconsistent torus topology!");
00193 a_flags[a] = 1;
00194 b_flags[b] = 1;
00195 c_flags[c] = 1;
00196 d_flags[d] = 1;
00197 e_flags[e] = 1;
00198 }
00199 std::basic_ostringstream<char> iout;
00200 int printTill = na, printCount = 0;
00201 iout << "Charm++> " << "TORUS A SIZE " << na << " USING";
00202
00203 if((nb + nc +nd + ne) == 4) printTill = std::min(na,10);
00204 for ( int i = 0; (i < na) && (printCount <= printTill); ++i ) {
00205 if ( a_flags[i] ) {
00206 if(printCount == printTill) {
00207 iout << "...";
00208 } else {
00209 iout << " " << i;
00210 }
00211 printCount++;
00212 }
00213 }
00214 iout << "\n" ;
00215 iout << "Charm++> " << "TORUS B SIZE " << nb << " USING";
00216 for ( int i=0; i<nb; ++i ) { if ( b_flags[i] ) iout << " " << i; }
00217 iout << "\n" ;
00218 iout << "Charm++> " << "TORUS C SIZE " << nc << " USING";
00219 for ( int i=0; i<nc; ++i ) { if ( c_flags[i] ) iout << " " << i; }
00220 iout << "\n" ;
00221 #if CMK_BLUEGENEQ
00222 iout << "Charm++> " << "TORUS D SIZE " << nd << " USING";
00223 for ( int i=0; i<nd; ++i ) { if ( d_flags[i] ) iout << " " << i; }
00224 iout << "\n" ;
00225 iout << "Charm++> " << "TORUS E SIZE " << ne << " USING";
00226 for ( int i=0; i<ne; ++i ) { if ( e_flags[i] ) iout << " " << i; }
00227 iout << "\n" ;
00228 #endif
00229
00230 a_rot = b_rot = c_rot = d_rot = e_rot = 0;
00231 a_mod = na; b_mod = nb; c_mod = nc; d_mod = nd; e_mod = ne;
00232 #if CMK_BLUEGENEQ
00233 if ( tmgr.absA(na) == 0 )
00234 #else
00235 if ( tmgr.absX(na) == 0 )
00236 #endif
00237 for ( int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
00238 if ( a_flags[i%na] ) gapstart = i+1;
00239 else if ( i - gapstart >= gaplen ) {
00240 a_rot = 2*na-i-1; gaplen = i - gapstart;
00241 }
00242 }
00243 #if CMK_BLUEGENEQ
00244 if ( tmgr.absB(nb) == 0 )
00245 #else
00246 if ( tmgr.absY(nb) == 0 )
00247 #endif
00248 for ( int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
00249 if ( b_flags[i%nb] ) gapstart = i+1;
00250 else if ( i - gapstart >= gaplen ) {
00251 b_rot = 2*nb-i-1; gaplen = i - gapstart;
00252 }
00253 }
00254 #if CMK_BLUEGENEQ
00255 if ( tmgr.absC(nc) == 0 )
00256 #else
00257 if ( tmgr.absZ(nc) == 0 )
00258 #endif
00259 for ( int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
00260 if ( c_flags[i%nc] ) gapstart = i+1;
00261 else if ( i - gapstart >= gaplen ) {
00262 c_rot = 2*nc-i-1; gaplen = i - gapstart;
00263 }
00264 }
00265 #if CMK_BLUEGENEQ
00266 if ( tmgr.absD(nd) == 0 )
00267 for ( int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
00268 if ( d_flags[i%nd] ) gapstart = i+1;
00269 else if ( i - gapstart >= gaplen ) {
00270 d_rot = 2*nd-i-1; gaplen = i - gapstart;
00271 }
00272 }
00273 if ( tmgr.absE(ne) == 0 )
00274 for ( int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
00275 if ( e_flags[i%ne] ) gapstart = i+1;
00276 else if ( i - gapstart >= gaplen ) {
00277 e_rot = 2*ne-i-1; gaplen = i - gapstart;
00278 }
00279 }
00280 #endif
00281
00282 int a_min=na, a_max=-1;
00283 int b_min=nb, b_max=-1;
00284 int c_min=nc, c_max=-1;
00285 int d_min=nd, d_max=-1;
00286 int e_min=ne, e_max=-1;
00287 for ( int node=0; node<nnodes; ++node ) {
00288 int a,b,c,d,e,t;
00289 #if CMK_BLUEGENEQ
00290 tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,d,e,t);
00291 #else
00292 tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,t);
00293 d=0; e=0;
00294 #endif
00295 a = (a+a_rot)%a_mod;
00296 b = (b+b_rot)%b_mod;
00297 c = (c+c_rot)%c_mod;
00298 d = (d+d_rot)%d_mod;
00299 e = (e+e_rot)%e_mod;
00300 if ( a < a_min ) a_min = a;
00301 if ( b < b_min ) b_min = b;
00302 if ( c < c_min ) c_min = c;
00303 if ( d < d_min ) d_min = d;
00304 if ( e < e_min ) e_min = e;
00305 if ( a > a_max ) a_max = a;
00306 if ( b > b_max ) b_max = b;
00307 if ( c > c_max ) c_max = c;
00308 if ( d > d_max ) d_max = d;
00309 if ( e > e_max ) e_max = e;
00310 }
00311 int a_len = a_max - a_min + 1;
00312 int b_len = b_max - b_min + 1;
00313 int c_len = c_max - c_min + 1;
00314 int d_len = d_max - d_min + 1;
00315 int e_len = e_max - e_min + 1;
00316 int lensort[5];
00317 lensort[0] = (a_len << 3) + 4;
00318 lensort[1] = (b_len << 3) + 3;
00319 lensort[2] = (c_len << 3) + 2;
00320 lensort[3] = (d_len << 3) + 1;
00321 lensort[4] = (e_len << 3) + 0;
00322
00323 std::sort(lensort, lensort+5);
00324
00325 for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 4 ) a_dim = 4-i; }
00326 for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 3 ) b_dim = 4-i; }
00327 for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 2 ) c_dim = 4-i; }
00328 for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 1 ) d_dim = 4-i; }
00329 for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 0 ) e_dim = 4-i; }
00330 iout << "Charm++> " << "TORUS MINIMAL MESH SIZE IS " << a_len << " BY " << b_len << " BY " << c_len
00331 #if CMK_BLUEGENEQ
00332 << " BY " << d_len << " BY " << e_len
00333 #endif
00334 << "\n" ;
00335 if ( CmiMyNodeGlobal() == 0 ) printf("%s",iout.str().c_str());
00336
00337 }
00338 void coords(int node, int *crds) {
00339 int a,b,c,d,e,t;
00340 #if CMK_BLUEGENEQ
00341 tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,d,e,t);
00342 #else
00343 tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,t);
00344 d=0; e=0;
00345 #endif
00346 crds[a_dim] = (a+a_rot)%a_mod;
00347 crds[b_dim] = (b+b_rot)%b_mod;
00348 crds[c_dim] = (c+c_rot)%c_mod;
00349 crds[d_dim] = (d+d_rot)%d_mod;
00350 crds[e_dim] = (e+e_rot)%e_mod;
00351 }
00352 int coord(int node, int dim) {
00353 int crds[5];
00354 coords(node,crds);
00355 return crds[dim];
00356 }
00357 struct node_sortop_topo {
00358 TopoManagerWrapper &tmgr;
00359 const int *sortdims;
00360 node_sortop_topo(TopoManagerWrapper &t, int *d) : tmgr(t), sortdims(d) {}
00361 bool operator() (int node1, int node2) const {
00362 int crds1[5], crds2[5];
00363 tmgr.coords(node1,crds1);
00364 tmgr.coords(node2,crds2);
00365 for ( int i=0; i<5; ++i ) {
00366 int d = sortdims[i];
00367 if ( crds1[d] != crds2[d] ) return ( crds1[d] < crds2[d] );
00368 }
00369
00370
00371
00372 return ( node1 < node2 );
00373 }
00374 };
00375 void sortLongest(int *node_begin, int *node_end) {
00376 if ( node_begin == node_end ) return;
00377 int tmins[5], tmaxs[5], tlens[5], sortdims[5];
00378 coords(*node_begin, tmins);
00379 coords(*node_begin, tmaxs);
00380 for ( int *nodeitr = node_begin; nodeitr != node_end; ++nodeitr ) {
00381 int tvals[5];
00382 coords(*nodeitr, tvals);
00383 for ( int i=0; i<5; ++i ) {
00384 if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
00385 if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
00386 }
00387 }
00388 for ( int i=0; i<5; ++i ) {
00389 tlens[i] = ((tmaxs[i] - tmins[i] + 1) << 3) + (4-i);
00390 }
00391 std::sort(tlens, tlens+5);
00392 for ( int i=0; i<5; ++i ) {
00393 sortdims[4-i] = 4 - (tlens[i] & 7);
00394 }
00395 if ( PARTITION_TOPOLOGY_VERBOSE && CmiMyNodeGlobal() == 0 )
00396 printf("sorting %d(%d) %d(%d) %d(%d)\n", sortdims[0], tlens[4]>>3, sortdims[1], tlens[3]>>3, sortdims[2], tlens[2]>>3);
00397 std::sort(node_begin,node_end,node_sortop_topo(*this,sortdims));
00398 int *nodes = node_begin;
00399 int nnodes = node_end - node_begin;
00400 }
00401 };
00402
00403 void recursive_bisect(
00404 int part_begin, int part_end,
00405 int *node_begin, int *node_end,
00406 TopoManagerWrapper &tmgr
00407 ) {
00408
00409 if ( part_end - part_begin == 1 ) {
00410 if ( CmiPartitionSize(part_begin) != node_end - node_begin ) {
00411 CmiAbort("partitioning algorithm size mismatch in recursive_bisect()");
00412 }
00413 tmgr.sortLongest(node_begin, node_end);
00414
00415 if ( PARTITION_TOPOLOGY_VERBOSE && CmiMyNodeGlobal() == 0 ) {
00416 int crds[5];
00417 tmgr.coords(*node_begin, crds);
00418 printf("partitioning node %5d at %5d %5d %5d %5d %5d nodes %5" PRId64 "\n",
00419 *node_begin,
00420 crds[0], crds[1], crds[2], crds[3], crds[4],
00421 (int64_t)(node_end - node_begin));
00422 }
00423 return;
00424 }
00425
00426 if ( PARTITION_TOPOLOGY_VERBOSE && CmiMyNodeGlobal() == 0 )
00427 printf("recursive_bisect %d %d %" PRId64 "\n", part_begin, part_end, (int64_t)(node_end-node_begin));
00428
00429 int nnodes = node_end - node_begin;
00430 int nsplit = (nnodes+1) / 2;
00431 int ncount = 0;
00432 int part_split = part_begin;
00433 for ( int p = part_begin; p < part_end; ++p ) {
00434 int ps = CmiPartitionSize(p);
00435 if ( abs(ncount+ps-nsplit) < abs(ncount-nsplit) ) part_split = p+1;
00436 else break;
00437 ncount += ps;
00438 }
00439 if ( part_split == part_begin || part_split == part_end )
00440 CmiAbort("partitioning algorithm failure in recursive_bisect()");
00441
00442 int *node_split = node_begin + ncount;
00443
00444 tmgr.sortLongest(node_begin, node_end);
00445
00446
00447 recursive_bisect(
00448 part_begin, part_split, node_begin, node_split, tmgr);
00449 recursive_bisect(
00450 part_split, part_end, node_split, node_end, tmgr);
00451 }
00452
00453 }
00454
00457 void getRecursiveBisectionList(int numparts, int *procList)
00458 {
00459 int n = CmiNumNodes();
00460 for(int i = 0; i < n; i++) {
00461 procList[i] = i;
00462 }
00463 if ( numparts < 2 ) return;
00464 TopoManagerWrapper tmgr;
00465 recursive_bisect(
00466 0, numparts, procList, procList + n, tmgr);
00467 if ( PARTITION_TOPOLOGY_VERBOSE && CmiMyNodeGlobal() == 0 ) {
00468 int crds[5];
00469 for ( int i=0,p=0,ip=0; i<n; ++i,++ip ) {
00470 if ( p == numparts ) break;
00471 if ( ip == CmiPartitionSize(p) ) { ++p; ip=0; }
00472 tmgr.coords(procList[i],crds);
00473 printf("procList[%5d] part[%3d] %5d (%2d %2d %2d %2d %2d)\n",
00474 i, p, procList[i],
00475 crds[0], crds[1], crds[2], crds[3], crds[4]);
00476 }
00477 }
00478 }
00479
00480
00481 #endif