00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include <parmetislib.h>
00015
00016 #define ProperSide(c, from, other) \
00017 (((c) == 0 && (from)-(other) < 0) || ((c) == 1 && (from)-(other) > 0))
00018
00019
00020
00021
00022 void Moc_KWayFM(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses)
00023 {
00024 int h, i, ii, iii, j, k, c;
00025 int pass, nvtxs, nedges, ncon;
00026 int nmoves, nmoved, nswaps, nzgswaps;
00027
00028 int me, firstvtx, lastvtx, yourlastvtx;
00029 int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight;
00030 int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts;
00031 int nlupd, nsupd, nnbrs, nchanged;
00032 idxtype *xadj, *ladjncy, *adjwgt, *vtxdist;
00033 idxtype *where, *tmp_where, *moved;
00034 floattype *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill;
00035 idxtype *update, *supdate, *rupdate, *pe_updates;
00036 idxtype *changed, *perm, *pperm, *htable;
00037 idxtype *peind, *recvptr, *sendptr;
00038 KeyValueType *swchanges, *rwchanges;
00039 RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo;
00040 EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees;
00041 floattype lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg;
00042 int *nupds_pe;
00043
00044 IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr));
00045
00046
00047
00048
00049 nvtxs = graph->nvtxs;
00050 nedges = graph->nedges;
00051 ncon = graph->ncon;
00052
00053 vtxdist = graph->vtxdist;
00054 xadj = graph->xadj;
00055 ladjncy = graph->adjncy;
00056 adjwgt = graph->adjwgt;
00057
00058 firstvtx = vtxdist[mype];
00059 lastvtx = vtxdist[mype+1];
00060
00061 where = graph->where;
00062 rinfo = graph->rinfo;
00063 lnpwgts = graph->lnpwgts;
00064 gnpwgts = graph->gnpwgts;
00065 ubvec = ctrl->ubvec;
00066 tpwgts = ctrl->tpwgts;
00067
00068 nnbrs = graph->nnbrs;
00069 peind = graph->peind;
00070 recvptr = graph->recvptr;
00071 sendptr = graph->sendptr;
00072
00073 changed = idxmalloc(nvtxs, "KWR: changed");
00074 rwchanges = wspace->pairs;
00075 swchanges = rwchanges + recvptr[nnbrs];
00076
00077
00078
00079
00080 perm = idxmalloc(nvtxs, "KWR: perm");
00081 pperm = idxmalloc(nparts, "KWR: pperm");
00082
00083 update = idxmalloc(nvtxs, "KWR: update");
00084 supdate = wspace->indices;
00085 rupdate = supdate + recvptr[nnbrs];
00086 nupds_pe = imalloc(npes, "KWR: nupds_pe");
00087 htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable");
00088 badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt");
00089
00090 for (i=0; i<nparts; i++) {
00091 for (h=0; h<ncon; h++) {
00092 badmaxpwgt[i*ncon+h] = ubvec[h]*tpwgts[i*ncon+h];
00093 }
00094 }
00095
00096 movewgts = fmalloc(nparts*ncon, "KWR: movewgts");
00097 ognpwgts = fmalloc(nparts*ncon, "KWR: ognpwgts");
00098 pgnpwgts = fmalloc(nparts*ncon, "KWR: pgnpwgts");
00099 overfill = fmalloc(nparts*ncon, "KWR: overfill");
00100 moved = idxmalloc(nvtxs, "KWR: moved");
00101 tmp_where = idxmalloc(nvtxs+graph->nrecv, "KWR: tmp_where");
00102 tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo");
00103 tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees");
00104
00105 idxcopy(nvtxs+graph->nrecv, where, tmp_where);
00106 for (i=0; i<nvtxs; i++) {
00107 tmp_rinfo[i].id = rinfo[i].id;
00108 tmp_rinfo[i].ed = rinfo[i].ed;
00109 tmp_rinfo[i].ndegrees = rinfo[i].ndegrees;
00110 tmp_rinfo[i].degrees = tmp_edegrees+xadj[i];
00111
00112 for (j=0; j<rinfo[i].ndegrees; j++) {
00113 tmp_rinfo[i].degrees[j].edge = rinfo[i].degrees[j].edge;
00114 tmp_rinfo[i].degrees[j].ewgt = rinfo[i].degrees[j].ewgt;
00115 }
00116 }
00117
00118 nswaps = nzgswaps = 0;
00119
00120
00121
00122 for (pass=0; pass<npasses; pass++) {
00123 if (mype == 0)
00124 RandomPermute(nparts, pperm, 1);
00125 MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm);
00126 FastRandomPermute(nvtxs, perm, 1);
00127 oldcut = graph->mincut;
00128
00129
00130 Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
00131 ubavg = savg(ncon, ubvec);
00132 lbavg = savg(ncon, lbvec);
00133 imbalanced = (lbavg > ubavg) ? 1 : 0;
00134
00135 for (c=0; c<2; c++) {
00136 scopy(ncon*nparts, gnpwgts, ognpwgts);
00137 sset(ncon*nparts, 0.0, movewgts);
00138 nmoved = 0;
00139
00140
00141
00142
00143 for (iii=0; iii<nvtxs; iii++) {
00144 i = perm[iii];
00145 from = tmp_where[i];
00146 nvwgt = graph->nvwgt+i*ncon;
00147
00148 for (h=0; h<ncon; h++)
00149 if (fabs(nvwgt[h]-gnpwgts[from*ncon+h]) < SMALLFLOAT)
00150 break;
00151
00152 if (h < ncon) {
00153 continue;
00154 }
00155
00156
00157 if (tmp_rinfo[i].ed >= tmp_rinfo[i].id) {
00158 my_edegrees = tmp_rinfo[i].degrees;
00159
00160 for (k=0; k<tmp_rinfo[i].ndegrees; k++) {
00161 to = my_edegrees[k].edge;
00162 if (ProperSide(c, pperm[from], pperm[to])) {
00163 for (h=0; h<ncon; h++)
00164 if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0)
00165 break;
00166
00167 if (h == ncon)
00168 break;
00169 }
00170 }
00171 oldto = to;
00172
00173
00174 if (k < tmp_rinfo[i].ndegrees) {
00175 for (j=k+1; j<tmp_rinfo[i].ndegrees; j++) {
00176 to = my_edegrees[j].edge;
00177 if (ProperSide(c, pperm[from], pperm[to])) {
00178 for (h=0; h<ncon; h++)
00179 if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0)
00180 break;
00181
00182 if (h == ncon) {
00183 if (my_edegrees[j].ewgt > my_edegrees[k].ewgt ||
00184 (my_edegrees[j].ewgt == my_edegrees[k].ewgt &&
00185 IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){
00186 k = j;
00187 oldto = my_edegrees[k].edge;
00188 }
00189 }
00190 }
00191 }
00192 to = oldto;
00193
00194 if (my_edegrees[k].ewgt > tmp_rinfo[i].id ||
00195 (my_edegrees[k].ewgt == tmp_rinfo[i].id &&
00196 (imbalanced || graph->level > 3 || iii % 8 == 0) &&
00197 IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){
00198
00199
00200
00201
00202 tmp_where[i] = to;
00203 moved[nmoved++] = i;
00204 for (h=0; h<ncon; h++) {
00205 lnpwgts[to*ncon+h] += nvwgt[h];
00206 lnpwgts[from*ncon+h] -= nvwgt[h];
00207 gnpwgts[to*ncon+h] += nvwgt[h];
00208 gnpwgts[from*ncon+h] -= nvwgt[h];
00209 movewgts[to*ncon+h] += nvwgt[h];
00210 movewgts[from*ncon+h] -= nvwgt[h];
00211 }
00212
00213 tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt;
00214 SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j);
00215 if (my_edegrees[k].ewgt == 0) {
00216 tmp_rinfo[i].ndegrees--;
00217 my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge;
00218 my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt;
00219 }
00220 else {
00221 my_edegrees[k].edge = from;
00222 }
00223
00224
00225 for (j=xadj[i]; j<xadj[i+1]; j++) {
00226
00227 if (ladjncy[j] >= nvtxs)
00228 continue;
00229
00230 me = ladjncy[j];
00231 mydomain = tmp_where[me];
00232
00233 myrinfo = tmp_rinfo+me;
00234 your_edegrees = myrinfo->degrees;
00235
00236 if (mydomain == from) {
00237 INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
00238 }
00239 else {
00240 if (mydomain == to) {
00241 INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
00242 }
00243 }
00244
00245
00246 if (mydomain != from) {
00247 for (k=0; k<myrinfo->ndegrees; k++) {
00248 if (your_edegrees[k].edge == from) {
00249 if (your_edegrees[k].ewgt == adjwgt[j]) {
00250 myrinfo->ndegrees--;
00251 your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge;
00252 your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt;
00253 }
00254 else {
00255 your_edegrees[k].ewgt -= adjwgt[j];
00256 }
00257 break;
00258 }
00259 }
00260 }
00261
00262
00263 if (mydomain != to) {
00264 for (k=0; k<myrinfo->ndegrees; k++) {
00265 if (your_edegrees[k].edge == to) {
00266 your_edegrees[k].ewgt += adjwgt[j];
00267 break;
00268 }
00269 }
00270 if (k == myrinfo->ndegrees) {
00271 your_edegrees[myrinfo->ndegrees].edge = to;
00272 your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
00273 }
00274 }
00275 }
00276 }
00277 }
00278 }
00279 }
00280
00281
00282
00283
00284
00285 MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon,
00286 MPI_DOUBLE, MPI_SUM, ctrl->comm);
00287
00288
00289
00290
00291 overweight = 0;
00292 for (j=0; j<nparts; j++) {
00293 for (h=0; h<ncon; h++) {
00294 if (pgnpwgts[j*ncon+h] > ognpwgts[j*ncon+h]) {
00295 overfill[j*ncon+h] =
00296 (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) /
00297 (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]);
00298 }
00299 else {
00300 overfill[j*ncon+h] = 0.0;
00301 }
00302
00303 overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0);
00304 overfill[j*ncon+h] *= movewgts[j*ncon+h];
00305
00306 if (overfill[j*ncon+h] > 0.0)
00307 overweight = 1;
00308
00309 ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] ||
00310 pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h],
00311 (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h],
00312 badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h]));
00313 }
00314 }
00315
00316
00317
00318
00319 if (overweight == 1) {
00320 for (iii=0; iii<nmoved; iii++) {
00321 i = moved[iii];
00322 oldto = tmp_where[i];
00323 nvwgt = graph->nvwgt+i*ncon;
00324 my_edegrees = tmp_rinfo[i].degrees;
00325
00326 for (k=0; k<tmp_rinfo[i].ndegrees; k++)
00327 if (my_edegrees[k].edge == where[i])
00328 break;
00329
00330 for (h=0; h<ncon; h++)
00331 if (nvwgt[h] > 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0)
00332 break;
00333
00334
00335
00336
00337 if (k != tmp_rinfo[i].ndegrees && h != ncon) {
00338 moved[iii] = -1;
00339 from = oldto;
00340 to = where[i];
00341
00342 for (h=0; h<ncon; h++) {
00343 overfill[oldto*ncon+h] = amax(overfill[oldto*ncon+h]-nvwgt[h], 0.0);
00344 }
00345
00346 tmp_where[i] = to;
00347 tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt;
00348 SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j);
00349 if (my_edegrees[k].ewgt == 0) {
00350 tmp_rinfo[i].ndegrees--;
00351 my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge;
00352 my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt;
00353 }
00354 else {
00355 my_edegrees[k].edge = from;
00356 }
00357
00358 for (h=0; h<ncon; h++) {
00359 lnpwgts[to*ncon+h] += nvwgt[h];
00360 lnpwgts[from*ncon+h] -= nvwgt[h];
00361 }
00362
00363
00364 for (j=xadj[i]; j<xadj[i+1]; j++) {
00365
00366 if (ladjncy[j] >= nvtxs)
00367 continue;
00368
00369 me = ladjncy[j];
00370 mydomain = tmp_where[me];
00371
00372 myrinfo = tmp_rinfo+me;
00373 your_edegrees = myrinfo->degrees;
00374
00375 if (mydomain == from) {
00376 INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
00377 }
00378 else {
00379 if (mydomain == to) {
00380 INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
00381 }
00382 }
00383
00384
00385 if (mydomain != from) {
00386 for (k=0; k<myrinfo->ndegrees; k++) {
00387 if (your_edegrees[k].edge == from) {
00388 if (your_edegrees[k].ewgt == adjwgt[j]) {
00389 myrinfo->ndegrees--;
00390 your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge;
00391 your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt;
00392 }
00393 else {
00394 your_edegrees[k].ewgt -= adjwgt[j];
00395 }
00396 break;
00397 }
00398 }
00399 }
00400
00401
00402 if (mydomain != to) {
00403 for (k=0; k<myrinfo->ndegrees; k++) {
00404 if (your_edegrees[k].edge == to) {
00405 your_edegrees[k].ewgt += adjwgt[j];
00406 break;
00407 }
00408 }
00409 if (k == myrinfo->ndegrees) {
00410 your_edegrees[myrinfo->ndegrees].edge = to;
00411 your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
00412 }
00413 }
00414 }
00415 }
00416 }
00417 }
00418
00419
00420
00421
00422 nlupd = nsupd = nmoves = nchanged = 0;
00423 for (iii=0; iii<nmoved; iii++) {
00424 i = moved[iii];
00425 if (i == -1)
00426 continue;
00427
00428 where[i] = tmp_where[i];
00429
00430
00431 if (htable[i] == 0) {
00432
00433 htable[i] = 1;
00434 update[nlupd++] = i;
00435 }
00436
00437
00438 for (j=xadj[i]; j<xadj[i+1]; j++) {
00439 k = ladjncy[j];
00440 if (htable[k] == 0) {
00441 htable[k] = 1;
00442 if (k<nvtxs)
00443 update[nlupd++] = k;
00444 else
00445 supdate[nsupd++] = k;
00446 }
00447 }
00448 nmoves++;
00449 nswaps++;
00450
00451
00452 for (k=0; k<rinfo[i].ndegrees; k++)
00453 if (rinfo[i].degrees[k].edge == to)
00454 break;
00455 if (rinfo[i].id == rinfo[i].degrees[k].ewgt)
00456 nzgswaps++;
00457
00458 if (graph->pexadj[i+1]-graph->pexadj[i] > 0)
00459 changed[nchanged++] = i;
00460 }
00461
00462
00463 CommChangedInterfaceData(ctrl, graph, nchanged, changed, where,
00464 swchanges, rwchanges, wspace->pv4);
00465
00466
00467 IFSET(ctrl->dbglvl, DBG_RMOVEINFO,
00468 rprintf(ctrl, "\t[%d %d], [%.4f], [%d %d %d]\n",
00469 pass, c, badmaxpwgt[0],
00470 GlobalSESum(ctrl, nmoves),
00471 GlobalSESum(ctrl, nsupd),
00472 GlobalSESum(ctrl, nlupd)));
00473
00474
00475
00476
00477
00478
00479 for (i=0; i<nnbrs; i++) {
00480 MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE,
00481 peind[i], 1, ctrl->comm, ctrl->rreq+i);
00482 }
00483
00484
00485 for (i=0; i<nsupd; i++) {
00486 htable[supdate[i]] = 0;
00487 supdate[i] = graph->imap[supdate[i]];
00488 }
00489 iidxsort(nsupd, supdate);
00490
00491 for (j=i=0; i<nnbrs; i++) {
00492 yourlastvtx = vtxdist[peind[i]+1];
00493 for (k=j; k<nsupd && supdate[k] < yourlastvtx; k++);
00494 MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i);
00495 j = k;
00496 }
00497
00498
00499 MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
00500 for (i=0; i<nnbrs; i++)
00501 MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i);
00502 MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);
00503
00504
00505
00506
00507
00508 for (i=0; i<nnbrs; i++) {
00509 pe_updates = rupdate+sendptr[i];
00510 for (j=0; j<nupds_pe[i]; j++) {
00511 k = pe_updates[j];
00512 if (htable[k-firstvtx] == 0) {
00513 htable[k-firstvtx] = 1;
00514 update[nlupd++] = k-firstvtx;
00515 }
00516 }
00517 }
00518
00519
00520
00521
00522
00523 for (ii=0; ii<nlupd; ii++) {
00524 i = update[ii];
00525 ASSERT(ctrl, htable[i] == 1);
00526
00527 htable[i] = 0;
00528
00529 mydomain = where[i];
00530 myrinfo = rinfo+i;
00531 tmp_myrinfo = tmp_rinfo+i;
00532 my_edegrees = myrinfo->degrees;
00533 your_edegrees = tmp_myrinfo->degrees;
00534
00535 graph->lmincut -= myrinfo->ed;
00536 myrinfo->ndegrees = 0;
00537 myrinfo->id = 0;
00538 myrinfo->ed = 0;
00539
00540 for (j=xadj[i]; j<xadj[i+1]; j++) {
00541 yourdomain = where[ladjncy[j]];
00542 if (mydomain != yourdomain) {
00543 myrinfo->ed += adjwgt[j];
00544
00545 for (k=0; k<myrinfo->ndegrees; k++) {
00546 if (my_edegrees[k].edge == yourdomain) {
00547 my_edegrees[k].ewgt += adjwgt[j];
00548 your_edegrees[k].ewgt += adjwgt[j];
00549 break;
00550 }
00551 }
00552 if (k == myrinfo->ndegrees) {
00553 my_edegrees[k].edge = yourdomain;
00554 my_edegrees[k].ewgt = adjwgt[j];
00555 your_edegrees[k].edge = yourdomain;
00556 your_edegrees[k].ewgt = adjwgt[j];
00557 myrinfo->ndegrees++;
00558 }
00559 ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
00560 ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
00561
00562 }
00563 else {
00564 myrinfo->id += adjwgt[j];
00565 }
00566 }
00567 graph->lmincut += myrinfo->ed;
00568
00569 tmp_myrinfo->id = myrinfo->id;
00570 tmp_myrinfo->ed = myrinfo->ed;
00571 tmp_myrinfo->ndegrees = myrinfo->ndegrees;
00572 }
00573
00574
00575 MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon,
00576 MPI_DOUBLE, MPI_SUM, ctrl->comm);
00577 }
00578 graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2;
00579
00580 if (graph->mincut == oldcut)
00581 break;
00582 }
00583
00584
00585
00586
00587
00588
00589
00590
00591 GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM);
00592 GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM);
00593 GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM);
00594 GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM);
00595
00596 IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr));
00597 }
00598
00599