00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include <parmetislib.h>
00014
00015
00016
00017
00018
00019 void Moc_SerialKWayAdaptRefine(GraphType *graph, int nparts, idxtype *home,
00020 floattype *orgubvec, int npasses)
00021 {
00022 int i, ii, iii, j, k;
00023 int nvtxs, ncon, pass, nmoves, myndegrees;
00024 int from, me, myhome, to, oldcut, gain, tmp;
00025 idxtype *xadj, *adjncy, *adjwgt;
00026 idxtype *where;
00027 EdgeType *mydegrees;
00028 RInfoType *rinfo, *myrinfo;
00029 floattype *npwgts, *nvwgt, *minwgt, *maxwgt, ubvec[MAXNCON];
00030 int gain_is_greater, gain_is_same, fit_in_to, fit_in_from, going_home;
00031 int zero_gain, better_balance_ft, better_balance_tt;
00032 KeyValueType *cand;
00033 int mype;
00034 MPI_Comm_rank(MPI_COMM_WORLD, &mype);
00035
00036 nvtxs = graph->nvtxs;
00037 ncon = graph->ncon;
00038 xadj = graph->xadj;
00039 adjncy = graph->adjncy;
00040 adjwgt = graph->adjwgt;
00041 where = graph->where;
00042 rinfo = graph->rinfo;
00043 npwgts = graph->gnpwgts;
00044
00045
00046 cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");
00047 minwgt = fmalloc(nparts*ncon, "minwgt");
00048 maxwgt = fmalloc(nparts*ncon, "maxwgt");
00049
00050 ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec);
00051 for (i=0; i<ncon; i++)
00052 ubvec[i] = amax(ubvec[i], orgubvec[i]);
00053
00054 for (i=0; i<nparts; i++) {
00055 for (j=0; j<ncon; j++) {
00056 maxwgt[i*ncon+j] = ubvec[j]/(floattype)nparts;
00057 minwgt[i*ncon+j] = ubvec[j]*(floattype)nparts;
00058 }
00059 }
00060
00061 for (pass=0; pass<npasses; pass++) {
00062 oldcut = graph->mincut;
00063
00064 for (i=0; i<nvtxs; i++) {
00065 cand[i].key = rinfo[i].id-rinfo[i].ed;
00066 cand[i].val = i;
00067 }
00068 ikeysort(nvtxs, cand);
00069
00070 nmoves = 0;
00071 for (iii=0; iii<nvtxs; iii++) {
00072 i = cand[iii].val;
00073
00074 myrinfo = rinfo+i;
00075
00076 if (myrinfo->ed >= myrinfo->id) {
00077 from = where[i];
00078 myhome = home[i];
00079 nvwgt = graph->nvwgt+i*ncon;
00080
00081 if (myrinfo->id > 0 &&
00082 AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon))
00083 continue;
00084
00085 mydegrees = myrinfo->degrees;
00086 myndegrees = myrinfo->ndegrees;
00087
00088 for (k=0; k<myndegrees; k++) {
00089 to = mydegrees[k].edge;
00090 gain = mydegrees[k].ewgt - myrinfo->id;
00091 if (gain >= 0 &&
00092 (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) ||
00093 IsHBalanceBetterFT(ncon,npwgts+from*ncon,npwgts+to*ncon,nvwgt,ubvec))) {
00094 break;
00095 }
00096 }
00097
00098
00099 if (k == myndegrees)
00100 continue;
00101
00102 for (j=k+1; j<myndegrees; j++) {
00103 to = mydegrees[j].edge;
00104 going_home = (myhome == to);
00105 gain_is_same = (mydegrees[j].ewgt == mydegrees[k].ewgt);
00106 gain_is_greater = (mydegrees[j].ewgt > mydegrees[k].ewgt);
00107 fit_in_to = AreAllHVwgtsBelow(ncon,1.0,npwgts+to*ncon,1.0,nvwgt,maxwgt+to*ncon);
00108 better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon,
00109 npwgts+to*ncon,nvwgt,ubvec);
00110 better_balance_tt = IsHBalanceBetterTT(ncon,npwgts+mydegrees[k].edge*ncon,
00111 npwgts+to*ncon,nvwgt,ubvec);
00112
00113 if (
00114 (gain_is_greater &&
00115 (fit_in_to ||
00116 better_balance_ft)
00117 )
00118 ||
00119 (gain_is_same &&
00120 (
00121 (fit_in_to &&
00122 going_home)
00123 ||
00124 better_balance_tt
00125 )
00126 )
00127 ) {
00128 k = j;
00129 }
00130 }
00131
00132 to = mydegrees[k].edge;
00133 going_home = (myhome == to);
00134 zero_gain = (mydegrees[k].ewgt == myrinfo->id);
00135
00136 fit_in_from = AreAllHVwgtsBelow(ncon,1.0,npwgts+from*ncon,0.0,npwgts+from*ncon,
00137 maxwgt+from*ncon);
00138 better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon,
00139 npwgts+to*ncon,nvwgt,ubvec);
00140
00141 if (zero_gain &&
00142 !going_home &&
00143 !better_balance_ft &&
00144 fit_in_from)
00145 continue;
00146
00147
00148
00149
00150 graph->mincut -= mydegrees[k].ewgt-myrinfo->id;
00151
00152
00153 saxpy2(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1);
00154 saxpy2(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1);
00155 where[i] = to;
00156 myrinfo->ed += myrinfo->id-mydegrees[k].ewgt;
00157 SWAP(myrinfo->id, mydegrees[k].ewgt, tmp);
00158
00159 if (mydegrees[k].ewgt == 0) {
00160 myrinfo->ndegrees--;
00161 mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge;
00162 mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt;
00163 }
00164 else
00165 mydegrees[k].edge = from;
00166
00167
00168 for (j=xadj[i]; j<xadj[i+1]; j++) {
00169 ii = adjncy[j];
00170 me = where[ii];
00171
00172 myrinfo = rinfo+ii;
00173 mydegrees = myrinfo->degrees;
00174
00175 if (me == from) {
00176 INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
00177 }
00178 else {
00179 if (me == to) {
00180 INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
00181 }
00182 }
00183
00184
00185 if (me != from) {
00186 for (k=0; k<myrinfo->ndegrees; k++) {
00187 if (mydegrees[k].edge == from) {
00188 if (mydegrees[k].ewgt == adjwgt[j]) {
00189 myrinfo->ndegrees--;
00190 mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge;
00191 mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt;
00192 }
00193 else
00194 mydegrees[k].ewgt -= adjwgt[j];
00195 break;
00196 }
00197 }
00198 }
00199
00200
00201 if (me != to) {
00202 for (k=0; k<myrinfo->ndegrees; k++) {
00203 if (mydegrees[k].edge == to) {
00204 mydegrees[k].ewgt += adjwgt[j];
00205 break;
00206 }
00207 }
00208 if (k == myrinfo->ndegrees) {
00209 mydegrees[myrinfo->ndegrees].edge = to;
00210 mydegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
00211 }
00212 }
00213
00214 }
00215 nmoves++;
00216 }
00217 }
00218
00219 if (graph->mincut == oldcut)
00220 break;
00221 }
00222
00223 GKfree((void **)&minwgt, (void **)&maxwgt, (void **)&cand, LTERM);
00224
00225 return;
00226 }
00227
00228
00229
00230
00231
00232 void Moc_ComputeSerialPartitionParams(GraphType *graph, int nparts,
00233 EdgeType *degrees)
00234 {
00235 int i, j, k;
00236 int nvtxs, nedges, ncon, mincut, me, other;
00237 idxtype *xadj, *adjncy, *adjwgt, *where;
00238 RInfoType *rinfo, *myrinfo;
00239 EdgeType *mydegrees;
00240 floattype *nvwgt, *npwgts;
00241 int mype;
00242 MPI_Comm_rank(MPI_COMM_WORLD, &mype);
00243
00244
00245 nvtxs = graph->nvtxs;
00246 ncon = graph->ncon;
00247 xadj = graph->xadj;
00248 nvwgt = graph->nvwgt;
00249 adjncy = graph->adjncy;
00250 adjwgt = graph->adjwgt;
00251 where = graph->where;
00252 rinfo = graph->rinfo;
00253
00254 npwgts = sset(ncon*nparts, 0.0, graph->gnpwgts);
00255
00256
00257
00258
00259 nedges = mincut = 0;
00260 for (i=0; i<nvtxs; i++) {
00261 me = where[i];
00262 saxpy2(ncon, 1.0, nvwgt+i*ncon, 1, npwgts+me*ncon, 1);
00263
00264 myrinfo = rinfo+i;
00265 myrinfo->id = myrinfo->ed = myrinfo->ndegrees = 0;
00266 myrinfo->degrees = degrees + nedges;
00267 nedges += xadj[i+1]-xadj[i];
00268
00269 for (j=xadj[i]; j<xadj[i+1]; j++) {
00270 if (me == where[adjncy[j]]) {
00271 myrinfo->id += adjwgt[j];
00272 }
00273 else {
00274 myrinfo->ed += adjwgt[j];
00275 }
00276 }
00277
00278 mincut += myrinfo->ed;
00279
00280
00281 if (myrinfo->ed > 0) {
00282 mydegrees = myrinfo->degrees;
00283
00284 for (j=xadj[i]; j<xadj[i+1]; j++) {
00285 other = where[adjncy[j]];
00286 if (me != other) {
00287 for (k=0; k<myrinfo->ndegrees; k++) {
00288 if (mydegrees[k].edge == other) {
00289 mydegrees[k].ewgt += adjwgt[j];
00290 break;
00291 }
00292 }
00293 if (k == myrinfo->ndegrees) {
00294 mydegrees[myrinfo->ndegrees].edge = other;
00295 mydegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
00296 }
00297 }
00298 }
00299 }
00300 }
00301
00302 graph->mincut = mincut/2;
00303
00304 return;
00305 }
00306
00307
00308
00309
00310
00311
00312 int AreAllHVwgtsBelow(int ncon, floattype alpha, floattype *vwgt1, floattype beta, floattype *vwgt2, floattype *limit)
00313 {
00314 int i;
00315
00316 for (i=0; i<ncon; i++)
00317 if (alpha*vwgt1[i] + beta*vwgt2[i] > limit[i])
00318 return 0;
00319
00320 return 1;
00321 }
00322
00323
00324
00325
00326
00327
00328 void ComputeHKWayLoadImbalance(int ncon, int nparts, floattype *npwgts, floattype *lbvec)
00329 {
00330 int i, j;
00331 floattype max;
00332
00333 for (i=0; i<ncon; i++) {
00334 max = 0.0;
00335 for (j=0; j<nparts; j++) {
00336 if (npwgts[j*ncon+i] > max)
00337 max = npwgts[j*ncon+i];
00338 }
00339
00340 lbvec[i] = max*nparts;
00341 }
00342 }
00343
00344
00345
00346
00347
00348 void SerialRemap(GraphType *graph, int nparts, idxtype *base, idxtype *scratch,
00349 idxtype *remap, floattype *tpwgts)
00350 {
00351 int i, ii, j, k;
00352 int nvtxs, nmapped, max_mult;
00353 int from, to, current_from, smallcount, bigcount;
00354 KeyValueType *flowto, *bestflow;
00355 KeyKeyValueType *sortvtx;
00356 idxtype *vsize, *htable, *map, *rowmap;
00357
00358 nvtxs = graph->nvtxs;
00359 vsize = graph->vsize;
00360 max_mult = amin(MAX_NPARTS_MULTIPLIER, nparts);
00361
00362 sortvtx = (KeyKeyValueType *)GKmalloc(nvtxs*sizeof(KeyKeyValueType), "sortvtx");
00363 flowto = (KeyValueType *)GKmalloc((nparts*max_mult+nparts)*sizeof(KeyValueType), "flowto");
00364 bestflow = flowto+nparts;
00365 map = htable = idxsmalloc(nparts*2, -1, "htable");
00366 rowmap = map+nparts;
00367
00368 for (i=0; i<nvtxs; i++) {
00369 sortvtx[i].key1 = base[i];
00370 sortvtx[i].key2 = vsize[i];
00371 sortvtx[i].val = i;
00372 }
00373
00374 qsort((void *)sortvtx, (size_t)nvtxs, (size_t)sizeof(KeyKeyValueType), SSMIncKeyCmp);
00375
00376 for (j=0; j<nparts; j++) {
00377 flowto[j].key = 0;
00378 flowto[j].val = j;
00379 }
00380
00381
00382 bigcount = smallcount = current_from = 0;
00383 for (ii=0; ii<nvtxs; ii++) {
00384 i = sortvtx[ii].val;
00385 from = base[i];
00386 to = scratch[i];
00387
00388 if (from > current_from) {
00389
00390 for (j=0; j<smallcount; j++)
00391 htable[flowto[j].val] = -1;
00392 ASSERTS(idxsum(nparts, htable) == -nparts);
00393
00394 ikeysort(smallcount, flowto);
00395
00396 for (j=0; j<amin(smallcount, max_mult); j++, bigcount++) {
00397 bestflow[bigcount].key = flowto[j].key;
00398 bestflow[bigcount].val = current_from*nparts+flowto[j].val;
00399 }
00400
00401 smallcount = 0;
00402 current_from = from;
00403 }
00404
00405 if (htable[to] == -1) {
00406 htable[to] = smallcount;
00407 flowto[smallcount].key = -vsize[i];
00408 flowto[smallcount].val = to;
00409 smallcount++;
00410 }
00411 else {
00412 flowto[htable[to]].key += -vsize[i];
00413 }
00414 }
00415
00416
00417 for (j=0; j<smallcount; j++)
00418 htable[flowto[j].val] = -1;
00419 ASSERTS(idxsum(nparts, htable) == -nparts);
00420
00421 ikeysort(smallcount, flowto);
00422
00423 for (j=0; j<amin(smallcount, max_mult); j++, bigcount++) {
00424 bestflow[bigcount].key = flowto[j].key;
00425 bestflow[bigcount].val = current_from*nparts+flowto[j].val;
00426 }
00427 ikeysort(bigcount, bestflow);
00428
00429 ASSERTS(idxsum(nparts, map) == -nparts);
00430 ASSERTS(idxsum(nparts, rowmap) == -nparts);
00431 nmapped = 0;
00432
00433
00434 for (ii=0; ii<bigcount; ii++) {
00435 i = bestflow[ii].val;
00436 j = i % nparts;
00437 k = i / nparts;
00438
00439 if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(tpwgts, graph->ncon, j, k)) {
00440 map[j] = k;
00441 rowmap[k] = j;
00442 nmapped++;
00443 }
00444
00445 if (nmapped == nparts)
00446 break;
00447 }
00448
00449
00450
00451
00452 if (nmapped < nparts) {
00453 for (j=0; j<nparts && nmapped<nparts; j++) {
00454 if (map[j] == -1) {
00455 for (ii=0; ii<nparts; ii++) {
00456 i = (j+ii) % nparts;
00457 if (rowmap[i] == -1 && SimilarTpwgts(tpwgts, graph->ncon, i, j)) {
00458 map[j] = i;
00459 rowmap[i] = j;
00460 nmapped++;
00461 break;
00462 }
00463 }
00464 }
00465 }
00466 }
00467
00468
00469
00470 if (nmapped < nparts)
00471 for (i=0; i<nparts; i++)
00472 map[i] = i;
00473
00474 for (i=0; i<nvtxs; i++)
00475 remap[i] = map[remap[i]];
00476
00477 GKfree((void **)&sortvtx, (void **)&flowto, (void **)&htable, LTERM);
00478 }
00479
00480
00481
00482
00483
00484 int SSMIncKeyCmp(const void *fptr, const void *sptr)
00485 {
00486 KeyKeyValueType *first, *second;
00487
00488 first = (KeyKeyValueType *)(fptr);
00489 second = (KeyKeyValueType *)(sptr);
00490
00491 if (first->key1 > second->key1)
00492 return 1;
00493
00494 if (first->key1 < second->key1)
00495 return -1;
00496
00497 if (first->key2 < second->key2)
00498 return 1;
00499
00500 if (first->key2 > second->key2)
00501 return -1;
00502
00503 return 0;
00504 }
00505
00506
00507
00508
00509
00510 void Moc_Serial_FM_2WayRefine(GraphType *graph, floattype *tpwgts, int npasses)
00511 {
00512 int i, ii, j, k;
00513 int kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum;
00514 idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
00515 idxtype *moved, *swaps, *qnum;
00516 floattype *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;
00517 FPQueueType parts[MAXNCON][2];
00518 int higain, oldgain, mincut, initcut, newcut, mincutorder;
00519 floattype rtpwgts[MAXNCON*2];
00520 KeyValueType *cand;
00521 int mype;
00522 MPI_Comm_rank(MPI_COMM_WORLD, &mype);
00523
00524 nvtxs = graph->nvtxs;
00525 ncon = graph->ncon;
00526 xadj = graph->xadj;
00527 nvwgt = graph->nvwgt;
00528 adjncy = graph->adjncy;
00529 adjwgt = graph->adjwgt;
00530 where = graph->where;
00531 id = graph->sendind;
00532 ed = graph->recvind;
00533 npwgts = graph->gnpwgts;
00534 bndptr = graph->sendptr;
00535 bndind = graph->recvptr;
00536
00537 moved = idxmalloc(nvtxs, "moved");
00538 swaps = idxmalloc(nvtxs, "swaps");
00539 qnum = idxmalloc(nvtxs, "qnum");
00540 cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");
00541
00542 limit = amin(amax(0.01*nvtxs, 25), 150);
00543
00544
00545 for (i=0; i<ncon; i++) {
00546 FPQueueInit(&parts[i][0], nvtxs);
00547 FPQueueInit(&parts[i][1], nvtxs);
00548 }
00549 for (i=0; i<nvtxs; i++)
00550 qnum[i] = samax(ncon, nvwgt+i*ncon);
00551
00552 origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
00553
00554 for (i=0; i<ncon; i++) {
00555 rtpwgts[i] = origbal*tpwgts[i];
00556 rtpwgts[ncon+i] = origbal*tpwgts[ncon+i];
00557 }
00558
00559 idxset(nvtxs, -1, moved);
00560 for (pass=0; pass<npasses; pass++) {
00561 for (i=0; i<ncon; i++) {
00562 FPQueueReset(&parts[i][0]);
00563 FPQueueReset(&parts[i][1]);
00564 }
00565
00566 mincutorder = -1;
00567 newcut = mincut = initcut = graph->mincut;
00568 for (i=0; i<ncon; i++)
00569 mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
00570 minbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
00571
00572
00573 nbnd = graph->gnvtxs;
00574
00575 for (i=0; i<nbnd; i++) {
00576 cand[i].key = id[i]-ed[i];
00577 cand[i].val = i;
00578 }
00579 ikeysort(nbnd, cand);
00580
00581 for (ii=0; ii<nbnd; ii++) {
00582 i = bndind[cand[ii].val];
00583 FPQueueInsert(&parts[qnum[i]][where[i]], i, (floattype)(ed[i]-id[i]));
00584 }
00585
00586 for (nswaps=0; nswaps<nvtxs; nswaps++) {
00587 Serial_SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts);
00588 to = (from+1)%2;
00589
00590 if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1)
00591 break;
00592
00593 saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00594 saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
00595
00596 newcut -= (ed[higain]-id[higain]);
00597 newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
00598
00599 if ((newcut < mincut && newbal-origbal <= .00001) ||
00600 (newcut == mincut && (newbal < minbal ||
00601 (newbal == minbal && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
00602 mincut = newcut;
00603 minbal = newbal;
00604 mincutorder = nswaps;
00605 for (i=0; i<ncon; i++)
00606 mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
00607 }
00608 else if (nswaps-mincutorder > limit) {
00609 newcut += (ed[higain]-id[higain]);
00610 saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
00611 saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00612 break;
00613 }
00614
00615 where[higain] = to;
00616 moved[higain] = nswaps;
00617 swaps[nswaps] = higain;
00618
00619
00620
00621
00622 SWAP(id[higain], ed[higain], tmp);
00623 if (ed[higain] == 0 && xadj[higain] < xadj[higain+1])
00624 BNDDelete(nbnd, bndind, bndptr, higain);
00625
00626 for (j=xadj[higain]; j<xadj[higain+1]; j++) {
00627 k = adjncy[j];
00628 oldgain = ed[k]-id[k];
00629
00630 kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
00631 INC_DEC(id[k], ed[k], kwgt);
00632
00633
00634 if (bndptr[k] != -1) {
00635 if (ed[k] == 0) {
00636 BNDDelete(nbnd, bndind, bndptr, k);
00637 if (moved[k] == -1)
00638 FPQueueDelete(&parts[qnum[k]][where[k]], k);
00639 }
00640 else {
00641 if (moved[k] == -1)
00642 FPQueueUpdate(&parts[qnum[k]][where[k]], k, (floattype)oldgain, (floattype)(ed[k]-id[k]));
00643 }
00644 }
00645 else {
00646 if (ed[k] > 0) {
00647 BNDInsert(nbnd, bndind, bndptr, k);
00648 if (moved[k] == -1)
00649 FPQueueInsert(&parts[qnum[k]][where[k]], k, (floattype)(ed[k]-id[k]));
00650 }
00651 }
00652 }
00653 }
00654
00655
00656
00657
00658 for (i=0; i<nswaps; i++)
00659 moved[swaps[i]] = -1;
00660 for (nswaps--; nswaps>mincutorder; nswaps--) {
00661 higain = swaps[nswaps];
00662
00663 to = where[higain] = (where[higain]+1)%2;
00664 SWAP(id[higain], ed[higain], tmp);
00665 if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
00666 BNDDelete(nbnd, bndind, bndptr, higain);
00667 else if (ed[higain] > 0 && bndptr[higain] == -1)
00668 BNDInsert(nbnd, bndind, bndptr, higain);
00669
00670 saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00671 saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
00672 for (j=xadj[higain]; j<xadj[higain+1]; j++) {
00673 k = adjncy[j];
00674
00675 kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
00676 INC_DEC(id[k], ed[k], kwgt);
00677
00678 if (bndptr[k] != -1 && ed[k] == 0)
00679 BNDDelete(nbnd, bndind, bndptr, k);
00680 if (bndptr[k] == -1 && ed[k] > 0)
00681 BNDInsert(nbnd, bndind, bndptr, k);
00682 }
00683 }
00684
00685 graph->mincut = mincut;
00686 graph->gnvtxs = nbnd;
00687
00688 if (mincutorder == -1 || mincut == initcut)
00689 break;
00690 }
00691
00692 for (i=0; i<ncon; i++) {
00693 FPQueueFree(&parts[i][0]);
00694 FPQueueFree(&parts[i][1]);
00695 }
00696
00697 GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM);
00698 return;
00699 }
00700
00701
00702
00703
00704
00705 void Serial_SelectQueue(int ncon, floattype *npwgts, floattype *tpwgts, int *from, int *cnum,
00706 FPQueueType queues[MAXNCON][2])
00707 {
00708 int i, part;
00709 floattype maxgain=0.0;
00710 floattype max = -1.0, maxdiff=0.0;
00711 int mype;
00712 MPI_Comm_rank(MPI_COMM_WORLD, &mype);
00713
00714 *from = -1;
00715 *cnum = -1;
00716
00717
00718 for (part=0; part<2; part++) {
00719 for (i=0; i<ncon; i++) {
00720 if (npwgts[part*ncon+i]-tpwgts[part*ncon+i] >= maxdiff) {
00721 maxdiff = npwgts[part*ncon+i]-tpwgts[part*ncon+i];
00722 *from = part;
00723 *cnum = i;
00724 }
00725 }
00726 }
00727
00728 if (*from != -1 && FPQueueGetQSize(&queues[*cnum][*from]) == 0) {
00729
00730 for (i=0; i<ncon; i++) {
00731 if (FPQueueGetQSize(&queues[i][*from]) > 0) {
00732 max = npwgts[(*from)*ncon + i];
00733 *cnum = i;
00734 break;
00735 }
00736 }
00737
00738 for (i++; i<ncon; i++) {
00739 if (npwgts[(*from)*ncon + i] > max && FPQueueGetQSize(&queues[i][*from]) > 0) {
00740 max = npwgts[(*from)*ncon + i];
00741 *cnum = i;
00742 }
00743 }
00744 }
00745
00746
00747
00748 if (maxdiff <= 0.0 || *from == -1) {
00749 maxgain = -100000.0;
00750
00751 for (part=0; part<2; part++) {
00752 for (i=0; i<ncon; i++) {
00753 if (FPQueueGetQSize(&queues[i][part]) > 0 &&
00754 FPQueueSeeMaxGain(&queues[i][part]) > maxgain) {
00755 maxgain = FPQueueSeeMaxGain(&queues[i][part]);
00756 *from = part;
00757 *cnum = i;
00758 }
00759 }
00760 }
00761 }
00762
00763 return;
00764 }
00765
00766
00767
00768
00769
00770 int Serial_BetterBalance(int ncon, floattype *npwgts, floattype *tpwgts, floattype *diff)
00771 {
00772 int i;
00773 floattype ndiff[MAXNCON];
00774
00775 for (i=0; i<ncon; i++)
00776 ndiff[i] = fabs(tpwgts[i]-npwgts[i]);
00777
00778 return snorm2(ncon, ndiff) < snorm2(ncon, diff);
00779 }
00780
00781
00782
00783
00784
00785
00786 floattype Serial_Compute2WayHLoadImbalance(int ncon, floattype *npwgts, floattype *tpwgts)
00787 {
00788 int i;
00789 floattype max=0.0, temp;
00790
00791 for (i=0; i<ncon; i++) {
00792 if (tpwgts[i] == 0.0)
00793 temp = 0.0;
00794 else
00795 temp = fabs(tpwgts[i]-npwgts[i])/tpwgts[i];
00796 max = (max < temp ? temp : max);
00797 }
00798 return 1.0+max;
00799 }
00800
00801
00802
00803
00804
00805
00806 void Moc_Serial_Balance2Way(GraphType *graph, floattype *tpwgts, floattype lbfactor)
00807 {
00808 int i, ii, j, k, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum;
00809 idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
00810 idxtype *moved, *swaps, *qnum;
00811 floattype *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;
00812 FPQueueType parts[MAXNCON][2];
00813 int higain, oldgain, mincut, newcut, mincutorder;
00814 int qsizes[MAXNCON][2];
00815 KeyValueType *cand;
00816
00817 nvtxs = graph->nvtxs;
00818 ncon = graph->ncon;
00819 xadj = graph->xadj;
00820 nvwgt = graph->nvwgt;
00821 adjncy = graph->adjncy;
00822 adjwgt = graph->adjwgt;
00823 where = graph->where;
00824 id = graph->sendind;
00825 ed = graph->recvind;
00826 npwgts = graph->gnpwgts;
00827 bndptr = graph->sendptr;
00828 bndind = graph->recvptr;
00829
00830 moved = idxmalloc(nvtxs, "moved");
00831 swaps = idxmalloc(nvtxs, "swaps");
00832 qnum = idxmalloc(nvtxs, "qnum");
00833 cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");
00834
00835
00836 limit = amin(amax(0.01*nvtxs, 15), 100);
00837
00838
00839 for (i=0; i<ncon; i++) {
00840 FPQueueInit(&parts[i][0], nvtxs);
00841 FPQueueInit(&parts[i][1], nvtxs);
00842 qsizes[i][0] = qsizes[i][1] = 0;
00843 }
00844
00845 for (i=0; i<nvtxs; i++) {
00846 qnum[i] = samax(ncon, nvwgt+i*ncon);
00847 qsizes[qnum[i]][where[i]]++;
00848 }
00849
00850 for (from=0; from<2; from++) {
00851 for (j=0; j<ncon; j++) {
00852 if (qsizes[j][from] == 0) {
00853 for (i=0; i<nvtxs; i++) {
00854 if (where[i] != from)
00855 continue;
00856
00857 k = samax2(ncon, nvwgt+i*ncon);
00858 if (k == j &&
00859 qsizes[qnum[i]][from] > qsizes[j][from] &&
00860 nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) {
00861 qsizes[qnum[i]][from]--;
00862 qsizes[j][from]++;
00863 qnum[i] = j;
00864 }
00865 }
00866 }
00867 }
00868 }
00869
00870
00871 for (i=0; i<ncon; i++)
00872 mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
00873 minbal = origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
00874 newcut = mincut = graph->mincut;
00875 mincutorder = -1;
00876
00877 idxset(nvtxs, -1, moved);
00878
00879
00880 nbnd = graph->gnvtxs;
00881 for (i=0; i<nvtxs; i++) {
00882 cand[i].key = id[i]-ed[i];
00883 cand[i].val = i;
00884 }
00885 ikeysort(nvtxs, cand);
00886
00887 for (ii=0; ii<nvtxs; ii++) {
00888 i = cand[ii].val;
00889 FPQueueInsert(&parts[qnum[i]][where[i]], i, (floattype)(ed[i]-id[i]));
00890 }
00891
00892 for (nswaps=0; nswaps<nvtxs; nswaps++) {
00893 if (minbal < lbfactor)
00894 break;
00895
00896 Serial_SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts);
00897 to = (from+1)%2;
00898
00899 if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1)
00900 break;
00901
00902 saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00903 saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
00904 newcut -= (ed[higain]-id[higain]);
00905 newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
00906
00907 if (newbal < minbal || (newbal == minbal &&
00908 (newcut < mincut || (newcut == mincut &&
00909 Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
00910 mincut = newcut;
00911 minbal = newbal;
00912 mincutorder = nswaps;
00913 for (i=0; i<ncon; i++)
00914 mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
00915 }
00916 else if (nswaps-mincutorder > limit) {
00917 newcut += (ed[higain]-id[higain]);
00918 saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
00919 saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00920 break;
00921 }
00922
00923 where[higain] = to;
00924 moved[higain] = nswaps;
00925 swaps[nswaps] = higain;
00926
00927
00928
00929
00930 SWAP(id[higain], ed[higain], tmp);
00931 if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
00932 BNDDelete(nbnd, bndind, bndptr, higain);
00933 if (ed[higain] > 0 && bndptr[higain] == -1)
00934 BNDInsert(nbnd, bndind, bndptr, higain);
00935
00936 for (j=xadj[higain]; j<xadj[higain+1]; j++) {
00937 k = adjncy[j];
00938 oldgain = ed[k]-id[k];
00939
00940 kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
00941 INC_DEC(id[k], ed[k], kwgt);
00942
00943
00944 if (moved[k] == -1)
00945 FPQueueUpdate(&parts[qnum[k]][where[k]], k, (floattype)(oldgain), (floattype)(ed[k]-id[k]));
00946
00947
00948 if (ed[k] == 0 && bndptr[k] != -1)
00949 BNDDelete(nbnd, bndind, bndptr, k);
00950 else if (ed[k] > 0 && bndptr[k] == -1)
00951 BNDInsert(nbnd, bndind, bndptr, k);
00952 }
00953 }
00954
00955
00956
00957
00958
00959 for (nswaps--; nswaps>mincutorder; nswaps--) {
00960 higain = swaps[nswaps];
00961
00962 to = where[higain] = (where[higain]+1)%2;
00963 SWAP(id[higain], ed[higain], tmp);
00964 if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
00965 BNDDelete(nbnd, bndind, bndptr, higain);
00966 else if (ed[higain] > 0 && bndptr[higain] == -1)
00967 BNDInsert(nbnd, bndind, bndptr, higain);
00968
00969 saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00970 saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
00971 for (j=xadj[higain]; j<xadj[higain+1]; j++) {
00972 k = adjncy[j];
00973
00974 kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
00975 INC_DEC(id[k], ed[k], kwgt);
00976
00977 if (bndptr[k] != -1 && ed[k] == 0)
00978 BNDDelete(nbnd, bndind, bndptr, k);
00979 if (bndptr[k] == -1 && ed[k] > 0)
00980 BNDInsert(nbnd, bndind, bndptr, k);
00981 }
00982 }
00983
00984 graph->mincut = mincut;
00985 graph->gnvtxs = nbnd;
00986
00987
00988 for (i=0; i<ncon; i++) {
00989 FPQueueFree(&parts[i][0]);
00990 FPQueueFree(&parts[i][1]);
00991 }
00992
00993 GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM);
00994 return;
00995 }
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005 void Moc_Serial_Init2WayBalance(GraphType *graph, floattype *tpwgts)
01006 {
01007 int i, ii, j, k;
01008 int kwgt, nvtxs, nbnd, ncon, nswaps, from, to, cnum, tmp;
01009 idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
01010 idxtype *qnum;
01011 floattype *nvwgt, *npwgts;
01012 FPQueueType parts[MAXNCON][2];
01013 int higain, oldgain, mincut;
01014 KeyValueType *cand;
01015
01016 nvtxs = graph->nvtxs;
01017 ncon = graph->ncon;
01018 xadj = graph->xadj;
01019 adjncy = graph->adjncy;
01020 nvwgt = graph->nvwgt;
01021 adjwgt = graph->adjwgt;
01022 where = graph->where;
01023 id = graph->sendind;
01024 ed = graph->recvind;
01025 npwgts = graph->gnpwgts;
01026 bndptr = graph->sendptr;
01027 bndind = graph->recvptr;
01028
01029 qnum = idxmalloc(nvtxs, "qnum");
01030 cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");
01031
01032
01033 from = 1;
01034 to = (from+1)%2;
01035
01036 for (i=0; i<ncon; i++) {
01037 FPQueueInit(&parts[i][0], nvtxs);
01038 FPQueueInit(&parts[i][1], nvtxs);
01039 }
01040
01041
01042 for (i=0; i<nvtxs; i++)
01043 qnum[i] = samax(ncon, nvwgt+i*ncon);
01044
01045 for (i=0; i<nvtxs; i++) {
01046 cand[i].key = id[i]-ed[i];
01047 cand[i].val = i;
01048 }
01049 ikeysort(nvtxs, cand);
01050
01051
01052 for (ii=0; ii<nvtxs; ii++) {
01053 i = cand[ii].val;
01054 if (where[i] == from) {
01055 if (ed[i] > 0)
01056 FPQueueInsert(&parts[qnum[i]][0], i, (floattype)(ed[i]-id[i]));
01057 else
01058 FPQueueInsert(&parts[qnum[i]][1], i, (floattype)(ed[i]-id[i]));
01059 }
01060 }
01061
01062 mincut = graph->mincut;
01063 nbnd = graph->gnvtxs;
01064 for (nswaps=0; nswaps<nvtxs; nswaps++) {
01065 if (Serial_AreAnyVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, nvwgt, tpwgts+from*ncon))
01066 break;
01067
01068 if ((cnum = Serial_SelectQueueOneWay(ncon, npwgts, tpwgts, from, parts)) == -1)
01069 break;
01070
01071
01072 if ((higain = FPQueueGetMax(&parts[cnum][0])) == -1)
01073 higain = FPQueueGetMax(&parts[cnum][1]);
01074
01075 mincut -= (ed[higain]-id[higain]);
01076 saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
01077 saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
01078
01079 where[higain] = to;
01080
01081
01082
01083
01084 SWAP(id[higain], ed[higain], tmp);
01085 if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
01086 BNDDelete(nbnd, bndind, bndptr, higain);
01087 if (ed[higain] > 0 && bndptr[higain] == -1)
01088 BNDInsert(nbnd, bndind, bndptr, higain);
01089
01090 for (j=xadj[higain]; j<xadj[higain+1]; j++) {
01091 k = adjncy[j];
01092 oldgain = ed[k]-id[k];
01093
01094 kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
01095 INC_DEC(id[k], ed[k], kwgt);
01096
01097
01098 if (where[k] == from) {
01099 if (ed[k] > 0 && bndptr[k] == -1) {
01100 FPQueueDelete(&parts[qnum[k]][1], k);
01101 FPQueueInsert(&parts[qnum[k]][0], k, (floattype)(ed[k]-id[k]));
01102 }
01103 else {
01104 FPQueueUpdate(&parts[qnum[k]][0], k, (floattype)(oldgain), (floattype)(ed[k]-id[k]));
01105 }
01106 }
01107
01108
01109 if (ed[k] == 0 && bndptr[k] != -1)
01110 BNDDelete(nbnd, bndind, bndptr, k);
01111 else if (ed[k] > 0 && bndptr[k] == -1)
01112 BNDInsert(nbnd, bndind, bndptr, k);
01113 }
01114 }
01115
01116 graph->mincut = mincut;
01117 graph->gnvtxs = nbnd;
01118
01119 for (i=0; i<ncon; i++) {
01120 FPQueueFree(&parts[i][0]);
01121 FPQueueFree(&parts[i][1]);
01122 }
01123
01124 GKfree((void **)&cand, (void **)&qnum, LTERM);
01125 }
01126
01127
01128
01129
01130
01131
01132 int Serial_SelectQueueOneWay(int ncon, floattype *npwgts, floattype *tpwgts, int from,
01133 FPQueueType queues[MAXNCON][2])
01134 {
01135 int i, cnum=-1;
01136 floattype max=0.0;
01137
01138 for (i=0; i<ncon; i++) {
01139 if (npwgts[from*ncon+i]-tpwgts[from*ncon+i] >= max &&
01140 FPQueueGetQSize(&queues[i][0]) + FPQueueGetQSize(&queues[i][1]) > 0) {
01141 max = npwgts[from*ncon+i]-tpwgts[i];
01142 cnum = i;
01143 }
01144 }
01145
01146 return cnum;
01147 }
01148
01149
01150
01151
01152
01153 void Moc_Serial_Compute2WayPartitionParams(GraphType *graph)
01154 {
01155 int i, j, me, nvtxs, ncon, nbnd, mincut;
01156 idxtype *xadj, *adjncy, *adjwgt;
01157 floattype *nvwgt, *npwgts;
01158 idxtype *id, *ed, *where;
01159 idxtype *bndptr, *bndind;
01160
01161 nvtxs = graph->nvtxs;
01162 ncon = graph->ncon;
01163 xadj = graph->xadj;
01164 nvwgt = graph->nvwgt;
01165 adjncy = graph->adjncy;
01166 adjwgt = graph->adjwgt;
01167 where = graph->where;
01168
01169 npwgts = sset(2*ncon, 0.0, graph->gnpwgts);
01170 id = idxset(nvtxs, 0, graph->sendind);
01171 ed = idxset(nvtxs, 0, graph->recvind);
01172 bndptr = idxset(nvtxs, -1, graph->sendptr);
01173 bndind = graph->recvptr;
01174
01175
01176
01177
01178 nbnd = mincut = 0;
01179 for (i=0; i<nvtxs; i++) {
01180 me = where[i];
01181 saxpy2(ncon, 1.0, nvwgt+i*ncon, 1, npwgts+me*ncon, 1);
01182
01183 for (j=xadj[i]; j<xadj[i+1]; j++) {
01184 if (me == where[adjncy[j]])
01185 id[i] += adjwgt[j];
01186 else
01187 ed[i] += adjwgt[j];
01188 }
01189
01190 if (ed[i] > 0 || xadj[i] == xadj[i+1]) {
01191 mincut += ed[i];
01192 bndptr[i] = nbnd;
01193 bndind[nbnd++] = i;
01194 }
01195 }
01196
01197 graph->mincut = mincut/2;
01198 graph->gnvtxs = nbnd;
01199
01200 }
01201
01202
01203
01204
01205
01206 int Serial_AreAnyVwgtsBelow(int ncon, floattype alpha, floattype *vwgt1, floattype beta, floattype *vwgt2, floattype *limit)
01207 {
01208 int i;
01209
01210 for (i=0; i<ncon; i++)
01211 if (alpha*vwgt1[i] + beta*vwgt2[i] < limit[i])
01212 return 1;
01213
01214 return 0;
01215 }
01216
01217
01218
01219
01220
01221 int ComputeSerialEdgeCut(GraphType *graph)
01222 {
01223 int i, j;
01224 int cut = 0;
01225
01226 for (i=0; i<graph->nvtxs; i++) {
01227 for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++)
01228 if (graph->where[i] != graph->where[graph->adjncy[j]])
01229 cut += graph->adjwgt[j];
01230 }
01231 graph->mincut = cut/2;
01232
01233 return graph->mincut;
01234 }
01235
01236
01237
01238
01239 int ComputeSerialTotalV(GraphType *graph, idxtype *home)
01240 {
01241 int i;
01242 int totalv = 0;
01243
01244 for (i=0; i<graph->nvtxs; i++)
01245 if (graph->where[i] != home[i])
01246 totalv += (graph->vsize == NULL) ? graph->vwgt[i] : graph->vsize[i];
01247
01248 return totalv;
01249 }
01250
01251