00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include <metis.h>
00015
00016
00017
00018
00019
00020 void MocFM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, floattype *tpwgts, int npasses)
00021 {
00022 int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum;
00023 idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
00024 idxtype *moved, *swaps, *perm, *qnum;
00025 floattype *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;
00026 PQueueType parts[MAXNCON][2];
00027 int higain, oldgain, mincut, initcut, newcut, mincutorder;
00028 floattype rtpwgts[2];
00029
00030 nvtxs = graph->nvtxs;
00031 ncon = graph->ncon;
00032 xadj = graph->xadj;
00033 nvwgt = graph->nvwgt;
00034 adjncy = graph->adjncy;
00035 adjwgt = graph->adjwgt;
00036 where = graph->where;
00037 id = graph->id;
00038 ed = graph->ed;
00039 npwgts = graph->npwgts;
00040 bndptr = graph->bndptr;
00041 bndind = graph->bndind;
00042
00043 moved = idxwspacemalloc(ctrl, nvtxs);
00044 swaps = idxwspacemalloc(ctrl, nvtxs);
00045 perm = idxwspacemalloc(ctrl, nvtxs);
00046 qnum = idxwspacemalloc(ctrl, nvtxs);
00047
00048 limit = amin(amax(0.01*nvtxs, 25), 150);
00049
00050
00051 for (i=0; i<ncon; i++) {
00052 PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1);
00053 PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1);
00054 }
00055 for (i=0; i<nvtxs; i++)
00056 qnum[i] = samax(ncon, nvwgt+i*ncon);
00057
00058 origbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
00059
00060 rtpwgts[0] = origbal*tpwgts[0];
00061 rtpwgts[1] = origbal*tpwgts[1];
00062
00063 if (ctrl->dbglvl&DBG_REFINE) {
00064 printf("Parts: [");
00065 for (l=0; l<ncon; l++)
00066 printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
00067 printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f\n", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut, origbal);
00068 }
00069
00070 idxset(nvtxs, -1, moved);
00071 for (pass=0; pass<npasses; pass++) {
00072 for (i=0; i<ncon; i++) {
00073 PQueueReset(&parts[i][0]);
00074 PQueueReset(&parts[i][1]);
00075 }
00076
00077 mincutorder = -1;
00078 newcut = mincut = initcut = graph->mincut;
00079 for (i=0; i<ncon; i++)
00080 mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
00081 minbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
00082
00083 ASSERT(ComputeCut(graph, where) == graph->mincut);
00084 ASSERT(CheckBnd(graph));
00085
00086
00087 nbnd = graph->nbnd;
00088 RandomPermute(nbnd, perm, 1);
00089 for (ii=0; ii<nbnd; ii++) {
00090 i = bndind[perm[ii]];
00091 ASSERT(ed[i] > 0 || id[i] == 0);
00092 ASSERT(bndptr[i] != -1);
00093 PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]);
00094 }
00095
00096 for (nswaps=0; nswaps<nvtxs; nswaps++) {
00097 SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts);
00098 to = (from+1)%2;
00099
00100 if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1)
00101 break;
00102 ASSERT(bndptr[higain] != -1);
00103
00104 saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00105 saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
00106
00107 newcut -= (ed[higain]-id[higain]);
00108 newbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
00109
00110 if ((newcut < mincut && newbal-origbal <= .00001) ||
00111 (newcut == mincut && (newbal < minbal ||
00112 (newbal == minbal && BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
00113 mincut = newcut;
00114 minbal = newbal;
00115 mincutorder = nswaps;
00116 for (i=0; i<ncon; i++)
00117 mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
00118 }
00119 else if (nswaps-mincutorder > limit) {
00120 newcut += (ed[higain]-id[higain]);
00121 saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
00122 saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00123 break;
00124 }
00125
00126 where[higain] = to;
00127 moved[higain] = nswaps;
00128 swaps[nswaps] = higain;
00129
00130 if (ctrl->dbglvl&DBG_MOVEINFO) {
00131 printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
00132 for (l=0; l<ncon; l++)
00133 printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
00134 printf(", %.3f LB: %.3f\n", minbal, newbal);
00135 }
00136
00137
00138
00139
00140
00141 SWAP(id[higain], ed[higain], tmp);
00142 if (ed[higain] == 0 && xadj[higain] < xadj[higain+1])
00143 BNDDelete(nbnd, bndind, bndptr, higain);
00144
00145 for (j=xadj[higain]; j<xadj[higain+1]; j++) {
00146 k = adjncy[j];
00147 oldgain = ed[k]-id[k];
00148
00149 kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
00150 INC_DEC(id[k], ed[k], kwgt);
00151
00152
00153 if (bndptr[k] != -1) {
00154 if (ed[k] == 0) {
00155 BNDDelete(nbnd, bndind, bndptr, k);
00156 if (moved[k] == -1)
00157 PQueueDelete(&parts[qnum[k]][where[k]], k, oldgain);
00158 }
00159 else {
00160 if (moved[k] == -1)
00161 PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]);
00162 }
00163 }
00164 else {
00165 if (ed[k] > 0) {
00166 BNDInsert(nbnd, bndind, bndptr, k);
00167 if (moved[k] == -1)
00168 PQueueInsert(&parts[qnum[k]][where[k]], k, ed[k]-id[k]);
00169 }
00170 }
00171 }
00172
00173 }
00174
00175
00176
00177
00178
00179 for (i=0; i<nswaps; i++)
00180 moved[swaps[i]] = -1;
00181 for (nswaps--; nswaps>mincutorder; nswaps--) {
00182 higain = swaps[nswaps];
00183
00184 to = where[higain] = (where[higain]+1)%2;
00185 SWAP(id[higain], ed[higain], tmp);
00186 if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
00187 BNDDelete(nbnd, bndind, bndptr, higain);
00188 else if (ed[higain] > 0 && bndptr[higain] == -1)
00189 BNDInsert(nbnd, bndind, bndptr, higain);
00190
00191 saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00192 saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
00193 for (j=xadj[higain]; j<xadj[higain+1]; j++) {
00194 k = adjncy[j];
00195
00196 kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
00197 INC_DEC(id[k], ed[k], kwgt);
00198
00199 if (bndptr[k] != -1 && ed[k] == 0)
00200 BNDDelete(nbnd, bndind, bndptr, k);
00201 if (bndptr[k] == -1 && ed[k] > 0)
00202 BNDInsert(nbnd, bndind, bndptr, k);
00203 }
00204 }
00205
00206 if (ctrl->dbglvl&DBG_REFINE) {
00207 printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd);
00208 for (l=0; l<ncon; l++)
00209 printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
00210 printf("], LB: %.3f\n", Compute2WayHLoadImbalance(ncon, npwgts, tpwgts));
00211 }
00212
00213 graph->mincut = mincut;
00214 graph->nbnd = nbnd;
00215
00216 if (mincutorder == -1 || mincut == initcut)
00217 break;
00218 }
00219
00220 for (i=0; i<ncon; i++) {
00221 PQueueFree(ctrl, &parts[i][0]);
00222 PQueueFree(ctrl, &parts[i][1]);
00223 }
00224
00225 idxwspacefree(ctrl, nvtxs);
00226 idxwspacefree(ctrl, nvtxs);
00227 idxwspacefree(ctrl, nvtxs);
00228 idxwspacefree(ctrl, nvtxs);
00229
00230 }
00231
00232
00233
00234
00235
00236
00237 void SelectQueue(int ncon, floattype *npwgts, floattype *tpwgts, int *from, int *cnum, PQueueType queues[MAXNCON][2])
00238 {
00239 int i, part, maxgain=0;
00240 floattype max, maxdiff=0.0;
00241
00242 *from = -1;
00243 *cnum = -1;
00244
00245
00246 for (part=0; part<2; part++) {
00247 for (i=0; i<ncon; i++) {
00248 if (npwgts[part*ncon+i]-tpwgts[part] >= maxdiff) {
00249 maxdiff = npwgts[part*ncon+i]-tpwgts[part];
00250 *from = part;
00251 *cnum = i;
00252 }
00253 }
00254 }
00255
00256
00257
00258 if (*from != -1 && PQueueGetSize(&queues[*cnum][*from]) == 0) {
00259
00260 for (i=0; i<ncon; i++) {
00261 if (PQueueGetSize(&queues[i][*from]) > 0) {
00262 max = npwgts[(*from)*ncon + i];
00263 *cnum = i;
00264 break;
00265 }
00266 }
00267
00268 for (i++; i<ncon; i++) {
00269 if (npwgts[(*from)*ncon + i] > max && PQueueGetSize(&queues[i][*from]) > 0) {
00270 max = npwgts[(*from)*ncon + i];
00271 *cnum = i;
00272 }
00273 }
00274 }
00275
00276
00277 if (maxdiff <= 0.0 || *from == -1) {
00278 maxgain = -100000;
00279
00280 for (part=0; part<2; part++) {
00281 for (i=0; i<ncon; i++) {
00282 if (PQueueGetSize(&queues[i][part]) > 0 && PQueueGetKey(&queues[i][part]) > maxgain) {
00283 maxgain = PQueueGetKey(&queues[i][part]);
00284 *from = part;
00285 *cnum = i;
00286 }
00287 }
00288 }
00289 }
00290 }
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300 int BetterBalance(int ncon, floattype *npwgts, floattype *tpwgts, floattype *diff)
00301 {
00302 int i;
00303 floattype ndiff[MAXNCON];
00304
00305 for (i=0; i<ncon; i++)
00306 ndiff[i] = fabs(tpwgts[0]-npwgts[i]);
00307
00308 return snorm2(ncon, ndiff) < snorm2(ncon, diff);
00309 }
00310
00311
00312
00313
00314
00315
00316 floattype Compute2WayHLoadImbalance(int ncon, floattype *npwgts, floattype *tpwgts)
00317 {
00318 int i;
00319 floattype max=0.0, temp;
00320
00321 for (i=0; i<ncon; i++) {
00322
00323 temp = fabs(tpwgts[0]-npwgts[i])/tpwgts[0];
00324 max = (max < temp ? temp : max);
00325 }
00326 return 1.0+max;
00327 }
00328
00329
00330
00331
00332
00333
00334 void Compute2WayHLoadImbalanceVec(int ncon, floattype *npwgts, floattype *tpwgts, floattype *lbvec)
00335 {
00336 int i;
00337
00338 for (i=0; i<ncon; i++)
00339 lbvec[i] = 1.0 + fabs(tpwgts[0]-npwgts[i])/tpwgts[0];
00340 }
00341