00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include <metis.h>
00015
00016
00017
00018
00019
00020 void MocFM_2WayEdgeRefine2(CtrlType *ctrl, GraphType *graph, floattype *tpwgts, floattype *orgubvec,
00021 int npasses)
00022 {
00023 int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum;
00024 idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
00025 idxtype *moved, *swaps, *perm, *qnum;
00026 floattype *nvwgt, *npwgts, origdiff[MAXNCON], origbal[MAXNCON], minbal[MAXNCON];
00027 PQueueType parts[MAXNCON][2];
00028 int higain, oldgain, mincut, initcut, newcut, mincutorder;
00029 floattype *maxwgt, *minwgt, ubvec[MAXNCON], tvec[MAXNCON];
00030
00031 nvtxs = graph->nvtxs;
00032 ncon = graph->ncon;
00033 xadj = graph->xadj;
00034 nvwgt = graph->nvwgt;
00035 adjncy = graph->adjncy;
00036 adjwgt = graph->adjwgt;
00037 where = graph->where;
00038 id = graph->id;
00039 ed = graph->ed;
00040 npwgts = graph->npwgts;
00041 bndptr = graph->bndptr;
00042 bndind = graph->bndind;
00043
00044 moved = idxwspacemalloc(ctrl, nvtxs);
00045 swaps = idxwspacemalloc(ctrl, nvtxs);
00046 perm = idxwspacemalloc(ctrl, nvtxs);
00047 qnum = idxwspacemalloc(ctrl, nvtxs);
00048
00049 limit = amin(amax(0.01*nvtxs, 15), 100);
00050
00051 Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, origbal);
00052 for (i=0; i<ncon; i++) {
00053 origdiff[i] = fabs(tpwgts[0]-npwgts[i]);
00054 ubvec[i] = amax(origbal[i], orgubvec[i]);
00055 }
00056
00057
00058 minwgt = fwspacemalloc(ctrl, 2*ncon);
00059 maxwgt = fwspacemalloc(ctrl, 2*ncon);
00060
00061 for (i=0; i<2; i++) {
00062 for (j=0; j<ncon; j++) {
00063 maxwgt[i*ncon+j] = tpwgts[i]*ubvec[j];
00064 minwgt[i*ncon+j] = tpwgts[i]*(1.0/ubvec[j]);
00065 }
00066 }
00067
00068
00069 for (i=0; i<ncon; i++) {
00070 PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1);
00071 PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1);
00072 }
00073 for (i=0; i<nvtxs; i++)
00074 qnum[i] = samax(ncon, nvwgt+i*ncon);
00075
00076
00077 if (ctrl->dbglvl&DBG_REFINE) {
00078 printf("Parts: [");
00079 for (l=0; l<ncon; l++)
00080 printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
00081 printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: ", tpwgts[0], tpwgts[1],
00082 graph->nvtxs, graph->nbnd, graph->mincut);
00083 for (i=0; i<ncon; i++)
00084 printf("%.3f ", origbal[i]);
00085 printf("\n");
00086 }
00087
00088 idxset(nvtxs, -1, moved);
00089 for (pass=0; pass<npasses; pass++) {
00090 for (i=0; i<ncon; i++) {
00091 PQueueReset(&parts[i][0]);
00092 PQueueReset(&parts[i][1]);
00093 }
00094
00095 mincutorder = -1;
00096 newcut = mincut = initcut = graph->mincut;
00097 Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, minbal);
00098
00099 ASSERT(ComputeCut(graph, where) == graph->mincut);
00100 ASSERT(CheckBnd(graph));
00101
00102
00103 nbnd = graph->nbnd;
00104 RandomPermute(nbnd, perm, 1);
00105 for (ii=0; ii<nbnd; ii++) {
00106 i = bndind[perm[ii]];
00107 ASSERT(ed[i] > 0 || id[i] == 0);
00108 ASSERT(bndptr[i] != -1);
00109 PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]);
00110 }
00111
00112 for (nswaps=0; nswaps<nvtxs; nswaps++) {
00113 SelectQueue2(ncon, npwgts, tpwgts, &from, &cnum, parts, maxwgt);
00114 to = (from+1)%2;
00115
00116 if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1)
00117 break;
00118 ASSERT(bndptr[higain] != -1);
00119
00120 newcut -= (ed[higain]-id[higain]);
00121 saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00122 saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
00123
00124 Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec);
00125 if ((newcut < mincut && AreAllBelow(ncon, tvec, ubvec)) ||
00126 (newcut == mincut && IsBetter2wayBalance(ncon, tvec, minbal, ubvec))) {
00127 mincut = newcut;
00128 for (i=0; i<ncon; i++)
00129 minbal[i] = tvec[i];
00130 mincutorder = nswaps;
00131 }
00132 else if (nswaps-mincutorder > limit) {
00133 newcut += (ed[higain]-id[higain]);
00134 saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
00135 saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00136 break;
00137 }
00138
00139 where[higain] = to;
00140 moved[higain] = nswaps;
00141 swaps[nswaps] = higain;
00142
00143 if (ctrl->dbglvl&DBG_MOVEINFO) {
00144 printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
00145 for (l=0; l<ncon; l++)
00146 printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
00147
00148 printf(", LB: ");
00149 for (i=0; i<ncon; i++)
00150 printf("%.3f ", tvec[i]);
00151 if (mincutorder == nswaps)
00152 printf(" *\n");
00153 else
00154 printf("\n");
00155 }
00156
00157
00158
00159
00160
00161 SWAP(id[higain], ed[higain], tmp);
00162 if (ed[higain] == 0 && xadj[higain] < xadj[higain+1])
00163 BNDDelete(nbnd, bndind, bndptr, higain);
00164
00165 for (j=xadj[higain]; j<xadj[higain+1]; j++) {
00166 k = adjncy[j];
00167 oldgain = ed[k]-id[k];
00168
00169 kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
00170 INC_DEC(id[k], ed[k], kwgt);
00171
00172
00173 if (bndptr[k] != -1) {
00174 if (ed[k] == 0) {
00175 BNDDelete(nbnd, bndind, bndptr, k);
00176 if (moved[k] == -1)
00177 PQueueDelete(&parts[qnum[k]][where[k]], k, oldgain);
00178 }
00179 else {
00180 if (moved[k] == -1)
00181 PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]);
00182 }
00183 }
00184 else {
00185 if (ed[k] > 0) {
00186 BNDInsert(nbnd, bndind, bndptr, k);
00187 if (moved[k] == -1)
00188 PQueueInsert(&parts[qnum[k]][where[k]], k, ed[k]-id[k]);
00189 }
00190 }
00191 }
00192
00193 }
00194
00195
00196
00197
00198
00199 for (i=0; i<nswaps; i++)
00200 moved[swaps[i]] = -1;
00201 for (nswaps--; nswaps>mincutorder; nswaps--) {
00202 higain = swaps[nswaps];
00203
00204 to = where[higain] = (where[higain]+1)%2;
00205 SWAP(id[higain], ed[higain], tmp);
00206 if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
00207 BNDDelete(nbnd, bndind, bndptr, higain);
00208 else if (ed[higain] > 0 && bndptr[higain] == -1)
00209 BNDInsert(nbnd, bndind, bndptr, higain);
00210
00211 saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00212 saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
00213 for (j=xadj[higain]; j<xadj[higain+1]; j++) {
00214 k = adjncy[j];
00215
00216 kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
00217 INC_DEC(id[k], ed[k], kwgt);
00218
00219 if (bndptr[k] != -1 && ed[k] == 0)
00220 BNDDelete(nbnd, bndind, bndptr, k);
00221 if (bndptr[k] == -1 && ed[k] > 0)
00222 BNDInsert(nbnd, bndind, bndptr, k);
00223 }
00224 }
00225
00226 if (ctrl->dbglvl&DBG_REFINE) {
00227 printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd);
00228 for (l=0; l<ncon; l++)
00229 printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
00230 printf("], LB: ");
00231 Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec);
00232 for (i=0; i<ncon; i++)
00233 printf("%.3f ", tvec[i]);
00234 printf("\n");
00235 }
00236
00237 graph->mincut = mincut;
00238 graph->nbnd = nbnd;
00239
00240 if (mincutorder == -1 || mincut == initcut)
00241 break;
00242 }
00243
00244 for (i=0; i<ncon; i++) {
00245 PQueueFree(ctrl, &parts[i][0]);
00246 PQueueFree(ctrl, &parts[i][1]);
00247 }
00248
00249 idxwspacefree(ctrl, nvtxs);
00250 idxwspacefree(ctrl, nvtxs);
00251 idxwspacefree(ctrl, nvtxs);
00252 idxwspacefree(ctrl, nvtxs);
00253 fwspacefree(ctrl, 2*ncon);
00254 fwspacefree(ctrl, 2*ncon);
00255
00256 }
00257
00258
00259
00260
00261
00262
00263 void SelectQueue2(int ncon, floattype *npwgts, floattype *tpwgts, int *from, int *cnum,
00264 PQueueType queues[MAXNCON][2], floattype *maxwgt)
00265 {
00266 int i, j, maxgain=0;
00267 floattype diff, max, maxdiff=0.0;
00268
00269 *from = -1;
00270 *cnum = -1;
00271
00272
00273 for (j=0; j<2; j++) {
00274 for (i=0; i<ncon; i++) {
00275 diff = npwgts[j*ncon+i]-maxwgt[j*ncon+i];
00276 if (diff >= maxdiff) {
00277 maxdiff = diff;
00278 *from = j;
00279 *cnum = i;
00280 }
00281 }
00282 }
00283
00284 if (*from != -1 && PQueueGetSize(&queues[*cnum][*from]) == 0) {
00285
00286 for (i=0; i<ncon; i++) {
00287 if (PQueueGetSize(&queues[i][*from]) > 0) {
00288 max = (npwgts[(*from)*ncon+i] - maxwgt[(*from)*ncon+i]);
00289 *cnum = i;
00290 break;
00291 }
00292 }
00293
00294 for (i++; i<ncon; i++) {
00295 diff = npwgts[(*from)*ncon+i] - maxwgt[(*from)*ncon+i];
00296 if (diff > max && PQueueGetSize(&queues[i][*from]) > 0) {
00297 max = diff;
00298 *cnum = i;
00299 }
00300 }
00301 }
00302
00303
00304 if (maxdiff <= 0.0) {
00305 maxgain = -100000;
00306
00307 for (j=0; j<2; j++) {
00308 for (i=0; i<ncon; i++) {
00309 if (PQueueGetSize(&queues[i][j]) > 0 && PQueueGetKey(&queues[i][j]) > maxgain) {
00310 maxgain = PQueueGetKey(&queues[i][j]);
00311 *from = j;
00312 *cnum = i;
00313 }
00314 }
00315 }
00316
00317
00318 }
00319 }
00320
00321
00322
00323
00324
00325
00326 int IsBetter2wayBalance(int ncon, floattype *newbal, floattype *oldbal, floattype *ubvec)
00327 {
00328 int i, j;
00329 floattype max1=0.0, max2=0.0, sum1=0.0, sum2=0.0, tmp;
00330
00331 for (i=0; i<ncon; i++) {
00332 tmp = (newbal[i]-1)/(ubvec[i]-1);
00333 max1 = (max1 < tmp ? tmp : max1);
00334 sum1 += tmp;
00335
00336 tmp = (oldbal[i]-1)/(ubvec[i]-1);
00337 max2 = (max2 < tmp ? tmp : max2);
00338 sum2 += tmp;
00339 }
00340
00341 if (max1 < max2)
00342 return 1;
00343 else if (max1 > max2)
00344 return 0;
00345 else
00346 return sum1 <= sum2;
00347 }
00348
00349