00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include <metis.h>
00017
00018
00019
00020
00021
00022 void MocBalance2Way2(CtrlType *ctrl, GraphType *graph, floattype *tpwgts, floattype *ubvec)
00023 {
00024 int i;
00025 floattype tvec[MAXNCON];
00026
00027 Compute2WayHLoadImbalanceVec(graph->ncon, graph->npwgts, tpwgts, tvec);
00028 if (!AreAllBelow(graph->ncon, tvec, ubvec))
00029 MocGeneral2WayBalance2(ctrl, graph, tpwgts, ubvec);
00030 }
00031
00032
00033
00034
00035
00036
00037 void MocGeneral2WayBalance2(CtrlType *ctrl, GraphType *graph, floattype *tpwgts, floattype *ubvec)
00038 {
00039 int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum;
00040 idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
00041 idxtype *moved, *swaps, *perm, *qnum;
00042 floattype *nvwgt, *npwgts, origbal[MAXNCON], minbal[MAXNCON], newbal[MAXNCON];
00043 PQueueType parts[MAXNCON][2];
00044 int higain, oldgain, mincut, newcut, mincutorder;
00045 floattype *maxwgt, *minwgt, tvec[MAXNCON];
00046
00047
00048 nvtxs = graph->nvtxs;
00049 ncon = graph->ncon;
00050 xadj = graph->xadj;
00051 nvwgt = graph->nvwgt;
00052 adjncy = graph->adjncy;
00053 adjwgt = graph->adjwgt;
00054 where = graph->where;
00055 id = graph->id;
00056 ed = graph->ed;
00057 npwgts = graph->npwgts;
00058 bndptr = graph->bndptr;
00059 bndind = graph->bndind;
00060
00061 moved = idxwspacemalloc(ctrl, nvtxs);
00062 swaps = idxwspacemalloc(ctrl, nvtxs);
00063 perm = idxwspacemalloc(ctrl, nvtxs);
00064 qnum = idxwspacemalloc(ctrl, nvtxs);
00065
00066 limit = amin(amax(0.01*nvtxs, 15), 100);
00067
00068
00069 minwgt = fwspacemalloc(ctrl, 2*ncon);
00070 maxwgt = fwspacemalloc(ctrl, 2*ncon);
00071
00072 for (i=0; i<2; i++) {
00073 for (j=0; j<ncon; j++) {
00074 maxwgt[i*ncon+j] = tpwgts[i]*ubvec[j];
00075 minwgt[i*ncon+j] = tpwgts[i]*(1.0/ubvec[j]);
00076 }
00077 }
00078
00079
00080
00081 for (i=0; i<ncon; i++) {
00082 PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1);
00083 PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1);
00084 }
00085 for (i=0; i<nvtxs; i++)
00086 qnum[i] = samax(ncon, nvwgt+i*ncon);
00087
00088 Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, origbal);
00089 for (i=0; i<ncon; i++)
00090 minbal[i] = origbal[i];
00091
00092 newcut = mincut = graph->mincut;
00093 mincutorder = -1;
00094
00095 if (ctrl->dbglvl&DBG_REFINE) {
00096 printf("Parts: [");
00097 for (l=0; l<ncon; l++)
00098 printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
00099 printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: ", tpwgts[0], tpwgts[1],
00100 graph->nvtxs, graph->nbnd, graph->mincut);
00101 for (i=0; i<ncon; i++)
00102 printf("%.3f ", origbal[i]);
00103 printf("[B]\n");
00104 }
00105
00106 idxset(nvtxs, -1, moved);
00107
00108 ASSERT(ComputeCut(graph, where) == graph->mincut);
00109 ASSERT(CheckBnd(graph));
00110
00111
00112 nbnd = graph->nbnd;
00113 RandomPermute(nvtxs, perm, 1);
00114 for (ii=0; ii<nvtxs; ii++) {
00115 i = perm[ii];
00116 PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]);
00117 }
00118
00119
00120 for (nswaps=0; nswaps<nvtxs; nswaps++) {
00121 if (AreAllBelow(ncon, minbal, ubvec))
00122 break;
00123
00124 SelectQueue3(ncon, npwgts, tpwgts, &from, &cnum, parts, maxwgt);
00125 to = (from+1)%2;
00126
00127 if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1)
00128 break;
00129
00130 saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00131 saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
00132 newcut -= (ed[higain]-id[higain]);
00133 Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, newbal);
00134
00135 if (IsBetter2wayBalance(ncon, newbal, minbal, ubvec) ||
00136 (IsBetter2wayBalance(ncon, newbal, origbal, ubvec) && newcut < mincut)) {
00137 mincut = newcut;
00138 for (i=0; i<ncon; i++)
00139 minbal[i] = newbal[i];
00140 mincutorder = nswaps;
00141 }
00142 else if (nswaps-mincutorder > limit) {
00143 newcut += (ed[higain]-id[higain]);
00144 saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
00145 saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00146 break;
00147 }
00148
00149 where[higain] = to;
00150 moved[higain] = nswaps;
00151 swaps[nswaps] = higain;
00152
00153 if (ctrl->dbglvl&DBG_MOVEINFO) {
00154 printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
00155 for (i=0; i<ncon; i++)
00156 printf("(%.3f, %.3f) ", npwgts[i], npwgts[ncon+i]);
00157
00158 Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec);
00159 printf(", LB: ");
00160 for (i=0; i<ncon; i++)
00161 printf("%.3f ", tvec[i]);
00162 if (mincutorder == nswaps)
00163 printf(" *\n");
00164 else
00165 printf("\n");
00166 }
00167
00168
00169
00170
00171
00172 SWAP(id[higain], ed[higain], tmp);
00173 if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
00174 BNDDelete(nbnd, bndind, bndptr, higain);
00175 if (ed[higain] > 0 && bndptr[higain] == -1)
00176 BNDInsert(nbnd, bndind, bndptr, higain);
00177
00178 for (j=xadj[higain]; j<xadj[higain+1]; j++) {
00179 k = adjncy[j];
00180 oldgain = ed[k]-id[k];
00181
00182 kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
00183 INC_DEC(id[k], ed[k], kwgt);
00184
00185
00186 if (moved[k] == -1)
00187 PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]);
00188
00189
00190 if (ed[k] == 0 && bndptr[k] != -1)
00191 BNDDelete(nbnd, bndind, bndptr, k);
00192 else if (ed[k] > 0 && bndptr[k] == -1)
00193 BNDInsert(nbnd, bndind, bndptr, k);
00194 }
00195
00196 }
00197
00198
00199
00200
00201
00202
00203 for (i=0; i<nswaps; i++)
00204 moved[swaps[i]] = -1;
00205 for (nswaps--; nswaps>mincutorder; nswaps--) {
00206 higain = swaps[nswaps];
00207
00208 to = where[higain] = (where[higain]+1)%2;
00209 SWAP(id[higain], ed[higain], tmp);
00210 if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
00211 BNDDelete(nbnd, bndind, bndptr, higain);
00212 else if (ed[higain] > 0 && bndptr[higain] == -1)
00213 BNDInsert(nbnd, bndind, bndptr, higain);
00214
00215 saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
00216 saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
00217 for (j=xadj[higain]; j<xadj[higain+1]; j++) {
00218 k = adjncy[j];
00219
00220 kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
00221 INC_DEC(id[k], ed[k], kwgt);
00222
00223 if (bndptr[k] != -1 && ed[k] == 0)
00224 BNDDelete(nbnd, bndind, bndptr, k);
00225 if (bndptr[k] == -1 && ed[k] > 0)
00226 BNDInsert(nbnd, bndind, bndptr, k);
00227 }
00228 }
00229
00230 if (ctrl->dbglvl&DBG_REFINE) {
00231 printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd);
00232 for (i=0; i<ncon; i++)
00233 printf("(%.3f, %.3f) ", npwgts[i], npwgts[ncon+i]);
00234 printf("], LB: ");
00235 Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec);
00236 for (i=0; i<ncon; i++)
00237 printf("%.3f ", tvec[i]);
00238 printf("\n");
00239 }
00240
00241 graph->mincut = mincut;
00242 graph->nbnd = nbnd;
00243
00244
00245 for (i=0; i<ncon; i++) {
00246 PQueueFree(ctrl, &parts[i][0]);
00247 PQueueFree(ctrl, &parts[i][1]);
00248 }
00249
00250 idxwspacefree(ctrl, nvtxs);
00251 idxwspacefree(ctrl, nvtxs);
00252 idxwspacefree(ctrl, nvtxs);
00253 idxwspacefree(ctrl, nvtxs);
00254 fwspacefree(ctrl, 2*ncon);
00255 fwspacefree(ctrl, 2*ncon);
00256
00257 }
00258
00259
00260
00261
00262
00263
00264
00265
00266 void SelectQueue3(int ncon, floattype *npwgts, floattype *tpwgts, int *from, int *cnum,
00267 PQueueType queues[MAXNCON][2], floattype *maxwgt)
00268 {
00269 int i, j, maxgain=0;
00270 floattype maxdiff=0.0, diff;
00271
00272 *from = -1;
00273 *cnum = -1;
00274
00275
00276 for (j=0; j<2; j++) {
00277 for (i=0; i<ncon; i++) {
00278 diff = npwgts[j*ncon+i]-maxwgt[j*ncon+i];
00279 if (diff >= maxdiff) {
00280 maxdiff = diff;
00281 *from = j;
00282 *cnum = i;
00283 }
00284 }
00285 }
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295 if (*from != -1 && PQueueGetSize(&queues[*cnum][*from]) == 0) {
00296 for (i=0; i<ncon; i++) {
00297 if (PQueueGetSize(&queues[i][*from]) > 0) {
00298 maxdiff = (npwgts[(*from)*ncon+i] - maxwgt[(*from)*ncon+i]);
00299 *cnum = i;
00300 break;
00301 }
00302 }
00303
00304 for (i++; i<ncon; i++) {
00305 diff = npwgts[(*from)*ncon+i] - maxwgt[(*from)*ncon+i];
00306 if (diff > maxdiff && PQueueGetSize(&queues[i][*from]) > 0) {
00307 maxdiff = diff;
00308 *cnum = i;
00309 }
00310 }
00311 }
00312
00313
00314 if (*from == -1) {
00315 maxgain = -100000;
00316 for (j=0; j<2; j++) {
00317 for (i=0; i<ncon; i++) {
00318 if (PQueueGetSize(&queues[i][j]) > 0 && PQueueGetKey(&queues[i][j]) > maxgain) {
00319 maxgain = PQueueGetKey(&queues[i][0]);
00320 *from = j;
00321 *cnum = i;
00322 }
00323 }
00324 }
00325
00326
00327 }
00328 }