00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include <parmetislib.h>
00015
00016 #define ProperSide(c, from, other) \
00017 (((c) == 0 && (from)-(other) < 0) || ((c) == 1 && (from)-(other) > 0))
00018
00019
00020
00021
00022 void Moc_KWayBalance(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses)
00023 {
00024 int h, i, ii, iii, j, k, c;
00025 int pass, nvtxs, nedges, ncon;
00026 int nmoves, nmoved, nswaps;
00027
00028 int me, firstvtx, lastvtx, yourlastvtx;
00029 int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced;
00030 int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts;
00031 int nlupd, nsupd, nnbrs, nchanged;
00032 idxtype *xadj, *ladjncy, *adjwgt, *vtxdist;
00033 idxtype *where, *tmp_where, *moved;
00034 floattype *lnpwgts, *gnpwgts;
00035 idxtype *update, *supdate, *rupdate, *pe_updates;
00036 idxtype *changed, *perm, *pperm, *htable;
00037 idxtype *peind, *recvptr, *sendptr;
00038 KeyValueType *swchanges, *rwchanges;
00039 RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo;
00040 EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees;
00041 floattype lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg;
00042 int *nupds_pe;
00043
00044
00045 IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr));
00046
00047
00048
00049
00050 nvtxs = graph->nvtxs;
00051 nedges = graph->nedges;
00052 ncon = graph->ncon;
00053
00054 vtxdist = graph->vtxdist;
00055 xadj = graph->xadj;
00056 ladjncy = graph->adjncy;
00057 adjwgt = graph->adjwgt;
00058
00059 firstvtx = vtxdist[mype];
00060 lastvtx = vtxdist[mype+1];
00061
00062 where = graph->where;
00063 rinfo = graph->rinfo;
00064 lnpwgts = graph->lnpwgts;
00065 gnpwgts = graph->gnpwgts;
00066 ubvec = ctrl->ubvec;
00067 tpwgts = ctrl->tpwgts;
00068
00069 nnbrs = graph->nnbrs;
00070 peind = graph->peind;
00071 recvptr = graph->recvptr;
00072 sendptr = graph->sendptr;
00073
00074 changed = idxmalloc(nvtxs, "KWR: changed");
00075 rwchanges = wspace->pairs;
00076 swchanges = rwchanges + recvptr[nnbrs];
00077
00078
00079
00080
00081 perm = idxmalloc(nvtxs, "KWR: perm");
00082 pperm = idxmalloc(nparts, "KWR: pperm");
00083
00084 update = idxmalloc(nvtxs, "KWR: update");
00085 supdate = wspace->indices;
00086 rupdate = supdate + recvptr[nnbrs];
00087 nupds_pe = imalloc(npes, "KWR: nupds_pe");
00088 htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable");
00089 badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt");
00090
00091 for (i=0; i<nparts; i++) {
00092 for (h=0; h<ncon; h++) {
00093 badmaxpwgt[i*ncon+h] = ubvec[h]*tpwgts[i*ncon+h];
00094 }
00095 }
00096
00097 moved = idxmalloc(nvtxs, "KWR: moved");
00098 tmp_where = idxmalloc(nvtxs+graph->nrecv, "KWR: tmp_where");
00099 tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo");
00100 tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees");
00101
00102 idxcopy(nvtxs+graph->nrecv, where, tmp_where);
00103 for (i=0; i<nvtxs; i++) {
00104 tmp_rinfo[i].id = rinfo[i].id;
00105 tmp_rinfo[i].ed = rinfo[i].ed;
00106 tmp_rinfo[i].ndegrees = rinfo[i].ndegrees;
00107 tmp_rinfo[i].degrees = tmp_edegrees+xadj[i];
00108
00109 for (j=0; j<rinfo[i].ndegrees; j++) {
00110 tmp_rinfo[i].degrees[j].edge = rinfo[i].degrees[j].edge;
00111 tmp_rinfo[i].degrees[j].ewgt = rinfo[i].degrees[j].ewgt;
00112 }
00113 }
00114
00115 nswaps = 0;
00116
00117
00118
00119 for (pass=0; pass<npasses; pass++) {
00120 oldcut = graph->mincut;
00121 if (mype == 0)
00122 RandomPermute(nparts, pperm, 1);
00123 MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm);
00124 FastRandomPermute(nvtxs, perm, 1);
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150 Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
00151 ubavg = savg(ncon, ubvec);
00152 lbavg = savg(ncon, lbvec);
00153 imbalanced = (lbavg > ubavg) ? 1 : 0;
00154
00155 for (c=0; c<2; c++) {
00156 nmoved = 0;
00157
00158
00159
00160
00161 for (iii=0; iii<nvtxs; iii++) {
00162 i = perm[iii];
00163 from = tmp_where[i];
00164 nvwgt = graph->nvwgt+i*ncon;
00165
00166 for (h=0; h<ncon; h++)
00167 if (fabs(nvwgt[h]-gnpwgts[from*ncon+h]) < SMALLFLOAT)
00168 break;
00169
00170 if (h < ncon) {
00171 continue;
00172 }
00173
00174
00175 if (tmp_rinfo[i].ed >= tmp_rinfo[i].id) {
00176 my_edegrees = tmp_rinfo[i].degrees;
00177
00178 for (k=0; k<tmp_rinfo[i].ndegrees; k++) {
00179 to = my_edegrees[k].edge;
00180 if (ProperSide(c, pperm[from], pperm[to]) &&
00181 IsHBalanceBetterFT(ncon, gnpwgts+from*ncon, gnpwgts+to*ncon, nvwgt, ubvec)) {
00182 break;
00183 }
00184 }
00185 oldto = to;
00186
00187
00188 if (k < tmp_rinfo[i].ndegrees) {
00189 for (j=k+1; j<tmp_rinfo[i].ndegrees; j++) {
00190 to = my_edegrees[j].edge;
00191 if (ProperSide(c, pperm[from], pperm[to]) &&
00192 IsHBalanceBetterTT(ncon, gnpwgts+oldto*ncon, gnpwgts+to*ncon, nvwgt, ubvec)){
00193 k = j;
00194 oldto = my_edegrees[k].edge;
00195 }
00196 }
00197 to = oldto;
00198
00199 if (iii % npes == 0) {
00200
00201
00202
00203 tmp_where[i] = to;
00204 moved[nmoved++] = i;
00205 for (h=0; h<ncon; h++) {
00206 lnpwgts[to*ncon+h] += nvwgt[h];
00207 lnpwgts[from*ncon+h] -= nvwgt[h];
00208 gnpwgts[to*ncon+h] += nvwgt[h];
00209 gnpwgts[from*ncon+h] -= nvwgt[h];
00210 }
00211
00212 tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt;
00213 SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j);
00214 if (my_edegrees[k].ewgt == 0) {
00215 tmp_rinfo[i].ndegrees--;
00216 my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge;
00217 my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt;
00218 }
00219 else {
00220 my_edegrees[k].edge = from;
00221 }
00222
00223
00224 for (j=xadj[i]; j<xadj[i+1]; j++) {
00225
00226 if (ladjncy[j] >= nvtxs)
00227 continue;
00228
00229 me = ladjncy[j];
00230 mydomain = tmp_where[me];
00231
00232 myrinfo = tmp_rinfo+me;
00233 your_edegrees = myrinfo->degrees;
00234
00235 if (mydomain == from) {
00236 INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
00237 }
00238 else {
00239 if (mydomain == to) {
00240 INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
00241 }
00242 }
00243
00244
00245 if (mydomain != from) {
00246 for (k=0; k<myrinfo->ndegrees; k++) {
00247 if (your_edegrees[k].edge == from) {
00248 if (your_edegrees[k].ewgt == adjwgt[j]) {
00249 myrinfo->ndegrees--;
00250 your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge;
00251 your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt;
00252 }
00253 else {
00254 your_edegrees[k].ewgt -= adjwgt[j];
00255 }
00256 break;
00257 }
00258 }
00259 }
00260
00261
00262 if (mydomain != to) {
00263 for (k=0; k<myrinfo->ndegrees; k++) {
00264 if (your_edegrees[k].edge == to) {
00265 your_edegrees[k].ewgt += adjwgt[j];
00266 break;
00267 }
00268 }
00269 if (k == myrinfo->ndegrees) {
00270 your_edegrees[myrinfo->ndegrees].edge = to;
00271 your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
00272 }
00273 }
00274 }
00275 }
00276 }
00277 }
00278 }
00279
00280
00281
00282
00283 nlupd = nsupd = nmoves = nchanged = 0;
00284 for (iii=0; iii<nmoved; iii++) {
00285 i = moved[iii];
00286 if (i == -1)
00287 continue;
00288
00289 where[i] = tmp_where[i];
00290
00291
00292 if (htable[i] == 0) {
00293
00294 htable[i] = 1;
00295 update[nlupd++] = i;
00296 }
00297
00298
00299 for (j=xadj[i]; j<xadj[i+1]; j++) {
00300 k = ladjncy[j];
00301 if (htable[k] == 0) {
00302 htable[k] = 1;
00303 if (k<nvtxs)
00304 update[nlupd++] = k;
00305 else
00306 supdate[nsupd++] = k;
00307 }
00308 }
00309 nmoves++;
00310 nswaps++;
00311
00312
00313 for (k=0; k<rinfo[i].ndegrees; k++)
00314 if (rinfo[i].degrees[k].edge == to)
00315 break;
00316
00317 if (graph->pexadj[i+1]-graph->pexadj[i] > 0)
00318 changed[nchanged++] = i;
00319 }
00320
00321
00322 CommChangedInterfaceData(ctrl, graph, nchanged, changed, where,
00323 swchanges, rwchanges, wspace->pv4);
00324
00325
00326 IFSET(ctrl->dbglvl, DBG_RMOVEINFO,
00327 rprintf(ctrl, "\t[%d %d], [%.4f], [%d %d %d]\n",
00328 pass, c, badmaxpwgt[0],
00329 GlobalSESum(ctrl, nmoves),
00330 GlobalSESum(ctrl, nsupd),
00331 GlobalSESum(ctrl, nlupd)));
00332
00333
00334
00335
00336
00337
00338 for (i=0; i<nnbrs; i++) {
00339 MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE,
00340 peind[i], 1, ctrl->comm, ctrl->rreq+i);
00341 }
00342
00343
00344 for (i=0; i<nsupd; i++) {
00345 htable[supdate[i]] = 0;
00346 supdate[i] = graph->imap[supdate[i]];
00347 }
00348 iidxsort(nsupd, supdate);
00349
00350 for (j=i=0; i<nnbrs; i++) {
00351 yourlastvtx = vtxdist[peind[i]+1];
00352 for (k=j; k<nsupd && supdate[k] < yourlastvtx; k++);
00353 MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i);
00354 j = k;
00355 }
00356
00357
00358 MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
00359 for (i=0; i<nnbrs; i++)
00360 MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i);
00361 MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);
00362
00363
00364
00365
00366
00367 for (i=0; i<nnbrs; i++) {
00368 pe_updates = rupdate+sendptr[i];
00369 for (j=0; j<nupds_pe[i]; j++) {
00370 k = pe_updates[j];
00371 if (htable[k-firstvtx] == 0) {
00372 htable[k-firstvtx] = 1;
00373 update[nlupd++] = k-firstvtx;
00374 }
00375 }
00376 }
00377
00378
00379
00380
00381
00382 for (ii=0; ii<nlupd; ii++) {
00383 i = update[ii];
00384 ASSERT(ctrl, htable[i] == 1);
00385
00386 htable[i] = 0;
00387
00388 mydomain = where[i];
00389 myrinfo = rinfo+i;
00390 tmp_myrinfo = tmp_rinfo+i;
00391 my_edegrees = myrinfo->degrees;
00392 your_edegrees = tmp_myrinfo->degrees;
00393
00394 graph->lmincut -= myrinfo->ed;
00395 myrinfo->ndegrees = 0;
00396 myrinfo->id = 0;
00397 myrinfo->ed = 0;
00398
00399 for (j=xadj[i]; j<xadj[i+1]; j++) {
00400 yourdomain = where[ladjncy[j]];
00401 if (mydomain != yourdomain) {
00402 myrinfo->ed += adjwgt[j];
00403
00404 for (k=0; k<myrinfo->ndegrees; k++) {
00405 if (my_edegrees[k].edge == yourdomain) {
00406 my_edegrees[k].ewgt += adjwgt[j];
00407 your_edegrees[k].ewgt += adjwgt[j];
00408 break;
00409 }
00410 }
00411 if (k == myrinfo->ndegrees) {
00412 my_edegrees[k].edge = yourdomain;
00413 my_edegrees[k].ewgt = adjwgt[j];
00414 your_edegrees[k].edge = yourdomain;
00415 your_edegrees[k].ewgt = adjwgt[j];
00416 myrinfo->ndegrees++;
00417 }
00418 ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
00419 ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
00420
00421 }
00422 else {
00423 myrinfo->id += adjwgt[j];
00424 }
00425 }
00426 graph->lmincut += myrinfo->ed;
00427
00428 tmp_myrinfo->id = myrinfo->id;
00429 tmp_myrinfo->ed = myrinfo->ed;
00430 tmp_myrinfo->ndegrees = myrinfo->ndegrees;
00431 }
00432
00433
00434 MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon,
00435 MPI_DOUBLE, MPI_SUM, ctrl->comm);
00436 }
00437 graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2;
00438
00439 if (graph->mincut == oldcut)
00440 break;
00441 }
00442
00443
00444
00445
00446
00447
00448
00449 GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM);
00450 GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM);
00451 GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM);
00452
00453 IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr));
00454 }
00455
00456