00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include <parmetislib.h>
00014
00015 #define PE -1
00016
00017
00018
00019
00020
00021
00022 int Moc_Diffusion(CtrlType *ctrl, GraphType *graph, idxtype *vtxdist,
00023 idxtype *where, idxtype *home, WorkSpaceType *wspace, int npasses)
00024 {
00025 int h, i, j;
00026 int nvtxs, nedges, ncon, pass, iter, domain, processor;
00027 int nparts, mype, npes, nlinks, me, you, wsize;
00028 int nvisited, nswaps = -1, tnswaps, done, alldone = -1;
00029 idxtype *rowptr, *colind, *diff_where, *sr_where, *ehome, *map, *rmap;
00030 idxtype *pack, *unpack, *match, *proc2sub, *sub2proc;
00031 idxtype *visited, *gvisited;
00032 floattype *transfer, *npwgts, maxdiff, minflow, maxflow;
00033 floattype lbavg, oldlbavg, ubavg, lbvec[MAXNCON];
00034 floattype diff_flows[MAXNCON], sr_flows[MAXNCON];
00035 floattype diff_lbavg, sr_lbavg, diff_cost, sr_cost;
00036 idxtype *rbuffer, *sbuffer;
00037 int *rcount, *rdispl;
00038 floattype *solution, *load, *workspace;
00039 EdgeType *degrees;
00040 MatrixType matrix;
00041 GraphType *egraph;
00042 RInfoType *rinfo;
00043
00044 if (graph->ncon > 3)
00045 return 0;
00046
00047 nvtxs = graph->nvtxs;
00048 nedges = graph->nedges;
00049 ncon = graph->ncon;
00050
00051 nparts = ctrl->nparts;
00052 mype = ctrl->mype;
00053 npes = ctrl->npes;
00054 ubavg = savg(ncon, ctrl->ubvec);
00055
00056
00057
00058
00059 load = fmalloc(nparts*(2+ncon)+nedges*(1+ncon), "load");
00060 solution = load + nparts;
00061 npwgts = graph->gnpwgts = load + 2*nparts;
00062 matrix.values = load + (2+ncon)*nparts;
00063 transfer = matrix.transfer = load + (2+ncon)*nparts + nedges;
00064
00065 proc2sub = idxmalloc(amax(nparts, npes*2), "Mc_Diffusion: proc2sub");
00066 sub2proc = idxmalloc(nparts*3+nedges+1, "Mc_Diffusion: match");
00067 match = sub2proc + nparts;
00068 rowptr = matrix.rowptr = sub2proc + 2*nparts;
00069 colind = matrix.colind = sub2proc + 3*nparts + 1;
00070
00071 rcount = imalloc(2*npes+1, "Mc_Diffusion: rcount");
00072 rdispl = rcount + npes;
00073
00074 pack = idxmalloc(nvtxs*8, "Mc_Diffusion: pack");
00075 unpack = pack + nvtxs;
00076 rbuffer = pack + 2*nvtxs;
00077 sbuffer = pack + 3*nvtxs;
00078 map = pack + 4*nvtxs;
00079 rmap = pack + 5*nvtxs;
00080 diff_where = pack + 6*nvtxs;
00081 ehome = pack + 7*nvtxs;
00082
00083 wsize = amax(sizeof(floattype)*nparts*6, sizeof(idxtype)*(nvtxs+nparts*2+1));
00084 workspace = (floattype *)GKmalloc(wsize, "Moc_Diffusion: workspace");
00085 degrees = GKmalloc(nedges*sizeof(EdgeType), "Mc_Diffusion: degrees");
00086 rinfo = graph->rinfo = GKmalloc(nvtxs*sizeof(RInfoType), "Mc_Diffusion: rinfo");
00087
00088
00089
00090
00091 matrix.nrows = nparts;
00092 SetUpConnectGraph(graph, &matrix, (idxtype *)workspace);
00093 nlinks = (matrix.nnzs-nparts) / 2;
00094
00095 visited = idxmalloc(matrix.nnzs*2, "visited");
00096 gvisited = visited + matrix.nnzs;
00097
00098 for (pass=0; pass<npasses; pass++) {
00099 sset(matrix.nnzs*ncon, 0.0, transfer);
00100 idxset(matrix.nnzs, 0, gvisited);
00101 idxset(matrix.nnzs, 0, visited);
00102 iter = nvisited = 0;
00103
00104
00105
00106
00107 for (h=0; h<ncon; h++) {
00108 sset(nparts, 0.0, solution);
00109 ComputeLoad(graph, nparts, load, ctrl->tpwgts, h);
00110
00111 lbvec[h] = (load[samax(nparts, load)]+1.0/(floattype)nparts) * (floattype)nparts;
00112
00113 ConjGrad2(&matrix, load, solution, 0.001, workspace);
00114 ComputeTransferVector(ncon, &matrix, solution, transfer, h);
00115 }
00116
00117 oldlbavg = savg(ncon, lbvec);
00118 tnswaps = 0;
00119 maxdiff = 0.0;
00120 for (i=0; i<nparts; i++) {
00121 for (j=rowptr[i]; j<rowptr[i+1]; j++) {
00122 minflow = transfer[j*ncon+samin(ncon, transfer+j*ncon)];
00123 maxflow = transfer[j*ncon+samax(ncon, transfer+j*ncon)];
00124 maxdiff = (maxflow - minflow > maxdiff) ? maxflow - minflow : maxdiff;
00125 }
00126 }
00127
00128 while (nvisited < nlinks) {
00129
00130
00131
00132
00133 idxset(amax(nparts, npes*2), UNMATCHED, proc2sub);
00134 CSR_Match_SHEM(&matrix, match, proc2sub, gvisited, ncon);
00135
00136
00137
00138
00139 idxset(nparts, UNMATCHED, sub2proc);
00140 for (i=0; i<npes*2; i++) {
00141 if (proc2sub[i] == UNMATCHED)
00142 break;
00143
00144 sub2proc[proc2sub[i]] = i/2;
00145 }
00146
00147 iset(npes, 0, rcount);
00148 for (i=0; i<nvtxs; i++) {
00149 domain = where[i];
00150 processor = sub2proc[domain];
00151 if (processor != UNMATCHED) {
00152 rcount[processor]++;
00153 }
00154 }
00155
00156 rdispl[0] = 0;
00157 for (i=1; i<npes+1; i++)
00158 rdispl[i] = rdispl[i-1] + rcount[i-1];
00159
00160 idxset(nvtxs, UNMATCHED, unpack);
00161 for (i=0; i<nvtxs; i++) {
00162 domain = where[i];
00163 processor = sub2proc[domain];
00164 if (processor != UNMATCHED) {
00165 unpack[rdispl[processor]++] = i;
00166 }
00167 }
00168
00169 for (i=npes; i>0; i--)
00170 rdispl[i] = rdispl[i-1];
00171 rdispl[0] = 0;
00172
00173 idxset(nvtxs, UNMATCHED, pack);
00174 for (i=0; i<rdispl[npes]; i++) {
00175 ASSERTS(unpack[i] != UNMATCHED);
00176 domain = where[unpack[i]];
00177 processor = sub2proc[domain];
00178 if (processor != UNMATCHED) {
00179 pack[unpack[i]] = i;
00180 }
00181 }
00182
00183
00184
00185
00186 if (proc2sub[mype*2] != UNMATCHED) {
00187 me = proc2sub[mype*2];
00188 you = proc2sub[mype*2+1];
00189 ASSERTS(me != you);
00190
00191 for (j=rowptr[me]; j<rowptr[me+1]; j++) {
00192 if (colind[j] == you) {
00193 visited[j] = 1;
00194 scopy(ncon, transfer+j*ncon, diff_flows);
00195 break;
00196 }
00197 }
00198
00199 for (j=rowptr[you]; j<rowptr[you+1]; j++) {
00200 if (colind[j] == me) {
00201 visited[j] = 1;
00202 for (h=0; h<ncon; h++)
00203 if (transfer[j*ncon+h] > 0.0)
00204 diff_flows[h] = -1.0 * transfer[j*ncon+h];
00205 break;
00206 }
00207 }
00208
00209 nswaps = 1;
00210 scopy(ncon, diff_flows, sr_flows);
00211
00212 idxset(nvtxs, 0, sbuffer);
00213 for (i=0; i<nvtxs; i++)
00214 if (where[i] == me || where[i] == you)
00215 sbuffer[i] = 1;
00216
00217 egraph = ExtractGraph(ctrl, graph, sbuffer, map, rmap);
00218
00219 if (egraph != NULL) {
00220 idxcopy(egraph->nvtxs, egraph->where, diff_where);
00221 for (j=0; j<egraph->nvtxs; j++)
00222 ehome[j] = home[map[j]];
00223
00224 RedoMyLink(ctrl, egraph, ehome, me, you, sr_flows, &sr_cost, &sr_lbavg);
00225
00226 if (ncon <= 4) {
00227 sr_where = egraph->where;
00228 egraph->where = diff_where;
00229
00230 nswaps = BalanceMyLink(ctrl, egraph, ehome, me, you, diff_flows, maxdiff, &diff_cost, &diff_lbavg, 1.0/(floattype)nvtxs);
00231
00232 if ((sr_lbavg < diff_lbavg &&
00233 (diff_lbavg >= ubavg-1.0 || sr_cost == diff_cost)) ||
00234 (sr_lbavg < ubavg-1.0 && sr_cost < diff_cost)) {
00235 for (i=0; i<egraph->nvtxs; i++)
00236 where[map[i]] = sr_where[i];
00237 }
00238 else {
00239 for (i=0; i<egraph->nvtxs; i++)
00240 where[map[i]] = diff_where[i];
00241 }
00242 }
00243 else {
00244 for (i=0; i<egraph->nvtxs; i++)
00245 where[map[i]] = egraph->where[i];
00246 }
00247
00248 GKfree((void **)&egraph->xadj, (void **)&egraph->nvwgt, (void **)&egraph->adjncy, LTERM);
00249 GKfree((void **)&egraph, LTERM);
00250 }
00251
00252
00253
00254
00255 idxset(nvtxs, UNMATCHED, sbuffer);
00256 for (i=0; i<nvtxs; i++) {
00257 domain = where[i];
00258 if (domain == you || domain == me) {
00259 sbuffer[pack[i]] = where[i];
00260 }
00261 }
00262 }
00263
00264
00265
00266
00267 MPI_Allgatherv((void *)&sbuffer[rdispl[mype]], rcount[mype], IDX_DATATYPE, (void *)rbuffer, rcount, rdispl, IDX_DATATYPE, ctrl->comm);
00268
00269
00270
00271
00272
00273 for (i=0; i<rdispl[npes]; i++) {
00274 if (rbuffer[i] != UNMATCHED) {
00275 where[unpack[i]] = rbuffer[i];
00276 }
00277 }
00278
00279
00280
00281
00282
00283 MPI_Allreduce((void *)visited, (void *)gvisited, matrix.nnzs,
00284 IDX_DATATYPE, MPI_MAX, ctrl->comm);
00285 nvisited = idxsum(matrix.nnzs, gvisited)/2;
00286 tnswaps += GlobalSESum(ctrl, nswaps);
00287
00288 if (iter++ == NGD_PASSES)
00289 break;
00290 }
00291
00292
00293
00294
00295 Moc_ComputeSerialPartitionParams(graph, nparts, degrees);
00296 Moc_SerialKWayAdaptRefine(graph, nparts, home, ctrl->ubvec, 10);
00297
00298
00299
00300
00301
00302 for (h=0; h<ncon; h++) {
00303 lbvec[h] = (floattype)(nparts) *
00304 npwgts[samax_strd(nparts,npwgts+h,ncon)*ncon+h];
00305 }
00306 lbavg = savg(ncon, lbvec);
00307
00308 done = 0;
00309 if (
00310 tnswaps == 0 ||
00311 lbavg >= oldlbavg ||
00312 lbavg <= ubavg + 0.035
00313 )
00314 done = 1;
00315
00316 alldone = GlobalSEMax(ctrl, done);
00317 if (alldone == 1)
00318 break;
00319 }
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353 GKfree((void **)&load, (void **)&proc2sub, (void **)&sub2proc, (void **)&rcount, LTERM);
00354 GKfree((void **)&pack, (void **)&workspace, (void **)°rees, (void **)&rinfo, LTERM);
00355 GKfree((void **)&visited, LTERM);
00356 graph->gnpwgts = NULL;
00357 graph->rinfo = NULL;
00358
00359 return 0;
00360 }
00361
00362
00363
00364
00365
00366 GraphType *ExtractGraph(CtrlType *ctrl, GraphType *graph, idxtype *indicator,
00367 idxtype *map, idxtype *rmap)
00368 {
00369 int h, i, j;
00370 int nvtxs, envtxs, enedges, ncon;
00371 int vtx, count;
00372 idxtype *xadj, *vsize, *adjncy, *adjwgt, *where;
00373 idxtype *exadj, *evsize, *eadjncy, *eadjwgt, *ewhere;
00374 floattype *nvwgt, *envwgt;
00375 GraphType *egraph;
00376
00377 nvtxs = graph->nvtxs;
00378 ncon = graph->ncon;
00379 xadj = graph->xadj;
00380 nvwgt = graph->nvwgt;
00381 vsize = graph->vsize;
00382 adjncy = graph->adjncy;
00383 adjwgt = graph->adjwgt;
00384 where = graph->where;
00385
00386 count = 0;
00387 for (i=0; i<nvtxs; i++) {
00388 if (indicator[i] == 1) {
00389 map[count] = i;
00390 rmap[i] = count;
00391 count++;
00392 }
00393 }
00394
00395 if (count == 0) {
00396 return NULL;
00397 }
00398
00399
00400
00401
00402 egraph = CreateGraph();
00403 envtxs = egraph->nvtxs = count;
00404 egraph->ncon = graph->ncon;
00405
00406 exadj = egraph->xadj = idxmalloc(envtxs*3+1, "exadj");
00407 ewhere = egraph->where = exadj + envtxs + 1;
00408 evsize = egraph->vsize = exadj + 2*envtxs + 1;
00409
00410 envwgt = egraph->nvwgt = fmalloc(envtxs*ncon, "envwgt");
00411
00412
00413
00414
00415 idxset(envtxs+1, 0, exadj);
00416 for (i=0; i<envtxs; i++) {
00417 vtx = map[i];
00418
00419 ewhere[i] = where[vtx];
00420 for (h=0; h<ncon; h++)
00421 envwgt[i*ncon+h] = nvwgt[vtx*ncon+h];
00422
00423 if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION)
00424 evsize[i] = vsize[vtx];
00425
00426 for (j=xadj[vtx]; j<xadj[vtx+1]; j++)
00427 if (indicator[adjncy[j]] == 1)
00428 exadj[i]++;
00429
00430 }
00431 MAKECSR(i, envtxs, exadj);
00432
00433
00434
00435
00436 enedges = egraph->nedges = exadj[envtxs];
00437 eadjncy = egraph->adjncy = idxmalloc(enedges*2, "eadjncy");
00438 eadjwgt = egraph->adjwgt = eadjncy + enedges;
00439
00440 for (i=0; i<envtxs; i++) {
00441 vtx = map[i];
00442 for (j=xadj[vtx]; j<xadj[vtx+1]; j++) {
00443 if (indicator[adjncy[j]] == 1) {
00444 eadjncy[exadj[i]] = rmap[adjncy[j]];
00445 eadjwgt[exadj[i]++] = adjwgt[j];
00446 }
00447 }
00448 }
00449
00450 for (i=envtxs; i>0; i--)
00451 exadj[i] = exadj[i-1];
00452 exadj[0] = 0;
00453
00454 return egraph;
00455 }