00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include <parmetislib.h>
00015
00016
00017
00018
00019 floattype WavefrontDiffusion(CtrlType *ctrl, GraphType *graph, idxtype *home)
00020 {
00021 int ii, i, j, k, l, nvtxs, nedges, nparts;
00022 int from, to, edge, done, nswaps, noswaps, totalv, wsize;
00023 int npasses, first, second, third, mind, maxd;
00024 idxtype *xadj, *adjncy, *adjwgt, *where, *perm;
00025 idxtype *rowptr, *colind, *ed, *psize;
00026 floattype *transfer, *tmpvec;
00027 floattype balance = -1.0, *load, *solution, *workspace;
00028 floattype *nvwgt, *npwgts, flowFactor, cost, ubfactor;
00029 MatrixType matrix;
00030 KeyValueType *cand;
00031 int ndirty, nclean, dptr, clean;
00032
00033 nvtxs = graph->nvtxs;
00034 nedges = graph->nedges;
00035 xadj = graph->xadj;
00036 nvwgt = graph->nvwgt;
00037 adjncy = graph->adjncy;
00038 adjwgt = graph->adjwgt;
00039 where = graph->where;
00040 nparts = ctrl->nparts;
00041 ubfactor = ctrl->ubvec[0];
00042 matrix.nrows = nparts;
00043
00044 flowFactor = 0.35;
00045 flowFactor = (ctrl->mype == 2) ? 0.50 : flowFactor;
00046 flowFactor = (ctrl->mype == 3) ? 0.75 : flowFactor;
00047 flowFactor = (ctrl->mype == 4) ? 1.00 : flowFactor;
00048
00049
00050 solution = fmalloc(4*nparts+2*nedges, "WavefrontDiffusion: solution");
00051 tmpvec = solution + nparts;
00052 npwgts = solution + 2*nparts;
00053 load = solution + 3*nparts;
00054 matrix.values = solution + 4*nparts;
00055 transfer = matrix.transfer = solution + 4*nparts + nedges;
00056
00057 perm = idxmalloc(2*nvtxs+2*nparts+nedges+1, "WavefrontDiffusion: perm");
00058 ed = perm + nvtxs;
00059 psize = perm + 2*nvtxs;
00060 rowptr = matrix.rowptr = perm + 2*nvtxs + nparts;
00061 colind = matrix.colind = perm + 2*nvtxs + 2*nparts + 1;
00062
00063 wsize = amax(sizeof(floattype)*nparts*6, sizeof(idxtype)*(nvtxs+nparts*2+1));
00064 workspace = (floattype *)GKmalloc(wsize, "WavefrontDiffusion: workspace");
00065 cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "WavefrontDiffusion: cand");
00066
00067
00068
00069
00070
00071 idxset(nparts, 0, psize);
00072 for (i=0; i<nvtxs; i++)
00073 psize[where[i]]++;
00074
00075 mind = idxamin(nparts, psize);
00076 maxd = idxamax(nparts, psize);
00077 if (psize[mind] == 0) {
00078 for (i=0; i<nvtxs; i++) {
00079 k = (RandomInRange(nvtxs)+i)%nvtxs;
00080 if (where[k] == maxd) {
00081 where[k] = mind;
00082 psize[mind]++;
00083 psize[maxd]--;
00084 break;
00085 }
00086 }
00087 }
00088 idxset(nvtxs, 0, ed);
00089 sset(nparts, 0.0, npwgts);
00090 for (i=0; i<nvtxs; i++) {
00091 npwgts[where[i]] += nvwgt[i];
00092 for (j=xadj[i]; j<xadj[i+1]; j++)
00093 ed[i] += (where[i] != where[adjncy[j]] ? adjwgt[j] : 0);
00094 }
00095
00096 ComputeLoad(graph, nparts, load, ctrl->tpwgts, 0);
00097 done = 0;
00098
00099 npasses = amin(nparts/2, NGD_PASSES);
00100 for (l=0; l<npasses; l++) {
00101
00102 nswaps = 0;
00103
00104
00105
00106
00107 SetUpConnectGraph(graph, &matrix, (idxtype *)workspace);
00108
00109
00110 for(i=0; i<matrix.nrows; i++) {
00111 if (matrix.rowptr[i]+1 == matrix.rowptr[i+1]) {
00112 cost = (floattype)(ctrl->mype);
00113 goto CleanUpAndExit;
00114 }
00115 }
00116
00117 ConjGrad2(&matrix, load, solution, 0.001, workspace);
00118 ComputeTransferVector(1, &matrix, solution, transfer, 0);
00119
00120 GetThreeMax(nparts, load, &first, &second, &third);
00121
00122 if (l%3 == 0) {
00123 FastRandomPermute(nvtxs, perm, 1);
00124 }
00125 else {
00126
00127
00128
00129 ndirty = 0;
00130 for (i=0; i<nvtxs; i++)
00131 if (where[i] != home[i])
00132 ndirty++;
00133
00134 dptr = 0;
00135 for (i=0; i<nvtxs; i++)
00136 if (where[i] != home[i])
00137 perm[dptr++] = i;
00138 else
00139 perm[ndirty++] = i;
00140
00141 ASSERT(ctrl, ndirty == nvtxs);
00142 ndirty = dptr;
00143 nclean = nvtxs-dptr;
00144 FastRandomPermute(ndirty, perm, 0);
00145 FastRandomPermute(nclean, perm+ndirty, 0);
00146 }
00147
00148 if (ctrl->mype == 0) {
00149 for (j=nvtxs, k=0, ii=0; ii<nvtxs; ii++) {
00150 i = perm[ii];
00151 if (ed[i] != 0) {
00152 cand[k].key = -ed[i];
00153 cand[k++].val = i;
00154 }
00155 else {
00156 cand[--j].key = 0;
00157 cand[j].val = i;
00158 }
00159 }
00160 ikeysort(k, cand);
00161 }
00162
00163 for (ii=0; ii<nvtxs/3; ii++) {
00164 i = (ctrl->mype == 0) ? cand[ii].val : perm[ii];
00165 from = where[i];
00166
00167
00168 if (psize[from] == 1)
00169 continue;
00170
00171 clean = (from == home[i]) ? 1 : 0;
00172
00173
00174 if (from != first && from != second && from != third && clean)
00175 continue;
00176
00177
00178 for (j=rowptr[from]+1; j<rowptr[from+1]; j++)
00179 tmpvec[colind[j]] = transfer[j];
00180
00181 for (j=xadj[i]; j<xadj[i+1]; j++) {
00182 to = where[adjncy[j]];
00183 if (from != to) {
00184 if (tmpvec[to] > (flowFactor * nvwgt[i])) {
00185 tmpvec[to] -= nvwgt[i];
00186 INC_DEC(psize[to], psize[from], 1);
00187 INC_DEC(npwgts[to], npwgts[from], nvwgt[i]);
00188 INC_DEC(load[to], load[from], nvwgt[i]);
00189 where[i] = to;
00190 nswaps++;
00191
00192
00193 ed[i] = 0;
00194 for (k=xadj[i]; k<xadj[i+1]; k++) {
00195 edge = adjncy[k];
00196 ed[i] += (to != where[edge] ? adjwgt[k] : 0);
00197
00198 if (where[edge] == from)
00199 ed[edge] += adjwgt[k];
00200 if (where[edge] == to)
00201 ed[edge] -= adjwgt[k];
00202 }
00203 break;
00204 }
00205 }
00206 }
00207
00208
00209 for (j=rowptr[from]+1; j<rowptr[from+1]; j++) {
00210 transfer[j] = tmpvec[colind[j]];
00211 tmpvec[colind[j]] = 0.0;
00212 }
00213 ASSERTS(fabs(ssum(nparts, tmpvec)) < .0001)
00214 }
00215
00216 if (l % 2 == 1) {
00217 balance = npwgts[samax(nparts, npwgts)] * (floattype)nparts;
00218 if (balance < ubfactor + 0.035)
00219 done = 1;
00220
00221 if (GlobalSESum(ctrl, done) > 0)
00222 break;
00223
00224 noswaps = (nswaps > 0) ? 0 : 1;
00225 if (GlobalSESum(ctrl, noswaps) > ctrl->npes/2)
00226 break;
00227
00228 }
00229 }
00230
00231 graph->mincut = ComputeSerialEdgeCut(graph);
00232 totalv = Mc_ComputeSerialTotalV(graph, home);
00233 cost = ctrl->ipc_factor * (floattype)graph->mincut + ctrl->redist_factor * (floattype)totalv;
00234
00235
00236 CleanUpAndExit:
00237 GKfree((void **)&solution, (void **)&perm, (void **)&workspace, (void **)&cand, LTERM);
00238
00239 return cost;
00240 }
00241