00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include <parmetislib.h>
00015 #define PE 0
00016
00017
00018
00019
00020 int BalanceMyLink(CtrlType *ctrl, GraphType *graph, idxtype *home, int me,
00021 int you, floattype *flows, floattype maxdiff, floattype *diff_cost, floattype *diff_lbavg,
00022 floattype avgvwgt)
00023 {
00024 int h, i, ii, j, k;
00025 int nvtxs, ncon;
00026 int nqueues, minval, maxval, higain, vtx, edge, totalv;
00027 int from, to, qnum, index, nchanges, cut, tmp;
00028 int pass, nswaps, nmoves, multiplier;
00029 idxtype *xadj, *vsize, *adjncy, *adjwgt, *where, *ed, *id;
00030 idxtype *hval, *nvpq, *inq, *map, *rmap, *ptr, *myqueue, *changes;
00031 floattype *nvwgt, lbvec[MAXNCON], pwgts[MAXNCON*2], tpwgts[MAXNCON*2], my_wgt[MAXNCON];
00032 floattype newgain, oldgain = 0.0;
00033 floattype lbavg, bestflow, mycost;
00034 floattype ipc_factor, redist_factor, ftmp;
00035 FPQueueType *queues;
00036 int mype;
00037 MPI_Comm_rank(MPI_COMM_WORLD, &mype);
00038
00039 nvtxs = graph->nvtxs;
00040 ncon = graph->ncon;
00041 xadj = graph->xadj;
00042 nvwgt = graph->nvwgt;
00043 vsize = graph->vsize;
00044 adjncy = graph->adjncy;
00045 adjwgt = graph->adjwgt;
00046 where = graph->where;
00047 ipc_factor = ctrl->ipc_factor;
00048 redist_factor = ctrl->redist_factor;
00049
00050 hval = idxmalloc(nvtxs*7, "hval");
00051 id = hval + nvtxs;
00052 ed = hval + nvtxs*2;
00053 map = hval + nvtxs*3;
00054 rmap = hval + nvtxs*4;
00055 myqueue = hval + nvtxs*5;
00056 changes = hval + nvtxs*6;
00057
00058 sset(ncon*2, 0.0, pwgts);
00059 for (h=0; h<ncon; h++) {
00060 tpwgts[h] = -1.0 * flows[h];
00061 tpwgts[ncon+h] = flows[h];
00062 }
00063
00064 for (i=0; i<nvtxs; i++) {
00065 if (where[i] == me) {
00066 for (h=0; h<ncon; h++) {
00067 tpwgts[h] += nvwgt[i*ncon+h];
00068 pwgts[h] += nvwgt[i*ncon+h];
00069 }
00070 }
00071 else {
00072 ASSERTS(where[i] == you);
00073 for (h=0; h<ncon; h++) {
00074 tpwgts[ncon+h] += nvwgt[i*ncon+h];
00075 pwgts[ncon+h] += nvwgt[i*ncon+h];
00076 }
00077 }
00078 }
00079
00080
00081 for (h=0; h<ncon; h++) {
00082 if (tpwgts[h] < 0.0) {
00083 tpwgts[ncon+h] += tpwgts[h];
00084 tpwgts[h] = 0.0;
00085 }
00086
00087 if (tpwgts[ncon+h] < 0.0) {
00088 tpwgts[h] += tpwgts[ncon+h];
00089 tpwgts[ncon+h] = 0.0;
00090 }
00091 }
00092
00093
00094
00095
00096 minval = maxval = 0;
00097 multiplier = 1;
00098 for (i=0; i<ncon; i++) {
00099 multiplier *= (i+1);
00100 maxval += i*multiplier;
00101 minval += (ncon-1-i)*multiplier;
00102 }
00103
00104 nqueues = maxval-minval+1;
00105 nvpq = idxsmalloc(nqueues, 0, "nvpq");
00106 ptr = idxmalloc(nqueues+1, "ptr");
00107 inq = idxmalloc(nqueues*2, "inq");
00108 queues = (FPQueueType *)(GKmalloc(sizeof(FPQueueType)*nqueues*2, "queues"));
00109
00110 for (i=0; i<nvtxs; i++)
00111 hval[i] = Moc_HashVwgts(ncon, nvwgt+i*ncon) - minval;
00112
00113 for (i=0; i<nvtxs; i++)
00114 nvpq[hval[i]]++;
00115
00116 ptr[0] = 0;
00117 for (i=0; i<nqueues; i++)
00118 ptr[i+1] = ptr[i] + nvpq[i];
00119
00120 for (i=0; i<nvtxs; i++) {
00121 map[i] = ptr[hval[i]];
00122 rmap[ptr[hval[i]]++] = i;
00123 }
00124
00125 for (i=nqueues-1; i>0; i--)
00126 ptr[i] = ptr[i-1];
00127 ptr[0] = 0;
00128
00129
00130 for (i=0; i<nqueues; i++)
00131 if (nvpq[i] > 0) {
00132 FPQueueInit(queues+i, nvpq[i]);
00133 FPQueueInit(queues+i+nqueues, nvpq[i]);
00134 }
00135
00136
00137 idxset(nvtxs, 0, id);
00138 idxset(nvtxs, 0, ed);
00139 for (j=0; j<nvtxs; j++)
00140 for (k=xadj[j]; k<xadj[j+1]; k++)
00141 if (where[adjncy[k]] == where[j])
00142 id[j] += adjwgt[k];
00143 else
00144 ed[j] += adjwgt[k];
00145
00146 nswaps = 0;
00147 for (pass=0; pass<N_MOC_BAL_PASSES; pass++) {
00148 idxset(nvtxs, -1, myqueue);
00149 idxset(nqueues*2, 0, inq);
00150
00151
00152 for (j=0; j<nvtxs; j++) {
00153 index = (where[j] == me) ? 0 : nqueues;
00154
00155 newgain = ipc_factor*(floattype)(ed[j]-id[j]);
00156 if (home[j] == me || home[j] == you) {
00157 if (where[j] == home[j])
00158 newgain -= redist_factor*(floattype)vsize[j];
00159 else
00160 newgain += redist_factor*(floattype)vsize[j];
00161 }
00162
00163 FPQueueInsert(queues+hval[j]+index, map[j]-ptr[hval[j]], newgain);
00164 myqueue[j] = (where[j] == me) ? 0 : 1;
00165 inq[hval[j]+index]++;
00166 }
00167
00168
00169 for (j=0, h=0; h<ncon; h++)
00170 if (fabs(flows[h]) > fabs(flows[j])) j = h;
00171 bestflow = fabs(flows[j]);
00172
00173 nchanges = nmoves = 0;
00174 for (ii=0; ii<nvtxs/2; ii++) {
00175 from = -1;
00176 Moc_DynamicSelectQueue(nqueues, ncon, me, you, inq, flows, &from,
00177 &qnum, minval, avgvwgt, maxdiff);
00178
00179
00180 if (from != -1 && qnum == -1) {
00181 from = (from == me) ? you : me;
00182
00183 if (from == me) {
00184 for (j=0; j<ncon; j++)
00185 if (flows[j] > avgvwgt)
00186 break;
00187 }
00188 else {
00189 for (j=0; j<ncon; j++)
00190 if (flows[j] < -1.0*avgvwgt)
00191 break;
00192 }
00193
00194 if (j != ncon)
00195 Moc_DynamicSelectQueue(nqueues, ncon, me, you, inq, flows, &from,
00196 &qnum, minval, avgvwgt, maxdiff);
00197 }
00198
00199 if (qnum == -1)
00200 break;
00201
00202 to = (from == me) ? you : me;
00203 index = (from == me) ? 0 : nqueues;
00204 higain = FPQueueGetMax(queues+qnum+index);
00205 inq[qnum+index]--;
00206 ASSERTS(higain != -1);
00207
00208
00209
00210
00211 vtx = rmap[higain+ptr[qnum]];
00212 myqueue[vtx] = -1;
00213 where[vtx] = to;
00214 nswaps++;
00215 nmoves++;
00216
00217
00218 for (j=0; j<ncon; j++)
00219 flows[j] += (to == me) ? nvwgt[vtx*ncon+j] : -1.0*nvwgt[vtx*ncon+j];
00220
00221
00222 for (j=0, h=0; h<ncon; h++)
00223 if (fabs(flows[h]) > fabs(flows[j])) j = h;
00224 ftmp = fabs(flows[j]);
00225
00226 if (ftmp < bestflow) {
00227 bestflow = ftmp;
00228 nchanges = 0;
00229 }
00230 else {
00231 changes[nchanges++] = vtx;
00232 }
00233
00234 SWAP(id[vtx], ed[vtx], tmp);
00235
00236 for (j=xadj[vtx]; j<xadj[vtx+1]; j++) {
00237 edge = adjncy[j];
00238
00239
00240 if (myqueue[edge] != -1) {
00241 oldgain = ipc_factor*(floattype)(ed[edge]-id[edge]);
00242 if (home[edge] == me || home[edge] == you) {
00243 if (where[edge] == home[edge])
00244 oldgain -= redist_factor*(floattype)vsize[edge];
00245 else
00246 oldgain += redist_factor*(floattype)vsize[edge];
00247 }
00248 }
00249
00250 tmp = (to == where[edge] ? adjwgt[j] : -adjwgt[j]);
00251 INC_DEC(id[edge], ed[edge], tmp);
00252
00253 if (myqueue[edge] != -1) {
00254 newgain = ipc_factor*(floattype)(ed[edge]-id[edge]);
00255 if (home[edge] == me || home[edge] == you) {
00256 if (where[edge] == home[edge])
00257 newgain -= redist_factor*(floattype)vsize[edge];
00258 else
00259 newgain += redist_factor*(floattype)vsize[edge];
00260 }
00261
00262 FPQueueUpdate(queues+hval[edge]+(nqueues*myqueue[edge]),
00263 map[edge]-ptr[hval[edge]], oldgain, newgain);
00264 }
00265 }
00266 }
00267
00268
00269
00270
00271 nswaps -= nchanges;
00272 nmoves -= nchanges;
00273 for (i=0; i<nchanges; i++) {
00274 vtx = changes[i];
00275 from = where[vtx];
00276 where[vtx] = to = (from == me) ? you : me;
00277
00278 SWAP(id[vtx], ed[vtx], tmp);
00279 for (j=xadj[vtx]; j<xadj[vtx+1]; j++) {
00280 edge = adjncy[j];
00281 tmp = (to == where[edge] ? adjwgt[j] : -adjwgt[j]);
00282 INC_DEC(id[edge], ed[edge], tmp);
00283 }
00284 }
00285
00286 for (i=0; i<nqueues; i++) {
00287 if (nvpq[i] > 0) {
00288 FPQueueReset(queues+i);
00289 FPQueueReset(queues+i+nqueues);
00290 }
00291 }
00292
00293 if (nmoves == 0)
00294 break;
00295 }
00296
00297
00298
00299
00300 sset(ncon, 0.0, my_wgt);
00301 for (i=0; i<nvtxs; i++)
00302 if (where[i] == me)
00303 for (h=0; h<ncon; h++)
00304 my_wgt[h] += nvwgt[i*ncon+h];
00305
00306 for (i=0; i<ncon; i++) {
00307 ftmp = (pwgts[i]+pwgts[ncon+i])/2.0;
00308 if (ftmp != 0.0)
00309 lbvec[i] = fabs(my_wgt[i]-tpwgts[i]) / ftmp;
00310 else
00311 lbvec[i] = 0.0;
00312 }
00313 lbavg = savg(ncon, lbvec);
00314 *diff_lbavg = lbavg;
00315
00316
00317
00318
00319 cut = totalv = 0;
00320 for (i=0; i<nvtxs; i++) {
00321 if (where[i] != home[i])
00322 totalv += vsize[i];
00323
00324 for (j=xadj[i]; j<xadj[i+1]; j++)
00325 if (where[adjncy[j]] != where[i])
00326 cut += adjwgt[j];
00327 }
00328 cut /= 2;
00329 mycost = cut*ipc_factor + totalv*redist_factor;
00330 *diff_cost = mycost;
00331
00332
00333 for (i=0; i<nqueues; i++)
00334 if (nvpq[i] > 0) {
00335 FPQueueFree(queues+i);
00336 FPQueueFree(queues+i+nqueues);
00337 }
00338
00339 GKfree((void **)&hval, (void **)&nvpq, (void **)&ptr, (void **)&inq, (void **)&queues, LTERM);
00340 return nswaps;
00341 }
00342