00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include <parmetislib.h>
00017
00018
00019
00020
00021
00022 void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind,
00023 int *numflag, int *ncommonnodes, idxtype **xadj,
00024 idxtype **adjncy, MPI_Comm *comm)
00025 {
00026 int i, j, jj, k, kk, m;
00027 int npes, mype, pe, count, mask, pass;
00028 int nelms, lnns, my_nns, node;
00029 int firstelm, firstnode, lnode, nrecv, nsend;
00030 int *scounts, *rcounts, *sdispl, *rdispl;
00031 idxtype *nodedist, *nmap, *auxarray;
00032 idxtype *gnptr, *gnind, *nptr, *nind, *myxadj, *myadjncy = NULL;
00033 idxtype *sbuffer, *rbuffer, *htable;
00034 KeyValueType *nodelist, *recvbuffer;
00035 idxtype ind[200], wgt[200];
00036 int gmaxnode, gminnode;
00037 CtrlType ctrl;
00038
00039
00040 SetUpCtrl(&ctrl, -1, 0, *comm);
00041
00042 npes = ctrl.npes;
00043 mype = ctrl.mype;
00044
00045 nelms = elmdist[mype+1]-elmdist[mype];
00046
00047 if (*numflag == 1)
00048 ChangeNumberingMesh2(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1);
00049
00050 mask = (1<<11)-1;
00051
00052
00053
00054
00055 gminnode = GlobalSEMin(&ctrl, eind[idxamin(eptr[nelms], eind)]);
00056 for (i=0; i<eptr[nelms]; i++)
00057 eind[i] -= gminnode;
00058
00059 gmaxnode = GlobalSEMax(&ctrl, eind[idxamax(eptr[nelms], eind)]);
00060
00061
00062
00063
00064
00065 ASSERTS(nelms > 0);
00066
00067
00068 nodedist = idxsmalloc(npes+1, 0, "nodedist");
00069 for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) {
00070 k = j/(npes-i);
00071 nodedist[i+1] = nodedist[i]+k;
00072 j -= k;
00073 }
00074 my_nns = nodedist[mype+1]-nodedist[mype];
00075 firstnode = nodedist[mype];
00076
00077 nodelist = (KeyValueType *)GKmalloc(eptr[nelms]*sizeof(KeyValueType), "nodelist");
00078 auxarray = idxmalloc(eptr[nelms], "auxarray");
00079 htable = idxsmalloc(amax(my_nns, mask+1), -1, "htable");
00080 scounts = imalloc(4*npes+2, "scounts");
00081 rcounts = scounts+npes;
00082 sdispl = scounts+2*npes;
00083 rdispl = scounts+3*npes+1;
00084
00085
00086
00087
00088
00089 for (i=0; i<nelms; i++) {
00090 for (j=eptr[i]; j<eptr[i+1]; j++) {
00091 nodelist[j].key = eind[j];
00092 nodelist[j].val = j;
00093 auxarray[j] = i;
00094 }
00095 }
00096 ikeysort(eptr[nelms], nodelist);
00097
00098 for (count=1, i=1; i<eptr[nelms]; i++) {
00099 if (nodelist[i].key > nodelist[i-1].key)
00100 count++;
00101 }
00102
00103 lnns = count;
00104 nmap = idxmalloc(lnns, "nmap");
00105
00106
00107 count = 1;
00108 nmap[0] = nodelist[0].key;
00109 eind[nodelist[0].val] = 0;
00110 nodelist[0].val = auxarray[nodelist[0].val];
00111 for (i=1; i<eptr[nelms]; i++) {
00112 if (nodelist[i].key > nodelist[i-1].key) {
00113 nmap[count] = nodelist[i].key;
00114 count++;
00115 }
00116 eind[nodelist[i].val] = count-1;
00117 nodelist[i].val = auxarray[nodelist[i].val];
00118 }
00119 MPI_Barrier(*comm);
00120
00121
00122
00123
00124 iset(npes, 0, scounts);
00125 for (pe=i=0; i<eptr[nelms]; i++) {
00126 while (nodelist[i].key >= nodedist[pe+1])
00127 pe++;
00128 scounts[pe] += 2;
00129 }
00130 ASSERTS(pe < npes);
00131
00132 MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);
00133
00134 icopy(npes, scounts, sdispl);
00135 MAKECSR(i, npes, sdispl);
00136
00137 icopy(npes, rcounts, rdispl);
00138 MAKECSR(i, npes, rdispl);
00139
00140 ASSERTS(sdispl[npes] == eptr[nelms]*2);
00141
00142 nrecv = rdispl[npes]/2;
00143 recvbuffer = (KeyValueType *)GKmalloc(amax(1, nrecv)*sizeof(KeyValueType), "recvbuffer");
00144
00145 MPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_DATATYPE, (void *)recvbuffer,
00146 rcounts, rdispl, IDX_DATATYPE, *comm);
00147
00148
00149
00150
00151 gnptr = idxsmalloc(my_nns+1, 0, "gnptr");
00152
00153 for (i=0; i<npes; i++) {
00154 for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) {
00155 lnode = recvbuffer[j].key-firstnode;
00156 ASSERTS(lnode >= 0 && lnode < my_nns)
00157
00158 gnptr[lnode]++;
00159 }
00160 }
00161 MAKECSR(i, my_nns, gnptr);
00162
00163 gnind = idxmalloc(amax(1, gnptr[my_nns]), "gnind");
00164 for (pe=0; pe<npes; pe++) {
00165 firstelm = elmdist[pe];
00166 for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
00167 lnode = recvbuffer[j].key-firstnode;
00168 gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm;
00169 }
00170 }
00171 SHIFTCSR(i, my_nns, gnptr);
00172
00173
00174
00175
00176
00177 iset(npes, 0, scounts);
00178
00179
00180 for (pe=0; pe<npes; pe++) {
00181 for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
00182 lnode = recvbuffer[j].key-firstnode;
00183 if (htable[lnode] == -1) {
00184 scounts[pe] += gnptr[lnode+1]-gnptr[lnode];
00185 htable[lnode] = 1;
00186 }
00187 }
00188
00189
00190 for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
00191 lnode = recvbuffer[j].key-firstnode;
00192 htable[lnode] = -1;
00193 }
00194 }
00195
00196
00197 MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);
00198
00199 icopy(npes, scounts, sdispl);
00200 MAKECSR(i, npes, sdispl);
00201
00202
00203 nsend = sdispl[npes];
00204 sbuffer = (idxtype *)realloc(nodelist, sizeof(idxtype)*amax(1, nsend));
00205
00206 count = 0;
00207 for (pe=0; pe<npes; pe++) {
00208 for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
00209 lnode = recvbuffer[j].key-firstnode;
00210 if (htable[lnode] == -1) {
00211 for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) {
00212 if (k == gnptr[lnode])
00213 sbuffer[count++] = -1*(gnind[k]+1);
00214 else
00215 sbuffer[count++] = gnind[k];
00216 }
00217 htable[lnode] = 1;
00218 }
00219 }
00220 ASSERTS(count == sdispl[pe+1]);
00221
00222
00223 for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
00224 lnode = recvbuffer[j].key-firstnode;
00225 htable[lnode] = -1;
00226 }
00227 }
00228
00229 icopy(npes, rcounts, rdispl);
00230 MAKECSR(i, npes, rdispl);
00231
00232 nrecv = rdispl[npes];
00233 rbuffer = (idxtype *)realloc(recvbuffer, sizeof(idxtype)*amax(1, nrecv));
00234
00235 MPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_DATATYPE, (void *)rbuffer,
00236 rcounts, rdispl, IDX_DATATYPE, *comm);
00237
00238 k = -1;
00239 nptr = idxsmalloc(lnns+1, 0, "nptr");
00240 nind = rbuffer;
00241 for (pe=0; pe<npes; pe++) {
00242 for (j=rdispl[pe]; j<rdispl[pe+1]; j++) {
00243 if (nind[j] < 0) {
00244 k++;
00245 nind[j] = (-1*nind[j])-1;
00246 }
00247 nptr[k]++;
00248 }
00249 }
00250 MAKECSR(i, lnns, nptr);
00251
00252 ASSERTS(k+1 == lnns);
00253 ASSERTS(nptr[lnns] == nrecv)
00254
00255 myxadj = *xadj = idxsmalloc(nelms+1, 0, "xadj");
00256 idxset(mask+1, -1, htable);
00257
00258 firstelm = elmdist[mype];
00259
00260
00261 for (pass=0; pass<2; pass++) {
00262 for (i=0; i<nelms; i++) {
00263 for (count=0, j=eptr[i]; j<eptr[i+1]; j++) {
00264 node = eind[j];
00265
00266 for (k=nptr[node]; k<nptr[node+1]; k++) {
00267 if ((kk=nind[k]) == firstelm+i)
00268 continue;
00269
00270 m = htable[(kk&mask)];
00271
00272 if (m == -1) {
00273 ind[count] = kk;
00274 wgt[count] = 1;
00275 htable[(kk&mask)] = count++;
00276 }
00277 else {
00278 if (ind[m] == kk) {
00279 wgt[m]++;
00280 }
00281 else {
00282 for (jj=0; jj<count; jj++) {
00283 if (ind[jj] == kk) {
00284 wgt[jj]++;
00285 break;
00286 }
00287 }
00288 if (jj == count) {
00289 ind[count] = kk;
00290 wgt[count++] = 1;
00291 }
00292 }
00293 }
00294 }
00295 }
00296
00297 for (j=0; j<count; j++) {
00298 htable[(ind[j]&mask)] = -1;
00299 if (wgt[j] >= *ncommonnodes) {
00300 if (pass == 0)
00301 myxadj[i]++;
00302 else
00303 myadjncy[myxadj[i]++] = ind[j];
00304 }
00305 }
00306 }
00307
00308 if (pass == 0) {
00309 MAKECSR(i, nelms, myxadj);
00310 myadjncy = *adjncy = idxmalloc(myxadj[nelms], "adjncy");
00311 }
00312 else {
00313 SHIFTCSR(i, nelms, myxadj);
00314 }
00315 }
00316
00317
00318
00319
00320 for (i=0; i<eptr[nelms]; i++)
00321 eind[i] = nmap[eind[i]] + gminnode;
00322
00323 if (*numflag == 1)
00324 ChangeNumberingMesh2(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0);
00325
00326
00327 GKfree((void **)&scounts, (void **)&nodedist, (void **)&nmap, (void **)&sbuffer,
00328 (void **)&htable, (void **)&nptr, (void **)&nind, (void **)&gnptr,
00329 (void **)&gnind, (void **)&auxarray, LTERM);
00330
00331 FreeCtrl(&ctrl);
00332
00333 return;
00334 }
00335