00001 #include "import.h"
00002
00003 bool coord_leq(double *a, double* b, int dim);
00004
00005 void ParFUM_desharing(int meshid){
00006 FEM_Mesh *mesh = (FEM_chunk::get("ParFUM_desharing"))->lookup(meshid,"ParFUM_desharing");
00007 mesh->clearSharedNodes();
00008 }
00009
00010
00011 void ParFUM_deghosting(int meshid){
00012 FEM_Mesh *mesh = (FEM_chunk::get("ParFUM_deghosting"))->lookup(meshid,"ParFUM_deghosting");
00013 mesh->clearGhostNodes();
00014 mesh->clearGhostElems();
00015 }
00016
00017
00018
00020 void ParFUM_recreateSharedNodes(int meshid, int dim, MPI_Comm newComm) {
00021
00022 #define CORRECT_COORD_COMPARISON
00023 MPI_Comm comm = newComm;
00024 int rank, nParts;
00025 int send_count=0;
00026 int recv_count=0;
00027 MPI_Comm_rank(comm, &rank);
00028 MPI_Comm_size(comm, &nParts);
00029
00030
00031 #if SUPER_FAST_SPECIFIC_TORUS
00032
00033 #define TORUSY 15
00034 #define TORUSZ 15
00035
00036 CkPrintf("rank %d is manually configuring the IDXL lists to make the shared node generation fast\n");
00037
00038 FEM_Mesh *mesh = (FEM_chunk::get("ParFUM_recreateSharedNodes"))->lookup(meshid,"ParFUM_recreateSharedNodes");
00039 IDXL_Side &shared = mesh->node.shared;
00040
00041 int low = (rank-1+nParts) % nParts;
00042 int high = (rank+1) % nParts;
00043
00044 IDXL_List &list1 = shared.addList(low);
00045 IDXL_List &list2 = shared.addList(high);
00046
00047 int nodesInPlane = TORUSY * TORUSZ;
00048 int numNodes = FEM_Mesh_get_length(meshid,FEM_NODE);
00049
00050
00051 for(int j=0;j<nodesInPlane;j++){
00052 list1.push_back(j);
00053 }
00054
00055
00056 for(int j=0;j<nodesInPlane;j++){
00057 list2.push_back(numNodes - nodesInPlane +j);
00058 }
00059
00060 return;
00061 #else
00062
00063
00064
00065
00066
00067 int *sharedNodeCounts;
00068 int **sharedNodeLists;
00069
00070 sharedNodeCounts = (int *)malloc(nParts*sizeof(int));
00071 sharedNodeLists = (int **)malloc(nParts*sizeof(int *));
00072 for (int i=0; i<nParts; i++) {
00073 sharedNodeLists[i] = NULL;
00074 sharedNodeCounts[i] = 0;
00075 }
00076
00077 int numNodes;
00078 int coord_msg_tag=42, sharedlist_msg_tag=43;
00079 double *nodeCoords;
00080 numNodes = FEM_Mesh_get_length(meshid,FEM_NODE);
00081 nodeCoords = (double *)malloc(dim*numNodes*sizeof(double));
00082
00083 FEM_Mesh_become_get(meshid);
00084
00085 FEM_Mesh_data(meshid,FEM_NODE,FEM_COORD, nodeCoords, 0, numNodes,FEM_DOUBLE, dim);
00086
00087
00088 if (rank==0) CkPrintf("Extracted node data...\n");
00089
00090
00091
00092
00094 int sendUpperBound; if(nParts %2==0){
00095 sendUpperBound = rank + (nParts/2) - (rank%2);
00096 } else {
00097 sendUpperBound = rank + (nParts/2) ;
00098 }
00099
00101 int sendLowerBound;
00102 if(nParts %2==0){
00103 sendLowerBound = rank - (nParts/2) + ((rank+1)%2);
00104 } else {
00105 sendLowerBound = rank - (nParts/2);
00106 }
00107
00108
00109
00110 #ifdef SHARED_NODES_ONLY_NEIGHBOR
00111
00112 sendUpperBound = rank + 1;
00113 sendLowerBound = rank - 1;
00114 #endif
00115
00116 for (int i=rank+1; i<=sendUpperBound; i++) {
00117 MPI_Send(nodeCoords, dim*numNodes, MPI_DOUBLE, i%nParts, coord_msg_tag, comm);
00118 send_count ++;
00119
00120 }
00121
00122
00123
00124
00125 for (int i=sendLowerBound; i<rank; i++) {
00126 std::vector<int> remoteSharedNodes, localSharedNodes;
00127 double *recvNodeCoords;
00128 MPI_Status status;
00129 int source, length;
00130
00131 MPI_Probe(MPI_ANY_SOURCE, coord_msg_tag, comm, &status);
00132 source = status.MPI_SOURCE;
00133 length = status.MPI_LENGTH/sizeof(double);
00134
00135 recv_count ++;
00136
00137 recvNodeCoords = (double *)malloc(length*sizeof(double));
00138 MPI_Recv((void*)recvNodeCoords, length, MPI_DOUBLE, source,
00139 coord_msg_tag, comm, &status);
00140
00141 int recvNodeCount = length/dim;
00142
00143
00144
00145
00146 #ifdef SHARED_NODES_ONLY_NEIGHBOR
00147
00148 int borderNodes = BORDERNODES;
00149
00150
00151
00152
00153 int myBottomLow = 0;
00154 int myBottomHigh = borderNodes;
00155 int myTopLow = numNodes - borderNodes;
00156 int myTopHigh = numNodes-1;
00157
00158 int recvBottomLow = 0;
00159 int recvBottomHigh = borderNodes;
00160 int recvTopLow = recvNodeCount - borderNodes;
00161 int recvTopHigh = recvNodeCount-1;
00162
00163 CkPrintf("[%d] rank=%d myBottomLow=%d myBottomHigh=%d myTopLow=%d myTopHigh=%d recvBottomLow=%d recvBottomHigh=%d recvTopLow=%d recvTopHigh=%d\n", CkMyPe(), rank, myBottomLow, myBottomHigh, myTopLow, myTopHigh, recvBottomLow, recvBottomHigh, recvTopLow, recvTopHigh);
00164
00165
00166 if(myTopLow < 0)
00167 myTopLow = 0;
00168
00169 if(recvTopLow < 0)
00170 recvTopLow = 0;
00171
00172
00173 if(myBottomHigh >= myTopLow)
00174 myTopLow = myTopLow-1;
00175
00176 if(recvBottomHigh >= recvTopLow)
00177 recvTopLow = recvTopLow-1;
00178
00179 for (int j=myBottomLow; j<=myBottomHigh; j++) {
00180 for (int k=recvBottomLow; k<=recvBottomHigh; k++) {
00181 if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) {
00182 localSharedNodes.push_back(j);
00183 remoteSharedNodes.push_back(k);
00184 break;
00185 }
00186 }
00187 }
00188
00189 for (int j=myTopLow; j<=myBottomHigh; j++) {
00190 for (int k=recvTopLow; k<=recvTopHigh; k++) {
00191 if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) {
00192 localSharedNodes.push_back(j);
00193 remoteSharedNodes.push_back(k);
00194 break;
00195 }
00196 }
00197 }
00198
00199
00200 for (int j=myTopLow; j<=myTopHigh; j++) {
00201 for (int k=recvBottomLow; k<=recvBottomHigh; k++) {
00202 if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) {
00203 localSharedNodes.push_back(j);
00204 remoteSharedNodes.push_back(k);
00205 break;
00206 }
00207 }
00208 }
00209
00210 for (int j=myBottomLow; j<=myTopHigh; j++) {
00211 for (int k=recvTopLow; k<=recvTopHigh; k++) {
00212 if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) {
00213 localSharedNodes.push_back(j);
00214 remoteSharedNodes.push_back(k);
00215 break;
00216 }
00217 }
00218 }
00219
00220 #else
00221
00222
00223
00224 for (int j=0; j<numNodes; j++) {
00225 for (int k=0; k<recvNodeCount; k++) {
00226 if (coordEqual(&nodeCoords[j*dim], &recvNodeCoords[k*dim], dim)) {
00227 localSharedNodes.push_back(j);
00228 remoteSharedNodes.push_back(k);
00229
00230 break;
00231 }
00232 }
00233 }
00234
00235 #endif
00236
00237
00238 int *localSharedNodeList = (int *)malloc(localSharedNodes.size()*sizeof(int));
00239 for (int m=0; m<localSharedNodes.size(); m++) {
00240 localSharedNodeList[m] = localSharedNodes[m];
00241 }
00242 sharedNodeCounts[source] = localSharedNodes.size();
00243 sharedNodeLists[source] = localSharedNodeList;
00244
00245
00246 MPI_Send((int *)&remoteSharedNodes[0], remoteSharedNodes.size(), MPI_INT, source,
00247 sharedlist_msg_tag, comm);
00248 free(recvNodeCoords);
00249 }
00250
00251
00252 for (int i=rank+1; i<=sendUpperBound; i++) {
00253 int *sharedNodes;
00254 MPI_Status status;
00255 int source, length;
00256
00257 MPI_Probe(MPI_ANY_SOURCE, sharedlist_msg_tag, comm, &status);
00258 source = status.MPI_SOURCE;
00259 length = status.MPI_LENGTH/sizeof(int);
00260
00261 sharedNodes = (int *)malloc(length*sizeof(int));
00262 MPI_Recv((void*)sharedNodes, length, MPI_INT, source, sharedlist_msg_tag, comm, &status);
00263
00264 sharedNodeCounts[source] = length;
00265 sharedNodeLists[source] = sharedNodes;
00266
00267 }
00268
00269 if (rank==0) CkPrintf("Received new shared node lists...\n");
00270
00271
00272
00273 FEM_Mesh *mesh = (FEM_chunk::get("ParFUM_recreateSharedNodes"))->lookup(meshid,"ParFUM_recreateSharedNodes");
00274 IDXL_Side &shared = mesh->node.shared;
00275
00276 for(int i=0;i<nParts;i++){
00277 if(i == rank)
00278 continue;
00279 if(sharedNodeCounts[i] != 0){
00280 IDXL_List &list = shared.addList(i);
00281 for(int j=0;j<sharedNodeCounts[i];j++){
00282 list.push_back(sharedNodeLists[i][j]);
00283 }
00284 }
00285 }
00286
00287
00288 MPI_Barrier(MPI_COMM_WORLD);
00289
00290 if (rank==0) CkPrintf("Recreation of shared nodes complete...\n");
00291
00292
00293
00294 #ifdef SHARED_NODES_ONLY_NEIGHBOR
00295 CkAssert(send_count + recv_count == 2);
00296 #else
00297 CkAssert(send_count + recv_count == nParts-1);
00298 #endif
00299
00300
00301 free(nodeCoords);
00302 free(sharedNodeCounts);
00303 for (int i=0; i<nParts; i++) {
00304 if (sharedNodeLists[i])
00305 free(sharedNodeLists[i]);
00306 }
00307 free(sharedNodeLists);
00308
00309 #endif // normal mode, not super fast mesh specific one
00310 }
00311
00312 void ParFUM_createComm(int meshid, int dim, MPI_Comm comm)
00313 {
00314 int rank, comm_size;
00315 ParFUM_desharing(meshid);
00316 ParFUM_deghosting(meshid);
00317
00318 MPI_Comm_rank(comm, &rank);
00319 MPI_Comm_size(comm, &comm_size);
00320 if (rank==0) CkPrintf("Recreating shared nodes...\n");
00321 ParFUM_recreateSharedNodes(meshid, dim, comm);
00322
00323 if (rank==0) CkPrintf("Generating global node numbers...\n");
00324 ParFUM_generateGlobalNodeNumbers(meshid, comm);
00325 FEM_Mesh *mesh = (FEM_chunk::get("ParFUM_recreateSharedNodes"))->lookup(meshid,"ParFUM_recreateSharedNodes");
00326
00327
00328 if (rank==0) CkPrintf("Gathering ghost data...\n");
00329 struct ghostdata *gdata;
00330 if(rank == 0){
00331 gdata = gatherGhosts();
00332 }else{
00333 gdata = new ghostdata;
00334 }
00335 MPI_Bcast_pup(*gdata,0,comm);
00336 if (rank==0) CkPrintf("Making ghosts...\n");
00337 makeGhosts(mesh,comm,0,gdata->numLayers,gdata->layers);
00338
00339 delete gdata;
00340 }
00341
00342 void ParFUM_import_nodes(int meshid, int numNodes, double *nodeCoords, int dim)
00343 {
00344 FEM_Mesh_become_set(meshid);
00345 FEM_Mesh_data(meshid, FEM_NODE, FEM_COORD, nodeCoords, 0, numNodes, FEM_DOUBLE, dim);
00346 FEM_Mesh_become_get(meshid);
00347 }
00348
00349 void ParFUM_import_elems(int meshid, int numElems, int nodesPer, int *conn, int type)
00350 {
00351 FEM_Mesh_become_set(meshid);
00352 FEM_Mesh_data(meshid, FEM_ELEM+type, FEM_CONN, conn, 0, numElems, FEM_INDEX_0,
00353 nodesPer);
00354 FEM_Mesh_become_get(meshid);
00355 }
00356
00357 void qsort(double *nodes, int *ids, int dim, int first, int last);
00358 void sortNodes(double *nodes, double *sorted_nodes, int *sorted_ids, int numNodes, int dim)
00359 {
00360
00361 for(int i=0; i<numNodes; i++) {
00362 sorted_ids[i] = i;
00363 for(int j=0; j<dim; j++) {
00364 sorted_nodes[i*dim+j] = nodes[i*dim+j];
00365 }
00366 }
00367
00368
00369
00370
00371
00372 qsort(sorted_nodes, sorted_ids, dim, 0, numNodes-1);
00373
00374
00375
00376
00377
00378 }
00379
00380 void merge(double *nodes, int *ids, int dim, int first, int mid, int last);
00381 void qsort(double *nodes, int *ids, int dim, int first, int last)
00382 {
00383 if (first>=last) return;
00384 else if (first==last-1) {
00385 if (!coord_leq(&(nodes[first*dim]), &(nodes[last*dim]), dim)) {
00386 int tmpId=ids[first];
00387 ids[first] = ids[last];
00388 ids[last] = tmpId;
00389 double tmpCoord;
00390 for (int i=0; i<dim; i++) {
00391 tmpCoord = nodes[first*dim+i];
00392 nodes[first*dim+i] = nodes[last*dim+i];
00393 nodes[last*dim+i] = tmpCoord;
00394 }
00395 }
00396 }
00397 else {
00398 qsort(nodes, ids, dim, first, first+((last-first)/2));
00399 qsort(nodes, ids, dim, first+((last-first)/2)+1, last);
00400 merge(nodes, ids, dim, first, first+((last-first)/2)+1, last);
00401 }
00402 }
00403
00404 void merge(double *nodes, int *ids, int dim, int first, int mid, int last)
00405 {
00406 int rover1=first, rover2=mid;
00407 double *tmpCoords = (double *)malloc(dim*(last-first+1)*sizeof(double));
00408 int *tmpIds = (int *)malloc((last-first+1)*sizeof(int));
00409 int pos = 0;
00410 while ((rover1<mid) && (rover2<=last)) {
00411 if (!coord_leq(&(nodes[rover1*dim]), &(nodes[rover2*dim]), dim)) {
00412 tmpIds[pos] = ids[rover2];
00413 for (int i=0; i<dim; i++) {
00414 tmpCoords[pos*dim+i] = nodes[rover2*dim+i];
00415 }
00416 rover2++;
00417 }
00418 else if (coord_leq(&(nodes[rover1*dim]), &(nodes[rover2*dim]), dim)) {
00419 tmpIds[pos] = ids[rover1];
00420 for (int i=0; i<dim; i++) {
00421 tmpCoords[pos*dim+i] = nodes[rover1*dim+i];
00422 }
00423 rover1++;
00424 }
00425 else {
00426 CkPrintf("import.C: merge: ERROR: found identical nodes on single partition!\n");
00427 }
00428 pos++;
00429 }
00430
00431 if (rover1 < mid) {
00432 while (rover1 < mid) {
00433 tmpIds[pos] = ids[rover1];
00434 for (int i=0; i<dim; i++) {
00435 tmpCoords[pos*dim+i] = nodes[rover1*dim+i];
00436 }
00437 rover1++;
00438 pos++;
00439 }
00440 }
00441 else if (rover2 <= last) {
00442 while (rover2 <= last) {
00443 tmpIds[pos] = ids[rover2];
00444 for (int i=0; i<dim; i++) {
00445 tmpCoords[pos*dim+i] = nodes[rover2*dim+i];
00446 }
00447 rover2++;
00448 pos++;
00449 }
00450 }
00451
00452 for (int i=first; i<=last; i++) {
00453 ids[i] = tmpIds[i-first];
00454 for (int j=0; j<dim; j++) {
00455 nodes[i*dim+j] = tmpCoords[(i-first)*dim+j];
00456 }
00457 }
00458 free(tmpCoords);
00459 free(tmpIds);
00460 }
00461
00462
00463 bool coord_leq(double *a, double* b, int dim) {
00464 int d=0;
00465 while (d<dim) {
00466 if (a[d] < b[d]) return true;
00467 else if (a[d] > b[d]) return false;
00468 else d++;
00469 }
00470
00471 return true;
00472 }