00001
00005
00015 #include "ParFUM.h"
00016 #include "ParFUM_internals.h"
00017 #include "MsaHashtable.h"
00018
00019 #include <parmetis.h>
00020
00021 double elemlistaccTime=0;
00022 extern void clearPartition(void);
00023
00024 int FEM_Mesh_Parallel_broadcast(int fem_mesh,int masterRank,FEM_Comm_t comm_context){
00025 int myRank;
00026 MPI_Comm_rank((MPI_Comm)comm_context,&myRank);
00027
00028 int new_mesh;
00029 if(myRank == masterRank){
00030
00031
00032 printf("[%d] Memory usage on vp 0 at the begining of partition %d \n",CkMyPe(),CmiMemoryUsage());
00033 new_mesh=FEM_master_parallel_part(fem_mesh,masterRank,comm_context);
00034
00035 }else{
00036 new_mesh=FEM_slave_parallel_part(fem_mesh,masterRank,comm_context);
00037 }
00038
00039 MPI_Barrier((MPI_Comm)comm_context);
00040 if(myRank == masterRank){
00041 clearPartition();
00042 }
00043
00044 return new_mesh;
00045 }
00046
00047 int FEM_master_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context){
00048 const char *caller="FEM_Create_connmsa";
00049 FEMAPI(caller);
00050 FEM_chunk *c=FEM_chunk::get(caller);
00051 FEM_Mesh *m=c->lookup(fem_mesh,caller);
00052 m->setAbsoluteGlobalno();
00053 int nelem = m->nElems();
00054 int numChunks;
00055 MPI_Comm_size((MPI_Comm)comm_context,&numChunks);
00056 printf("master -> number of elements %d \n",nelem);
00057 DEBUG(m->print(0));
00058
00059
00060
00061
00062
00063 MSA1DINT eptrMSA(nelem,numChunks);
00064 MSA1DINT eindMSA(nelem*10,numChunks);
00065
00066
00067
00068
00069 struct conndata data;
00070 data.nelem = nelem;
00071 data.nnode = m->node.size();
00072 data.arr1 = eptrMSA;
00073 data.arr2 = eindMSA;
00074 MPI_Bcast_pup(data,masterRank,(MPI_Comm)comm_context);
00075
00076 eptrMSA.enroll(numChunks);
00077 eindMSA.enroll(numChunks);
00078 MSA1DINT::Write wPtr = eptrMSA.getInitialWrite();
00079 MSA1DINT::Write wInd = eindMSA.getInitialWrite();
00080 int indcount=0,ptrcount=0;
00081 for(int t=0;t<m->elem.size();t++){
00082 if(m->elem.has(t)){
00083 FEM_Elem &k=m->elem[t];
00084 for(int e=0;e<k.size();e++){
00085 wPtr.set(ptrcount)=indcount;
00086 ptrcount++;
00087 for(int n=0;n<k.getNodesPer();n++){
00088 wInd.set(indcount)=k.getConn(e,n);
00089 indcount++;
00090 }
00091 }
00092 }
00093 }
00094 wPtr.set(ptrcount) = indcount;
00095 printf("master -> ptrcount %d indcount %d sizeof(MSA1DINT) %d sizeof(MSA1DINTLIST) %d memory %d\n",ptrcount,indcount,sizeof(MSA1DINT),sizeof(MSA1DINTLIST),CmiMemoryUsage());
00096
00097
00098
00099
00100
00101
00102
00103 FEM_Mesh *mesh_array=FEM_break_mesh(m,ptrcount,numChunks);
00104
00105
00106
00107 sendBrokenMeshes(mesh_array,comm_context);
00108 delete [] mesh_array;
00109 FEM_Mesh mypiece;
00110 MPI_Recv_pup(mypiece,masterRank,MESH_CHUNK_TAG,(MPI_Comm)comm_context);
00111
00112
00113
00114
00115 double parStartTime = CkWallTimer();
00116 MSA1DINT::Read rPtr = wPtr.syncToRead();
00117 MSA1DINT::Read rInd = wInd.syncToRead();
00118 printf("starting FEM_call_parmetis \n");
00119 struct partconndata *partdata = FEM_call_parmetis(data.nelem, rPtr, rInd, comm_context);
00120
00121 printf("done with parmetis %d FEM_Mesh %d in %.6lf \n",CmiMemoryUsage(),sizeof(FEM_Mesh),CkWallTimer()-parStartTime);
00122
00123 double dataArrangeStartTime = CkWallTimer();
00124
00125
00126
00127
00128 int totalNodes = m->node.size();
00129 MSA1DINTLIST nodepart(totalNodes,numChunks);
00130 MPI_Bcast_pup(nodepart,masterRank,(MPI_Comm)comm_context);
00131 nodepart.enroll(numChunks);
00132 MSA1DINTLIST::Accum nodepartAcc = nodepart.getInitialAccum();
00133
00134 FEM_write_nodepart(nodepartAcc,partdata,(MPI_Comm)comm_context);
00135 printf("Creating mapping of node to partition took %.6lf\n",CkWallTimer()-dataArrangeStartTime);
00136 dataArrangeStartTime = CkWallTimer();
00137 MSA1DINTLIST::Read nodepartRead = nodepartAcc.syncToRead();
00138
00139
00140
00141
00142 MSA1DNODELIST part2node(numChunks,numChunks);
00143 MPI_Bcast_pup(part2node,masterRank,(MPI_Comm)comm_context);
00144 part2node.enroll(numChunks);
00145 MSA1DNODELIST::Accum part2nodeAcc = part2node.getInitialAccum();
00146
00147 FEM_write_part2node(nodepartRead, part2nodeAcc, partdata, (MPI_Comm)comm_context);
00148
00149
00150
00151
00152
00153 MSA1DNODELIST::Read rPart2node = part2nodeAcc.syncToRead();
00154 NodeList lnodes = rPart2node.get(masterRank);
00155 lnodes.uniquify();
00156
00157
00158
00159 printf("Creating mapping of partition to node took %.6lf\n",CkWallTimer()-dataArrangeStartTime);
00160 printf("Time spent doing +=ElemList %.6lf \n",elemlistaccTime);
00161 dataArrangeStartTime = CkWallTimer();
00162
00163
00164
00165
00166 MSA1DFEMMESH part2mesh(numChunks,numChunks);
00167 MPI_Bcast_pup(part2mesh,masterRank,(MPI_Comm)comm_context);
00168 part2mesh.enroll(numChunks);
00169 MSA1DFEMMESH::Accum aPart2mesh = part2mesh.getInitialAccum();
00170
00171 FEM_write_part2mesh(aPart2mesh,partdata, &data,nodepartRead,numChunks,masterRank,&mypiece);
00172
00173
00174
00175 MSA1DFEMMESH::Read rPart2mesh = aPart2mesh.syncToRead();
00176 MeshElem me = rPart2mesh.get(masterRank);
00177
00178
00179 DEBUG(printf("[%d] Memory usage on vp 0 close to max %d \n",CkMyPe(),CmiMemoryUsage()));
00180
00181 data.arr1.FreeMem();
00182 data.arr2.FreeMem();
00183 nodepart.FreeMem();
00184 DEBUG(printf("[%d] Memory usage on vp 0 after FreeMem %d \n",CkMyPe(),CmiMemoryUsage()));
00185
00186 addIDXLists(me.m,lnodes,masterRank);
00187
00188 part2node.FreeMem();
00189 DEBUG(printf("[%d] Memory usage on vp 0 after addIDXL %d \n",CkMyPe(),CmiMemoryUsage()));
00190
00191
00192
00193
00194 DEBUG(printf("[%d] Length of udata vector in master %d \n",masterRank,m->udata.size()));
00195 MPI_Bcast_pup(m->udata,masterRank,(MPI_Comm)comm_context);
00196 me.m->udata = m->udata;
00197
00198
00199 delete partdata;
00200
00201 printf("[%d] Data Arrangement took %.6lf \n",masterRank,CkWallTimer()-dataArrangeStartTime);
00202
00203
00204
00205
00206 struct ghostdata *gdata = gatherGhosts();
00207 DEBUG(printf("[%d] number of ghost layers %d \n",masterRank,gdata->numLayers));
00208 MPI_Bcast_pup(*gdata,masterRank,(MPI_Comm)comm_context);
00209
00210
00211
00212
00213 printf("[%d] Starting to generate number of ghost layers %d \n",masterRank,gdata->numLayers);
00214 double _startTime = CkWallTimer();
00215 makeGhosts(me.m,(MPI_Comm)comm_context,masterRank,gdata->numLayers,gdata->layers);
00216 delete gdata;
00217
00218 printf("[%d] Ghost generation took %.6lf \n",masterRank,CkWallTimer()-_startTime);
00219
00220 me.m->becomeGetting();
00221 FEM_chunk *chunk = FEM_chunk::get("FEM_Mesh_Parallel_broadcast");
00222 int tempMeshNo = chunk->meshes.put(me.m);
00223 int new_mesh = FEM_Mesh_copy(tempMeshNo);
00224
00225 FEM_Mesh *nmesh = c->lookup(new_mesh,"master_parallel_broadcast");
00226 DEBUG(printf("[%d] Length of udata vector in master new_mesh %d \n",masterRank,nmesh->udata.size()));
00227
00228 part2mesh.FreeMem();
00229 printf("[%d] Max Memory usage on vp 0 at end of parallel partition %d \n",CkMyPe(),CmiMaxMemoryUsage());
00230
00231 return new_mesh;
00232 }
00233
00234 int FEM_slave_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context){
00235 int myRank;
00236 MPI_Comm_rank((MPI_Comm)comm_context,&myRank);
00237 int numChunks;
00238 MPI_Comm_size((MPI_Comm)comm_context,&numChunks);
00239
00240
00241
00242 struct conndata data;
00243 MPI_Bcast_pup(data,masterRank,(MPI_Comm)comm_context);
00244 data.arr1.enroll(numChunks);
00245 data.arr2.enroll(numChunks);
00246 DEBUG(printf("Recv -> %d \n",data.nelem));
00247
00248
00249
00250
00251
00252 FEM_Mesh mypiece;
00253 MPI_Recv_pup(mypiece,masterRank,MESH_CHUNK_TAG,(MPI_Comm)comm_context);
00254
00255
00256
00257
00258 MSA1DINT::Read rPtr = data.arr1.getInitialWrite().syncToRead();
00259 MSA1DINT::Read rInd = data.arr2.getInitialWrite().syncToRead();
00260 struct partconndata *partdata = FEM_call_parmetis(data.nelem, rPtr, rInd, comm_context);
00261
00262
00263
00264
00265 MSA1DINTLIST nodepart;
00266 MPI_Bcast_pup(nodepart,masterRank,(MPI_Comm)comm_context);
00267 nodepart.enroll(numChunks);
00268 MSA1DINTLIST::Accum nodepartAcc = nodepart.getInitialAccum();
00269
00270 FEM_write_nodepart(nodepartAcc,partdata,(MPI_Comm)comm_context);
00271 MSA1DINTLIST::Read nodepartRead = nodepartAcc.syncToRead();
00272
00273
00274
00275
00276
00277 MSA1DNODELIST part2node;
00278 MPI_Bcast_pup(part2node,masterRank,(MPI_Comm)comm_context);
00279 part2node.enroll(numChunks);
00280 MSA1DNODELIST::Accum part2nodeAcc = part2node.getInitialAccum();
00281
00282 FEM_write_part2node(nodepartRead, part2nodeAcc, partdata, (MPI_Comm)comm_context);
00283
00284
00285
00286
00287 MSA1DNODELIST::Read part2nodeRead = part2nodeAcc.syncToRead();
00288 NodeList lnodes = part2nodeRead.get(myRank);
00289 lnodes.uniquify();
00290
00291
00292
00293
00294
00295 MSA1DFEMMESH part2mesh;
00296 MPI_Bcast_pup(part2mesh,masterRank,(MPI_Comm)comm_context);
00297 part2mesh.enroll(numChunks);
00298 MSA1DFEMMESH::Accum aPart2mesh = part2mesh.getInitialAccum();
00299 FEM_write_part2mesh(aPart2mesh, partdata, &data, nodepartRead,numChunks, myRank, &mypiece);
00300
00301
00302
00303
00304 MSA1DFEMMESH::Read rPart2mesh = aPart2mesh.syncToRead();
00305 MeshElem me = rPart2mesh.get(myRank);
00306
00307
00308
00309 data.arr1.FreeMem();
00310 data.arr2.FreeMem();
00311 nodepart.FreeMem();
00312
00313 addIDXLists(me.m,lnodes,myRank);
00314
00315
00316
00317
00318 MPI_Bcast_pup(me.m->udata,masterRank,(MPI_Comm)comm_context);
00319 DEBUG(printf("[%d] Length of udata vector %d \n",myRank,me.m->udata.size()));
00320
00321 delete partdata;
00322
00323 struct ghostdata *gdata = new ghostdata;
00324 MPI_Bcast_pup(*gdata,masterRank,(MPI_Comm)comm_context);
00325
00326
00327
00328
00329
00330 makeGhosts(me.m,(MPI_Comm )comm_context,masterRank,gdata->numLayers,gdata->layers);
00331
00332 me.m->becomeGetting();
00333 FEM_chunk *chunk = FEM_chunk::get("FEM_Mesh_Parallel_broadcast");
00334 int tempMeshNo = chunk->meshes.put(me.m);
00335 int new_mesh = FEM_Mesh_copy(tempMeshNo);
00336
00337
00338 part2mesh.FreeMem();
00339 delete gdata;
00340 return new_mesh;
00341 }
00342
00343
00344
00345
00346
00347
00348 struct partconndata * FEM_call_parmetis(int nelem, MSA1DINT::Read &rPtr, MSA1DINT::Read &rInd, FEM_Comm_t comm_context)
00349 {
00350 int myRank,numChunks;
00351 MPI_Comm_size((MPI_Comm)comm_context,&numChunks);
00352 MPI_Comm_rank((MPI_Comm)comm_context,&myRank);
00353
00354
00355
00356
00357
00358
00359
00360
00361 int *elmdist = new int[numChunks+1];
00362 DEBUG(printf("[%d] elmdist \n",myRank));
00363 for(int i=0;i<=numChunks;i++){
00364 elmdist[i] = (i*nelem)/numChunks;
00365 DEBUG(printf(" %d ",elmdist[i]));
00366 }
00367 DEBUG(printf("\n"));
00368 int startindex = elmdist[myRank];
00369 int endindex = elmdist[myRank+1];
00370 int numindices = endindex - startindex;
00371 int *eptr = new int[numindices+1];
00372
00373
00374
00375
00376 int startConn = rPtr.get(startindex);
00377 int endConn = rPtr.get(endindex);
00378 int numConn = endConn - startConn;
00379 int *eind = new int[numConn];
00380 DEBUG(printf("%d startindex %d endindex %d startConn %d endConn %d \n",myRank,startindex,endindex,startConn,endConn));
00381 for(int i=startindex;i<endindex;i++){
00382 int conn1 = rPtr.get(i);
00383 int conn2 = rPtr.get(i+1);
00384 eptr[i-startindex] = conn1 - startConn;
00385 for(int j=conn1;j<conn2;j++){
00386 eind[j-startConn] = rInd.get(j);
00387 }
00388 }
00389 eptr[numindices] = endConn - startConn;
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403 int wgtflag=0,numflag=0,ncon=1,ncommonnodes=2,options[5],edgecut=0;
00404 float ubvec = 1.05;
00405 float *tpwgts = new float[numChunks];
00406 int *parts = new int[numindices+1];
00407 for(int i=0;i<numChunks;i++){
00408 tpwgts[i]=1/(double)numChunks;
00409 }
00410 options[0]=0;
00411 MPI_Barrier((MPI_Comm)comm_context);
00412 ParMETIS_V3_PartMeshKway (elmdist,eptr,eind,NULL,&wgtflag,&numflag,&ncon,&ncommonnodes,&numChunks,tpwgts,&ubvec,options,&edgecut,parts,(MPI_Comm *)&comm_context);
00413 DEBUG(CkPrintf("%d partition ",myRank);)
00414 for(int i=0;i<numindices;i++){
00415 DEBUG(CkPrintf(" <%d %d> ",i+startindex,parts[i]));
00416 }
00417 DEBUG(CkPrintf("\n"));
00418 delete []elmdist;
00419 delete []tpwgts;
00420 struct partconndata *retval = new struct partconndata;
00421 retval->nelem = numindices;
00422 retval->eind = eind;
00423 retval->eptr = eptr;
00424 retval->part = parts;
00425 retval->startindex = startindex;
00426 return retval;
00427 }
00428
00429
00430
00431
00432
00433 void FEM_write_nodepart(MSA1DINTLIST::Accum &nodepart,struct partconndata *data,MPI_Comm comm_context){
00434 for(int i=0;i<data->nelem;i++){
00435 int start=data->eptr[i];
00436 int end = data->eptr[i+1];
00437 for(int j=start;j<end;j++){
00438 nodepart(data->eind[j]) += data->part[i];
00439 DEBUG(printf(" write_nodepart %d %d \n",data->eind[j],data->part[i]));
00440 }
00441 }
00442 }
00443
00444
00445
00446
00447
00448 void FEM_write_part2node(MSA1DINTLIST::Read &nodepart,
00449 MSA1DNODELIST::Accum &part2node,
00450 struct partconndata *data,
00451 MPI_Comm comm_context)
00452 {
00453 int nodes = nodepart.length();
00454 int myRank,numChunks;
00455
00456
00457
00458
00459 MPI_Comm_rank(comm_context,&myRank);
00460 MPI_Comm_size(comm_context,&numChunks);
00461 int start = (nodes*myRank)/numChunks;
00462 int end = (nodes*(myRank+1))/numChunks;
00463 for(int i=start;i<end;i++){
00464 IntList t = nodepart.get(i);
00465 t.uniquify();
00466 int num=0;
00467 if(t.vec->size()>1){
00468 num = t.vec->size();
00469 }
00470 NodeElem n(i,num);
00471 if(num != 0){
00472 for(int k=0;k<t.vec->size();k++){
00473 n.shared[k] =(*t.vec)[k];
00474 }
00475 }
00476
00477 for(int j=0;j<t.vec->size();j++){
00478 UniqElemList <NodeElem> en(n);
00479 part2node((*t.vec)[j]) += en;
00480 }
00481 }
00482 DEBUG(printf("done write_part2node\n"));
00483 }
00484
00485
00486
00487
00488 void FEM_write_part2elem(MSA1DINTLIST::Accum &part2elem,struct partconndata *data,MPI_Comm comm_context)
00489 {
00490 for(int i=0;i<data->nelem;i++){
00491 part2elem(data->part[i]) += data->startindex+i;
00492 }
00493 }
00494
00495
00496
00497
00498
00499
00500 FEM_Mesh * FEM_break_mesh(FEM_Mesh *m,int numElements,int numChunks){
00501 FEM_Mesh *mesh_array = new FEM_Mesh[numChunks];
00502 int *elmdist = new int[numChunks+1];
00503 int *nodedist = new int[numChunks+1];
00504 int numNodes = m->node.size();
00505
00506 for(int i=0;i<=numChunks;i++){
00507 elmdist[i] = (i*numElements)/numChunks;
00508 nodedist[i] = (i*numNodes)/numChunks;
00509 }
00510
00511 int elcount=0;
00512 int mcount=0;
00513 mesh_array[mcount].copyShape(*m);
00514 for(int t=0;t<m->elem.size();t++){
00515 if(m->elem.has(t)){
00516 FEM_Elem &k=m->elem[t];
00517 for(int e=0;e<k.size();e++){
00518 mesh_array[mcount].elem[t].push_back(k,e);
00519 elcount++;
00520 if(elcount == elmdist[mcount+1]){
00521 mcount++;
00522 if(mcount != numChunks){
00523 mesh_array[mcount].copyShape(*m);
00524 }
00525 }
00526
00527 }
00528 }
00529 }
00530 mcount=0;
00531 for(int i=0;i<m->node.size();i++){
00532 if(i == nodedist[mcount+1]){
00533 mcount++;
00534 }
00535 mesh_array[mcount].node.push_back(m->node,i);
00536 }
00537 delete [] elmdist;
00538 delete [] nodedist;
00539 return mesh_array;
00540 }
00541
00542
00543
00544 void sendBrokenMeshes(FEM_Mesh *mesh_array,FEM_Comm_t comm_context){
00545 int numChunks;
00546 MPI_Comm_size((MPI_Comm)comm_context,&numChunks);
00547 for(int i=0;i<numChunks;i++){
00548 MPI_Send_pup(mesh_array[i],i,MESH_CHUNK_TAG,(MPI_Comm)comm_context);
00549 }
00550 }
00551
00552 void FEM_write_part2mesh(MSA1DFEMMESH::Accum &part2mesh,
00553 struct partconndata *partdata,
00554 struct conndata *data,
00555 MSA1DINTLIST::Read &nodepart,
00556 int numChunks,
00557 int myChunk,
00558 FEM_Mesh *m)
00559 {
00560 int count=0;
00561
00562
00563 for(int t=0;t<m->elem.size();t++){
00564 if(m->elem.has(t)){
00565 const FEM_Elem &k=(m)->elem[t];
00566 for(int e=0;e<k.size();e++){
00567 int chunkID = partdata->part[count];
00568 MeshElem::ElemInfo ei(m, e, t);
00569 part2mesh(chunkID) += ei;
00570 count++;
00571 }
00572 }
00573 }
00574
00575 int startnode=(myChunk * data->nnode)/numChunks;
00576 for(int i=0;i<m->node.size();i++){
00577 IntList chunks = nodepart.get(i+startnode);
00578 chunks.uniquify();
00579 for(int j=0;j<chunks.vec->size();j++){
00580 MeshElem::NodeInfo ni(m, i);
00581 part2mesh((*(chunks.vec))[j]) += ni;
00582 }
00583 }
00584 }
00585
00586 void sortNodeList(NodeList &lnodes){
00587 CkVec<NodeElem> *vec = lnodes.vec;
00588 vec->quickSort();
00589 }
00590
00591
00592 void addIDXLists(FEM_Mesh *m,NodeList &lnodes,int myChunk){
00593 double startAddIDXL = CkWallTimer();
00594
00595
00596
00597 sortNodeList(lnodes);
00598 DEBUG(printf("[%d] Sorting finished %.6lfs after starting addIDXL\n",myChunk,CkWallTimer()-startAddIDXL));
00599
00600 CkVec<NodeElem> *vec = lnodes.vec;
00601
00602
00603
00604 CkHashtableT<CkHashtableAdaptorT<int>,int> global2local;
00605 for(int i=0;i<m->node.size();i++){
00606 int nodeno = m->node.getGlobalno(i);
00607 global2local.put(nodeno)=i;
00608 }
00609
00610
00611
00612
00613
00614
00615
00617 for(int i=0;i<vec->size();i++){
00619 int global = (*vec)[i].global;
00621 int local = global2local.get(global);
00622
00623 m->node.setPrimary(local,true);
00624
00625 if(((*vec)[i]).numShared > 0){
00627
00628 DEBUG(printf("[%d] Global node %d local %d node is shared \n",myChunk,global,local));
00629 for(int j=0;j<((*vec)[i]).numShared;j++){
00630 if((*vec)[i].shared[j] != myChunk){
00631
00632 m->node.shared.addList((*vec)[i].shared[j]).push_back(local);
00633 }
00634 if((*vec)[i].shared[j] < myChunk){
00635 m->node.setPrimary(local,false);
00636 }
00637 }
00638 }
00639 }
00640 DEBUG(m->node.shared.print());
00641
00642
00643
00644
00645
00646 for(int i=0;i<m->elem.size();i++){
00647 if(m->elem.has(i)){
00648 FEM_Elem &el=m->elem[i];
00649 for(int j=0;j<el.size();j++){
00650 int *conn = el.connFor(j);
00651 for(int k=0;k<el.getNodesPer();k++){
00652 int gnode = conn[k];
00653 int lnode = global2local.get(gnode);
00654 conn[k] = lnode;
00655 }
00656 }
00657 }
00658 }
00659 DEBUG(printf("[%d] Time for addIDXL %.6lfs\n",myChunk,CkWallTimer()-startAddIDXL));
00660 }
00661
00662
00663
00664
00665
00666
00667 struct ghostdata *gatherGhosts(){
00668 struct ghostdata *res = new struct ghostdata;
00669 FEM_Partition &part = FEM_curPartition();
00670 res->numLayers = part.getRegions();
00671 res->layers = new FEM_Ghost_Layer *[res->numLayers];
00672 int count=0;
00673
00674 for(int i=0;i<res->numLayers;i++){
00675 const FEM_Ghost_Region ®ion = part.getRegion(i);
00676 if(region.layer != NULL){
00677 res->layers[count]=region.layer;
00678 count++;
00679 }
00680 }
00681 res->numLayers = count;
00682 DEBUG(printf("gatherGhosts found %d layers \n",res->numLayers));
00683 printf("gatherGhosts found %d layers \n",res->numLayers);
00684 return res;
00685 }
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696 double listSearchTime=0;
00697 double sharedSearchTime=0;
00698
00699 void makeGhosts(FEM_Mesh *m, MPI_Comm comm, int masterRank, int numLayers, FEM_Ghost_Layer **layers)
00700 {
00701 int myChunk;
00702 int numChunks;
00703 MPI_Comm_rank((MPI_Comm)comm,&myChunk);
00704 MPI_Comm_size((MPI_Comm)comm,&numChunks);
00705
00706 double _startTime=CkWallTimer();
00707
00708 if(numChunks == 1){
00709 return;
00710 }
00711
00712
00713
00714
00715
00716 FEM_Comm *shared = &m->node.shared;
00717 int count=0;
00718 CkHashtableT<CkHashtableAdaptorT<int>,char> countedSharedNode;
00719 for(int i=0;i<shared->size();i++){
00720 const FEM_Comm_List &list = shared->getLocalList(i);
00721
00722
00723
00724
00725
00726
00727
00728 if(list.getDest() > myChunk){
00729 for(int j=0;j<list.size();j++){
00730 if(countedSharedNode.get(list[j]) == 0){
00731 count++;
00732 countedSharedNode.put(list[j]) = 1;
00733 }
00734 }
00735 }else{
00736 for(int j=0;j<list.size();j++){
00737 if(countedSharedNode.get(list[j]) == 1){
00738 count--;
00739 }
00740 countedSharedNode.put(list[j]) = 2;
00741 }
00742 }
00743 }
00744 int totalShared;
00745 MPI_Allreduce(&count, &totalShared,1, MPI_INT,
00746 MPI_SUM, comm);
00747
00748
00749
00750
00751 FEM_IndexAttribute *nchunkattr = (FEM_IndexAttribute *)m->node.lookup(FEM_CHUNK,"makeGhosts");
00752 int *nchunk = nchunkattr->get().getData();
00753 for(int i=0;i<nchunkattr->getLength();i++){
00754 nchunk[i] = myChunk;
00755 }
00756 for(int e = 0; e < m->elem.size();e++){
00757 if(m->elem.has(e)){
00758 FEM_IndexAttribute *echunkattr = (FEM_IndexAttribute *)m->elem[e].lookup(FEM_CHUNK,"makeGhosts");
00759 int *echunk = echunkattr->get().getData();
00760 for(int i=0;i<echunkattr->getLength();i++){
00761 echunk[i] = myChunk;
00762 }
00763 }
00764 }
00765
00766
00767
00768
00769
00770
00771
00772 CkHashtableT<CkHashtableAdaptorT<int>,int> global2local;
00773 for(int i=0;i<m->node.size();i++){
00774 global2local.put(m->node.getGlobalno(i))=i+1;
00775 }
00776 if(myChunk == 0){
00777 printf("Time to do preprocessing for ghosts %.6lf \n",CkWallTimer()-_startTime);
00778 }
00779 for(int i=0;i<numLayers;i++){
00780 if(myChunk == 0){printf("[%d] Starting ghost layer %d \n",myChunk,i);}
00781 _startTime = CkWallTimer();
00782 makeGhost(m,comm,masterRank,totalShared,layers[i],countedSharedNode,global2local);
00783 if(myChunk == 0){
00784 printf("[%d] Making ghost layer %d took %.6lf listSearch %.6lf sharedSearchTime %.6lf \n",myChunk,i,CkWallTimer()-_startTime,listSearchTime,sharedSearchTime);
00785 listSearchTime = 0;
00786 sharedSearchTime=0;
00787 }
00788 }
00789 }
00790
00791
00792
00793
00794 bool listContains(FEM_Comm_List &list,int entry){
00795 double _startTime=CkWallTimer();
00796 for(int i=0;i<list.size();i++){
00797 if(entry == list[i]){
00798 listSearchTime += (CkWallTimer()-_startTime);
00799 return true;
00800 }
00801 }
00802 listSearchTime += (CkWallTimer()-_startTime);
00803 return false;
00804 }
00805
00806 void makeGhost(FEM_Mesh *m,
00807 MPI_Comm comm,
00808 int masterRank,
00809 int totalShared,
00810 FEM_Ghost_Layer *layer,
00811 CkHashtableT<CkHashtableAdaptorT<int>,char> &sharedNode,
00812 CkHashtableT<CkHashtableAdaptorT<int>,int> &global2local)
00813 {
00814 int myChunk;
00815 int numChunks;
00816 MPI_Comm_rank((MPI_Comm)comm,&myChunk);
00817 MPI_Comm_size((MPI_Comm)comm,&numChunks);
00818
00819
00820
00821
00822
00823 MsaHashtable *distTab;
00824 if(myChunk == masterRank){
00825 distTab = new MsaHashtable(totalShared, numChunks);
00826 }else{
00827 distTab = new MsaHashtable;
00828 }
00829 MPI_Bcast_pup(*distTab,masterRank,comm);
00830 distTab->enroll(numChunks);
00831 DEBUG(printf("[%d] distributed table calling sync \n",myChunk));
00832
00833
00834
00835 MsaHashtable::Add aDistTab = distTab->getInitialAdd();
00836
00837 DEBUG(printf("Chunk %d Mesh: *********************************** \n",myChunk));
00838
00839 DEBUG(printf("**********************************\n"));
00840
00841
00842
00843
00844
00845
00846
00847 CkVec<Hashnode::tupledata> tupleVec;
00848 CkVec<int> indexVec;
00849 CkVec<int> elementVec;
00850
00851 for(int i=0;i<m->elem.size();i++){
00852 if(m->elem.has(i)){
00853
00854 if(layer->elem[i].add){
00855
00856 for(int e=0;e<m->elem[i].size();e++){
00857 const int *conn = m->elem[i].connFor(e);
00858
00859 for(int t=0;t< layer->elem[i].tuplesPerElem;t++){
00860 bool possibleGhost=true;
00861
00862 int globalNodeTuple[Hashnode::tupledata::MAX_TUPLE];
00863
00864
00865
00866
00867
00868 int nodesPerTuple = layer->nodesPerTuple;
00869 for(int n = 0;n<layer->nodesPerTuple;n++){
00870 int nodeindex=(layer->elem[i].elem2tuple)[t*layer->nodesPerTuple+n];
00871
00872
00873
00874
00875 if(nodeindex == -1){
00876 nodesPerTuple--;
00877 globalNodeTuple[n] =-1;
00878 continue;
00879 }
00880 int node=conn[nodeindex];
00881 globalNodeTuple[n]=m->node.getGlobalno(node);
00882 if(sharedNode.get(node) == 0){
00883 possibleGhost = false;
00884 break;
00885 }
00886 }
00887
00888 if(possibleGhost){
00889 int index = aDistTab.addTuple(globalNodeTuple,nodesPerTuple,myChunk,m->nElems(i)+e);
00890 tupleVec.push_back(Hashnode::tupledata(globalNodeTuple));
00891 indexVec.push_back(index);
00892 elementVec.push_back(i);
00893 elementVec.push_back(e);
00894 }
00895 }
00896 }
00897 }
00898 }
00899 }
00900
00901
00902
00903
00904
00905 int ghostcount=0;
00906 for(int i=0;i<m->elem.size();i++){
00907 if(m->elem.has(i)){
00908 if(layer->elem[i].add){
00909 FEM_Elem *ghostElems = (FEM_Elem *)m->elem[i].getGhost();
00910 for(int e=0;e<ghostElems->size();e++){
00911 ghostcount++;
00912 const int *conn = ghostElems->connFor(e);
00913 for(int t=0;t<layer->elem[i].tuplesPerElem;t++){
00914 int globalNodeTuple[Hashnode::tupledata::MAX_TUPLE];
00915 FEM_Node *ghostNodes = (FEM_Node *)m->node.getGhost();
00916 int nodesPerTuple = layer->nodesPerTuple;
00917 for(int n=0;n<layer->nodesPerTuple;n++){
00918 int nodeindex=(layer->elem[i].elem2tuple)[t*layer->nodesPerTuple+n];
00919 if(nodeindex == -1){
00920 nodesPerTuple--;
00921 globalNodeTuple[n] = -1;
00922 continue;
00923 }
00924 int node=conn[nodeindex];
00925 if(FEM_Is_ghost_index(node)){
00926 globalNodeTuple[n]=ghostNodes->getGlobalno(FEM_From_ghost_index(node));
00927 }else{
00928 globalNodeTuple[n]=m->node.getGlobalno(node);
00929 }
00930 }
00931
00932 aDistTab.addTuple(globalNodeTuple,nodesPerTuple,myChunk,ghostcount);
00933 }
00934 }
00935 }
00936 }
00937 }
00938 MsaHashtable::Read rDistTab = aDistTab.syncToRead();
00939
00940
00941
00942 if(myChunk == masterRank){
00943 DEBUG(rDistTab.print());
00944 }
00945
00946 DEBUG(printf("[%d] id %d says Ghost distributed hashtable printed \n",CkMyPe(),myChunk));
00947
00948 MSA1DFEMMESH *ghostmeshes;
00949 if(myChunk == masterRank){
00950 ghostmeshes = new MSA1DFEMMESH(numChunks,numChunks);
00951 }else{
00952 ghostmeshes = new MSA1DFEMMESH;
00953 }
00954 MPI_Bcast_pup(*ghostmeshes,masterRank,comm);
00955 ghostmeshes->enroll(numChunks);
00956 DEBUG(printf("[%d] id %d says ghostmeshes enroll done \n",CkMyPe(),myChunk));
00957
00958 MSA1DFEMMESH::Accum aGhostMeshes = ghostmeshes->getInitialAccum();
00959
00960 DEBUG(printf("[%d] id %d says ghostmeshes sync done \n",CkMyPe(),myChunk));
00961
00962
00963
00964
00965
00966
00967 char str[100];
00968 for(int i=0;i<tupleVec.size();i++){
00969 const Hashtuple &listTuple = rDistTab.get(indexVec[i]);
00970
00971 int elType = elementVec[2*i];
00972 int elNo = elementVec[2*i+1];
00973 for(int j=0;j<listTuple.vec->size();j++){
00974
00975
00976 if((*listTuple.vec)[j].equals(tupleVec[i])){
00977 int destChunk=(*listTuple.vec)[j].chunk;
00978
00979
00980
00981 if(destChunk != myChunk){
00982
00983
00984
00985
00986 FEM_Comm &sendGhostSide = m->elem[elType].setGhostSend();
00987 FEM_Comm_List &sendGhostList = sendGhostSide.addList(destChunk);
00988 if(listContains(sendGhostList,elNo)){
00989
00990
00991
00992
00993 continue;
00994 }
00995
00996
00997 sendGhostList.push_back(elNo);
00998
00999
01000
01001 MeshElem::ElemInfo ei(m, elNo, elType);
01002 aGhostMeshes(destChunk) += ei;
01003 int globalelem = m->elem[elType].getGlobalno(elNo);
01004 DEBUG(printf("[%d] For chunk %d ghost element global no %d \n",myChunk,destChunk,globalelem));
01005
01006
01007
01008
01009
01010 int* conn = m->elem[elType].connFor(elNo);
01011 for(int k=0;k<m->elem[elType].getNodesPer();k++){
01012 int lnode = conn[k];
01013
01014 if(lnode < 0){
01015 continue;
01016 }
01017
01018
01019
01020 if(sharedNode.get(lnode) == 0){
01021 sharedNode.put(lnode)=3;
01022 }
01023 int globalnode = m->node.getGlobalno(lnode);
01024 conn[k] = globalnode;
01025 assert(conn[k] >= 0);
01026
01027
01028
01029
01030 if(layer->addNodes && lnode >= 0){
01031
01032
01033
01034
01035
01036 if(!sharedWith(lnode,(*listTuple.vec)[j].chunk,m)){
01037 FEM_Comm &sendNodeGhostSide = m->node.setGhostSend();
01038 FEM_Comm_List &sendNodeGhostList = sendNodeGhostSide.addList((*listTuple.vec)[j].chunk);
01039 if(!listContains(sendNodeGhostList,lnode)){
01040 MeshElem::NodeInfo ni(m, lnode);
01041 aGhostMeshes(destChunk) += ni;
01042 DEBUG(printf("[%d] Ghost node (send) global no %d \n",myChunk,globalnode));
01043
01044 sendNodeGhostList.push_back(lnode);
01045 }
01046 }
01047 }
01048 }
01049 }
01050 }
01051 }
01052
01053 }
01054
01055
01056 DEBUG(printf("[%d] finished creating ghost mesh \n",myChunk));
01057
01058 MSA1DFEMMESH::Read rGhostMeshes = aGhostMeshes.syncToRead();
01059
01060
01061
01062
01063
01064
01065
01066
01067 FEM_Mesh *gmesh = rGhostMeshes.get(myChunk).m;
01068 DEBUG(printf("[%d] my ghost mesh is at %p \n",myChunk,gmesh));
01069
01070 FEM_Node *gnodes = (FEM_Node *)m->node.getGhost();
01071 gnodes->copyShape(m->node);
01072 FEM_IndexAttribute *nodeSrcChunkAttr = (FEM_IndexAttribute *)gmesh->node.lookup(FEM_CHUNK,"makeGhosts");
01073 int *nodeSrcChunkNo = nodeSrcChunkAttr->get().getData();
01074
01075 DEBUG(printf("[%d] gmesh->node.size() = %d \n",myChunk,gmesh->node.size()));
01076 for(int i=0;i<gmesh->node.size();i++){
01077 int gnum = gmesh->node.getGlobalno(i);
01078 int lindex = global2local.get(gnum);
01079
01080 if(lindex == 0){
01081 int countgnodes = gnodes->push_back(gmesh->node,i);
01082 global2local.put(gnum) = -(countgnodes+1);
01083 lindex = -(countgnodes+1);
01084 }
01085 DEBUG(CkPrintf("[%d] Ghost node (recv) global node %d lindex %d \n",myChunk,gnum,lindex));
01086
01087 FEM_Comm &recvNodeGhostSide = m->node.setGhostRecv();
01088 FEM_Comm_List &recvNodeGhostList = recvNodeGhostSide.addList(nodeSrcChunkNo[i]);
01089 recvNodeGhostList.push_back(-lindex-1);
01090
01091 }
01092
01093
01094
01095
01096
01097
01098
01099
01100 for(int elType = 0;elType < gmesh->elem.size();elType++){
01101 if(gmesh->elem.has(elType)){
01102 FEM_IndexAttribute *elemSrcChunkAttr = (FEM_IndexAttribute *)gmesh->elem[elType].lookup(FEM_CHUNK,"makeGhosts");
01103 int *elemSrcChunkNo = elemSrcChunkAttr->get().getData();
01104
01105 for(int e=0;e<gmesh->elem[elType].size();e++){
01106 m->elem[elType].getGhost()->copyShape(gmesh->elem[elType]);
01107 int lghostelem = m->elem[elType].getGhost()->push_back(gmesh->elem[elType],e);
01108 int *conn = ((FEM_Elem *)m->elem[elType].getGhost())->connFor(lghostelem);
01109 for(int n=0;n<gmesh->elem[elType].getNodesPer();n++){
01110 int gnode = conn[n];
01111 assert(gnode>=0);
01112 if(gnode >= 0){
01113 int lnode = global2local.get(gnode);
01114
01115 if(lnode == 0){
01116 if(layer->addNodes){
01117 CkPrintf("[%d]Unknown global number %d \n",myChunk,gnode);
01118 CkAbort("Unknown global number for node in ghost element connectivity");
01119 }
01120 conn[n] = -1;
01121 }
01122
01123 if(lnode > 0){
01124 conn[n] = lnode -1;
01125 }
01126
01127 if(lnode < 0){
01128 conn[n] = FEM_To_ghost_index((-lnode-1));
01129 }
01130 }else{
01131 conn[n] = -1;
01132 }
01133 }
01134 FEM_Comm &recvGhostSide = m->elem[elType].setGhostRecv();
01135 FEM_Comm_List &recvGhostList = recvGhostSide.addList(elemSrcChunkNo[e]);
01136 recvGhostList.push_back(lghostelem);
01137
01138 }
01139 }
01140 }
01141 DEBUG(printf("[%d] Send ghost nodes \n",myChunk));
01142 DEBUG(m->node.getGhostSend().print());
01143 DEBUG(printf("[%d] Recv ghost nodes \n",myChunk));
01144 DEBUG(m->node.getGhostRecv().print());
01145
01146 delete distTab;
01147 delete ghostmeshes;
01148 MPI_Barrier(comm);
01149 }
01150
01151
01152
01153 bool sharedWith(int lnode,int chunk,FEM_Mesh *m){
01154 double _startTime=CkWallTimer();
01155 int lindex = m->node.shared.findLocalList(chunk);
01156 if(lindex != -1){
01157 const IDXL_List & llist = m->node.shared.getLocalList(lindex);
01158 for(int i=0;i<llist.size();i++){
01159 if(llist[i] == lnode){
01160 sharedSearchTime += CkWallTimer()-_startTime;
01161 return true;
01162 }
01163 }
01164 sharedSearchTime += CkWallTimer()-_startTime;
01165 return false;
01166 }else{
01167 sharedSearchTime += CkWallTimer()-_startTime;
01168 return false;
01169 }
01170 }
01171 #include "ParFUM.def.h"