00001
00005
00015 #include "ParFUM.h"
00016 #include "ParFUM_internals.h"
00017 #include "MsaHashtable.h"
00018
00019 #include "../parmetis/parmetis.h"
00020
00021 double elemlistaccTime=0;
00022 extern void clearPartition(void);
00023
00024 int FEM_Mesh_Parallel_broadcast(int fem_mesh,int masterRank,FEM_Comm_t comm_context){
00025 int myRank;
00026 MPI_Comm_rank((MPI_Comm)comm_context,&myRank);
00027
00028 int new_mesh;
00029 if(myRank == masterRank){
00030
00031
00032 printf("[%d] Memory usage on vp 0 at the begining of partition %d \n",CkMyPe(),CmiMemoryUsage());
00033 new_mesh=FEM_master_parallel_part(fem_mesh,masterRank,comm_context);
00034
00035 }else{
00036 new_mesh=FEM_slave_parallel_part(fem_mesh,masterRank,comm_context);
00037 }
00038
00039 MPI_Barrier((MPI_Comm)comm_context);
00040 if(myRank == masterRank){
00041 clearPartition();
00042 }
00043
00044 return new_mesh;
00045 }
00046
00047 int FEM_master_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context){
00048 const char *caller="FEM_Create_connmsa";
00049 FEMAPI(caller);
00050 FEM_chunk *c=FEM_chunk::get(caller);
00051 FEM_Mesh *m=c->lookup(fem_mesh,caller);
00052 m->setAbsoluteGlobalno();
00053 int nelem = m->nElems();
00054 int numChunks;
00055 MPI_Comm_size((MPI_Comm)comm_context,&numChunks);
00056 printf("master -> number of elements %d \n",nelem);
00057 DEBUG(m->print(0));
00058
00059
00060
00061
00062
00063 MSA1DINT eptrMSA(nelem,numChunks);
00064 MSA1DINT eindMSA(nelem*10,numChunks);
00065
00066
00067
00068
00069 struct conndata data;
00070 data.nelem = nelem;
00071 data.nnode = m->node.size();
00072 data.arr1 = eptrMSA;
00073 data.arr2 = eindMSA;
00074 MPI_Bcast_pup(data,masterRank,(MPI_Comm)comm_context);
00075
00076 eptrMSA.enroll(numChunks);
00077 eindMSA.enroll(numChunks);
00078 MSA1DINT::Write wPtr = eptrMSA.getInitialWrite();
00079 MSA1DINT::Write wInd = eindMSA.getInitialWrite();
00080 int indcount=0,ptrcount=0;
00081 for(int t=0;t<m->elem.size();t++){
00082 if(m->elem.has(t)){
00083 FEM_Elem &k=m->elem[t];
00084 for(int e=0;e<k.size();e++){
00085 wPtr.set(ptrcount)=indcount;
00086 ptrcount++;
00087 for(int n=0;n<k.getNodesPer();n++){
00088 wInd.set(indcount)=k.getConn(e,n);
00089 indcount++;
00090 }
00091 }
00092 }
00093 }
00094 wPtr.set(ptrcount) = indcount;
00095 printf("master -> ptrcount %d indcount %d sizeof(MSA1DINT) %d sizeof(MSA1DINTLIST) %d memory %d\n",ptrcount,indcount,sizeof(MSA1DINT),sizeof(MSA1DINTLIST),CmiMemoryUsage());
00096
00097
00098
00099
00100
00101
00102
00103 FEM_Mesh *mesh_array=FEM_break_mesh(m,ptrcount,numChunks);
00104
00105
00106
00107 sendBrokenMeshes(mesh_array,comm_context);
00108 delete [] mesh_array;
00109 FEM_Mesh mypiece;
00110 MPI_Recv_pup(mypiece,masterRank,MESH_CHUNK_TAG,(MPI_Comm)comm_context);
00111
00112
00113
00114
00115 double parStartTime = CkWallTimer();
00116 MSA1DINT::Read rPtr = wPtr.syncToRead();
00117 MSA1DINT::Read rInd = wInd.syncToRead();
00118 printf("starting FEM_call_parmetis \n");
00119 struct partconndata *partdata = FEM_call_parmetis(data.nelem, rPtr, rInd, comm_context);
00120
00121 printf("done with parmetis %d FEM_Mesh %d in %.6lf \n",CmiMemoryUsage(),sizeof(FEM_Mesh),CkWallTimer()-parStartTime);
00122
00123 double dataArrangeStartTime = CkWallTimer();
00124
00125
00126
00127
00128 int totalNodes = m->node.size();
00129 MSA1DINTLIST nodepart(totalNodes,numChunks);
00130 MPI_Bcast_pup(nodepart,masterRank,(MPI_Comm)comm_context);
00131 nodepart.enroll(numChunks);
00132 MSA1DINTLIST::Accum nodepartAcc = nodepart.getInitialAccum();
00133
00134 FEM_write_nodepart(nodepartAcc,partdata,(MPI_Comm)comm_context);
00135 printf("Creating mapping of node to partition took %.6lf\n",CkWallTimer()-dataArrangeStartTime);
00136 dataArrangeStartTime = CkWallTimer();
00137 MSA1DINTLIST::Read nodepartRead = nodepartAcc.syncToRead();
00138
00139
00140
00141
00142 MSA1DNODELIST part2node(numChunks,numChunks);
00143 MPI_Bcast_pup(part2node,masterRank,(MPI_Comm)comm_context);
00144 part2node.enroll(numChunks);
00145 MSA1DNODELIST::Accum part2nodeAcc = part2node.getInitialAccum();
00146
00147 FEM_write_part2node(nodepartRead, part2nodeAcc, partdata, (MPI_Comm)comm_context);
00148
00149
00150
00151
00152
00153 MSA1DNODELIST::Read rPart2node = part2nodeAcc.syncToRead();
00154 NodeList lnodes = rPart2node.get(masterRank);
00155 lnodes.uniquify();
00156
00157
00158
00159 printf("Creating mapping of partition to node took %.6lf\n",CkWallTimer()-dataArrangeStartTime);
00160 printf("Time spent doing +=ElemList %.6lf \n",elemlistaccTime);
00161 dataArrangeStartTime = CkWallTimer();
00162
00163
00164
00165
00166 MSA1DFEMMESH part2mesh(numChunks,numChunks);
00167 MPI_Bcast_pup(part2mesh,masterRank,(MPI_Comm)comm_context);
00168 part2mesh.enroll(numChunks);
00169 MSA1DFEMMESH::Accum aPart2mesh = part2mesh.getInitialAccum();
00170
00171 FEM_write_part2mesh(aPart2mesh,partdata, &data,nodepartRead,numChunks,masterRank,&mypiece);
00172
00173
00174
00175 MSA1DFEMMESH::Read rPart2mesh = aPart2mesh.syncToRead();
00176 MeshElem me = rPart2mesh.get(masterRank);
00177
00178
00179 DEBUG(printf("[%d] Memory usage on vp 0 close to max %d \n",CkMyPe(),CmiMemoryUsage()));
00180
00181 delete &rPtr;
00182 delete &rInd;
00183 data.arr1.FreeMem();
00184 data.arr2.FreeMem();
00185 nodepart.FreeMem();
00186 DEBUG(printf("[%d] Memory usage on vp 0 after FreeMem %d \n",CkMyPe(),CmiMemoryUsage()));
00187
00188 addIDXLists(me.m,lnodes,masterRank);
00189
00190 part2node.FreeMem();
00191 DEBUG(printf("[%d] Memory usage on vp 0 after addIDXL %d \n",CkMyPe(),CmiMemoryUsage()));
00192
00193
00194
00195
00196 DEBUG(printf("[%d] Length of udata vector in master %d \n",masterRank,m->udata.size()));
00197 MPI_Bcast_pup(m->udata,masterRank,(MPI_Comm)comm_context);
00198 me.m->udata = m->udata;
00199
00200
00201 delete partdata;
00202
00203 printf("[%d] Data Arrangement took %.6lf \n",masterRank,CkWallTimer()-dataArrangeStartTime);
00204
00205
00206
00207
00208 struct ghostdata *gdata = gatherGhosts();
00209 DEBUG(printf("[%d] number of ghost layers %d \n",masterRank,gdata->numLayers));
00210 MPI_Bcast_pup(*gdata,masterRank,(MPI_Comm)comm_context);
00211
00212
00213
00214
00215 printf("[%d] Starting to generate number of ghost layers %d \n",masterRank,gdata->numLayers);
00216 double _startTime = CkWallTimer();
00217 makeGhosts(me.m,(MPI_Comm)comm_context,masterRank,gdata->numLayers,gdata->layers);
00218 delete gdata;
00219
00220 printf("[%d] Ghost generation took %.6lf \n",masterRank,CkWallTimer()-_startTime);
00221
00222 me.m->becomeGetting();
00223 FEM_chunk *chunk = FEM_chunk::get("FEM_Mesh_Parallel_broadcast");
00224 int tempMeshNo = chunk->meshes.put(me.m);
00225 int new_mesh = FEM_Mesh_copy(tempMeshNo);
00226
00227 FEM_Mesh *nmesh = c->lookup(new_mesh,"master_parallel_broadcast");
00228 DEBUG(printf("[%d] Length of udata vector in master new_mesh %d \n",masterRank,nmesh->udata.size()));
00229
00230 part2mesh.FreeMem();
00231 printf("[%d] Max Memory usage on vp 0 at end of parallel partition %d \n",CkMyPe(),CmiMaxMemoryUsage());
00232
00233 return new_mesh;
00234 }
00235
00236 int FEM_slave_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context){
00237 int myRank;
00238 MPI_Comm_rank((MPI_Comm)comm_context,&myRank);
00239 int numChunks;
00240 MPI_Comm_size((MPI_Comm)comm_context,&numChunks);
00241
00242
00243
00244 struct conndata data;
00245 MPI_Bcast_pup(data,masterRank,(MPI_Comm)comm_context);
00246 data.arr1.enroll(numChunks);
00247 data.arr2.enroll(numChunks);
00248 DEBUG(printf("Recv -> %d \n",data.nelem));
00249
00250
00251
00252
00253
00254 FEM_Mesh mypiece;
00255 MPI_Recv_pup(mypiece,masterRank,MESH_CHUNK_TAG,(MPI_Comm)comm_context);
00256
00257
00258
00259
00260 MSA1DINT::Read rPtr = data.arr1.getInitialWrite().syncToRead();
00261 MSA1DINT::Read rInd = data.arr1.getInitialWrite().syncToRead();
00262 struct partconndata *partdata = FEM_call_parmetis(data.nelem, rPtr, rInd, comm_context);
00263
00264
00265
00266
00267 MSA1DINTLIST nodepart;
00268 MPI_Bcast_pup(nodepart,masterRank,(MPI_Comm)comm_context);
00269 nodepart.enroll(numChunks);
00270 MSA1DINTLIST::Accum nodepartAcc = nodepart.getInitialAccum();
00271
00272 FEM_write_nodepart(nodepartAcc,partdata,(MPI_Comm)comm_context);
00273
00274
00275
00276
00277
00278 MSA1DNODELIST part2node;
00279 MPI_Bcast_pup(part2node,masterRank,(MPI_Comm)comm_context);
00280 part2node.enroll(numChunks);
00281 MSA1DNODELIST::Accum part2nodeAcc = part2node.getInitialAccum();
00282 MSA1DINTLIST::Read nodepartRead = nodepartAcc.syncToRead();
00283
00284
00285 FEM_write_part2node(nodepartRead, part2nodeAcc, partdata, (MPI_Comm)comm_context);
00286
00287
00288
00289
00290 MSA1DNODELIST::Read part2nodeRead = part2nodeAcc.syncToRead();
00291 NodeList lnodes = part2nodeRead.get(myRank);
00292 lnodes.uniquify();
00293
00294
00295
00296
00297
00298 MSA1DFEMMESH part2mesh;
00299 MPI_Bcast_pup(part2mesh,masterRank,(MPI_Comm)comm_context);
00300 part2mesh.enroll(numChunks);
00301 MSA1DFEMMESH::Accum aPart2mesh = part2mesh.getInitialAccum();
00302 FEM_write_part2mesh(aPart2mesh, partdata, &data, nodepartRead,numChunks, myRank, &mypiece);
00303
00304
00305
00306
00307 MSA1DFEMMESH::Read rPart2mesh = aPart2mesh.syncToRead();
00308 MeshElem me = rPart2mesh.get(myRank);
00309
00310
00311
00312 delete &rPtr;
00313 delete &rInd;
00314 data.arr1.FreeMem();
00315 data.arr2.FreeMem();
00316 nodepart.FreeMem();
00317
00318 addIDXLists(me.m,lnodes,myRank);
00319
00320
00321
00322
00323 MPI_Bcast_pup(me.m->udata,masterRank,(MPI_Comm)comm_context);
00324 DEBUG(printf("[%d] Length of udata vector %d \n",myRank,me.m->udata.size()));
00325
00326 delete partdata;
00327
00328 struct ghostdata *gdata = new ghostdata;
00329 MPI_Bcast_pup(*gdata,masterRank,(MPI_Comm)comm_context);
00330
00331
00332
00333
00334
00335 makeGhosts(me.m,(MPI_Comm )comm_context,masterRank,gdata->numLayers,gdata->layers);
00336
00337 me.m->becomeGetting();
00338 FEM_chunk *chunk = FEM_chunk::get("FEM_Mesh_Parallel_broadcast");
00339 int tempMeshNo = chunk->meshes.put(me.m);
00340 int new_mesh = FEM_Mesh_copy(tempMeshNo);
00341
00342
00343 part2mesh.FreeMem();
00344 delete gdata;
00345 return new_mesh;
00346 }
00347
00348
00349
00350
00351
00352
00353 struct partconndata * FEM_call_parmetis(int nelem, MSA1DINT::Read &rPtr, MSA1DINT::Read &rInd, FEM_Comm_t comm_context)
00354 {
00355 int myRank,numChunks;
00356 MPI_Comm_size((MPI_Comm)comm_context,&numChunks);
00357 MPI_Comm_rank((MPI_Comm)comm_context,&myRank);
00358
00359
00360
00361
00362
00363
00364
00365
00366 int *elmdist = new int[numChunks+1];
00367 DEBUG(printf("[%d] elmdist \n",myRank));
00368 for(int i=0;i<=numChunks;i++){
00369 elmdist[i] = (i*nelem)/numChunks;
00370 DEBUG(printf(" %d ",elmdist[i]));
00371 }
00372 DEBUG(printf("\n"));
00373 int startindex = elmdist[myRank];
00374 int endindex = elmdist[myRank+1];
00375 int numindices = endindex - startindex;
00376 int *eptr = new int[numindices+1];
00377
00378
00379
00380
00381 int startConn = rPtr.get(startindex);
00382 int endConn = rPtr.get(endindex);
00383 int numConn = endConn - startConn;
00384 int *eind = new int[numConn];
00385 DEBUG(printf("%d startindex %d endindex %d startConn %d endConn %d \n",myRank,startindex,endindex,startConn,endConn));
00386 for(int i=startindex;i<endindex;i++){
00387 int conn1 = rPtr.get(i);
00388 int conn2 = rPtr.get(i+1);
00389 eptr[i-startindex] = conn1 - startConn;
00390 for(int j=conn1;j<conn2;j++){
00391 eind[j-startConn] = rInd.get(j);
00392 }
00393 }
00394 eptr[numindices] = endConn - startConn;
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408 int wgtflag=0,numflag=0,ncon=1,ncommonnodes=2,options[5],edgecut=0;
00409 double ubvec = 1.05;
00410 double *tpwgts = new double[numChunks];
00411 int *parts = new int[numindices+1];
00412 for(int i=0;i<numChunks;i++){
00413 tpwgts[i]=1/(double)numChunks;
00414 }
00415 options[0]=0;
00416 MPI_Barrier((MPI_Comm)comm_context);
00417 ParMETIS_V3_PartMeshKway (elmdist,eptr,eind,NULL,&wgtflag,&numflag,&ncon,&ncommonnodes,&numChunks,tpwgts,&ubvec,options,&edgecut,parts,(MPI_Comm *)&comm_context);
00418 DEBUG(CkPrintf("%d partition ",myRank);)
00419 for(int i=0;i<numindices;i++){
00420 DEBUG(CkPrintf(" <%d %d> ",i+startindex,parts[i]));
00421 }
00422 DEBUG(CkPrintf("\n"));
00423 delete []elmdist;
00424 delete []tpwgts;
00425 struct partconndata *retval = new struct partconndata;
00426 retval->nelem = numindices;
00427 retval->eind = eind;
00428 retval->eptr = eptr;
00429 retval->part = parts;
00430 retval->startindex = startindex;
00431 return retval;
00432 }
00433
00434
00435
00436
00437
00438 void FEM_write_nodepart(MSA1DINTLIST::Accum &nodepart,struct partconndata *data,MPI_Comm comm_context){
00439 for(int i=0;i<data->nelem;i++){
00440 int start=data->eptr[i];
00441 int end = data->eptr[i+1];
00442 for(int j=start;j<end;j++){
00443 nodepart(data->eind[j]) += data->part[i];
00444 DEBUG(printf(" write_nodepart %d %d \n",data->eind[j],data->part[i]));
00445 }
00446 }
00447 }
00448
00449
00450
00451
00452
00453 void FEM_write_part2node(MSA1DINTLIST::Read &nodepart,
00454 MSA1DNODELIST::Accum &part2node,
00455 struct partconndata *data,
00456 MPI_Comm comm_context)
00457 {
00458 int nodes = nodepart.length();
00459 int myRank,numChunks;
00460
00461
00462
00463
00464 MPI_Comm_rank(comm_context,&myRank);
00465 MPI_Comm_size(comm_context,&numChunks);
00466 int start = (nodes*myRank)/numChunks;
00467 int end = (nodes*(myRank+1))/numChunks;
00468 for(int i=start;i<end;i++){
00469 IntList t = nodepart.get(i);
00470 t.uniquify();
00471 int num=0;
00472 if(t.vec->size()>1){
00473 num = t.vec->size();
00474 }
00475 NodeElem n(i,num);
00476 if(num != 0){
00477 for(int k=0;k<t.vec->size();k++){
00478 n.shared[k] =(*t.vec)[k];
00479 }
00480 }
00481
00482 for(int j=0;j<t.vec->size();j++){
00483 UniqElemList <NodeElem> en(n);
00484 part2node((*t.vec)[j]) += en;
00485 }
00486 }
00487 DEBUG(printf("done write_part2node\n"));
00488 }
00489
00490
00491
00492
00493 void FEM_write_part2elem(MSA1DINTLIST::Accum &part2elem,struct partconndata *data,MPI_Comm comm_context)
00494 {
00495 for(int i=0;i<data->nelem;i++){
00496 part2elem(data->part[i]) += data->startindex+i;
00497 }
00498 }
00499
00500
00501
00502
00503
00504
00505 FEM_Mesh * FEM_break_mesh(FEM_Mesh *m,int numElements,int numChunks){
00506 FEM_Mesh *mesh_array = new FEM_Mesh[numChunks];
00507 int *elmdist = new int[numChunks+1];
00508 int *nodedist = new int[numChunks+1];
00509 int numNodes = m->node.size();
00510
00511 for(int i=0;i<=numChunks;i++){
00512 elmdist[i] = (i*numElements)/numChunks;
00513 nodedist[i] = (i*numNodes)/numChunks;
00514 }
00515
00516 int elcount=0;
00517 int mcount=0;
00518 mesh_array[mcount].copyShape(*m);
00519 for(int t=0;t<m->elem.size();t++){
00520 if(m->elem.has(t)){
00521 FEM_Elem &k=m->elem[t];
00522 for(int e=0;e<k.size();e++){
00523 mesh_array[mcount].elem[t].push_back(k,e);
00524 elcount++;
00525 if(elcount == elmdist[mcount+1]){
00526 mcount++;
00527 if(mcount != numChunks){
00528 mesh_array[mcount].copyShape(*m);
00529 }
00530 }
00531
00532 }
00533 }
00534 }
00535 mcount=0;
00536 for(int i=0;i<m->node.size();i++){
00537 if(i == nodedist[mcount+1]){
00538 mcount++;
00539 }
00540 mesh_array[mcount].node.push_back(m->node,i);
00541 }
00542 delete [] elmdist;
00543 delete [] nodedist;
00544 return mesh_array;
00545 }
00546
00547
00548
00549 void sendBrokenMeshes(FEM_Mesh *mesh_array,FEM_Comm_t comm_context){
00550 int numChunks;
00551 MPI_Comm_size((MPI_Comm)comm_context,&numChunks);
00552 for(int i=0;i<numChunks;i++){
00553 MPI_Send_pup(mesh_array[i],i,MESH_CHUNK_TAG,(MPI_Comm)comm_context);
00554 }
00555 }
00556
00557 void FEM_write_part2mesh(MSA1DFEMMESH::Accum &part2mesh,
00558 struct partconndata *partdata,
00559 struct conndata *data,
00560 MSA1DINTLIST::Read &nodepart,
00561 int numChunks,
00562 int myChunk,
00563 FEM_Mesh *m)
00564 {
00565 int count=0;
00566
00567
00568 for(int t=0;t<m->elem.size();t++){
00569 if(m->elem.has(t)){
00570 const FEM_Elem &k=(m)->elem[t];
00571 for(int e=0;e<k.size();e++){
00572 int chunkID = partdata->part[count];
00573 MeshElem::ElemInfo ei(m, e, t);
00574 part2mesh(chunkID) += ei;
00575 count++;
00576 }
00577 }
00578 }
00579
00580 int startnode=(myChunk * data->nnode)/numChunks;
00581 for(int i=0;i<m->node.size();i++){
00582 IntList chunks = nodepart.get(i+startnode);
00583 chunks.uniquify();
00584 for(int j=0;j<chunks.vec->size();j++){
00585 MeshElem::NodeInfo ni(m, i);
00586 part2mesh((*(chunks.vec))[j]) += ni;
00587 }
00588 }
00589 }
00590
00591 void sortNodeList(NodeList &lnodes){
00592 CkVec<NodeElem> *vec = lnodes.vec;
00593 vec->quickSort();
00594 }
00595
00596
00597 void addIDXLists(FEM_Mesh *m,NodeList &lnodes,int myChunk){
00598 double startAddIDXL = CkWallTimer();
00599
00600
00601
00602 sortNodeList(lnodes);
00603 DEBUG(printf("[%d] Sorting finished %.6lfs after starting addIDXL\n",myChunk,CkWallTimer()-startAddIDXL));
00604
00605 CkVec<NodeElem> *vec = lnodes.vec;
00606
00607
00608
00609 CkHashtableT<CkHashtableAdaptorT<int>,int> global2local;
00610 for(int i=0;i<m->node.size();i++){
00611 int nodeno = m->node.getGlobalno(i);
00612 global2local.put(nodeno)=i;
00613 }
00614
00615
00616
00617
00618
00619
00620
00622 for(int i=0;i<vec->size();i++){
00624 int global = (*vec)[i].global;
00626 int local = global2local.get(global);
00627
00628 m->node.setPrimary(local,true);
00629
00630 if(((*vec)[i]).numShared > 0){
00632
00633 DEBUG(printf("[%d] Global node %d local %d node is shared \n",myChunk,global,local));
00634 for(int j=0;j<((*vec)[i]).numShared;j++){
00635 if((*vec)[i].shared[j] != myChunk){
00636
00637 m->node.shared.addList((*vec)[i].shared[j]).push_back(local);
00638 }
00639 if((*vec)[i].shared[j] < myChunk){
00640 m->node.setPrimary(local,false);
00641 }
00642 }
00643 }
00644 }
00645 DEBUG(m->node.shared.print());
00646
00647
00648
00649
00650
00651 for(int i=0;i<m->elem.size();i++){
00652 if(m->elem.has(i)){
00653 FEM_Elem &el=m->elem[i];
00654 for(int j=0;j<el.size();j++){
00655 int *conn = el.connFor(j);
00656 for(int k=0;k<el.getNodesPer();k++){
00657 int gnode = conn[k];
00658 int lnode = global2local.get(gnode);
00659 conn[k] = lnode;
00660 }
00661 }
00662 }
00663 }
00664 DEBUG(printf("[%d] Time for addIDXL %.6lfs\n",myChunk,CkWallTimer()-startAddIDXL));
00665 }
00666
00667
00668
00669
00670
00671
00672 struct ghostdata *gatherGhosts(){
00673 struct ghostdata *res = new struct ghostdata;
00674 FEM_Partition &part = FEM_curPartition();
00675 res->numLayers = part.getRegions();
00676 res->layers = new FEM_Ghost_Layer *[res->numLayers];
00677 int count=0;
00678
00679 for(int i=0;i<res->numLayers;i++){
00680 const FEM_Ghost_Region ®ion = part.getRegion(i);
00681 if(region.layer != NULL){
00682 res->layers[count]=region.layer;
00683 count++;
00684 }
00685 }
00686 res->numLayers = count;
00687 DEBUG(printf("gatherGhosts found %d layers \n",res->numLayers));
00688 printf("gatherGhosts found %d layers \n",res->numLayers);
00689 return res;
00690 }
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701 double listSearchTime=0;
00702 double sharedSearchTime=0;
00703
00704 void makeGhosts(FEM_Mesh *m, MPI_Comm comm, int masterRank, int numLayers, FEM_Ghost_Layer **layers)
00705 {
00706 int myChunk;
00707 int numChunks;
00708 MPI_Comm_rank((MPI_Comm)comm,&myChunk);
00709 MPI_Comm_size((MPI_Comm)comm,&numChunks);
00710
00711 double _startTime=CkWallTimer();
00712
00713 if(numChunks == 1){
00714 return;
00715 }
00716
00717
00718
00719
00720
00721 FEM_Comm *shared = &m->node.shared;
00722 int count=0;
00723 CkHashtableT<CkHashtableAdaptorT<int>,char> countedSharedNode;
00724 for(int i=0;i<shared->size();i++){
00725 const FEM_Comm_List &list = shared->getLocalList(i);
00726
00727
00728
00729
00730
00731
00732
00733 if(list.getDest() > myChunk){
00734 for(int j=0;j<list.size();j++){
00735 if(countedSharedNode.get(list[j]) == 0){
00736 count++;
00737 countedSharedNode.put(list[j]) = 1;
00738 }
00739 }
00740 }else{
00741 for(int j=0;j<list.size();j++){
00742 if(countedSharedNode.get(list[j]) == 1){
00743 count--;
00744 }
00745 countedSharedNode.put(list[j]) = 2;
00746 }
00747 }
00748 }
00749 int totalShared;
00750 MPI_Allreduce(&count, &totalShared,1, MPI_INT,
00751 MPI_SUM, comm);
00752
00753
00754
00755
00756 FEM_IndexAttribute *nchunkattr = (FEM_IndexAttribute *)m->node.lookup(FEM_CHUNK,"makeGhosts");
00757 int *nchunk = nchunkattr->get().getData();
00758 for(int i=0;i<nchunkattr->getLength();i++){
00759 nchunk[i] = myChunk;
00760 }
00761 for(int e = 0; e < m->elem.size();e++){
00762 if(m->elem.has(e)){
00763 FEM_IndexAttribute *echunkattr = (FEM_IndexAttribute *)m->elem[e].lookup(FEM_CHUNK,"makeGhosts");
00764 int *echunk = echunkattr->get().getData();
00765 for(int i=0;i<echunkattr->getLength();i++){
00766 echunk[i] = myChunk;
00767 }
00768 }
00769 }
00770
00771
00772
00773
00774
00775
00776
00777 CkHashtableT<CkHashtableAdaptorT<int>,int> global2local;
00778 for(int i=0;i<m->node.size();i++){
00779 global2local.put(m->node.getGlobalno(i))=i+1;
00780 }
00781 if(myChunk == 0){
00782 printf("Time to do preprocessing for ghosts %.6lf \n",CkWallTimer()-_startTime);
00783 }
00784 for(int i=0;i<numLayers;i++){
00785 if(myChunk == 0){printf("[%d] Starting ghost layer %d \n",myChunk,i);}
00786 _startTime = CkWallTimer();
00787 makeGhost(m,comm,masterRank,totalShared,layers[i],countedSharedNode,global2local);
00788 if(myChunk == 0){
00789 printf("[%d] Making ghost layer %d took %.6lf listSearch %.6lf sharedSearchTime %.6lf \n",myChunk,i,CkWallTimer()-_startTime,listSearchTime,sharedSearchTime);
00790 listSearchTime = 0;
00791 sharedSearchTime=0;
00792 }
00793 }
00794 }
00795
00796
00797
00798
00799 bool listContains(FEM_Comm_List &list,int entry){
00800 double _startTime=CkWallTimer();
00801 for(int i=0;i<list.size();i++){
00802 if(entry == list[i]){
00803 listSearchTime += (CkWallTimer()-_startTime);
00804 return true;
00805 }
00806 }
00807 listSearchTime += (CkWallTimer()-_startTime);
00808 return false;
00809 }
00810
00811 void makeGhost(FEM_Mesh *m,
00812 MPI_Comm comm,
00813 int masterRank,
00814 int totalShared,
00815 FEM_Ghost_Layer *layer,
00816 CkHashtableT<CkHashtableAdaptorT<int>,char> &sharedNode,
00817 CkHashtableT<CkHashtableAdaptorT<int>,int> &global2local)
00818 {
00819 int myChunk;
00820 int numChunks;
00821 MPI_Comm_rank((MPI_Comm)comm,&myChunk);
00822 MPI_Comm_size((MPI_Comm)comm,&numChunks);
00823
00824
00825
00826
00827
00828 MsaHashtable *distTab;
00829 if(myChunk == masterRank){
00830 distTab = new MsaHashtable(totalShared, numChunks);
00831 }else{
00832 distTab = new MsaHashtable;
00833 }
00834 MPI_Bcast_pup(*distTab,masterRank,comm);
00835 distTab->enroll(numChunks);
00836 DEBUG(printf("[%d] distributed table calling sync \n",myChunk));
00837
00838
00839
00840 MsaHashtable::Add aDistTab = distTab->getInitialAdd();
00841
00842 DEBUG(printf("Chunk %d Mesh: *********************************** \n",myChunk));
00843
00844 DEBUG(printf("**********************************\n"));
00845
00846
00847
00848
00849
00850
00851
00852 CkVec<Hashnode::tupledata> tupleVec;
00853 CkVec<int> indexVec;
00854 CkVec<int> elementVec;
00855
00856 for(int i=0;i<m->elem.size();i++){
00857 if(m->elem.has(i)){
00858
00859 if(layer->elem[i].add){
00860
00861 for(int e=0;e<m->elem[i].size();e++){
00862 const int *conn = m->elem[i].connFor(e);
00863
00864 for(int t=0;t< layer->elem[i].tuplesPerElem;t++){
00865 bool possibleGhost=true;
00866
00867 int globalNodeTuple[Hashnode::tupledata::MAX_TUPLE];
00868
00869
00870
00871
00872
00873 int nodesPerTuple = layer->nodesPerTuple;
00874 for(int n = 0;n<layer->nodesPerTuple;n++){
00875 int nodeindex=(layer->elem[i].elem2tuple)[t*layer->nodesPerTuple+n];
00876
00877
00878
00879
00880 if(nodeindex == -1){
00881 nodesPerTuple--;
00882 globalNodeTuple[n] =-1;
00883 continue;
00884 }
00885 int node=conn[nodeindex];
00886 globalNodeTuple[n]=m->node.getGlobalno(node);
00887 if(sharedNode.get(node) == 0){
00888 possibleGhost = false;
00889 break;
00890 }
00891 }
00892
00893 if(possibleGhost){
00894 int index = aDistTab.addTuple(globalNodeTuple,nodesPerTuple,myChunk,m->nElems(i)+e);
00895 tupleVec.push_back(Hashnode::tupledata(globalNodeTuple));
00896 indexVec.push_back(index);
00897 elementVec.push_back(i);
00898 elementVec.push_back(e);
00899 }
00900 }
00901 }
00902 }
00903 }
00904 }
00905
00906
00907
00908
00909
00910 int ghostcount=0;
00911 for(int i=0;i<m->elem.size();i++){
00912 if(m->elem.has(i)){
00913 if(layer->elem[i].add){
00914 FEM_Elem *ghostElems = (FEM_Elem *)m->elem[i].getGhost();
00915 for(int e=0;e<ghostElems->size();e++){
00916 ghostcount++;
00917 const int *conn = ghostElems->connFor(e);
00918 for(int t=0;t<layer->elem[i].tuplesPerElem;t++){
00919 int globalNodeTuple[Hashnode::tupledata::MAX_TUPLE];
00920 FEM_Node *ghostNodes = (FEM_Node *)m->node.getGhost();
00921 int nodesPerTuple = layer->nodesPerTuple;
00922 for(int n=0;n<layer->nodesPerTuple;n++){
00923 int nodeindex=(layer->elem[i].elem2tuple)[t*layer->nodesPerTuple+n];
00924 if(nodeindex == -1){
00925 nodesPerTuple--;
00926 globalNodeTuple[n] = -1;
00927 continue;
00928 }
00929 int node=conn[nodeindex];
00930 if(FEM_Is_ghost_index(node)){
00931 globalNodeTuple[n]=ghostNodes->getGlobalno(FEM_From_ghost_index(node));
00932 }else{
00933 globalNodeTuple[n]=m->node.getGlobalno(node);
00934 }
00935 }
00936
00937 aDistTab.addTuple(globalNodeTuple,nodesPerTuple,myChunk,ghostcount);
00938 }
00939 }
00940 }
00941 }
00942 }
00943 MsaHashtable::Read rDistTab = aDistTab.syncToRead();
00944
00945
00946
00947 if(myChunk == masterRank){
00948 DEBUG(rDistTab.print());
00949 }
00950
00951 DEBUG(printf("[%d] id %d says Ghost distributed hashtable printed \n",CkMyPe(),myChunk));
00952
00953 MSA1DFEMMESH *ghostmeshes;
00954 if(myChunk == masterRank){
00955 ghostmeshes = new MSA1DFEMMESH(numChunks,numChunks);
00956 }else{
00957 ghostmeshes = new MSA1DFEMMESH;
00958 }
00959 MPI_Bcast_pup(*ghostmeshes,masterRank,comm);
00960 ghostmeshes->enroll(numChunks);
00961 DEBUG(printf("[%d] id %d says ghostmeshes enroll done \n",CkMyPe(),myChunk));
00962
00963 MSA1DFEMMESH::Accum aGhostMeshes = ghostmeshes->getInitialAccum();
00964
00965 DEBUG(printf("[%d] id %d says ghostmeshes sync done \n",CkMyPe(),myChunk));
00966
00967
00968
00969
00970
00971
00972 char str[100];
00973 for(int i=0;i<tupleVec.size();i++){
00974 const Hashtuple &listTuple = rDistTab.get(indexVec[i]);
00975
00976 int elType = elementVec[2*i];
00977 int elNo = elementVec[2*i+1];
00978 for(int j=0;j<listTuple.vec->size();j++){
00979
00980
00981 if((*listTuple.vec)[j].equals(tupleVec[i])){
00982 int destChunk=(*listTuple.vec)[j].chunk;
00983
00984
00985
00986 if(destChunk != myChunk){
00987
00988
00989
00990
00991 FEM_Comm &sendGhostSide = m->elem[elType].setGhostSend();
00992 FEM_Comm_List &sendGhostList = sendGhostSide.addList(destChunk);
00993 if(listContains(sendGhostList,elNo)){
00994
00995
00996
00997
00998 continue;
00999 }
01000
01001
01002 sendGhostList.push_back(elNo);
01003
01004
01005
01006 MeshElem::ElemInfo ei(m, elNo, elType);
01007 aGhostMeshes(destChunk) += ei;
01008 int globalelem = m->elem[elType].getGlobalno(elNo);
01009 DEBUG(printf("[%d] For chunk %d ghost element global no %d \n",myChunk,destChunk,globalelem));
01010
01011
01012
01013
01014
01015 int* conn = m->elem[elType].connFor(elNo);
01016 for(int k=0;k<m->elem[elType].getNodesPer();k++){
01017 int lnode = conn[k];
01018
01019 if(lnode < 0){
01020 continue;
01021 }
01022
01023
01024
01025 if(sharedNode.get(lnode) == 0){
01026 sharedNode.put(lnode)=3;
01027 }
01028 int globalnode = m->node.getGlobalno(lnode);
01029 conn[k] = globalnode;
01030 assert(conn[k] >= 0);
01031
01032
01033
01034
01035 if(layer->addNodes && lnode >= 0){
01036
01037
01038
01039
01040
01041 if(!sharedWith(lnode,(*listTuple.vec)[j].chunk,m)){
01042 FEM_Comm &sendNodeGhostSide = m->node.setGhostSend();
01043 FEM_Comm_List &sendNodeGhostList = sendNodeGhostSide.addList((*listTuple.vec)[j].chunk);
01044 if(!listContains(sendNodeGhostList,lnode)){
01045 MeshElem::NodeInfo ni(m, lnode);
01046 aGhostMeshes(destChunk) += ni;
01047 DEBUG(printf("[%d] Ghost node (send) global no %d \n",myChunk,globalnode));
01048
01049 sendNodeGhostList.push_back(lnode);
01050 }
01051 }
01052 }
01053 }
01054 }
01055 }
01056 }
01057
01058 }
01059
01060
01061 DEBUG(printf("[%d] finished creating ghost mesh \n",myChunk));
01062
01063 MSA1DFEMMESH::Read rGhostMeshes = aGhostMeshes.syncToRead();
01064
01065
01066
01067
01068
01069
01070
01071
01072 FEM_Mesh *gmesh = rGhostMeshes.get(myChunk).m;
01073 DEBUG(printf("[%d] my ghost mesh is at %p \n",myChunk,gmesh));
01074
01075 FEM_Node *gnodes = (FEM_Node *)m->node.getGhost();
01076 gnodes->copyShape(m->node);
01077 FEM_IndexAttribute *nodeSrcChunkAttr = (FEM_IndexAttribute *)gmesh->node.lookup(FEM_CHUNK,"makeGhosts");
01078 int *nodeSrcChunkNo = nodeSrcChunkAttr->get().getData();
01079
01080 DEBUG(printf("[%d] gmesh->node.size() = %d \n",myChunk,gmesh->node.size()));
01081 for(int i=0;i<gmesh->node.size();i++){
01082 int gnum = gmesh->node.getGlobalno(i);
01083 int lindex = global2local.get(gnum);
01084
01085 if(lindex == 0){
01086 int countgnodes = gnodes->push_back(gmesh->node,i);
01087 global2local.put(gnum) = -(countgnodes+1);
01088 lindex = -(countgnodes+1);
01089 }
01090 DEBUG(CkPrintf("[%d] Ghost node (recv) global node %d lindex %d \n",myChunk,gnum,lindex));
01091
01092 FEM_Comm &recvNodeGhostSide = m->node.setGhostRecv();
01093 FEM_Comm_List &recvNodeGhostList = recvNodeGhostSide.addList(nodeSrcChunkNo[i]);
01094 recvNodeGhostList.push_back(-lindex-1);
01095
01096 }
01097
01098
01099
01100
01101
01102
01103
01104
01105 for(int elType = 0;elType < gmesh->elem.size();elType++){
01106 if(gmesh->elem.has(elType)){
01107 FEM_IndexAttribute *elemSrcChunkAttr = (FEM_IndexAttribute *)gmesh->elem[elType].lookup(FEM_CHUNK,"makeGhosts");
01108 int *elemSrcChunkNo = elemSrcChunkAttr->get().getData();
01109
01110 for(int e=0;e<gmesh->elem[elType].size();e++){
01111 m->elem[elType].getGhost()->copyShape(gmesh->elem[elType]);
01112 int lghostelem = m->elem[elType].getGhost()->push_back(gmesh->elem[elType],e);
01113 int *conn = ((FEM_Elem *)m->elem[elType].getGhost())->connFor(lghostelem);
01114 for(int n=0;n<gmesh->elem[elType].getNodesPer();n++){
01115 int gnode = conn[n];
01116 assert(gnode>=0);
01117 if(gnode >= 0){
01118 int lnode = global2local.get(gnode);
01119
01120 if(lnode == 0){
01121 if(layer->addNodes){
01122 CkPrintf("[%d]Unknown global number %d \n",myChunk,gnode);
01123 CkAbort("Unknown global number for node in ghost element connectivity");
01124 }
01125 conn[n] = -1;
01126 }
01127
01128 if(lnode > 0){
01129 conn[n] = lnode -1;
01130 }
01131
01132 if(lnode < 0){
01133 conn[n] = FEM_To_ghost_index((-lnode-1));
01134 }
01135 }else{
01136 conn[n] = -1;
01137 }
01138 }
01139 FEM_Comm &recvGhostSide = m->elem[elType].setGhostRecv();
01140 FEM_Comm_List &recvGhostList = recvGhostSide.addList(elemSrcChunkNo[e]);
01141 recvGhostList.push_back(lghostelem);
01142
01143 }
01144 }
01145 }
01146 DEBUG(printf("[%d] Send ghost nodes \n",myChunk));
01147 DEBUG(m->node.getGhostSend().print());
01148 DEBUG(printf("[%d] Recv ghost nodes \n",myChunk));
01149 DEBUG(m->node.getGhostRecv().print());
01150
01151 delete &rDistTab;
01152 delete &rGhostMeshes;
01153 delete distTab;
01154 delete ghostmeshes;
01155 MPI_Barrier(comm);
01156 }
01157
01158
01159
01160 bool sharedWith(int lnode,int chunk,FEM_Mesh *m){
01161 double _startTime=CkWallTimer();
01162 int lindex = m->node.shared.findLocalList(chunk);
01163 if(lindex != -1){
01164 const IDXL_List & llist = m->node.shared.getLocalList(lindex);
01165 for(int i=0;i<llist.size();i++){
01166 if(llist[i] == lnode){
01167 sharedSearchTime += CkWallTimer()-_startTime;
01168 return true;
01169 }
01170 }
01171 sharedSearchTime += CkWallTimer()-_startTime;
01172 return false;
01173 }else{
01174 sharedSearchTime += CkWallTimer()-_startTime;
01175 return false;
01176 }
01177 }
01178 #include "ParFUM.def.h"