00001
00006 #include "tetmesh.h"
00007 #include "transfer.h"
00008 #include "bbox.h"
00009 #include "paralleltransfer.h"
00010 #include "charm++.h"
00011 #include "GenericElement.h"
00012
00013 #define OSL_com_debug 0
00014 #define COORD_PER_POINT 3
00015 #define POINT_PER_TET 4
00016
00017 class progress_t {
00018 public:
00019 virtual ~progress_t();
00020 virtual void p(const char *where) {}
00021 };
00022
00024 class ConcreteLocalElement : public ConcreteElementNodeData {
00025 const TetMesh &mesh;
00026 const int *conn;
00027 const xfer_t *ptVals;
00028 int valsPerPt;
00029 public:
00030 ConcreteLocalElement(const TetMesh &mesh_,const xfer_t *ptVals_,int valsPerPt_)
00031 :mesh(mesh_), conn(0), ptVals(ptVals_), valsPerPt(valsPerPt_) { }
00032 void set(int tet) {
00033 conn=mesh.getTet(tet);
00034 }
00035
00037 virtual CPoint getNodeLocation(int i) const {
00038 return mesh.getPoint(conn[i]);
00039 }
00040
00042 virtual const double *getNodeData(int i) const {
00043 return &ptVals[conn[i]*valsPerPt];
00044 }
00045 };
00046
00047
00048 class parallelTransfer_c {
00049 collide_t voxels;
00050 int firstDest;
00053 inline bool isDest(int n) {return n>=firstDest;}
00054
00055 MPI_Comm mpi_comm;
00056 int myRank,commSize;
00058 inline bool isLocal(const int *coll) {return coll[1]==myRank;}
00059
00060 int valsPerTet, valsPerPt;
00061 const TetMesh &srcMesh;
00062 const xfer_t *srcTet;
00063 const xfer_t *srcPt;
00064 const TetMesh &destMesh;
00065 xfer_t *destTet;
00066 xfer_t *destPt;
00067 double *destVolumes;
00068
00069 ConcreteLocalElement theLocalElement;
00070
00073 void accumulateCellValues(const xfer_t *sCellVals,int dest,double sharedVolume) {
00074 for (int v=0;v<valsPerTet;v++) {
00075 destTet[dest*valsPerTet+v]+=sharedVolume*sCellVals[v];
00076 }
00077 destVolumes[dest]+=sharedVolume;
00078 }
00079
00082 void transferNodeValues(const ConcreteElementNodeData &srcElement, int dest)
00083 {
00084 GenericElement el(POINT_PER_TET);
00085 for (int dni=0;dni<POINT_PER_TET;dni++) {
00086 int dn=destMesh.getTet(dest)[dni];
00087 CkVector3d dnLoc=destMesh.getPoint(dn);
00088 CVector natc;
00089 if (el.element_contains_point(dnLoc,srcElement,natc))
00090 {
00091 el.interpolate_natural(valsPerPt,srcElement,natc,&destPt[dn*valsPerPt]);
00092 }
00093 }
00094 }
00095 public:
00096 parallelTransfer_c(collide_t voxels_,MPI_Comm mpi_comm_,
00097 int valsPerTet_,int valsPerPt_,
00098 const xfer_t *srcTet_,const xfer_t *srcPt_,const TetMesh &srcMesh_,
00099 xfer_t *destTet_,xfer_t *destPt_,const TetMesh &destMesh_)
00100 :voxels(voxels_), mpi_comm(mpi_comm_),
00101 valsPerTet(valsPerTet_), valsPerPt(valsPerPt_),
00102 srcMesh(srcMesh_),srcTet(srcTet_),srcPt(srcPt_),
00103 destMesh(destMesh_),destTet(destTet_),destPt(destPt_),
00104 theLocalElement(srcMesh, srcPt,valsPerPt)
00105 {
00106 MPI_Comm_rank(mpi_comm,&myRank);
00107 MPI_Comm_size(mpi_comm,&commSize);
00108 destVolumes=new double[destMesh.getTets()];
00109 for (int d=0;d<destMesh.getTets();d++) {
00110 destVolumes[d]=0;
00111 for (int v=0;v<valsPerTet;v++) {
00112 destTet[d*valsPerTet+v]=(xfer_t)0;
00113 }
00114 }
00115 }
00116 ~parallelTransfer_c() {
00117 delete[] destVolumes;
00118 }
00119
00121 void transfer(progress_t &progress);
00122 };
00123
00124 static bbox3d getBox(int t,const TetMesh &mesh) {
00125 bbox3d ret; ret.empty();
00126 const int *conn=mesh.getTet(t);
00127 for (int i=0;i<4;i++)
00128 ret.add(mesh.getPoint(conn[i]));
00129 return ret;
00130 }
00131
00132
00133 inline int bytesToXfer(int nBytes) {
00134 return (nBytes+sizeof(xfer_t)-1)/sizeof(xfer_t);
00135 }
00136
00137
00138 template <class D,class S>
00139 inline void copy(D *dest,const S *src,int n) {
00140 for (int i=0;i<n;i++) dest[i]=(D)src[i];
00141 }
00142
00143
00144 class meshState {
00145 public:
00146 int tetVal, ptVal;
00147 meshState(int t,int p) :tetVal(t), ptVal(p) {}
00148 };
00149
00151 class sendState : public meshState {
00152 public:
00153 const TetMesh &mesh;
00154 const xfer_t *tetVals;
00155 const xfer_t *ptVals;
00156
00157 const xfer_t *tetData(int t) const {return &tetVals[t*tetVal];}
00158 const xfer_t * ptData(int p) const {return & ptVals[p* ptVal];}
00159
00160 sendState(const TetMesh &m,int tv,int pv,const xfer_t *t,const xfer_t *p)
00161 :meshState(tv,pv), mesh(m), tetVals(t), ptVals(p) {}
00162 };
00163
00164
00165 class entityPackList {
00166 int *mark;
00167 int *locals;
00168 void allocate(int nGlobal) {
00169 mark=new int[nGlobal];
00170 for (int i=0;i<nGlobal;i++) mark[i]=-1;
00171 locals=new int[nGlobal];
00172 }
00173 public:
00174 int n;
00175
00176 entityPackList() {n=0; mark=0; locals=0;}
00177 ~entityPackList() {if (mark) {delete[] mark;delete[] locals;}}
00178
00181 bool add(int global,int nGlobal) {
00182 if (mark==NULL) allocate(nGlobal);
00183 if (mark[global]==-1) {
00184 mark[global]=n;
00185 locals[n]=global;
00186 n++;
00187 return true;
00188 }
00189 return false;
00190 }
00192 int getLocal(int global) const {return mark[global];}
00194 int getGlobal(int local) const {return locals[local];}
00195 };
00196
00207 class tetMeshChunk {
00208 xfer_t *buf;
00209
00210
00211 struct header_t {
00212 int nXfer;
00213 int nSend;
00214 int nTet;
00215 int nPt;
00216 };
00217 header_t *header;
00218 inline int headerSize() const
00219 {return bytesToXfer(sizeof(header_t));}
00220
00221 int *sendTets;
00222 inline int sendTetsSize(const meshState &s,int nSend) const
00223 {return bytesToXfer(nSend*sizeof(int));}
00224
00225 xfer_t *tetData;
00226 inline int tetDataRecordSize(const meshState &s) const
00227 {return bytesToXfer(POINT_PER_TET*sizeof(int))+s.tetVal;}
00228 inline int tetDataSize(const meshState &s,int nTet) const
00229 {return nTet*tetDataRecordSize(s); }
00230
00231 xfer_t *ptData;
00232 inline int ptDataRecordSize(const meshState &s) const
00233 {return COORD_PER_POINT+s.ptVal;}
00234 inline int ptDataSize(const meshState &s,int nPt) const
00235 {return nPt*ptDataRecordSize(s); }
00236 public:
00237 tetMeshChunk() {buf=NULL;}
00238 ~tetMeshChunk() { if (buf) delete[] buf;}
00239
00240
00242 void allocate(const meshState &s,int nSend,int nTet,int nPt) {
00243 int msgLen=headerSize()+sendTetsSize(s,nSend)+tetDataSize(s,nTet)+ptDataSize(s,nPt);
00244 buf=new xfer_t[msgLen];
00245 header=(header_t *)buf;
00246 header->nXfer=msgLen;
00247 header->nSend=nSend;
00248 header->nTet=nTet;
00249 header->nPt=nPt;
00250 setupPointers(s,msgLen);
00251 }
00252
00254 int messageSizeXfer(void) const {return header->nXfer;}
00255 double *messageBuf(void) const {return buf;}
00256
00257
00260 void receive(const meshState &s,xfer_t *buf_,int msgLen) {
00261 buf=buf_;
00262 setupPointers(s,msgLen);
00263 }
00264
00265
00267 inline int *getSendTets(void) {return sendTets;}
00268 inline int nSendTets(void) const {return header->nSend;}
00269
00271 inline int nTets(void) const {return header->nTet;}
00273 inline int *getTetConn(const meshState &s,int t) {
00274 return (int *)&tetData[t*tetDataRecordSize(s)];
00275 }
00277 inline xfer_t *getTetData(const meshState &s,int t) {
00278 return &tetData[t*tetDataRecordSize(s)+bytesToXfer(POINT_PER_TET*sizeof(int))];
00279 }
00280
00282 inline int nPts(void) const {return header->nPt;}
00284 inline xfer_t *getPtLoc(const meshState &s,int n) {
00285 return &ptData[n*ptDataRecordSize(s)];
00286 }
00288 inline xfer_t *getPtData(const meshState &s,int n) {
00289 return &ptData[n*ptDataRecordSize(s)+COORD_PER_POINT];
00290 }
00291
00292 private:
00293
00294 void setupPointers(const meshState &s,int msgLen) {
00295 xfer_t *b=buf;
00296 header=(header_t *)b; b+=headerSize();
00297 sendTets=(int *)b; b+=sendTetsSize(s,header->nSend);
00298 tetData=b; b+=tetDataSize(s,header->nTet);
00299 ptData=b; b+=ptDataSize(s,header->nPt);
00300 int nRead=b-buf;
00301 if (nRead!=header->nXfer || nRead!=msgLen)
00302 CkAbort("Tet mesh header length mismatch!");
00303 }
00304 };
00305
00307 class tetSender {
00308 entityPackList packTets,packPts;
00309 int nSend;
00310 int nPut;
00311 tetMeshChunk ck;
00312 MPI_Request sendReq;
00313 public:
00314 tetSender(void) {
00315 nSend=0; nPut=0;
00316 }
00317 ~tetSender() {
00318 }
00319
00321 void countTet(const sendState &s,int t) {
00322 nSend++;
00323 if (packTets.add(t,s.mesh.getTets())) {
00324 const int *conn=s.mesh.getTet(t);
00325 for (int i=0;i<POINT_PER_TET;i++)
00326 packPts.add(conn[i],s.mesh.getPoints());
00327 }
00328 }
00329 int getCount(void) const {return nSend;}
00330
00332 void putTet(const sendState &s,int t) {
00333 if (nPut==0)
00334 {
00335 ck.allocate(s,nSend,packTets.n,packPts.n);
00336 for (int t=0;t<packTets.n;t++)
00337 {
00338 int g=packTets.getGlobal(t);
00339 copy(ck.getTetData(s,t),s.tetData(g),s.tetVal);
00340
00341 const int *gNode=s.mesh.getTet(g);
00342 int *lNode=ck.getTetConn(s,t);
00343 for (int i=0;i<POINT_PER_TET;i++)
00344 lNode[i]=packPts.getLocal(gNode[i]);
00345 }
00346 for (int p=0;p<packPts.n;p++)
00347 {
00348 int g=packPts.getGlobal(p);
00349 copy(ck.getPtData(s,p),s.ptData(g),s.ptVal);
00350 copy(ck.getPtLoc(s,p),(const double *)s.mesh.getPoint(g),COORD_PER_POINT);
00351 }
00352 }
00353 ck.getSendTets()[nPut++]=packTets.getLocal(t);
00354
00355 }
00356
00358 void isend(MPI_Comm comm,int src,int dest) {
00359 if (nSend==0) return;
00360
00361 #if OSL_com_debug
00362 CkPrintf("%d sending %d records to %d\n", src,n,dest);
00363 #endif
00364 MPI_Isend(ck.messageBuf(),ck.messageSizeXfer(),
00365 PARALLELTRANSFER_MPI_DTYPE,
00366 dest,PARALLELTRANSFER_MPI_TAG,comm,&sendReq);
00367 }
00369 void wait(void) {
00370 if (nSend==0) return;
00371 MPI_Status sts;
00372 MPI_Wait(&sendReq,&sts);
00373 }
00374 };
00375
00376
00377
00379 class ConcreteNetworkElement : public ConcreteElementNodeData {
00380 const meshState *s;
00381 tetMeshChunk &ck;
00382 int tet;
00383 const int *tetConn;
00384 public:
00385 ConcreteNetworkElement(tetMeshChunk &ck_)
00386 :s(0), ck(ck_), tet(-1), tetConn(0) {}
00387 void setTet(const meshState &s_,int tet_) {
00388 s=&s_;
00389 tet=tet_;
00390 tetConn=ck.getTetConn(*s,tet);
00391 }
00392
00394 virtual CPoint getNodeLocation(int i) const {
00395 return CPoint(ck.getPtLoc(*s,tetConn[i]));
00396 }
00397
00399 virtual const double *getNodeData(int i) const {
00400 return ck.getPtData(*s,tetConn[i]);
00401 }
00402 };
00403
00405 class tetReceiver {
00406 int nRecv;
00407 tetMeshChunk ck;
00408 int outCount;
00409 ConcreteNetworkElement outElement;
00410 public:
00411 tetReceiver() :outElement(ck) { nRecv=0; }
00412 ~tetReceiver() { }
00413
00414 void count(void) { nRecv++;}
00415 int getCount(void) const {return nRecv;}
00416
00418 void recv(const meshState &s,MPI_Comm comm,int src) {
00419 if (!nRecv) return;
00420
00421
00422 MPI_Status sts;
00423 MPI_Probe(src,PARALLELTRANSFER_MPI_TAG,comm, &sts);
00424 int msgLen; MPI_Get_count(&sts, PARALLELTRANSFER_MPI_DTYPE, &msgLen);
00425
00426
00427 xfer_t *buf=new xfer_t[msgLen];
00428 MPI_Recv(buf,msgLen,PARALLELTRANSFER_MPI_DTYPE,src,PARALLELTRANSFER_MPI_TAG,comm,&sts);
00429 ck.receive(s,buf,msgLen);
00430
00431 outCount=0;
00432 }
00433
00436 ConcreteElementNodeData *getTet(const meshState &s,const xfer_t* &cells) {
00437 int t=ck.getSendTets()[outCount++];
00438 cells=ck.getTetData(s,t);
00439 outElement.setTet(s,t);
00440 return &outElement;
00441 }
00442 };
00443
00445 void parallelTransfer_c::transfer(progress_t &progress) {
00446 int s,d;
00447 int p;
00448 int c;
00449
00450
00451
00452
00453 progress.p("Finding bounding boxes");
00454 firstDest=srcMesh.getTets();
00455 int lastDest=firstDest+destMesh.getTets();
00456 bbox3d *boxes=new bbox3d[lastDest];
00457 int *prio=new int[lastDest];
00458 progress.p("Finding bounding boxes: src");
00459 for (s=0;s<firstDest;s++)
00460 { boxes[s]=getBox(s,srcMesh); prio[s]=1; }
00461 progress.p("Finding bounding boxes: dest");
00462 for (d=firstDest;d<lastDest;d++)
00463 { boxes[d]=getBox(d-firstDest,destMesh); prio[d]=2; }
00464
00465
00466
00467 progress.p("Colliding bounding boxes");
00468 COLLIDE_Boxes_prio(voxels, lastDest,(const double *)boxes,prio);
00469 delete[] boxes; delete[] prio;
00470
00471
00472
00473 progress.p("Extracting collision list");
00474 int nColl=COLLIDE_Count(voxels);
00475 int *coll=new int[3*nColl];
00476 COLLIDE_List(voxels,coll);
00477
00478
00479 progress.p("Finding communication size");
00480 sendState ss(srcMesh,valsPerTet,valsPerPt, srcTet,srcPt);
00481 tetReceiver *recv=new tetReceiver[commSize];
00482 tetSender *send=new tetSender[commSize];
00483 for (c=0;c<nColl;c++) {
00484 const int *cr=&coll[3*c];
00485 if (isLocal(cr)) continue;
00486
00487 if (isDest(cr[0]))
00488 recv[cr[1]].count();
00489 else
00490 send[cr[1]].countTet(ss,cr[0]);
00491 }
00492 #if OSL_com_debug
00493 printf("Rank %d: %d collisions, ",myRank,nColl);
00494 for (p=0;p<commSize;p++)
00495 if (send[p].getCount() || recv[p].getCount())
00496 printf("(%d s%d r%d) ",p,send[p].getCount(),recv[p].getCount());
00497 CkPrintf("\n");
00498 #endif
00499
00500
00501 progress.p("Creating outgoing messages");
00502 for (c=0;c<nColl;c++) {
00503 const int *cr=&coll[3*c];
00504 if ((!isLocal(cr)) && (!isDest(cr[0])))
00505 send[cr[1]].putTet(ss,cr[0]);
00506 }
00507
00508
00509 progress.p("Isend");
00510 for (p=0;p<commSize;p++) send[p].isend(mpi_comm,myRank,p);
00511
00512
00513 progress.p("Recv");
00514 for (p=0;p<commSize;p++) recv[p].recv(ss,mpi_comm,p);
00515
00516
00517 progress.p("Wait");
00518 for (p=0;p<commSize;p++) send[p].wait();
00519 delete[] send;
00520
00521
00522 progress.p("Transferring solution");
00523 for (c=0;c<nColl;c++) {
00524 const int *cr=&coll[3*c];
00525 int dest=-1;
00526 const xfer_t *sCell;
00527 ConcreteElementNodeData *srcElement=NULL;
00528 if (isLocal(cr)) {
00529 int src=cr[0];
00530 dest=cr[2]-firstDest;
00531
00532 if (isDest(src) || dest<0)
00533 CmiAbort("Collision library did not respect local priority");
00534 theLocalElement.set(src);
00535 srcElement=&theLocalElement;
00536 sCell=&srcTet[src*valsPerTet];
00537 }
00538 else if (isDest(cr[0])) {
00539 dest=cr[0]-firstDest;
00540 srcElement=recv[cr[1]].getTet(ss,sCell);
00541 }
00542
00543
00544 if (dest!=-1) {
00545 TetMeshElement destElement(dest,destMesh);
00546 double sharedVolume=getSharedVolumeTets(*srcElement,destElement);
00547 if (sharedVolume>0.0) {
00548 accumulateCellValues(sCell,dest,sharedVolume);
00549 transferNodeValues(*srcElement, dest);
00550 }
00551 }
00552 }
00553 delete[] recv;
00554 delete[] coll;
00555
00556
00557 progress.p("Normalizing transfer");
00558 for (d=0;d<destMesh.getTets();d++) {
00559 double trueVolume=destMesh.getTetVolume(d);
00560 double volErr=fabs(destVolumes[d]-trueVolume);
00561
00562 double accumScale=1.0/destVolumes[d];
00563 double relErr=volErr*accumScale;
00564 if (0 && (fabs(relErr)>1.0e-6 && volErr>1.0e-8)) {
00565 printf("WARNING: ------------- volume mismatch for cell %d -------------\n"
00566 " True volume %g, but total is only %g (err %g)\n",
00567 d,trueVolume,destVolumes[d],volErr);
00568
00569 }
00570
00571
00572
00573 for (int v=0;v<valsPerTet;v++)
00574 destTet[d*valsPerTet+v]*=accumScale;
00575 }
00576 }
00577
00578 class VerboseProgress_t : public progress_t {
00579 MPI_Comm comm;
00580 int myRank;
00581 const char *module;
00582 const char *last;
00583 double start;
00584 void printLast(void) {
00585 double t=MPI_Wtime();
00586 double withoutBarrier=t-start; start=t;
00587 MPI_Barrier(comm);
00588 t=MPI_Wtime();
00589 double barrier=t-start; start=t;
00590 if (myRank==0 && last && ((withoutBarrier>0.1) || (barrier>0.1)))
00591 {
00592 CkPrintf("%s: %s took %.6f s (+%.6f s imbalance)\n",
00593 module,last,withoutBarrier,barrier);
00594 fflush(stdout);
00595 }
00596 }
00597 public:
00598 VerboseProgress_t(MPI_Comm comm_,const char *module_) {
00599 comm=comm_;
00600 MPI_Comm_rank(comm,&myRank);
00601 last=NULL;
00602 module=module_;
00603 }
00604 ~VerboseProgress_t() {
00605 if (last) printLast();
00606 }
00607 virtual void p(const char *where) {
00608 printLast();
00609 last=where;
00610 }
00611 };
00612
00613 progress_t::~progress_t() {}
00614
00615
00616 void ParallelTransfer(collide_t voxels,MPI_Comm mpi_comm,
00617 int valsPerTet,int valsPerPt,
00618 const xfer_t *srcTet,const xfer_t *srcPt,const TetMesh &srcMesh,
00619 xfer_t *destTet,xfer_t *destPt,const TetMesh &destMesh)
00620 {
00621 parallelTransfer_c t(voxels,mpi_comm,valsPerTet,valsPerPt,
00622 srcTet,srcPt,srcMesh, destTet,destPt,destMesh);
00623 VerboseProgress_t p(mpi_comm,"ParallelTransfer");
00624 t.transfer(p);
00625 }
00626