00001
00006 #include "triSurfMesh.h"
00007 #include "prismMesh.h"
00008 #include "transfer.h"
00009 #include "bbox.h"
00010 #include "parallelsurfacetransfer.h"
00011 #include "charm++.h"
00012 #include "GenericElement.h"
00013
00014 #define COORD_PER_POINT 3
00015 #define POINT_PER_PRISM 6
00016 #define POINT_PER_TRIANGLE 3
00017
00018 class surfProgress_t {
00019 public:
00020 virtual ~surfProgress_t();
00021 virtual void p(const char *where) {}
00022 };
00023
00025 class ConcreteLocalElement : public ConcreteElementNodeData {
00026 const PrismMesh &mesh;
00027 const int *conn;
00028 const double *ptVals;
00029 int valsPerPt;
00030 public:
00031 ConcreteLocalElement(const PrismMesh &mesh_,const double *ptVals_,int valsPerPt_)
00032 :mesh(mesh_), conn(0), ptVals(ptVals_), valsPerPt(valsPerPt_) { }
00033 void set(int prism) {
00034 conn=mesh.getPrism(prism);
00035 }
00036
00038 virtual CPoint getNodeLocation(int i) const {
00039 return mesh.getPoint(conn[i]);
00040 }
00041
00043 virtual const double *getNodeData(int i) const {
00044 return &ptVals[conn[i]*valsPerPt];
00045 }
00046 };
00047
00048
00049 class parallelSurfaceTransfer_c {
00050 collide_t voxels;
00051 int firstDest;
00054 inline bool isDest(int n) {return n>=firstDest;}
00055
00056 MPI_Comm mpi_comm;
00057 int myRank,commSize;
00059 inline bool isLocal(const int *coll) {return coll[1]==myRank;}
00060
00061 int valsPerFace, valsPerPt;
00062 const PrismMesh &srcMesh;
00063 const double *srcFaceVals;
00064 const double *srcPtVals;
00065 const TriangleSurfaceMesh &destMesh;
00066 double *destFaceVals;
00067 double *destPtVals;
00068 double *destAreas;
00069
00070 ConcreteLocalElement theLocalElement;
00071
00074 void accumulateCellValues(const double *sCellVals, int dest, double sharedArea) {
00075 for (int v=0;v<valsPerFace;v++) {
00076 destFaceVals[dest*valsPerFace+v]+=sharedArea*sCellVals[v];
00077 }
00078 destAreas[dest]+=sharedArea;
00079 }
00080
00083 void transferNodeValues(const ConcreteElementNodeData &srcElement, int dest)
00084 {
00085 GenericElement el(POINT_PER_PRISM);
00086 for (int dni=0;dni<POINT_PER_TRIANGLE;dni++) {
00087 int dn=destMesh.getTriangle(dest)[dni];
00088 CkVector3d dnLoc=destMesh.getPoint(dn);
00089 CVector natc;
00090 if (el.element_contains_point(dnLoc,srcElement,natc))
00091 {
00092 el.interpolate_natural(valsPerPt,srcElement,natc,&destPtVals[dn*valsPerPt]);
00093 }
00094 }
00095 }
00096 public:
00097 parallelSurfaceTransfer_c(collide_t voxels_, MPI_Comm mpi_comm_, int valsPerFace_,
00098 int valsPerPt_, const double *srcFaceVals_,
00099 const double *srcPtVals_, const PrismMesh &srcMesh_,
00100 double *destFaceVals_, double *destPtVals_,
00101 const TriangleSurfaceMesh &destMesh_)
00102 :voxels(voxels_), mpi_comm(mpi_comm_),
00103 valsPerFace(valsPerFace_), valsPerPt(valsPerPt_),
00104 srcMesh(srcMesh_),srcFaceVals(srcFaceVals_),srcPtVals(srcPtVals_),
00105 destMesh(destMesh_),destFaceVals(destFaceVals_),destPtVals(destPtVals_),
00106 theLocalElement(srcMesh,srcPtVals,valsPerPt)
00107 {
00108 MPI_Comm_rank(mpi_comm,&myRank);
00109 MPI_Comm_size(mpi_comm,&commSize);
00110 destAreas=new double[destMesh.getTriangles()];
00111 for (int d=0;d<destMesh.getTriangles();d++) {
00112 destAreas[d]=0;
00113 for (int v=0;v<valsPerFace;v++) {
00114 destFaceVals[d*valsPerFace+v]=(double)0;
00115 }
00116 }
00117 }
00118 ~parallelSurfaceTransfer_c() {
00119 delete[] destAreas;
00120 }
00121
00123 void transfer(surfProgress_t &surfProgress);
00124 };
00125
00126 static bbox3d getPrismBox(int t,const PrismMesh &mesh) {
00127 bbox3d ret; ret.empty();
00128 const int *conn=mesh.getPrism(t);
00129 for (int i=0;i<POINT_PER_PRISM;i++)
00130 ret.add(mesh.getPoint(conn[i]));
00131 return ret;
00132 }
00133
00134 static bbox3d getTriangleBox(int t,const TriangleSurfaceMesh &mesh) {
00135 bbox3d ret; ret.empty();
00136 const int *conn=mesh.getTriangle(t);
00137 for (int i=0;i<POINT_PER_TRIANGLE;i++)
00138 ret.add(mesh.getPoint(conn[i]));
00139 return ret;
00140 }
00141
00142
00143 inline int bytesToXfer(int nBytes) {
00144 return (nBytes+sizeof(double)-1)/sizeof(double);
00145 }
00146
00147
00148 template <class D,class S>
00149 inline void copy(D *dest,const S *src,int n) {
00150 for (int i=0;i<n;i++) dest[i]=(D)src[i];
00151 }
00152
00153
00154 class meshState {
00155 public:
00156 int faceVal, ptVal;
00157 meshState(int t,int p) :faceVal(t), ptVal(p) {}
00158 };
00159
00161 class sendState : public meshState {
00162 public:
00163 const PrismMesh &mesh;
00164 const double *faceVals;
00165 const double *ptVals;
00166
00167 const double *faceData(int t) const {return &faceVals[t*faceVal];}
00168 const double * ptData(int p) const {return & ptVals[p* ptVal];}
00169
00170 sendState(const PrismMesh &m,int tv,int pv,const double *t,const double *p)
00171 :meshState(tv,pv), mesh(m), faceVals(t), ptVals(p) {}
00172 };
00173
00174
00175 class entityPackList {
00176 int *mark;
00177 int *locals;
00178 void allocate(int nGlobal) {
00179 mark=new int[nGlobal];
00180 for (int i=0;i<nGlobal;i++) mark[i]=-1;
00181 locals=new int[nGlobal];
00182 }
00183 public:
00184 int n;
00185
00186 entityPackList() {n=0; mark=0; locals=0;}
00187 ~entityPackList() {if (mark) {delete[] mark;delete[] locals;}}
00188
00191 bool add(int global,int nGlobal) {
00192 if (mark==NULL) allocate(nGlobal);
00193 if (mark[global]==-1) {
00194 mark[global]=n;
00195 locals[n]=global;
00196 n++;
00197 return true;
00198 }
00199 return false;
00200 }
00202 int getLocal(int global) const {return mark[global];}
00204 int getGlobal(int local) const {return locals[local];}
00205 };
00206
00217 class meshChunk {
00218 double *buf;
00219
00220
00221 struct header_t {
00222 int nXfer;
00223 int nSend;
00224 int nFace;
00225 int nPt;
00226 };
00227 header_t *header;
00228 inline int headerSize() const
00229 {return bytesToXfer(sizeof(header_t));}
00230
00231 int *sendFaces;
00232 inline int sendFacesSize(const meshState &s,int nSend) const
00233 {return bytesToXfer(nSend*sizeof(int));}
00234
00235 double *faceData;
00236 inline int faceDataRecordSize(const meshState &s) const
00237 {return bytesToXfer(POINT_PER_PRISM*sizeof(int))+s.faceVal;}
00238 inline int faceDataSize(const meshState &s,int nFaces) const
00239 {return nFaces*faceDataRecordSize(s); }
00240
00241 double *ptData;
00242 inline int ptDataRecordSize(const meshState &s) const
00243 {return COORD_PER_POINT+s.ptVal;}
00244 inline int ptDataSize(const meshState &s,int nPt) const
00245 {return nPt*ptDataRecordSize(s); }
00246 public:
00247 meshChunk() {buf=NULL;}
00248 ~meshChunk() { if (buf) delete[] buf;}
00249
00250
00252 void allocate(const meshState &s,int nSend,int nFace,int nPt) {
00253 int msgLen=headerSize()+sendFacesSize(s,nSend)+faceDataSize(s,nFace)+ptDataSize(s,nPt);
00254 buf=new double[msgLen];
00255 header=(header_t *)buf;
00256 header->nXfer=msgLen;
00257 header->nSend=nSend;
00258 header->nFace=nFace;
00259 header->nPt=nPt;
00260 setupPointers(s,msgLen);
00261 }
00262
00264 int messageSizeXfer(void) const {return header->nXfer;}
00265 double *messageBuf(void) const {return buf;}
00266
00267
00270 void receive(const meshState &s,double *buf_,int msgLen) {
00271 buf=buf_;
00272 setupPointers(s,msgLen);
00273 }
00274
00275
00277 inline int *getSendFaces(void) {return sendFaces;}
00278 inline int nSendFaces(void) const {return header->nSend;}
00279
00281 inline int nFaces(void) const {return header->nFace;}
00283 inline int *getFaceConn(const meshState &s,int t) {
00284 return (int *)&faceData[t*faceDataRecordSize(s)];
00285 }
00287 inline double *getFaceData(const meshState &s,int t) {
00288 return &faceData[t*faceDataRecordSize(s)+bytesToXfer(POINT_PER_PRISM*sizeof(int))];
00289 }
00290
00292 inline int nPts(void) const {return header->nPt;}
00294 inline double *getPtLoc(const meshState &s,int n) {
00295 return &ptData[n*ptDataRecordSize(s)];
00296 }
00298 inline double *getPtData(const meshState &s,int n) {
00299 return &ptData[n*ptDataRecordSize(s)+COORD_PER_POINT];
00300 }
00301
00302 private:
00303
00304 void setupPointers(const meshState &s,int msgLen) {
00305 double *b=buf;
00306 header=(header_t *)b; b+=headerSize();
00307 sendFaces=(int *)b; b+=sendFacesSize(s,header->nSend);
00308 faceData=b; b+=faceDataSize(s,header->nFace);
00309 ptData=b; b+=ptDataSize(s,header->nPt);
00310 int nRead=b-buf;
00311 if (nRead!=header->nXfer || nRead!=msgLen)
00312 CkAbort("Mesh header length mismatch!");
00313 }
00314 };
00315
00317 class faceSender {
00318 entityPackList packFaces,packPts;
00319 int nSend;
00320 int nPut;
00321 meshChunk ck;
00322 MPI_Request sendReq;
00323 public:
00324 faceSender(void) {
00325 nSend=0; nPut=0;
00326 }
00327 ~faceSender() {
00328 }
00329
00331 void countFace(const sendState &s,int t) {
00332 nSend++;
00333 if (packFaces.add(t,s.mesh.getPrisms())) {
00334 const int *conn=s.mesh.getPrism(t);
00335 for (int i=0;i<POINT_PER_PRISM;i++)
00336 packPts.add(conn[i],s.mesh.getPoints());
00337 }
00338 }
00339 int getCount(void) const {return nSend;}
00340
00342 void putFace(const sendState &s,int t) {
00343 if (nPut==0)
00344 {
00345 ck.allocate(s,nSend,packFaces.n,packPts.n);
00346 for (int t=0;t<packFaces.n;t++)
00347 {
00348 int g=packFaces.getGlobal(t);
00349 copy(ck.getFaceData(s,t),s.faceData(g),s.faceVal);
00350
00351 const int *gNode=s.mesh.getPrism(g);
00352 int *lNode=ck.getFaceConn(s,t);
00353 for (int i=0;i<POINT_PER_PRISM;i++)
00354 lNode[i]=packPts.getLocal(gNode[i]);
00355 }
00356 for (int p=0;p<packPts.n;p++)
00357 {
00358 int g=packPts.getGlobal(p);
00359 copy(ck.getPtData(s,p),s.ptData(g),s.ptVal);
00360 copy(ck.getPtLoc(s,p),(const double *)s.mesh.getPoint(g),COORD_PER_POINT);
00361 }
00362 }
00363 ck.getSendFaces()[nPut++]=packFaces.getLocal(t);
00364
00365 }
00366
00368 void isend(MPI_Comm comm,int src,int dest) {
00369 if (nSend==0) return;
00370
00371 #if OSL_com_debug
00372 CkPrintf("%d sending %d records to %d\n", src,n,dest);
00373 #endif
00374 MPI_Isend(ck.messageBuf(),ck.messageSizeXfer(),
00375 PARALLELTRANSFER_MPI_DTYPE,
00376 dest,PARALLELTRANSFER_MPI_TAG,comm,&sendReq);
00377 }
00379 void wait(void) {
00380 if (nSend==0) return;
00381 MPI_Status sts;
00382 MPI_Wait(&sendReq,&sts);
00383 }
00384 };
00385
00387 class ConcreteNetworkElement : public ConcreteElementNodeData {
00388 const meshState *s;
00389 meshChunk &ck;
00390 int face;
00391 const int *faceConn;
00392 public:
00393 ConcreteNetworkElement(meshChunk &ck_)
00394 :s(0), ck(ck_), face(-1), faceConn(0) {}
00395 void setFace(const meshState &s_,int face_) {
00396 s=&s_;
00397 face=face_;
00398 faceConn=ck.getFaceConn(*s,face);
00399 }
00400
00402 virtual CPoint getNodeLocation(int i) const {
00403 return CPoint(ck.getPtLoc(*s,faceConn[i]));
00404 }
00405
00407 virtual const double *getNodeData(int i) const {
00408 return ck.getPtData(*s,faceConn[i]);
00409 }
00410 };
00411
00413 class faceReceiver {
00414 int nRecv;
00415 meshChunk ck;
00416 int outCount;
00417 ConcreteNetworkElement outElement;
00418 public:
00419 faceReceiver() :outElement(ck) { nRecv=0; }
00420 ~faceReceiver() { }
00421
00422 void count(void) { nRecv++;}
00423 int getCount(void) const {return nRecv;}
00424
00426 void recv(const meshState &s,MPI_Comm comm,int src) {
00427 if (!nRecv) return;
00428
00429
00430 MPI_Status sts;
00431 MPI_Probe(src,PARALLELTRANSFER_MPI_TAG,comm, &sts);
00432 int msgLen; MPI_Get_count(&sts, PARALLELTRANSFER_MPI_DTYPE, &msgLen);
00433
00434
00435 double *buf=new double[msgLen];
00436 MPI_Recv(buf,msgLen,PARALLELTRANSFER_MPI_DTYPE,src,PARALLELTRANSFER_MPI_TAG,comm,&sts);
00437 ck.receive(s,buf,msgLen);
00438
00439 outCount=0;
00440 }
00441
00444 ConcreteElementNodeData *getFace(const meshState &s,const double* &faces) {
00445 int t=ck.getSendFaces()[outCount++];
00446 faces=ck.getFaceData(s,t);
00447 outElement.setFace(s,t);
00448 return &outElement;
00449 }
00450 };
00451
00453 void parallelSurfaceTransfer_c::transfer(surfProgress_t &surfProgress) {
00454 int s,d;
00455 int p;
00456 int c;
00457
00458
00459
00460
00461 surfProgress.p("Finding bounding boxes");
00462 firstDest=srcMesh.getPrisms();
00463 int lastDest=firstDest+destMesh.getTriangles();
00464 bbox3d *boxes=new bbox3d[lastDest];
00465 int *prio=new int[lastDest];
00466 surfProgress.p("Finding bounding boxes: src");
00467 for (s=0;s<firstDest;s++)
00468 { boxes[s]=getPrismBox(s,srcMesh); prio[s]=1; }
00469 surfProgress.p("Finding bounding boxes: dest");
00470 for (d=firstDest;d<lastDest;d++)
00471 { boxes[d]=getTriangleBox(d-firstDest,destMesh); prio[d]=2; }
00472
00473
00474 printf("[%d] Rank %d: BEGIN colliding bounding boxes...\n",CkMyPe(), myRank);
00475 surfProgress.p("Colliding bounding boxes");
00476 COLLIDE_Boxes_prio(voxels, lastDest,(const double *)boxes,prio);
00477 delete[] boxes; delete[] prio;
00478 printf("[%d] Rank %d: DONE colliding bounding boxes...\n",CkMyPe(), myRank);
00479
00480
00481 surfProgress.p("Extracting collision list");
00482 int nColl=COLLIDE_Count(voxels);
00483 int *coll=new int[3*nColl];
00484 COLLIDE_List(voxels,coll);
00485
00486
00487 surfProgress.p("Finding communication size");
00488 sendState ss(srcMesh,valsPerFace,valsPerPt, srcFaceVals,srcPtVals);
00489 faceReceiver *recv=new faceReceiver[commSize];
00490 faceSender *send=new faceSender[commSize];
00491 for (c=0;c<nColl;c++) {
00492 const int *cr=&coll[3*c];
00493 if (isLocal(cr)) continue;
00494
00495 if (isDest(cr[0]))
00496 recv[cr[1]].count();
00497 else
00498 send[cr[1]].countFace(ss,cr[0]);
00499 }
00500 #if OSL_com_debug
00501 printf("Rank %d: %d collisions, ",myRank,nColl);
00502 for (p=0;p<commSize;p++)
00503 if (send[p].getCount() || recv[p].getCount())
00504 printf("(%d s%d r%d) ",p,send[p].getCount(),recv[p].getCount());
00505 CkPrintf("\n");
00506 #endif
00507
00508
00509 surfProgress.p("Creating outgoing messages");
00510 for (c=0;c<nColl;c++) {
00511 const int *cr=&coll[3*c];
00512 if ((!isLocal(cr)) && (!isDest(cr[0])))
00513 send[cr[1]].putFace(ss,cr[0]);
00514 }
00515
00516
00517 surfProgress.p("Isend");
00518 for (p=0;p<commSize;p++) send[p].isend(mpi_comm,myRank,p);
00519
00520
00521 surfProgress.p("Recv");
00522 for (p=0;p<commSize;p++) recv[p].recv(ss,mpi_comm,p);
00523
00524
00525 surfProgress.p("Wait");
00526 for (p=0;p<commSize;p++) send[p].wait();
00527 delete[] send;
00528
00529
00530 surfProgress.p("Transferring solution");
00531 for (c=0;c<nColl;c++) {
00532 const int *cr=&coll[3*c];
00533 int dest=-1;
00534 const double *sFace;
00535 ConcreteElementNodeData *srcElement=NULL;
00536 if (isLocal(cr)) {
00537 int src=cr[0];
00538 dest=cr[2]-firstDest;
00539
00540 if (isDest(src) || dest<0)
00541 CmiAbort("Collision library did not respect local priority");
00542 theLocalElement.set(src);
00543 srcElement=&theLocalElement;
00544 sFace=&srcFaceVals[src*valsPerFace];
00545 }
00546 else if (isDest(cr[0])) {
00547 dest=cr[0]-firstDest;
00548 srcElement=recv[cr[1]].getFace(ss,sFace);
00549 }
00550
00551
00552 if (dest!=-1) {
00553 Triangle3DElement destElement(dest,destMesh);
00554 double sharedArea=getSharedArea(*srcElement,destElement);
00555 if (sharedArea>0.0) {
00556 accumulateCellValues(sFace,dest,sharedArea);
00557 transferNodeValues(*srcElement, dest);
00558 }
00559 }
00560 }
00561 delete[] recv;
00562 delete[] coll;
00563
00564
00565 surfProgress.p("Normalizing transfer");
00566 for (d=0;d<destMesh.getTriangles();d++) {
00567 double trueArea=destMesh.getArea(d);
00568 double areaErr=fabs(destAreas[d]-trueArea);
00569
00570 double accumScale=1.0/destAreas[d];
00571 double relErr=areaErr*accumScale;
00572 if (0 && (fabs(relErr)>1.0e-6 && areaErr>1.0e-8)) {
00573 printf("WARNING: ------------- area mismatch for face %d -------------\n"
00574 " True area %g, but total is only %g (err %g)\n",
00575 d,trueArea,destAreas[d],areaErr);
00576
00577 }
00578
00579
00580
00581 for (int v=0;v<valsPerFace;v++)
00582 destFaceVals[d*valsPerFace+v]*=accumScale;
00583 }
00584 }
00585
00586 class VerboseSurfProgress_t : public surfProgress_t {
00587 MPI_Comm comm;
00588 int myRank;
00589 const char *module;
00590 const char *last;
00591 double start;
00592 void printLast(void) {
00593 double t=MPI_Wtime();
00594 double withoutBarrier=t-start; start=t;
00595 MPI_Barrier(comm);
00596 t=MPI_Wtime();
00597 double barrier=t-start; start=t;
00598 if (myRank==0 && last && ((withoutBarrier>0.1) || (barrier>0.1)))
00599 {
00600 CkPrintf("%s: %s took %.6f s (+%.6f s imbalance)\n",
00601 module,last,withoutBarrier,barrier);
00602 fflush(stdout);
00603 }
00604 }
00605 public:
00606 VerboseSurfProgress_t(MPI_Comm comm_,const char *module_) {
00607 comm=comm_;
00608 MPI_Comm_rank(comm,&myRank);
00609 last=NULL;
00610 module=module_;
00611 }
00612 ~VerboseSurfProgress_t() {
00613 if (last) printLast();
00614 }
00615 virtual void p(const char *where) {
00616 printLast();
00617 last=where;
00618 }
00619 };
00620
00621 surfProgress_t::~surfProgress_t() {}
00622
00623
00624 void ParallelSurfaceTransfer(collide_t voxels, MPI_Comm mpi_comm, int valsPerFace,
00625 int valsPerPt, const double *srcFaceVals,
00626 const double *srcPtVals,
00627 const PrismMesh &srcMesh, double *destFaceVals,
00628 double *destPtVals, const TriangleSurfaceMesh &destMesh)
00629 {
00630 printf("BEGIN ParallelSurfaceTransfer...\n");
00631 parallelSurfaceTransfer_c t(voxels, mpi_comm, valsPerFace, valsPerPt, srcFaceVals,
00632 srcPtVals, srcMesh, destFaceVals, destPtVals, destMesh);
00633 VerboseSurfProgress_t p(mpi_comm,"ParallelSurfaceTransfer");
00634 t.transfer(p);
00635 printf("END ParallelSurfaceTransfer.\n");
00636 }
00637