OpenAtom  Version1.5a
pcCommManager.C
1 #include "pcCommManager.h"
2 
3 #include "paircalc/pcMessages.h"
4 #include "ckPairCalculator.decl.h"
5 #include "paircalc/InputDataHandler.h"
6 #include "utility/matrix2file.h"
7 
8 #include "ckmulticast.h"
9 #include "ckcomplex.h"
10 
11 #include <algorithm>
12 
13 /// Do not use comlib for multicasts within paircalc
14 #define _PC_COMMLIB_MULTI_ 0
15 /** @addtogroup GSpaceState
16  @{
17 */
18 namespace cp {
19  namespace gspace {
20 
21 PCCommManager::PCCommManager(const CkIndex2D gspaceIdx, const pc::pcConfig &_cfg, const pc::InstanceIDs _pcHandle):
22  gspaceIndex(gspaceIdx), pcCfg(_cfg), pcHandle(_pcHandle),
23  sectionGettingLeft(0), sectionGettingRight(0),
24  existsLproxy(false), existsRproxy(false)
25 {}
26 
27 
28 
29 
31 {
32  #ifdef DEBUG_CP_PAIRCALC_CREATION
33  CkPrintf("GSpace[%d,%d] Making symm(%d) PC array section to receive left data \n", gspaceIndex.x, gspaceIndex.y, pcCfg.isSymmetric);
34  #endif
35 
36  /// Compute the max index along the state dimensions of the PC array
37  int maxpcstateindex=(pcCfg.numStates/pcCfg.grainSize-1) * pcCfg.grainSize;
38  /// Find the row index of the PC chare that handles this state
39  int s1 = (gspaceIndex.x/pcCfg.grainSize) * pcCfg.grainSize;
40  s1 = (s1>maxpcstateindex) ? maxpcstateindex :s1;
41  /// If the PC is a symmetric instance, then include only the post-diagonal chares on the row s1, else, include all the PC chares on row s1
42  int sColMin = (pcCfg.isSymmetric) ? s1 : 0;
43 
44  #ifdef DEBUG_CP_PAIRCALC_COMM
45  CkPrintf("GSpace[%d,%d] will send left matrix data to symm(%d) PC chares on: Row %d, Cols %d to %d\n",
46  gspaceIndex.x, gspaceIndex.y, pcCfg.isSymmetric,s1,sColMin,maxpcstateindex);
47  #endif
48 
49  /// If GSpace to PC comm is point to point direct msging
51  {
52  /// simply create a list of PC chare array indices, with chunk=0 (as the comm list is the same for all chunks)
53  for(int s2 = sColMin; s2 <= maxpcstateindex; s2 += pcCfg.grainSize)
54  listGettingLeft.push_back(CkArrayIndex4D(gspaceIndex.y,s1,s2,0));
55  }
56  /// else, if communication is through section multicasts
57  else
58  {
59  /// Allocate one section proxy for each chunk
61  /// Build an array section for each chunk
62  for (int chunk = 0; chunk < pcCfg.numChunks; chunk++)
63  {
65  gspaceIndex.y, gspaceIndex.y, 1,
66  s1, s1, 1,
67  sColMin, maxpcstateindex, pcCfg.grainSize,
68  chunk, chunk, 1);
69  /// Delegate the multicast work to an appropriate library
70  #ifndef _PAIRCALC_DO_NOT_DELEGATE_
71 #ifdef USE_COMLIB
72  if(_PC_COMMLIB_MULTI_ )
73  ComlibAssociateProxy(mcastInstanceCP,sectionGettingLeft[chunk]);
74  else
75 #endif
76  {
77  CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(pcHandle.mCastMgrGID).ckLocalBranch();
78  sectionGettingLeft[chunk].ckSectionDelegate(mcastGrp);
79  }
80  #endif
81  }
82  }
83  /// PC chares receiving data have been identified and memorized (either as an array section or a vector of IDs)
84  existsLproxy=true;
85 }
86 
87 
88 
89 
91 {
92  #ifdef DEBUG_CP_PAIRCALC_CREATION
93  CkPrintf("GSpace[%d,%d] Making symm(%d) PC array section to receive right data \n", gspaceIndex.x, gspaceIndex.y, pcCfg.isSymmetric);
94  #endif
95 
96  /// Compute the max index along the state dimensions of the PC array
97  int maxpcstateindex=(pcCfg.numStates/ pcCfg.grainSize - 1) * pcCfg.grainSize;
98  int s2 = (gspaceIndex.x / pcCfg.grainSize) * pcCfg.grainSize;
99  s2 = (s2>maxpcstateindex) ? maxpcstateindex :s2;
100  /// If the PC is a symmetric instance, then include only the pre-diagonal chares on the column s2 else, include all the PC chares on column s2
101  int sRowMax = (pcCfg.isSymmetric) ? s2 - pcCfg.grainSize : maxpcstateindex;
102  #ifdef DEBUG_CP_PAIRCALC_COMM
103  CkPrintf("GSpace[%d,%d] will send left matrix data to symm(%d) PC chares on: Col %d, Rows %d to %d\n",
104  gspaceIndex.x, gspaceIndex.y, pcCfg.isSymmetric,s2,0,sRowMax);
105  #endif
106 
107  // Accomodate the boundary case: PC chares on the top left [*,0,0,*] of the array shouldnt receive any right data. So dont build any proxies to them
108  if (sRowMax >=0)
109  {
110  /// If GSpace to PC comm is point to point direct msging
112  {
113  /// simply create a list of PC chare array indices, with chunk=0 (as the comm list is the same for all chunks)
114  for(int s1 = 0; s1 <= sRowMax; s1 += pcCfg.grainSize)
115  listGettingRight.push_back(CkArrayIndex4D(gspaceIndex.y,s1,s2,0));
116  }
117  /// else, if communication is through section multicasts
118  else
119  {
120  /// Allocate one section proxy for each chunk
122  /// Build an array section for each chunk
123  for (int c = 0; c < pcCfg.numChunks; c++)
124  {
126  gspaceIndex.y, gspaceIndex.y, 1,
127  0, sRowMax, pcCfg.grainSize,
128  s2, s2, 1,
129  c, c, 1);
130  /// Delegate the multicast work to an appropriate library
131  #ifndef _PAIRCALC_DO_NOT_DELEGATE_
132 #ifdef USE_COMLIB
133  if(_PC_COMMLIB_MULTI_)
134  ComlibAssociateProxy(mcastInstanceCP, sectionGettingRight[c]);
135  else
136 #endif
137  {
138  CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(pcHandle.mCastMgrGID).ckLocalBranch();
139  sectionGettingRight[c].ckSectionDelegate(mcastGrp);
140  }
141  #endif
142  }
143  }
144  /// PC chares receiving data have been identified and memorized (either as an array section or a vector of IDs)
145  existsRproxy=true;
146  }
147 }
148 
149 
150 
151 
152 /** Deposits data as left matrix block with InputHandler chare array
153  * For symmetric instances, sends to the post-diagonal row of PCs that correspond to state gspaceIndex.x (including the chare on the chare array diagonal)
154  * For asymmetric instances, sends to the whole row of PCs that correspond to state gspaceIndex.x
155  */
156 void PCCommManager::sendLeftDataMcast(int numPoints, complex* ptr, bool psiV)
157 {
158  #ifdef PC_USE_RDMA
159  /// If RDMA is enabled, we should be here ONLY during PsiV updates
160  CkAssert(psiV);
161  #ifdef DEBUG_CP_PAIRCALC_RDMA
162  CkPrintf("GSpace[%d,%d] Using traditional channels (not RDMA) for psiV left data.\n",gspaceIndex.x,gspaceIndex.y);
163  #endif
164  #endif
165 
166  bool flag_dp = pcCfg.isDoublePackOn;
167  /// If a destination array section doesnt exist, build one
168  if(!existsLproxy)
169  {
170  makeLeftTree();
171  }
172  /// If a left matrix destination section exists, send the data as the left matrix block
173  if(existsLproxy)
174  {
175  int chunksize = numPoints / pcCfg.numChunks;
176  int outsize = chunksize;
177 
178  for(int chunk=0; chunk < pcCfg.numChunks ; chunk++)
179  {
180  // last chunk gets remainder
181  if((pcCfg.numChunks > 1) && (chunk == (pcCfg.numChunks - 1)))
182  outsize= chunksize + (numPoints % pcCfg.numChunks);
183  #ifdef _PAIRCALC_DEBUG_PARANOID_FW_
184  if(pcCfg.isSymmetric && gspaceIndex.y==0)
185  dumpMatrix("gspPts",(double *)ptr, 1, numPoints*2,gspaceIndex.y,gspaceIndex.x,0,chunk,pcCfg.isSymmetric);
186  CkPrintf("L [%d,%d,%d,%d,%d] chunk %d chunksize %d outsize %d for numpoint %d offset will be %d %.12g\n",gspaceIndex.y,gspaceIndex.x, gspaceIndex.x, chunk,pcCfg.isSymmetric, chunk,chunksize, outsize, numPoints, chunk*chunksize, ptr[chunk*chunksize].re);
187  #endif
188  // If sending directly, use the vector of target PC chares
189  if( !pcCfg.isInputMulticast)
190  {
191  CkArrayIndex4D idx;
192  for(int elem=0; elem < listGettingLeft.size() ; elem++)
193  {
194  paircalcInputMsg *msg=new (outsize, 8* sizeof(int)) paircalcInputMsg(outsize, gspaceIndex.x, true, flag_dp, &(ptr[chunk * chunksize]), psiV, numPoints);
195  *(int*)CkPriorityPtr(msg) = pcCfg.inputMsgPriority;
196  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
197  idx=listGettingLeft[elem];
198  reinterpret_cast<short*> (idx.data() )[3]=chunk;
199  #ifdef _NAN_CHECK_
200  for(int i=0;i<outsize ;i++)
201  {
202  CkAssert(finite(msg->points[i].re));
203  CkAssert(finite(msg->points[i].im));
204  }
205  #endif
207  handlerProxy(idx).acceptLeftData(msg);
208  }
209  }
210  // else, use a typical multicast to the destination section
211  else
212  {
213  paircalcInputMsg *msg=new (outsize, 8* sizeof(int)) paircalcInputMsg(outsize, gspaceIndex.x, true, flag_dp, &(ptr[chunk * chunksize]), psiV, numPoints);
214  *(int*)CkPriorityPtr(msg) = pcCfg.inputMsgPriority;
215  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
216  #ifdef _PAIRCALC_DEBUG_PARANOID_FW_
217  if(pcCfg.isSymmetric && gspaceIndex.y==0)
218  dumpMatrix("pairmsg",(double *)msg->points, 1, outsize*2,gspaceIndex.y,gspaceIndex.x,0,chunk,pcCfg.isSymmetric);
219  #endif
220  #ifdef _NAN_CHECK_
221  for(int i=0;i<outsize ;i++)
222  {
223  CkAssert(finite(msg->points[i].re));
224  CkAssert(finite(msg->points[i].im));
225  }
226  #endif
227  sectionGettingLeft[chunk].acceptLeftData(msg);
228  }
229  }
230  }
231  /// else, if the destination section doesnt exist even after attempting to create one
232  else
233  CkPrintf("GSpace[%d,%d] No destination symm(%d) PC array section to send left block data [%d,%d,%d,%d,%d] !!!\n",gspaceIndex.x,gspaceIndex.y,pcCfg.isSymmetric);
234 }
235 
236 
237 
238 
239 /** Deposits data as right matrix block with InputHandler chare array
240  * For symmetric instances, sends to the strictly pre-diagonal column of PCs that correspond to state gspaceIndex.x
241  * For asymmetric instances, sends to the whole row of PCs that correspond to state gspaceIndex.x
242  */
243 void PCCommManager::sendRightDataMcast(int numPoints, complex* ptr, bool psiV)
244 {
245  #ifdef PC_USE_RDMA
246  /// If RDMA is enabled, we should be here ONLY during PsiV updates
247  CkAssert(psiV);
248  #ifdef DEBUG_CP_PAIRCALC_RDMA
249  CkPrintf("GSpace[%d,%d] Using traditional channels (not RDMA) for psiV right data.\n",gspaceIndex.x,gspaceIndex.y);
250  #endif
251  #endif
252 
253  bool flag_dp = pcCfg.isDoublePackOn;
254  /// If a destination array section doesnt exist, build one
255  if(!existsRproxy)
256  makeRightTree();
257  /// If a right matrix destination section exists, send the data as the left matrix block
258  if(existsRproxy)
259  {
260  #ifdef _DEBUG_PAIRCALC_PARANOID_
261  double re;
262  double im;
263  for(int i=0;i<numPoints;i++)
264  {
265  re=ptr[i].re;
266  im=ptr[i].im;
267  if(fabs(re)>0.0)
268  CkAssert(fabs(re)>1.0e-300);
269  if(fabs(im)>0.0)
270  CkAssert(fabs(im)>1.0e-300);
271  }
272  #endif
273  for(int chunk=0; chunk < pcCfg.numChunks; chunk++)
274  {
275  int chunksize = numPoints / pcCfg.numChunks;
276  int outsize = chunksize;
277  /// last chunk gets remainder
278  if(pcCfg.numChunks > 1 && chunk == pcCfg.numChunks - 1)
279  outsize += numPoints % pcCfg.numChunks;
281  {
282  CkArrayIndex4D idx;
283  for(int elem=0; elem<listGettingRight.size();elem++)
284  {
285  idx=listGettingRight[elem];
286  reinterpret_cast<short*> (idx.data() )[3]=chunk;
287  paircalcInputMsg *msg=new (outsize, 8* sizeof(int)) paircalcInputMsg(outsize, gspaceIndex.x, false, flag_dp, &(ptr[chunk * chunksize]), psiV, numPoints);
288  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
289  *(int*)CkPriorityPtr(msg) = pcCfg.inputMsgPriority;
290  #ifdef _NAN_CHECK_
291  for(int i=0;i<outsize ;i++)
292  {
293  CkAssert(finite(msg->points[i].re));
294  CkAssert(finite(msg->points[i].im));
295  }
296  #endif
298  handlerProxy(idx).acceptRightData(msg);
299  }
300  }
301  else
302  {
303  paircalcInputMsg *msg = new (outsize, 8* sizeof(int)) paircalcInputMsg(outsize, gspaceIndex.x, false, flag_dp, &(ptr[chunk * chunksize]), psiV, numPoints);
304  CkSetQueueing(msg, CK_QUEUEING_IFIFO);
305  *(int*)CkPriorityPtr(msg) = pcCfg.inputMsgPriority;
306  #ifdef _NAN_CHECK_
307  for(int i=0;i<outsize ;i++)
308  {
309  CkAssert(finite(msg->points[i].re));
310  CkAssert(finite(msg->points[i].im));
311  }
312  #endif
313  sectionGettingRight[chunk].acceptRightData(msg);
314  }
315  }
316  }
317  /// else, if the destination section doesnt exist even after attempting to create one
318  else
319  CkPrintf("GSpace[%d,%d] No destination symm(%d) PC array section to send right block data [%d,%d,%d,%d,%d] !!!\n",gspaceIndex.x,gspaceIndex.y,pcCfg.isSymmetric);
320 }
321 
322 
323 
324 
325 void PCCommManager::sendLeftDataRDMA(int numPoints, complex* ptr, bool psiV)
326 {
327  #ifndef PC_USE_RDMA
328  CkAbort("GSpace[,] Trying to send data to paircalcs via RDMA when RDMA is not enabled\n");
329  #else
330  if(!psiV)
331  {
332  /// Trigger an RDMA send for every rdma handle associated with all the PCs getting my data as left matrix
333  for (int i=0; i< leftDestinationHandles.size();i++)
334  if (leftDestinationHandles[i].handle >=0)
335  {
336  #ifdef DEBUG_CP_PAIRCALC_RDMA
337  CkPrintf("GSpace[%d,%d] Sending left data to PC via RDMA.\n",gspaceIndex.x,gspaceIndex.y);
338  #endif
339  CmiDirect_put( &(leftDestinationHandles[i]) );
340  }
341  }
342  /// else, if it is a PsiV update step, send the data via traditional messaging
343  else
344  sendLeftDataMcast(numPoints, ptr, psiV);
345  #endif // PC_USE_RDMA
346 }
347 
348 
349 
350 
351 void PCCommManager::sendRightDataRDMA(int numPoints, complex* ptr, bool psiV)
352 {
353  #ifndef PC_USE_RDMA
354  CkAbort("GSpace[,] Trying to send data to paircalcs via RDMA when RDMA is not enabled\n");
355  #else
356  if (!psiV)
357  {
358  /// Trigger an RDMA send for every rdma handle associated with all the PCs getting my data as right matrix
359  for (int i=0; i< rightDestinationHandles.size();i++)
360  if (rightDestinationHandles[i].handle >=0)
361  {
362  #ifdef DEBUG_CP_PAIRCALC_RDMA
363  CkPrintf("GSpace[%d,%d] Sending right data to PC via RDMA.\n",gspaceIndex.x,gspaceIndex.y);
364  #endif
365  CmiDirect_put( &(rightDestinationHandles[i]) );
366  }
367  }
368  /// else, if it is a PsiV update step, send the data via traditional messaging
369  else
370  sendRightDataMcast(numPoints, ptr, psiV);
371  #endif // PC_USE_RDMA
372 }
373 
374 
375 
376 
377 void PCCommManager::sendLeftRDMARequest(RDMApair_GSP_PC idTkn, int totalsize, CkCallback cb)
378 {
379  #ifndef PC_USE_RDMA
380  CkAbort("GSpace[,] Trying to setup RDMA when RDMA is not enabled\n");
381  #else
382  #ifdef DEBUG_CP_PAIRCALC_RDMA
383  CkPrintf("GSpace[%d,%d] Sending out RDMA setup requests to PCs getting left matrix data from me.\n",idTkn.gspIndex.x,idTkn.gspIndex.y);
384  #endif
385  /// If the destination PC chares are not known, determine them
386  if(!existsLproxy)
387  makeLeftTree();
388  /// If there exist any destination PC chares
389  if(existsLproxy)
390  {
391  /// Verify
392  CkAssert(pcCfg.numChunks > 0);
393  /// Compute the size of the chunk of data to be sent out in terms of the number of doubles (as PC treats them) and not complex
394  int chunksize = 2 * (totalsize / pcCfg.numChunks);
395 
396  /// Send an RDMA setup request to each destination PC
397  for (int chunk=0; chunk < pcCfg.numChunks; chunk++)
398  {
399  /// The last chunk gets the remainder of the points
400  if( (pcCfg.numChunks > 1) && (chunk == pcCfg.numChunks-1) )
401  chunksize += 2 * (totalsize % pcCfg.numChunks);
402  /// If the communication is through a direct p2p send
404  {
405  CkArrayIndex4D idx;
406  for(int elem=0; elem < listGettingLeft.size() ; elem++)
407  {
408  idx=listGettingLeft[elem];
409  reinterpret_cast<short*> (idx.data() )[3]=chunk;
410  RDMASetupRequestMsg<RDMApair_GSP_PC> *msg = new RDMASetupRequestMsg<RDMApair_GSP_PC> (idTkn,idTkn.gspIndex.x,CkMyPe(),chunksize,cb);
412  handlerProxy(idx).setupRDMALeft(msg);
413  }
414  }
415  /// else, if we're multicasting
416  else
417  {
418  RDMASetupRequestMsg<RDMApair_GSP_PC> *msg = new RDMASetupRequestMsg<RDMApair_GSP_PC> (idTkn,idTkn.gspIndex.x,CkMyPe(),chunksize,cb);
419  sectionGettingLeft[chunk].setupRDMALeft(msg);
420  }
421  }
422  }
423  #endif // PC_USE_RDMA
424 }
425 
426 
427 
428 
429 void PCCommManager::sendRightRDMARequest(RDMApair_GSP_PC idTkn, int totalsize, CkCallback cb)
430 {
431  #ifndef PC_USE_RDMA
432  CkAbort("GSpace[,] Trying to setup RDMA when RDMA is not enabled\n");
433  #else
434  #ifdef DEBUG_CP_PAIRCALC_RDMA
435  CkPrintf("GSpace[%d,%d] Sending out RDMA setup requests to PCs getting right matrix data from me.\n",idTkn.gspIndex.x,idTkn.gspIndex.y);
436  #endif
437  /// If the destination PC chares are not known, determine them
438  if(!existsRproxy)
439  makeRightTree();
440  /// If there exist any destination PC chares
441  if(existsRproxy)
442  {
443  /// Verify
444  CkAssert(pcCfg.numChunks > 0);
445  /// Compute the size of the chunk of data to be sent out in terms of the number of doubles (as PC treats them) and not complex
446  int chunksize = 2 * (totalsize / pcCfg.numChunks);
447 
448  /// Send an RDMA setup request to each destination PC
449  for (int chunk=0; chunk < pcCfg.numChunks; chunk++)
450  {
451  /// The last chunk gets the remainder of the points
452  if( (pcCfg.numChunks > 1) && (chunk == pcCfg.numChunks-1) )
453  chunksize += 2 * (totalsize % pcCfg.numChunks);
454  /// If the communication is through a direct p2p send
456  {
457  CkArrayIndex4D idx;
458  for(int elem=0; elem < listGettingRight.size() ; elem++)
459  {
460  idx=listGettingRight[elem];
461  reinterpret_cast<short*> (idx.data() )[3]=chunk;
462  RDMASetupRequestMsg<RDMApair_GSP_PC> *msg = new RDMASetupRequestMsg<RDMApair_GSP_PC> (idTkn,idTkn.gspIndex.x,CkMyPe(),chunksize,cb);
464  handlerProxy(idx).setupRDMARight(msg);
465  }
466  }
467  /// else, if we're multicasting
468  else
469  {
470  RDMASetupRequestMsg<RDMApair_GSP_PC> *msg = new RDMASetupRequestMsg<RDMApair_GSP_PC> (idTkn,idTkn.gspIndex.x,CkMyPe(),chunksize,cb);
471  sectionGettingRight[chunk].setupRDMARight(msg);
472  }
473  }
474  }
475  #endif // PC_USE_RDMA
476 }
477 
478 
479 
480 
481 /**
482  * send the multcast message to initialize the section tree and set the cookie
483  */
484 void PCCommManager::setResultProxy(CProxySection_PairCalculator *sectProxy, bool lbsync, CkCallback synccb)
485 {
486  int offset = gspaceIndex.x % pcCfg.grainSize;
487  int dest = gspaceIndex.x / pcCfg.grainSize * pcCfg.grainSize; //row or column
488  initResultMsg *redMsg=new initResultMsg;
489  redMsg->mCastGrpId = pcHandle.mCastMgrGID;
490  redMsg->dest=dest;
491  redMsg->offset=offset;
492  redMsg->lbsync=lbsync;
493  redMsg->synccb=synccb;
494  sectProxy->initResultSection(redMsg);
495 }
496 
497 
498 
499 
500 //! initialize plane and row wise section reduction for lambda->gspace
501 /**
502  * The makeOneResultSection functions all have the same mission. Make one
503  * section at a time using only the relevant processors instead of making them
504  * all at once. We have each gspaceplane chare initialize its own section.
505  * Each section will have S/grainsize members. Such that PC(w,*,y,*)
506  * contribute to GSP(y,w).
507  *
508  * Symmetric and asymm dynamics case will additionally have
509  * PC(w,x,y!=x,*) contributing to GSP(x,w) to fill out the total
510  * S/grainsize contributions in each section.
511  *
512  * Then return the section proxy.
513  */
514 CProxySection_PairCalculator PCCommManager::makeOneResultSection_asym(int chunk)
515 {
516  CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(pcHandle.mCastMgrGID).ckLocalBranch();
517  int maxpcstateindex = (pcCfg.numStates/pcCfg.grainSize-1) * pcCfg.grainSize;
518  int s2 = gspaceIndex.x / pcCfg.grainSize * pcCfg.grainSize;
519  s2 = (s2>maxpcstateindex) ? maxpcstateindex :s2;
520 
521  CProxySection_PairCalculator sectProxy = CProxySection_PairCalculator::ckNew(pcHandle.pcAID,
522  gspaceIndex.y, gspaceIndex.y, 1,
523  0, maxpcstateindex, pcCfg.grainSize,
524  s2, s2, 1,
525  chunk, chunk,1);
526  CkSectionID sid=sectProxy.ckGetSectionID();
527  std::random_shuffle(sid._elems, sid._elems + sid._nElems);
528  sectProxy.ckSectionDelegate(mcastGrp);
529  //initialize proxy
530  setResultProxy(&sectProxy, false, CkCallback(CkCallback::ignore));
531  return sectProxy;
532 }
533 
534 /**
535  * initialize plane and column wise section reduction for lambda->gspace
536  */
537 CProxySection_PairCalculator PCCommManager::makeOneResultSection_asym_column(int chunk)
538 {
539  CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(pcHandle.mCastMgrGID).ckLocalBranch();
540  int s1 = gspaceIndex.x / pcCfg.grainSize * pcCfg.grainSize; ///olumn
541  int maxpcstateindex = (pcCfg.numStates/pcCfg.grainSize-1) * pcCfg.grainSize;
542  s1 = (s1>maxpcstateindex) ? maxpcstateindex :s1;
543 
544  // all nondiagonal elements
545  // so we'll have to make this the tedious explicit way
546 
547  CkArrayIndex4D *elems= new CkArrayIndex4D[pcCfg.numStates/ pcCfg.grainSize];
548  int ecount=0;
549  for(int s2 =0; s2<=maxpcstateindex; s2+=pcCfg.grainSize)
550  {
551  if(s1!=s2)
552  {
553  CkArrayIndex4D idx4d(gspaceIndex.y,s1,s2,chunk);
554  elems[ecount++]=idx4d;
555  }
556  }
557  std::random_shuffle(elems, elems + ecount);
558  CProxySection_PairCalculator sectProxy = CProxySection_PairCalculator::ckNew(pcHandle.pcAID, elems, ecount);
559  delete [] elems;
560  sectProxy.ckSectionDelegate(mcastGrp);
561  setResultProxy(&sectProxy, false, CkCallback(CkCallback::ignore));
562  return sectProxy;
563 }
564 
565 
566 
567 /**
568  * initialize plane and row wise section reduction for psi->gspace
569  */
570 CProxySection_PairCalculator PCCommManager::makeOneResultSection_sym1(int chunk)
571 {
572  CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(pcHandle.mCastMgrGID).ckLocalBranch();
573  int maxpcstateindex=(pcCfg.numStates/pcCfg.grainSize-1)*pcCfg.grainSize;
574  int s2 = gspaceIndex.x / pcCfg.grainSize * pcCfg.grainSize;
575  s2 = (s2>maxpcstateindex) ? maxpcstateindex :s2;
576 
577  int s2range= (s2==0) ? 1 : pcCfg.grainSize;
578  CProxySection_PairCalculator sectProxy = CProxySection_PairCalculator::ckNew(pcHandle.pcAID,
579  gspaceIndex.y, gspaceIndex.y, 1,
580  0, s2, s2range,
581  s2, s2, 1,
582  chunk, chunk, 1);
583  CkSectionID sid=sectProxy.ckGetSectionID();
584  std::random_shuffle(sid._elems, sid._elems + sid._nElems);
585  sectProxy.ckSectionDelegate(mcastGrp);
586  setResultProxy(&sectProxy, false, CkCallback(CkCallback::ignore));
587  return sectProxy;
588 }
589 
590 
591 /**
592  * initialize plane and column wise section reduction for psi->gspace
593  */
594 CProxySection_PairCalculator PCCommManager::makeOneResultSection_sym2(int chunk)
595 {
596  CkMulticastMgr *mcastGrp = CProxy_CkMulticastMgr(pcHandle.mCastMgrGID).ckLocalBranch();
597  int s1 = gspaceIndex.x / pcCfg.grainSize * pcCfg.grainSize; ///olumn
598  int maxpcstateindex=(pcCfg.numStates/pcCfg.grainSize-1)*pcCfg.grainSize;
599  s1 = (s1>maxpcstateindex) ? maxpcstateindex :s1;
600 
601  int s2start = s1 + pcCfg.grainSize;
602  s2start= (s2start>maxpcstateindex) ? maxpcstateindex : s2start;
603  int s2range= (s2start==maxpcstateindex) ? 1 : pcCfg.grainSize;
604  CkAssert(s2start<pcCfg.numStates);
605  CProxySection_PairCalculator sectProxy =
606  CProxySection_PairCalculator::ckNew(pcHandle.pcAID,
607  gspaceIndex.y, gspaceIndex.y, 1,
608  s1, s1, 1,
609  s2start, maxpcstateindex, s2range,
610  chunk, chunk, 1);
611 
612  CkSectionID sid=sectProxy.ckGetSectionID();
613  std::random_shuffle(sid._elems, sid._elems + sid._nElems);
614  sectProxy.ckSectionDelegate(mcastGrp);
615  setResultProxy(&sectProxy, false, CkCallback(CkCallback::ignore));
616  return sectProxy;
617 }
618 
619  } // end namespace gspace
620 } // end namespace cp
621 
622 #include "RDMAMessages.def.h"
623 /*@}*/
bool isSymmetric
Is this a symmetric or asymmetric paircalc instance.
Definition: pcConfig.h:30
void makeRightTree()
Creates a multicast tree that includes the PC chare arrays used in the asymmetric loop...
Definition: pcCommManager.C:90
CProxySection_InputDataHandler< CollatorType, CollatorType > * sectionGettingRight
Array section which receives right matrix block data.
Definition: pcCommManager.h:93
CkVec< CkArrayIndex4D > listGettingLeft
A list of PC array elements which expect left matrix data from owning GSpace chare.
Definition: pcCommManager.h:96
bool existsLproxy
True if a proxy for the destination PC array section including a (portion of a) row exists...
int inputMsgPriority
The priority (set by GSpace) of the input messages.
Definition: pcConfig.h:87
bool isDoublePackOn
Is double-packing on?
Definition: pcConfig.h:79
void sendLeftDataMcast(int numPoints, complex *ptr, bool psiV)
Multicasts the left matrix data to the PC section.
void sendRightDataMcast(int numPoints, complex *ptr, bool psiV)
Multicasts the right matrix data to the PC section.
CProxySection_PairCalculator makeOneResultSection_asym(int chunk)
Initialize an array section that is used to reduce the results from the PCs back to the GSP chares...
void sendLeftRDMARequest(RDMApair_GSP_PC idTkn, int totalsize, CkCallback cb)
Send RDMA setup requests to all the destination PC chares that will be getting left data...
bool isInputMulticast
Will the input data be multicast to PC sections or sent directly (p2p)
Definition: pcConfig.h:83
CkGroupID mCastMgrGID
The CkMulticast group that will handle gspace <–> pc comm.
Definition: pcInstanceIDs.h:19
int numStates
The total number of states in the system.
Definition: pcConfig.h:41
CkIndex2D gspaceIndex
The array index of the owner GSpace chare.
Definition: pcCommManager.h:75
CProxySection_PairCalculator makeOneResultSection_sym1(int chunk)
initialize plane and row wise section reduction for psi->gspace
bool existsRproxy
True if a proxy for the destination PC array section including a (portion of a) column exists...
int grainSize
The grain size along the states dimensions (plural) (number of states per PC chare) ...
Definition: pcConfig.h:45
inputType * points
Definition: pcMessages.h:147
void sendRightDataRDMA(int numPoints, complex *ptr, bool psiV)
Sends right matrix data via RDMA.
cp::paircalc::pcConfig pcCfg
Input configurations for the paircalcs.
Definition: pcCommManager.h:77
int numChunks
The number of chunks (4th dimension of decomposition)
Definition: pcConfig.h:43
CProxySection_InputDataHandler< CollatorType, CollatorType > * sectionGettingLeft
Array section which receives left matrix block data.
Definition: pcCommManager.h:86
void setResultProxy(CProxySection_PairCalculator *sectProxy, bool lbsync, CkCallback synccb)
Send out a dummy mcast to prod CkMulticast into setting up the result reduction trees etc...
The new message for sending input data to the PairCalculator.
Definition: pcMessages.h:117
A request from a data sender to setup an RDMA link. Initiates the sender-receiver handshake required ...
Definition: RDMAMessages.h:22
CProxySection_PairCalculator makeOneResultSection_sym2(int chunk)
initialize plane and column wise section reduction for psi->gspace
void makeLeftTree()
Creates multicast trees to the appropriate PC chare array sections used in the symmetric / asymmetric...
Definition: pcCommManager.C:30
void sendRightRDMARequest(RDMApair_GSP_PC idTkn, int totalsize, CkCallback cb)
Send RDMA setup requests to all the destination PC chares that will be getting right data...
CProxySection_PairCalculator makeOneResultSection_asym_column(int chunk)
initialize plane and column wise section reduction for lambda->gspace
A (hopefully) tiny token that is unique to every data sender-receiver pair, and is shared by them dur...
Definition: RDMAMessages.h:94
CkVec< CkArrayIndex4D > listGettingRight
A list of PC array elements which expect right matrix data from owning GSpace chare.
Definition: pcCommManager.h:98
Dumb structure that holds all the configuration inputs required for paircalc instantiation, functioning and interaction.
Definition: pcConfig.h:23
Input handler chare array proxies.
CkArrayID pcAID
The array IDs of the paircalc and its servant input handler arrays.
Definition: pcInstanceIDs.h:17
A tiny structure to hold the relevant IDs/ proxies required to interact with a paircalc instance...
Definition: pcInstanceIDs.h:11
cp::paircalc::InstanceIDs pcHandle
Handles to the paircalc array and related entities that I will be managing comm with.
Definition: pcCommManager.h:79
CkVec< rdmaHandleType > leftDestinationHandles
RDMA handles for each PC chare's input data handler that will receive data from the owner of this obj...
void sendLeftDataRDMA(int numPoints, complex *ptr, bool psiV)
Sends left matrix data via RDMA.