00001
00002
00003
00004
00005 #ifndef _CKRDMA_H_
00006 #define _CKRDMA_H_
00007
00008 #include "envelope.h"
00009
00010
00011
00012 #define CK_BUFFER_REG CMK_BUFFER_REG
00013 #define CK_BUFFER_UNREG CMK_BUFFER_UNREG
00014 #define CK_BUFFER_PREREG CMK_BUFFER_PREREG
00015 #define CK_BUFFER_NOREG CMK_BUFFER_NOREG
00016
00017 #define CK_BUFFER_DEREG CMK_BUFFER_DEREG
00018 #define CK_BUFFER_NODEREG CMK_BUFFER_NODEREG
00019
00020 #ifndef CMK_NOCOPY_DIRECT_BYTES
00021
00022 #if defined(_WIN32)
00023 #define CMK_NOCOPY_DIRECT_BYTES 1
00024
00025
00026
00027
00028
00029 #else
00030 #define CMK_NOCOPY_DIRECT_BYTES 0
00031 #endif // end of if defined(_WIN32)
00032
00033 #endif // end of ifndef CMK_NOCOPY_DIRECT_BYTES
00034
00035 #ifndef CMK_COMMON_NOCOPY_DIRECT_BYTES
00036 #define CMK_COMMON_NOCOPY_DIRECT_BYTES 0
00037 #endif
00038
00039 #define CkRdmaAlloc CmiRdmaAlloc
00040 #define CkRdmaFree CmiRdmaFree
00041
00042
00043
00044
00045
00046 enum class CkNcpyMode : char { MEMCPY, CMA, RDMA };
00047
00048
00049
00050
00051 enum class CkNcpyStatus : char { incomplete, complete };
00052
00053
00054
00055
00056
00057 enum class ncpyEmApiMode : char { P2P_SEND, BCAST_SEND, P2P_RECV, BCAST_RECV };
00058
00059
00060 struct CkNcpyBufferPost {
00061
00062 unsigned short int regMode;
00063
00064
00065 unsigned short int deregMode;
00066 };
00067
00068
00069
00070 class CkNcpyBuffer{
00071
00072 private:
00073
00074
00075 bool isRegistered;
00076
00077
00078 #ifdef __GNUC__
00079 #pragma GCC diagnostic push
00080 #pragma GCC diagnostic ignored "-Wpedantic"
00081 #endif
00082 char layerInfo[CMK_COMMON_NOCOPY_DIRECT_BYTES + CMK_NOCOPY_DIRECT_BYTES];
00083 #ifdef __GNUC__
00084 #pragma GCC diagnostic pop
00085 #endif
00086
00087 #if CMK_ERROR_CHECKING
00088 void checkRegModeIsValid() {
00089 if(regMode < CK_BUFFER_REG || regMode > CK_BUFFER_NOREG) CmiAbort("checkRegModeIsValid: Invalid value for regMode!\n");
00090 }
00091
00092 void checkDeregModeIsValid() {
00093 if(deregMode < CK_BUFFER_DEREG || deregMode > CK_BUFFER_NODEREG) CmiAbort("checkDeregModeIsValid: Invalid value for deregMode!\n");
00094 }
00095 #endif
00096
00097 public:
00098
00099 const void *ptr;
00100
00101
00102 size_t cnt;
00103
00104
00105 CkCallback cb;
00106
00107
00108 int pe;
00109
00110
00111 unsigned short int regMode;
00112
00113
00114 unsigned short int deregMode;
00115
00116
00117 const void *ref;
00118
00119
00120 const void *bcastAckInfo;
00121
00122 CkNcpyBuffer() : isRegistered(false), ptr(NULL), cnt(0), pe(-1), regMode(CK_BUFFER_REG), deregMode(CK_BUFFER_DEREG), ref(NULL), bcastAckInfo(NULL) {}
00123
00124 explicit CkNcpyBuffer(const void *ptr_, size_t cnt_, unsigned short int regMode_=CK_BUFFER_REG, unsigned short int deregMode_=CK_BUFFER_DEREG) {
00125 cb = CkCallback(CkCallback::ignore);
00126 init(ptr_, cnt_, regMode_, deregMode_);
00127 }
00128
00129 explicit CkNcpyBuffer(const void *ptr_, size_t cnt_, CkCallback &cb_, unsigned short int regMode_=CK_BUFFER_REG, unsigned short int deregMode_=CK_BUFFER_DEREG) {
00130 init(ptr_, cnt_, cb_, regMode_, deregMode_);
00131 }
00132
00133 void print() {
00134 CkPrintf("[%d][%d][%d] CkNcpyBuffer print: ptr:%p, size:%d, pe:%d, regMode=%d, deregMode=%d, ref:%p, bcastAckInfo:%p\n", CmiMyPe(), CmiMyNode(), CmiMyRank(), ptr, cnt, pe, regMode, deregMode, ref, bcastAckInfo);
00135 }
00136
00137 void init(const void *ptr_, size_t cnt_, CkCallback &cb_, unsigned short int regMode_=CK_BUFFER_REG, unsigned short int deregMode_=CK_BUFFER_DEREG) {
00138 cb = cb_;
00139 init(ptr_, cnt_, regMode_, deregMode_);
00140 }
00141
00142 void init(const void *ptr_, size_t cnt_, unsigned short int regMode_=CK_BUFFER_REG, unsigned short int deregMode_=CK_BUFFER_DEREG) {
00143 ptr = ptr_;
00144 cnt = cnt_;
00145 pe = CkMyPe();
00146 regMode = regMode_;
00147 deregMode = deregMode_;
00148
00149 isRegistered = false;
00150
00151 #if CMK_ERROR_CHECKING
00152
00153 checkRegModeIsValid();
00154
00155
00156 checkDeregModeIsValid();
00157 #endif
00158
00159
00160 if(cnt > 0)
00161 registerMem();
00162 }
00163
00164 void setRef(const void *ref_) {
00165 ref = ref_;
00166 }
00167
00168 const void *getRef() {
00169 return ref;
00170 }
00171
00172
00173 void registerMem()
00174 {
00175
00176 CkAssert(CkNodeOf(pe) == CkMyNode());
00177
00178
00179 if(regMode != CK_BUFFER_NOREG) {
00180
00181 CmiSetRdmaCommonInfo(&layerInfo[0], ptr, cnt);
00182
00183
00184
00185 #if CMK_REG_REQUIRED
00186 if(regMode == CK_BUFFER_REG || regMode == CK_BUFFER_PREREG)
00187 #endif
00188 {
00189 CmiSetRdmaBufferInfo(layerInfo + CmiGetRdmaCommonInfoSize(), ptr, cnt, regMode);
00190 isRegistered = true;
00191 }
00192 }
00193 }
00194
00195 void setMode(unsigned short int regMode_) { regMode = regMode_; }
00196
00197 void memcpyGet(CkNcpyBuffer &source);
00198 void memcpyPut(CkNcpyBuffer &destination);
00199
00200 #if CMK_USE_CMA
00201 void cmaGet(CkNcpyBuffer &source);
00202 void cmaPut(CkNcpyBuffer &destination);
00203 #endif
00204
00205 void rdmaGet(CkNcpyBuffer &source);
00206 void rdmaPut(CkNcpyBuffer &destination);
00207
00208 CkNcpyStatus get(CkNcpyBuffer &source);
00209 CkNcpyStatus put(CkNcpyBuffer &destination);
00210
00211
00212 void deregisterMem() {
00213
00214 CkAssert(CkNodeOf(pe) == CkMyNode());
00215
00216 if(isRegistered == false)
00217 return;
00218
00219 #if CMK_REG_REQUIRED
00220 if(regMode != CK_BUFFER_NOREG) {
00221 CmiDeregisterMem(ptr, layerInfo + CmiGetRdmaCommonInfoSize(), pe, regMode);
00222 isRegistered = false;
00223 }
00224 #endif
00225 }
00226
00227 void pup(PUP::er &p) {
00228 p((char *)&ptr, sizeof(ptr));
00229 p((char *)&ref, sizeof(ref));
00230 p((char *)&bcastAckInfo, sizeof(bcastAckInfo));
00231 p|cnt;
00232 p|cb;
00233 p|pe;
00234 p|regMode;
00235 p|deregMode;
00236 p|isRegistered;
00237 PUParray(p, layerInfo, CMK_COMMON_NOCOPY_DIRECT_BYTES + CMK_NOCOPY_DIRECT_BYTES);
00238 }
00239
00240 friend void CkRdmaDirectAckHandler(void *ack);
00241
00242 friend void CkRdmaEMBcastAckHandler(void *ack);
00243
00244 friend void constructSourceBufferObject(NcpyOperationInfo *info, CkNcpyBuffer &src);
00245 friend void constructDestinationBufferObject(NcpyOperationInfo *info, CkNcpyBuffer &dest);
00246
00247 friend envelope* CkRdmaIssueRgets(envelope *env, ncpyEmApiMode emMode, void *forwardMsg);
00248 friend void CkRdmaIssueRgets(envelope *env, ncpyEmApiMode emMode, void *forwardMsg, int numops, void **arrPtrs, CkNcpyBufferPost *postStructs);
00249
00250 friend void readonlyGet(CkNcpyBuffer &src, CkNcpyBuffer &dest, void *refPtr);
00251 friend void readonlyCreateOnSource(CkNcpyBuffer &src);
00252
00253
00254 friend void performEmApiNcpyTransfer(CkNcpyBuffer &source, CkNcpyBuffer &dest, int opIndex, int child_count, char *ref, int extraSize, CkNcpyMode ncpyMode, ncpyEmApiMode emMode);
00255
00256 friend void performEmApiRget(CkNcpyBuffer &source, CkNcpyBuffer &dest, int opIndex, char *ref, int extraSize, ncpyEmApiMode emMode);
00257
00258 friend void performEmApiCmaTransfer(CkNcpyBuffer &source, CkNcpyBuffer &dest, int child_count, ncpyEmApiMode emMode);
00259
00260 friend void deregisterMemFromMsg(envelope *env, bool isRecv);
00261 };
00262
00263
00264
00265 void CkRdmaDirectAckHandler(void *ack);
00266
00267
00268
00269
00270 void invokeCallback(void *cb, int pe, CkNcpyBuffer &buff);
00271
00272
00273
00274
00275 CkNcpyMode findTransferMode(int srcPe, int destPe);
00276
00277 void invokeSourceCallback(NcpyOperationInfo *info);
00278
00279 void invokeDestinationCallback(NcpyOperationInfo *info);
00280
00281
00282 void enqueueNcpyMessage(int destPe, void *msg);
00283
00284
00285 static inline CkNcpyBuffer CkSendBuffer(const void *ptr_, CkCallback &cb_, unsigned short int regMode_=CK_BUFFER_REG, unsigned short int deregMode_=CK_BUFFER_DEREG) {
00286 return CkNcpyBuffer(ptr_, 0, cb_, regMode_, deregMode_);
00287 }
00288
00289 static inline CkNcpyBuffer CkSendBuffer(const void *ptr_, unsigned short int regMode_=CK_BUFFER_REG, unsigned short int deregMode_=CK_BUFFER_DEREG) {
00290 return CkNcpyBuffer(ptr_, 0, regMode_, deregMode_);
00291 }
00292
00293 #if CMK_ONESIDED_IMPL
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305 struct NcpyEmInfo{
00306 int numOps;
00307 int counter;
00308 int pe;
00309 ncpyEmApiMode mode;
00310 void *msg;
00311 void *forwardMsg;
00312 };
00313
00314
00315
00316
00317
00318 struct NcpyEmBufferInfo{
00319 int index;
00320 NcpyOperationInfo ncpyOpInfo;
00321 };
00322
00323
00324
00325
00326
00327
00328 envelope* CkRdmaIssueRgets(envelope *env, ncpyEmApiMode emMode, void *forwardMsg = NULL);
00329
00330 void CkRdmaIssueRgets(envelope *env, ncpyEmApiMode emMode, void *forwardMsg, int numops, void **arrPtrs, CkNcpyBufferPost *postStructs);
00331
00332 void handleEntryMethodApiCompletion(NcpyOperationInfo *info);
00333
00334 void handleReverseEntryMethodApiCompletion(NcpyOperationInfo *info);
00335
00336
00337 void CkPackRdmaPtrs(char *msgBuf);
00338
00339
00340 void CkUnpackRdmaPtrs(char *msgBuf);
00341
00342
00343
00344 void getRdmaNumopsAndBufsize(envelope *env, int &numops, int &bufsize);
00345
00346
00347 void CkRdmaEMAckHandler(int destPe, void *ack);
00348
00349 void CkRdmaEMBcastPostAckHandler(void *msg);
00350
00351 struct NcpyBcastRecvPeerAckInfo{
00352 #if CMK_SMP
00353 std::atomic<int> numPeers;
00354 #else
00355 int numPeers;
00356 #endif
00357 void *bcastAckInfo;
00358 void *msg;
00359 int peerParentPe;
00360 #if CMK_SMP
00361 int getNumPeers() const {
00362 return numPeers.load(std::memory_order_acquire);
00363 }
00364 void setNumPeers(int r) {
00365 return numPeers.store(r, std::memory_order_release);
00366 }
00367 int incNumPeers() {
00368 return numPeers.fetch_add(1, std::memory_order_release);
00369 }
00370 int decNumPeers() {
00371 return numPeers.fetch_sub(1, std::memory_order_release);
00372 }
00373 #else
00374 int getNumPeers() const { return numPeers; }
00375 void setNumPeers(int r) { numPeers = r; }
00376 int incNumPeers() { return numPeers++; }
00377 int decNumPeers() { return numPeers--; }
00378 #endif
00379
00380 };
00381
00382
00383
00384
00385 struct NcpyBcastAckInfo{
00386 int numChildren;
00387 int counter;
00388 bool isRoot;
00389 int pe;
00390 int numops;
00391 };
00392
00393 struct NcpyBcastRootAckInfo : public NcpyBcastAckInfo {
00394 CkNcpyBuffer src[0];
00395 };
00396
00397 struct NcpyBcastInterimAckInfo : public NcpyBcastAckInfo {
00398 void *msg;
00399
00400
00401 bool isRecv;
00402 bool isArray;
00403 void *parentBcastAckInfo;
00404 int origPe;
00405
00406 };
00407
00408
00409 void CkRdmaPrepareBcastMsg(envelope *env);
00410
00411 void CkReplaceSourcePtrsInBcastMsg(envelope *env, NcpyBcastInterimAckInfo *bcastAckInfo, int origPe);
00412
00413
00414 const void *getParentBcastAckInfo(void *msg, int &srcPe);
00415
00416
00417 NcpyBcastInterimAckInfo *allocateInterimNodeAckObj(envelope *myEnv, envelope *myChildEnv, int pe);
00418
00419 void forwardMessageToChildNodes(envelope *myChildrenMsg, UChar msgType);
00420
00421 void forwardMessageToPeerNodes(envelope *myMsg, UChar msgType);
00422
00423 void handleBcastEntryMethodApiCompletion(NcpyOperationInfo *info);
00424
00425 void handleBcastReverseEntryMethodApiCompletion(NcpyOperationInfo *info);
00426
00427 void deregisterMemFromMsg(envelope *env, bool isRecv);
00428
00429 void handleMsgUsingCMAPostCompletionForSendBcast(envelope *copyenv, envelope *env, CkNcpyBuffer &source);
00430
00431 void processBcastSendEmApiCompletion(NcpyEmInfo *ncpyEmInfo, int destPe);
00432
00433
00434 void CkReplaceSourcePtrsInBcastMsg(envelope *prevEnv, envelope *env, void *bcastAckInfo, int origPe);
00435
00436 void processBcastRecvEmApiCompletion(NcpyEmInfo *ncpyEmInfo, int destPe);
00437
00438
00439 void CkRdmaEMBcastAckHandler(void *ack);
00440
00441 void handleMsgOnChildPostCompletionForRecvBcast(envelope *env);
00442
00443 void handleMsgOnInterimPostCompletionForRecvBcast(envelope *env, NcpyBcastInterimAckInfo *bcastAckInfo, int pe);
00444
00445
00446
00447
00448
00449
00450 CkpvExtern(int, _numPendingRORdmaTransfers);
00451
00452 struct NcpyROBcastBuffAckInfo {
00453 const void *ptr;
00454
00455 int regMode;
00456
00457 int pe;
00458
00459
00460 #ifdef __GNUC__
00461 #pragma GCC diagnostic push
00462 #pragma GCC diagnostic ignored "-Wpedantic"
00463 #endif
00464 char layerInfo[CMK_COMMON_NOCOPY_DIRECT_BYTES + CMK_NOCOPY_DIRECT_BYTES];
00465 #ifdef __GNUC__
00466 #pragma GCC diagnostic pop
00467 #endif
00468 };
00469
00470 struct NcpyROBcastAckInfo {
00471 int numChildren;
00472 int counter;
00473 bool isRoot;
00474 int numops;
00475 NcpyROBcastBuffAckInfo buffAckInfo[0];
00476 };
00477
00478 void readonlyUpdateNumops();
00479
00480 void readonlyAllocateOnSource();
00481
00482 void readonlyCreateOnSource(CkNcpyBuffer &src);
00483
00484 void readonlyGet(CkNcpyBuffer &src, CkNcpyBuffer &dest, void *refPtr);
00485
00486 void readonlyGetCompleted(NcpyOperationInfo *ncpyOpInfo);
00487
00488 #if CMK_SMP
00489 void updatePeerCounterAndPush(envelope *env);
00490 #endif
00491
00492 CkArray* getArrayMgrFromMsg(envelope *env);
00493
00494 void sendAckMsgToParent(envelope *env);
00495
00496 void sendRecvDoneMsgToPeers(envelope *env, CkArray *mgr);
00497
00498 #endif
00499
00500 #endif