00001 #ifndef _ARMCI_IMPL_H
00002 #define _ARMCI_IMPL_H
00003
00004 #include <vector>
00005 using std::vector;
00006
00007 #include <stdlib.h>
00008 #include <math.h>
00009
00010 #include "tcharmc.h"
00011 #include "tcharm.h"
00012
00013
00014 typedef void* pointer;
00015 PUPbytes(pointer)
00016
00017 #include "armci.decl.h"
00018 #include "armci.h"
00019
00020
00021 #define ARMCI_INVALID 0x0
00022 #define ARMCI_GET 0x1
00023 #define ARMCI_PUT 0x2
00024 #define ARMCI_ACC 0x3
00025 #define ARMCI_BGET 0x5
00026 #define ARMCI_BPUT 0x6
00027 #define ARMCI_BACC 0x7
00028
00029 #define ARMCI_IGET 0x9 // implicit get
00030 #define ARMCI_IPUT 0xa // implicit put
00031
00032 #define BLOCKING_MASK 0x4
00033 #define IMPLICIT_MASK 0x8 // anything that is implicit is non-blocking
00034
00035 class Armci_Hdl {
00036 public:
00037 int op;
00038 int proc;
00039 int nbytes;
00040 int acked;
00041 int wait;
00042 pointer src;
00043 pointer dst;
00044
00045 Armci_Hdl() : op(ARMCI_INVALID), proc(-1), nbytes(0), acked(0), wait(0), src(NULL), dst(NULL)
00046 { }
00047 Armci_Hdl(int o, int p, int n, pointer s, pointer d):
00048 op(o), proc(p), nbytes(n), acked(0), wait(0), src(s), dst(d) { }
00049 void pup(PUP::er &p){
00050 p|op; p|proc; p|nbytes; p|acked; p|wait; p|src; p|dst;
00051 }
00052 };
00053
00054
00055
00056 class Armci_Note{
00057 public:
00058 int proc;
00059 int waited;
00060 int notified;
00061 Armci_Note() : proc(-1), waited(0), notified(0) { }
00062 Armci_Note(int p, int w, int n) : proc(p), waited(w), notified(n) { }
00063 void pup(PUP::er &p){ p|proc; p|waited; p|notified; }
00064 };
00065
00066
00067 typedef struct peAddr {
00068 int pe;
00069 pointer ptr;
00070 } addressPair;
00071
00072 extern CkArrayID armciVPAid;
00073
00074 #define ARMCI_TCHARM_SEMAID 0x00A53C10
00075
00076
00077 #define _ARMCI_NUM_REDN_OPS 6
00078 #define _ARMCI_REDN_OP_SUM 0
00079 #define _ARMCI_REDN_OP_PRODUCT 1
00080 #define _ARMCI_REDN_OP_MIN 2
00081 #define _ARMCI_REDN_OP_MAX 3
00082 #define _ARMCI_REDN_OP_ABSMIN 4
00083 #define _ARMCI_REDN_OP_ABSMAX 5
00084
00085
00086
00087
00088
00089
00090 #define _ARMCI_TEMPLATE_REDUCTION(name,dataType,reductionWork) \
00091 CkReductionMsg *name(int nMsg,CkReductionMsg **msg) \
00092 { \
00093 int m,i;\
00094 int nElem=msg[0]->getLength()/sizeof(dataType);\
00095 dataType *ret=(dataType *)(msg[0]->getData());\
00096 for (m=1;m<nMsg;m++) {\
00097 dataType *value=(dataType *)(msg[m]->getData());\
00098 for (i=0;i<nElem;i++) {\
00099 reductionWork\
00100 }\
00101 }\
00102 return CkReductionMsg::buildNew(nElem*sizeof(dataType),ret);\
00103 }
00104
00105 #define _ARMCI_GENERATE_POLYMORPHIC_REDUCTION(opName,reductionWork) \
00106 _ARMCI_TEMPLATE_REDUCTION(opName##_int,int,reductionWork) \
00107 _ARMCI_TEMPLATE_REDUCTION(opName##_long,long,reductionWork) \
00108 _ARMCI_TEMPLATE_REDUCTION(opName##_longlong,long long,reductionWork) \
00109 _ARMCI_TEMPLATE_REDUCTION(opName##_float,float,reductionWork) \
00110 _ARMCI_TEMPLATE_REDUCTION(opName##_double,double,reductionWork)
00111
00112 #if defined(_WIN32)
00113 #define llabs(x) abs(x)
00114 #endif
00115
00116 #if ! CMK_HAS_FABSF
00117 inline float fabsf(float x)
00118 {
00119 return x>0.0?x:-x;
00120 }
00121 #endif
00122
00123
00124
00125
00126
00127 #define _ARMCI_GENERATE_ABS_REDUCTION() \
00128 _ARMCI_TEMPLATE_REDUCTION(absmax_int,int,if (ret[i]<abs(value[i])) ret[i]=value[i];) \
00129 _ARMCI_TEMPLATE_REDUCTION(absmax_long,long,if (ret[i]<labs(value[i])) ret[i]=value[i];) \
00130 _ARMCI_TEMPLATE_REDUCTION(absmax_longlong,long long,if (ret[i]<labs(value[i])) ret[i]=value[i];) \
00131 _ARMCI_TEMPLATE_REDUCTION(absmax_float,float,if (ret[i]<fabsf(value[i])) ret[i]=value[i];) \
00132 _ARMCI_TEMPLATE_REDUCTION(absmax_double,double,if (ret[i]<fabs(value[i])) ret[i]=value[i];) \
00133 _ARMCI_TEMPLATE_REDUCTION(absmin_int,int,if (ret[i]>abs(value[i])) ret[i]=value[i];) \
00134 _ARMCI_TEMPLATE_REDUCTION(absmin_long,long,if (ret[i]>labs(value[i])) ret[i]=value[i];) \
00135 _ARMCI_TEMPLATE_REDUCTION(absmin_longlong,long long,if (ret[i]>labs(value[i])) ret[i]=value[i];) \
00136 _ARMCI_TEMPLATE_REDUCTION(absmin_float,float,if (ret[i]>fabsf(value[i])) ret[i]=value[i];) \
00137 _ARMCI_TEMPLATE_REDUCTION(absmin_double,double,if (ret[i]>fabs(value[i])) ret[i]=value[i];)
00138
00139 #define _ARMCI_REGISTER_REDUCTION(fnName,dataType,op) \
00140 _armciRednLookupTable[op][dataType] = \
00141 CkReduction::addReducer(fnName);
00142
00143
00144
00145
00146 #define _ARMCI_REGISTER_POLYMORPHIC_REDUCTION(opName, op) \
00147 _ARMCI_REGISTER_REDUCTION(opName##_int,ARMCI_INT,op) \
00148 _ARMCI_REGISTER_REDUCTION(opName##_long,ARMCI_LONG,op) \
00149 _ARMCI_REGISTER_REDUCTION(opName##_longlong,ARMCI_LONG_LONG,op) \
00150 _ARMCI_REGISTER_REDUCTION(opName##_float,ARMCI_FLOAT,op) \
00151 _ARMCI_REGISTER_REDUCTION(opName##_double,ARMCI_DOUBLE,op)
00152
00153 class ArmciMsg : public CMessage_ArmciMsg {
00154 public:
00155 pointer dst;
00156 int nbytes;
00157 int src_proc;
00158 int hdl;
00159 char *data;
00160
00161 ArmciMsg(void) { data = NULL; }
00162 ArmciMsg(pointer d, int n, int s, int h) :
00163 dst(d), nbytes(n), src_proc(s), hdl(h) { }
00164 static ArmciMsg* pup(PUP::er &p, ArmciMsg *m){
00165 pointer d=NULL;
00166 int n=0, s=0, h=0;
00167 if(p.isPacking() || p.isSizing()){
00168 d = m->dst;
00169 n = m->nbytes;
00170 s = m->src_proc;
00171 h = m->hdl;
00172 }
00173 p|d; p|n; p|s; p|h;
00174 if(p.isUnpacking()){
00175 m = new (n, 0) ArmciMsg(d,n,s,h);
00176 }
00177 p(m->data,n);
00178 if(p.isDeleting()){
00179 delete m;
00180 m = NULL;
00181 }
00182 return m;
00183 }
00184 };
00185
00186 class ArmciStridedMsg : public CMessage_ArmciStridedMsg {
00187 public:
00188 pointer dst;
00189 int stride_levels;
00190 int nbytes;
00191 int src_proc;
00192 int hdl;
00193 int *dst_stride_ar;
00194 int *count;
00195 char *data;
00196
00197 ArmciStridedMsg(void) { dst_stride_ar = NULL; count = NULL; data = NULL; }
00198 ArmciStridedMsg(pointer d, int l, int n, int s, int h) :
00199 dst(d), stride_levels(l), nbytes(n), src_proc(s), hdl(h) { }
00200 static ArmciStridedMsg* pup(PUP::er &p, ArmciStridedMsg *m){
00201 pointer d=NULL;
00202 int l=0, n=0, s=0, h=0;
00203 if(p.isPacking() || p.isSizing()){
00204 d = m->dst;
00205 l = m->stride_levels;
00206 n = m->nbytes;
00207 s = m->src_proc;
00208 h = m->hdl;
00209 }
00210 p|d; p|l; p|n; p|s; p|h;
00211 if(p.isUnpacking()){
00212 m = new (l,l+1,n, 0) ArmciStridedMsg(d,l,n,s,h);
00213 }
00214 p((char *)(m->dst_stride_ar),sizeof(int)*l);
00215 p((char *)(m->count),sizeof(int)*(l+1));
00216 p(m->data,n);
00217 if(p.isDeleting()){
00218 delete m;
00219 m = NULL;
00220 }
00221 return m;
00222 }
00223 };
00224
00225
00226
00227
00228 class ArmciVirtualProcessor : public TCharmClient1D {
00229 CmiIsomallocBlockList *memBlock;
00230 CProxy_ArmciVirtualProcessor thisProxy;
00231 AddressMsg *addressReply;
00232 CkPupPtrVec<Armci_Hdl> hdlList;
00233 CkPupPtrVec<Armci_Note> noteList;
00234
00235 void *collectiveTmpBufferPtr;
00236 protected:
00237 virtual void setupThreadPrivate(CthThread forThread);
00238 public:
00239 ArmciVirtualProcessor(const CProxy_TCharm &_thr_proxy);
00240 ArmciVirtualProcessor(CkMigrateMessage *m);
00241 ~ArmciVirtualProcessor();
00242
00243 pointer BlockMalloc(int bytes) {
00244 return (void *)CmiIsomallocBlockListMalloc(memBlock, bytes);
00245 }
00246 void getAddresses(AddressMsg *msg);
00247
00248 void put(pointer src, pointer dst, int bytes, int dst_proc);
00249 void putData(pointer dst, int nbytes, char *data, int src_proc, int hdl);
00250 void putData(ArmciMsg* msg);
00251 void putAck(int hdl);
00252 int nbput(pointer src, pointer dst, int bytes, int dst_proc);
00253 void nbput_implicit(pointer src, pointer dst, int bytes, int dst_proc);
00254 void wait(int hdl);
00255 int test(int hdl);
00256 void waitmulti(vector<int> procs);
00257 void waitproc(int proc);
00258 void waitall();
00259 void fence(int proc);
00260 void allfence();
00261 void barrier();
00262
00263 void get(pointer src, pointer dst, int bytes, int src_proc);
00264 int nbget(pointer src, pointer dst, int bytes, int dst_proc);
00265 void nbget_implicit(pointer src, pointer dst, int bytes, int dst_proc);
00266 void requestFromGet(pointer src, pointer dst, int nbytes, int dst_proc, int hdl);
00267 void putDataFromGet(pointer dst, int nbytes, char *data, int hdl);
00268 void putDataFromGet(ArmciMsg* msg);
00269
00270 void puts(pointer src_ptr, int src_stride_ar[],
00271 pointer dst_ptr, int dst_stride_ar[],
00272 int count[], int stride_levels, int dst_proc);
00273 int nbputs(pointer src_ptr, int src_stride_ar[],
00274 pointer dst_ptr, int dst_stride_ar[],
00275 int count[], int stride_levels, int dst_proc);
00276 void nbputs_implicit(pointer src_ptr, int src_stride_ar[],
00277 pointer dst_ptr, int dst_stride_ar[],
00278 int count[], int stride_levels, int dst_proc);
00279 void putsData(pointer dst_ptr, int dst_stride_ar[],
00280 int count[], int stride_levels,
00281 int nbytes, char *data, int src_proc, int hdl);
00282 void putsData(ArmciStridedMsg *m);
00283
00284 void gets(pointer src_ptr, int src_stride_ar[],
00285 pointer dst_ptr, int dst_stride_ar[],
00286 int count[], int stride_levels, int src_proc);
00287 int nbgets(pointer src_ptr, int src_stride_ar[],
00288 pointer dst_ptr, int dst_stride_ar[],
00289 int count[], int stride_levels, int src_proc);
00290 void nbgets_implicit(pointer src_ptr, int src_stride_ar[],
00291 pointer dst_ptr, int dst_stride_ar[],
00292 int count[], int stride_levels, int src_proc);
00293 void requestFromGets(pointer src_ptr, int src_stride_ar[],
00294 pointer dst_ptr, int dst_stride_ar[],
00295 int count[], int stride_levels, int dst_proc, int hdl);
00296 void putDataFromGets(pointer dst_ptr, int dst_stride_ar[],
00297 int count[], int stride_levels,
00298 int nbytes, char *data, int hdl);
00299 void putDataFromGets(ArmciStridedMsg *m);
00300
00301 void notify(int proc);
00302 void sendNote(int proc);
00303 void notify_wait(int proc);
00304
00305
00306 void requestAddresses(pointer ptr, pointer ptr_arr[], int bytes);
00307 void stridedCopy(void *base, void *buffer_ptr,
00308 int *stride, int *count,
00309 int dim_id, bool flatten);
00310 virtual void pup(PUP::er &p);
00311
00312
00313 void msgBcast(void *buffer, int len, int root);
00314 void recvMsgBcast(int len, char *buffer, int root);
00315 void msgGop(void *x, int n, char *op, int type);
00316
00317 void mallocClient(CkReductionMsg *msg);
00318 void resumeThread(void);
00319 void startCheckpoint(const char* dname);
00320 void checkpoint(int len, const char* dname);
00321 };
00322
00323 class AddressMsg : public CMessage_AddressMsg {
00324 public:
00325 pointer *addresses;
00326 friend class CMessage_AddressMsg;
00327 };
00328
00329
00330
00331 CtvExtern(ArmciVirtualProcessor *, _armci_ptr);
00332
00333
00334 #if CMK_TRACE_ENABLED
00335
00336 static const char *funclist[] = {"ARMCI_Init", "ARMCI_Finalize",
00337 "ARMCI_Error", "ARMCI_Cleanup", "ARMCI_Procs", "ARMCI_Myid",
00338 "ARMCI_GetV", "ARMCI_NbGetV", "ARMCI_PutV", "ARMCI_NbPutV",
00339 "ARMCI_AccV", "ARMCI_NbAccV", "ARMCI_Put", "ARMCI_NbPut",
00340 "ARMCI_Get", "ARMCI_NbGet", "ARMCI_Acc", "ARMCI_NbAcc",
00341 "ARMCI_PutS", "ARMCI_NbPutS", "ARMCI_GetS", "ARMCI_NbGetS",
00342 "ARMCI_AccS", "ARMCI_NbAccS", "ARMCI_PutValueLong",
00343 "ARMCI_PutValueInt", "ARMCI_PutValueFloat", "ARMCI_PutValueDouble",
00344 "ARMCI_NbPutValueLong", "ARMCI_NbPutValueInt",
00345 "ARMCI_NbPutValueFloat", "ARMCI_NbPutValueDouble",
00346 "ARMCI_GetValueLong", "ARMCI_GetValueInt", "ARMCI_GetValueFloat",
00347 "ARMCI_GetValueDouble", "ARMCI_NbGetValueLong",
00348 "ARMCI_NbGetValueInt", "ARMCI_NbGetValueFloat",
00349 "ARMCI_NbGetValueDouble", "ARMCI_Wait", "ARMCI_WaitProc",
00350 "ARMCI_WaitAll", "ARMCI_Test", "ARMCI_Barrier", "ARMCI_Fence",
00351 "ARMCI_AllFence", "ARMCI_Malloc", "ARMCI_Free",
00352 "ARMCI_Malloc_local", "ARMCI_Free_local",
00353 "ARMCI_SET_AGGREGATE_HANDLE", "ARMCI_UNSET_AGGREGATE_HANDLE",
00354 "ARMCI_Rmw", "ARMCI_Create_mutexes", "ARMCI_Destroy_mutexes",
00355 "ARMCI_Lock", "ARMCI_Unlock", "armci_notify", "armci_notify_wait",
00356 "ARMCI_Migrate", "ARMCI_Async_Migrate", "ARMCI_Checkpoint",
00357 "ARMCI_MemCheckpoint", "armci_msg_brdcst", "armci_msg_bcast",
00358 "armci_msg_gop2", "armci_msg_igop", "armci_msg_lgop",
00359 "armci_msg_fgop", "armci_msg_dgop", "armci_msg_barrier",
00360 "armci_msg_reduce", "armci_domain_nprocs", "armci_domain_count",
00361 "armci_domain_id", "armci_domain_glob_proc_id",
00362 "armci_domain_my_id"};
00363 #endif // CMK_TRACE_ENABLED
00364
00365 #endif // _ARMCI_IMPL_H