00001 #ifndef AMPI_PRINT_MSG_SIZES
00002 #define AMPI_PRINT_MSG_SIZES 0 // Record and print comm routines used & message sizes
00003 #endif
00004
00005 #define AMPIMSGLOG 0
00006 #define AMPI_PRINT_IDLE 0
00007
00008 #include "ampiimpl.h"
00009 #include "tcharm.h"
00010
00011 #if CMK_BIGSIM_CHARM
00012 #include "bigsim_logs.h"
00013 #endif
00014
00015 #if CMK_TRACE_ENABLED
00016 #include "register.h"
00017 #endif
00018
00019
00020
00021 #if AMPI_ERRHANDLER_RETURN
00022 #define AMPI_ERRHANDLER MPI_ERRORS_RETURN
00023 #else
00024 #define AMPI_ERRHANDLER MPI_ERRORS_ARE_FATAL
00025 #endif
00026
00027
00028 #define MSG_ORDER_DEBUG(x) //x
00029
00030 #define USER_CALL_DEBUG(x) // ckout<<"vp "<<TCHARM_Element()<<": "<<x<<endl;
00031 #define STARTUP_DEBUG(x) //ckout<<"ampi[pe "<<CkMyPe()<<"] "<< x <<endl;
00032 #define FUNCCALL_DEBUG(x) //x
00033
00034
00035 extern const char * const CmiCommitID;
00036
00037 static CkDDT *getDDT() noexcept {
00038 return &getAmpiParent()->myDDT;
00039 }
00040
00041
00042 #if AMPI_ERROR_CHECKING
00043 int ampiErrhandler(const char* func, int errcode) noexcept {
00044 if (AMPI_ERRHANDLER == MPI_ERRORS_ARE_FATAL && errcode != MPI_SUCCESS) {
00045
00046
00047
00048 int funclen = strlen(func);
00049 const char* filler = " failed with error code ";
00050 int fillerlen = strlen(filler);
00051 int errstrlen;
00052 char errstr[MPI_MAX_ERROR_STRING];
00053 MPI_Error_string(errcode, errstr, &errstrlen);
00054 vector<char> str(funclen + fillerlen + errstrlen);
00055 strcpy(str.data(), func);
00056 strcat(str.data(), filler);
00057 strcat(str.data(), errstr);
00058 CkAbort(str.data());
00059 }
00060 return errcode;
00061 }
00062 #endif
00063
00064 #if AMPI_PRINT_MSG_SIZES
00065 #if !AMPI_ERROR_CHECKING
00066 #error "AMPI_PRINT_MSG_SIZES requires AMPI error checking to be enabled!\n"
00067 #endif
00068 #include <string>
00069 #include <sstream>
00070 #include "ckliststring.h"
00071 CkpvDeclare(CkListString, msgSizesRanks);
00072
00073 bool ampiParent::isRankRecordingMsgSizes() noexcept {
00074 return (!CkpvAccess(msgSizesRanks).isEmpty() && CkpvAccess(msgSizesRanks).includes(thisIndex));
00075 }
00076
00077 void ampiParent::recordMsgSize(const char* func, int msgSize) noexcept {
00078 if (isRankRecordingMsgSizes()) {
00079 msgSizes[func][msgSize]++;
00080 }
00081 }
00082
00083 typedef std::unordered_map<std::string, std::map<int, int> >::iterator outer_itr_t;
00084 typedef std::map<int, int>::iterator inner_itr_t;
00085
00086 void ampiParent::printMsgSizes() noexcept {
00087 if (isRankRecordingMsgSizes()) {
00088
00089
00090 std::stringstream ss;
00091 ss << std::endl << "Rank " << thisIndex << ":" << std::endl;
00092 for (outer_itr_t i = msgSizes.begin(); i != msgSizes.end(); ++i) {
00093 ss << i->first << ": [ ";
00094 for (inner_itr_t j = i->second.begin(); j != i->second.end(); ++j) {
00095 ss << "(" << j->second << ": " << j->first << " B) ";
00096 }
00097 ss << "]" << std::endl;
00098 }
00099 CkPrintf("%s", ss.str().c_str());
00100 }
00101 }
00102 #endif //AMPI_PRINT_MSG_SIZES
00103
00104 inline int checkCommunicator(const char* func, MPI_Comm comm) noexcept {
00105 if (comm == MPI_COMM_NULL)
00106 return ampiErrhandler(func, MPI_ERR_COMM);
00107 return MPI_SUCCESS;
00108 }
00109
00110 inline int checkCount(const char* func, int count) noexcept {
00111 if (count < 0)
00112 return ampiErrhandler(func, MPI_ERR_COUNT);
00113 return MPI_SUCCESS;
00114 }
00115
00116 inline int checkData(const char* func, MPI_Datatype data) noexcept {
00117 if (data == MPI_DATATYPE_NULL)
00118 return ampiErrhandler(func, MPI_ERR_TYPE);
00119 return MPI_SUCCESS;
00120 }
00121
00122 inline int checkTag(const char* func, int tag) noexcept {
00123 if (tag != MPI_ANY_TAG && (tag < 0 || tag > MPI_TAG_UB_VALUE))
00124 return ampiErrhandler(func, MPI_ERR_TAG);
00125 return MPI_SUCCESS;
00126 }
00127
00128 inline int checkRank(const char* func, int rank, MPI_Comm comm) noexcept {
00129 int size = (comm == MPI_COMM_NULL) ? 0 : getAmpiInstance(comm)->getSize();
00130 if (((rank >= 0) && (rank < size)) ||
00131 (rank == MPI_ANY_SOURCE) ||
00132 (rank == MPI_PROC_NULL) ||
00133 (rank == MPI_ROOT))
00134 return MPI_SUCCESS;
00135 return ampiErrhandler(func, MPI_ERR_RANK);
00136 }
00137
00138 inline int checkBuf(const char* func, const void *buf, int count) noexcept {
00139 if ((count != 0 && buf == NULL) || buf == MPI_IN_PLACE)
00140 return ampiErrhandler(func, MPI_ERR_BUFFER);
00141 return MPI_SUCCESS;
00142 }
00143
00144 int errorCheck(const char* func, MPI_Comm comm, bool ifComm, int count,
00145 bool ifCount, MPI_Datatype data, bool ifData, int tag,
00146 bool ifTag, int rank, bool ifRank, const void *buf1,
00147 bool ifBuf1, const void *buf2=nullptr, bool ifBuf2=false) noexcept {
00148 int ret;
00149 if (ifComm) {
00150 ret = checkCommunicator(func, comm);
00151 if (ret != MPI_SUCCESS)
00152 return ampiErrhandler(func, ret);
00153 }
00154 if (ifCount) {
00155 ret = checkCount(func, count);
00156 if (ret != MPI_SUCCESS)
00157 return ampiErrhandler(func, ret);
00158 }
00159 if (ifData) {
00160 ret = checkData(func, data);
00161 if (ret != MPI_SUCCESS)
00162 return ampiErrhandler(func, ret);
00163 }
00164 if (ifTag) {
00165 ret = checkTag(func, tag);
00166 if (ret != MPI_SUCCESS)
00167 return ampiErrhandler(func, ret);
00168 }
00169 if (ifRank) {
00170 ret = checkRank(func, rank, comm);
00171 if (ret != MPI_SUCCESS)
00172 return ampiErrhandler(func, ret);
00173 }
00174 if (ifBuf1 && ifData) {
00175 ret = checkBuf(func, buf1, count*getDDT()->getSize(data));
00176 if (ret != MPI_SUCCESS)
00177 return ampiErrhandler(func, ret);
00178 }
00179 if (ifBuf2 && ifData) {
00180 ret = checkBuf(func, buf2, count*getDDT()->getSize(data));
00181 if (ret != MPI_SUCCESS)
00182 return ampiErrhandler(func, ret);
00183 }
00184 #if AMPI_PRINT_MSG_SIZES
00185 getAmpiParent()->recordMsgSize(func, getDDT()->getSize(data) * count);
00186 #endif
00187 return MPI_SUCCESS;
00188 }
00189
00190
00191 static mpi_comm_worlds mpi_worlds;
00192
00193 int _mpi_nworlds;
00194 int MPI_COMM_UNIVERSE[MPI_MAX_COMM_WORLDS];
00195
00196 class AmpiComplex {
00197 public:
00198 float re, im;
00199 void operator+=(const AmpiComplex &a) noexcept {
00200 re+=a.re;
00201 im+=a.im;
00202 }
00203 void operator*=(const AmpiComplex &a) noexcept {
00204 float nu_re=re*a.re-im*a.im;
00205 im=re*a.im+im*a.re;
00206 re=nu_re;
00207 }
00208 int operator>(const AmpiComplex &a) noexcept {
00209 CkAbort("AMPI> Cannot compare complex numbers with MPI_MAX\n");
00210 return 0;
00211 }
00212 int operator<(const AmpiComplex &a) noexcept {
00213 CkAbort("AMPI> Cannot compare complex numbers with MPI_MIN\n");
00214 return 0;
00215 }
00216 };
00217
00218 class AmpiDoubleComplex {
00219 public:
00220 double re, im;
00221 void operator+=(const AmpiDoubleComplex &a) noexcept {
00222 re+=a.re;
00223 im+=a.im;
00224 }
00225 void operator*=(const AmpiDoubleComplex &a) noexcept {
00226 double nu_re=re*a.re-im*a.im;
00227 im=re*a.im+im*a.re;
00228 re=nu_re;
00229 }
00230 int operator>(const AmpiDoubleComplex &a) noexcept {
00231 CkAbort("AMPI> Cannot compare double complex numbers with MPI_MAX\n");
00232 return 0;
00233 }
00234 int operator<(const AmpiDoubleComplex &a) noexcept {
00235 CkAbort("AMPI> Cannot compare double complex numbers with MPI_MIN\n");
00236 return 0;
00237 }
00238 };
00239
00240 class AmpiLongDoubleComplex {
00241 public:
00242 long double re, im;
00243 void operator+=(const AmpiLongDoubleComplex &a) noexcept {
00244 re+=a.re;
00245 im+=a.im;
00246 }
00247 void operator*=(const AmpiLongDoubleComplex &a) noexcept {
00248 long double nu_re=re*a.re-im*a.im;
00249 im=re*a.im+im*a.re;
00250 re=nu_re;
00251 }
00252 int operator>(const AmpiLongDoubleComplex &a) noexcept {
00253 CkAbort("AMPI> Cannot compare long double complex numbers with MPI_MAX\n");
00254 return 0;
00255 }
00256 int operator<(const AmpiLongDoubleComplex &a) noexcept {
00257 CkAbort("AMPI> Cannot compare long double complex numbers with MPI_MIN\n");
00258 return 0;
00259 }
00260 };
00261
00262 typedef struct { float val; int idx; } FloatInt;
00263 typedef struct { double val; int idx; } DoubleInt;
00264 typedef struct { long val; int idx; } LongInt;
00265 typedef struct { int val; int idx; } IntInt;
00266 typedef struct { short val; int idx; } ShortInt;
00267 typedef struct { long double val; int idx; } LongdoubleInt;
00268 typedef struct { float val; float idx; } FloatFloat;
00269 typedef struct { double val; double idx; } DoubleDouble;
00270
00271
00272 #define MPI_OP_SWITCH(OPNAME) \
00273 int i; \
00274 switch (*datatype) { \
00275 case MPI_CHAR: for(i=0;i<(*len);i++) { MPI_OP_IMPL(char); } break; \
00276 case MPI_SHORT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed short int); } break; \
00277 case MPI_INT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed int); } break; \
00278 case MPI_LONG: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed long); } break; \
00279 case MPI_UNSIGNED_CHAR: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned char); } break; \
00280 case MPI_UNSIGNED_SHORT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned short); } break; \
00281 case MPI_UNSIGNED: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned int); } break; \
00282 case MPI_UNSIGNED_LONG: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned long); } break; \
00283 case MPI_FLOAT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(float); } break; \
00284 case MPI_DOUBLE: for(i=0;i<(*len);i++) { MPI_OP_IMPL(double); } break; \
00285 case MPI_COMPLEX: for(i=0;i<(*len);i++) { MPI_OP_IMPL(AmpiComplex); } break; \
00286 case MPI_DOUBLE_COMPLEX: for(i=0;i<(*len);i++) { MPI_OP_IMPL(AmpiDoubleComplex); } break; \
00287 case MPI_LONG_LONG_INT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed long long); } break; \
00288 case MPI_SIGNED_CHAR: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed char); } break; \
00289 case MPI_UNSIGNED_LONG_LONG: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned long long); } break; \
00290 case MPI_WCHAR: for(i=0;i<(*len);i++) { MPI_OP_IMPL(wchar_t); } break; \
00291 case MPI_INT8_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int8_t); } break; \
00292 case MPI_INT16_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int16_t); } break; \
00293 case MPI_INT32_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int32_t); } break; \
00294 case MPI_INT64_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int64_t); } break; \
00295 case MPI_UINT8_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint8_t); } break; \
00296 case MPI_UINT16_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint16_t); } break; \
00297 case MPI_UINT32_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint32_t); } break; \
00298 case MPI_UINT64_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint64_t); } break; \
00299 case MPI_FLOAT_COMPLEX: for(i=0;i<(*len);i++) { MPI_OP_IMPL(AmpiComplex); } break; \
00300 case MPI_LONG_DOUBLE_COMPLEX: for(i=0;i<(*len);i++) { MPI_OP_IMPL(AmpiLongDoubleComplex); } break; \
00301 case MPI_AINT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(MPI_Aint); } break; \
00302 default: \
00303 ckerr << "Type " << *datatype << " with Op "#OPNAME" not supported." << endl; \
00304 CkAbort("Unsupported MPI datatype for MPI Op"); \
00305 };\
00306
00307
00308 #define MPI_LOGICAL_OP_SWITCH(OPNAME) \
00309 int i; \
00310 switch (*datatype) { \
00311 case MPI_SHORT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed short int); } break; \
00312 case MPI_INT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed int); } break; \
00313 case MPI_LONG: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed long); } break; \
00314 case MPI_UNSIGNED_CHAR: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned char); } break; \
00315 case MPI_UNSIGNED_SHORT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned short); } break; \
00316 case MPI_UNSIGNED: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned int); } break; \
00317 case MPI_UNSIGNED_LONG: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned long); } break; \
00318 case MPI_LONG_LONG_INT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed long long); } break; \
00319 case MPI_SIGNED_CHAR: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed char); } break; \
00320 case MPI_UNSIGNED_LONG_LONG: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned long long); } break; \
00321 case MPI_INT8_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int8_t); } break; \
00322 case MPI_INT16_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int16_t); } break; \
00323 case MPI_INT32_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int32_t); } break; \
00324 case MPI_INT64_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int64_t); } break; \
00325 case MPI_UINT8_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint8_t); } break; \
00326 case MPI_UINT16_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint16_t); } break; \
00327 case MPI_UINT32_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint32_t); } break; \
00328 case MPI_UINT64_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint64_t); } break; \
00329 case MPI_LOGICAL: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int); } break; \
00330 case MPI_C_BOOL: for(i=0;i<(*len);i++) { MPI_OP_IMPL(bool); } break; \
00331 case MPI_AINT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(MPI_Aint); } break; \
00332 default: \
00333 ckerr << "Type " << *datatype << " with Op "#OPNAME" not supported." << endl; \
00334 CkAbort("Unsupported MPI datatype for MPI Op"); \
00335 };\
00336
00337
00338 #define MPI_BITWISE_OP_SWITCH(OPNAME) \
00339 int i; \
00340 switch (*datatype) { \
00341 case MPI_SHORT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed short int); } break; \
00342 case MPI_INT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed int); } break; \
00343 case MPI_LONG: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed long); } break; \
00344 case MPI_UNSIGNED_CHAR: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned char); } break; \
00345 case MPI_UNSIGNED_SHORT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned short); } break; \
00346 case MPI_UNSIGNED: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned int); } break; \
00347 case MPI_UNSIGNED_LONG: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned long); } break; \
00348 case MPI_LONG_LONG_INT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed long long); } break; \
00349 case MPI_SIGNED_CHAR: for(i=0;i<(*len);i++) { MPI_OP_IMPL(signed char); } break; \
00350 case MPI_UNSIGNED_LONG_LONG: for(i=0;i<(*len);i++) { MPI_OP_IMPL(unsigned long long); } break; \
00351 case MPI_INT8_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int8_t); } break; \
00352 case MPI_INT16_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int16_t); } break; \
00353 case MPI_INT32_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int32_t); } break; \
00354 case MPI_INT64_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(int64_t); } break; \
00355 case MPI_UINT8_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint8_t); } break; \
00356 case MPI_UINT16_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint16_t); } break; \
00357 case MPI_UINT32_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint32_t); } break; \
00358 case MPI_UINT64_T: for(i=0;i<(*len);i++) { MPI_OP_IMPL(uint64_t); } break; \
00359 case MPI_BYTE: for(i=0;i<(*len);i++) { MPI_OP_IMPL(char); } break; \
00360 case MPI_AINT: for(i=0;i<(*len);i++) { MPI_OP_IMPL(MPI_Aint); } break; \
00361 default: \
00362 ckerr << "Type " << *datatype << " with Op "#OPNAME" not supported." << endl; \
00363 CkAbort("Unsupported MPI datatype for MPI Op"); \
00364 };\
00365
00366 void MPI_MAX_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00367 #define MPI_OP_IMPL(type) \
00368 if(((type *)invec)[i] > ((type *)inoutvec)[i]) ((type *)inoutvec)[i] = ((type *)invec)[i];
00369 MPI_OP_SWITCH(MPI_MAX)
00370 #undef MPI_OP_IMPL
00371 }
00372
00373 void MPI_MIN_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00374 #define MPI_OP_IMPL(type) \
00375 if(((type *)invec)[i] < ((type *)inoutvec)[i]) ((type *)inoutvec)[i] = ((type *)invec)[i];
00376 MPI_OP_SWITCH(MPI_MIN)
00377 #undef MPI_OP_IMPL
00378 }
00379
00380 void MPI_SUM_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00381 #define MPI_OP_IMPL(type) \
00382 ((type *)inoutvec)[i] += ((type *)invec)[i];
00383 MPI_OP_SWITCH(MPI_SUM)
00384 #undef MPI_OP_IMPL
00385 }
00386
00387 void MPI_PROD_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00388 #define MPI_OP_IMPL(type) \
00389 ((type *)inoutvec)[i] *= ((type *)invec)[i];
00390 MPI_OP_SWITCH(MPI_PROD)
00391 #undef MPI_OP_IMPL
00392 }
00393
00394 void MPI_REPLACE_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00395 #define MPI_OP_IMPL(type) \
00396 ((type *)inoutvec)[i] = ((type *)invec)[i];
00397 MPI_OP_SWITCH(MPI_REPLACE)
00398 #undef MPI_OP_IMPL
00399 }
00400
00401 void MPI_NO_OP_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00402
00403 }
00404
00405 void MPI_LAND_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00406 #define MPI_OP_IMPL(type) \
00407 ((type *)inoutvec)[i] = ((type *)inoutvec)[i] && ((type *)invec)[i];
00408 MPI_LOGICAL_OP_SWITCH(MPI_LAND)
00409 #undef MPI_OP_IMPL
00410 }
00411
00412 void MPI_BAND_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00413 #define MPI_OP_IMPL(type) \
00414 ((type *)inoutvec)[i] = ((type *)inoutvec)[i] & ((type *)invec)[i];
00415 MPI_BITWISE_OP_SWITCH(MPI_BAND)
00416 #undef MPI_OP_IMPL
00417 }
00418
00419 void MPI_LOR_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00420 #define MPI_OP_IMPL(type) \
00421 ((type *)inoutvec)[i] = ((type *)inoutvec)[i] || ((type *)invec)[i];
00422 MPI_LOGICAL_OP_SWITCH(MPI_LOR)
00423 #undef MPI_OP_IMPL
00424 }
00425
00426 void MPI_BOR_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00427 #define MPI_OP_IMPL(type) \
00428 ((type *)inoutvec)[i] = ((type *)inoutvec)[i] | ((type *)invec)[i];
00429 MPI_BITWISE_OP_SWITCH(MPI_BOR)
00430 #undef MPI_OP_IMPL
00431 }
00432
00433 void MPI_LXOR_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00434 #define MPI_OP_IMPL(type) \
00435 ((type *)inoutvec)[i] = (((type *)inoutvec)[i]&&(!((type *)invec)[i]))||(!(((type *)inoutvec)[i])&&((type *)invec)[i]);
00436 MPI_LOGICAL_OP_SWITCH(MPI_LXOR)
00437 #undef MPI_OP_IMPL
00438 }
00439
00440 void MPI_BXOR_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00441 #define MPI_OP_IMPL(type) \
00442 ((type *)inoutvec)[i] = ((type *)inoutvec)[i] ^ ((type *)invec)[i];
00443 MPI_BITWISE_OP_SWITCH(MPI_BXOR)
00444 #undef MPI_OP_IMPL
00445 }
00446
00447 #ifndef MIN
00448 #define MIN(a,b) (a < b ? a : b)
00449 #endif
00450
00451 void MPI_MAXLOC_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00452 int i;
00453
00454 switch (*datatype) {
00455 case MPI_FLOAT_INT:
00456 for(i=0;i<(*len);i++){
00457 if(((FloatInt *)invec)[i].val > ((FloatInt *)inoutvec)[i].val)
00458 ((FloatInt *)inoutvec)[i] = ((FloatInt *)invec)[i];
00459 else if(((FloatInt *)invec)[i].val == ((FloatInt *)inoutvec)[i].val)
00460 ((FloatInt *)inoutvec)[i].idx = MIN(((FloatInt *)inoutvec)[i].idx, ((FloatInt *)invec)[i].idx);
00461 }
00462 break;
00463 case MPI_DOUBLE_INT:
00464 for(i=0;i<(*len);i++){
00465 if(((DoubleInt *)invec)[i].val > ((DoubleInt *)inoutvec)[i].val)
00466 ((DoubleInt *)inoutvec)[i] = ((DoubleInt *)invec)[i];
00467 else if(((DoubleInt *)invec)[i].val == ((DoubleInt *)inoutvec)[i].val)
00468 ((DoubleInt *)inoutvec)[i].idx = MIN(((DoubleInt *)inoutvec)[i].idx, ((DoubleInt *)invec)[i].idx);
00469 }
00470 break;
00471 case MPI_LONG_INT:
00472 for(i=0;i<(*len);i++){
00473 if(((LongInt *)invec)[i].val > ((LongInt *)inoutvec)[i].val)
00474 ((LongInt *)inoutvec)[i] = ((LongInt *)invec)[i];
00475 else if(((LongInt *)invec)[i].val == ((LongInt *)inoutvec)[i].val)
00476 ((LongInt *)inoutvec)[i].idx = MIN(((LongInt *)inoutvec)[i].idx, ((LongInt *)invec)[i].idx);
00477 }
00478 break;
00479 case MPI_2INT:
00480 for(i=0;i<(*len);i++){
00481 if(((IntInt *)invec)[i].val > ((IntInt *)inoutvec)[i].val)
00482 ((IntInt *)inoutvec)[i] = ((IntInt *)invec)[i];
00483 else if(((IntInt *)invec)[i].val == ((IntInt *)inoutvec)[i].val)
00484 ((IntInt *)inoutvec)[i].idx = MIN(((IntInt *)inoutvec)[i].idx, ((IntInt *)invec)[i].idx);
00485 }
00486 break;
00487 case MPI_SHORT_INT:
00488 for(i=0;i<(*len);i++){
00489 if(((ShortInt *)invec)[i].val > ((ShortInt *)inoutvec)[i].val)
00490 ((ShortInt *)inoutvec)[i] = ((ShortInt *)invec)[i];
00491 else if(((ShortInt *)invec)[i].val == ((ShortInt *)inoutvec)[i].val)
00492 ((ShortInt *)inoutvec)[i].idx = MIN(((ShortInt *)inoutvec)[i].idx, ((ShortInt *)invec)[i].idx);
00493 }
00494 break;
00495 case MPI_LONG_DOUBLE_INT:
00496 for(i=0;i<(*len);i++){
00497 if(((LongdoubleInt *)invec)[i].val > ((LongdoubleInt *)inoutvec)[i].val)
00498 ((LongdoubleInt *)inoutvec)[i] = ((LongdoubleInt *)invec)[i];
00499 else if(((LongdoubleInt *)invec)[i].val == ((LongdoubleInt *)inoutvec)[i].val)
00500 ((LongdoubleInt *)inoutvec)[i].idx = MIN(((LongdoubleInt *)inoutvec)[i].idx, ((LongdoubleInt *)invec)[i].idx);
00501 }
00502 break;
00503 case MPI_2FLOAT:
00504 for(i=0;i<(*len);i++){
00505 if(((FloatFloat *)invec)[i].val > ((FloatFloat *)inoutvec)[i].val)
00506 ((FloatFloat *)inoutvec)[i] = ((FloatFloat *)invec)[i];
00507 else if(((FloatFloat *)invec)[i].val == ((FloatFloat *)inoutvec)[i].val)
00508 ((FloatFloat *)inoutvec)[i].idx = MIN(((FloatFloat *)inoutvec)[i].idx, ((FloatFloat *)invec)[i].idx);
00509 }
00510 break;
00511 case MPI_2DOUBLE:
00512 for(i=0;i<(*len);i++){
00513 if(((DoubleDouble *)invec)[i].val > ((DoubleDouble *)inoutvec)[i].val)
00514 ((DoubleDouble *)inoutvec)[i] = ((DoubleDouble *)invec)[i];
00515 else if(((DoubleDouble *)invec)[i].val == ((DoubleDouble *)inoutvec)[i].val)
00516 ((DoubleDouble *)inoutvec)[i].idx = MIN(((DoubleDouble *)inoutvec)[i].idx, ((DoubleDouble *)invec)[i].idx);
00517 }
00518 break;
00519 default:
00520 ckerr << "Type " << *datatype << " with Op MPI_MAXLOC not supported." << endl;
00521 CkAbort("exiting");
00522 }
00523 }
00524
00525 void MPI_MINLOC_USER_FN( void *invec, void *inoutvec, int *len, MPI_Datatype *datatype){
00526 int i;
00527 switch (*datatype) {
00528 case MPI_FLOAT_INT:
00529 for(i=0;i<(*len);i++){
00530 if(((FloatInt *)invec)[i].val < ((FloatInt *)inoutvec)[i].val)
00531 ((FloatInt *)inoutvec)[i] = ((FloatInt *)invec)[i];
00532 else if(((FloatInt *)invec)[i].val == ((FloatInt *)inoutvec)[i].val)
00533 ((FloatInt *)inoutvec)[i].idx = MIN(((FloatInt *)inoutvec)[i].idx, ((FloatInt *)invec)[i].idx);
00534 }
00535 break;
00536 case MPI_DOUBLE_INT:
00537 for(i=0;i<(*len);i++){
00538 if(((DoubleInt *)invec)[i].val < ((DoubleInt *)inoutvec)[i].val)
00539 ((DoubleInt *)inoutvec)[i] = ((DoubleInt *)invec)[i];
00540 else if(((DoubleInt *)invec)[i].val == ((DoubleInt *)inoutvec)[i].val)
00541 ((DoubleInt *)inoutvec)[i].idx = MIN(((DoubleInt *)inoutvec)[i].idx, ((DoubleInt *)invec)[i].idx);
00542 }
00543 break;
00544 case MPI_LONG_INT:
00545 for(i=0;i<(*len);i++){
00546 if(((LongInt *)invec)[i].val < ((LongInt *)inoutvec)[i].val)
00547 ((LongInt *)inoutvec)[i] = ((LongInt *)invec)[i];
00548 else if(((LongInt *)invec)[i].val == ((LongInt *)inoutvec)[i].val)
00549 ((LongInt *)inoutvec)[i].idx = MIN(((LongInt *)inoutvec)[i].idx, ((LongInt *)invec)[i].idx);
00550 }
00551 break;
00552 case MPI_2INT:
00553 for(i=0;i<(*len);i++){
00554 if(((IntInt *)invec)[i].val < ((IntInt *)inoutvec)[i].val)
00555 ((IntInt *)inoutvec)[i] = ((IntInt *)invec)[i];
00556 else if(((IntInt *)invec)[i].val == ((IntInt *)inoutvec)[i].val)
00557 ((IntInt *)inoutvec)[i].idx = MIN(((IntInt *)inoutvec)[i].idx, ((IntInt *)invec)[i].idx);
00558 }
00559 break;
00560 case MPI_SHORT_INT:
00561 for(i=0;i<(*len);i++){
00562 if(((ShortInt *)invec)[i].val < ((ShortInt *)inoutvec)[i].val)
00563 ((ShortInt *)inoutvec)[i] = ((ShortInt *)invec)[i];
00564 else if(((ShortInt *)invec)[i].val == ((ShortInt *)inoutvec)[i].val)
00565 ((ShortInt *)inoutvec)[i].idx = MIN(((ShortInt *)inoutvec)[i].idx, ((ShortInt *)invec)[i].idx);
00566 }
00567 break;
00568 case MPI_LONG_DOUBLE_INT:
00569 for(i=0;i<(*len);i++){
00570 if(((LongdoubleInt *)invec)[i].val < ((LongdoubleInt *)inoutvec)[i].val)
00571 ((LongdoubleInt *)inoutvec)[i] = ((LongdoubleInt *)invec)[i];
00572 else if(((LongdoubleInt *)invec)[i].val == ((LongdoubleInt *)inoutvec)[i].val)
00573 ((LongdoubleInt *)inoutvec)[i].idx = MIN(((LongdoubleInt *)inoutvec)[i].idx, ((LongdoubleInt *)invec)[i].idx);
00574 }
00575 break;
00576 case MPI_2FLOAT:
00577 for(i=0;i<(*len);i++){
00578 if(((FloatFloat *)invec)[i].val < ((FloatFloat *)inoutvec)[i].val)
00579 ((FloatFloat *)inoutvec)[i] = ((FloatFloat *)invec)[i];
00580 else if(((FloatFloat *)invec)[i].val == ((FloatFloat *)inoutvec)[i].val)
00581 ((FloatFloat *)inoutvec)[i].idx = MIN(((FloatFloat *)inoutvec)[i].idx, ((FloatFloat *)invec)[i].idx);
00582 }
00583 break;
00584 case MPI_2DOUBLE:
00585 for(i=0;i<(*len);i++){
00586 if(((DoubleDouble *)invec)[i].val < ((DoubleDouble *)inoutvec)[i].val)
00587 ((DoubleDouble *)inoutvec)[i] = ((DoubleDouble *)invec)[i];
00588 else if(((DoubleDouble *)invec)[i].val == ((DoubleDouble *)inoutvec)[i].val)
00589 ((DoubleDouble *)inoutvec)[i].idx = MIN(((DoubleDouble *)inoutvec)[i].idx, ((DoubleDouble *)invec)[i].idx);
00590 }
00591 break;
00592 default:
00593 ckerr << "Type " << *datatype << " with Op MPI_MINLOC not supported." << endl;
00594 CkAbort("exiting");
00595 }
00596 }
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615 CkReduction::reducerType AmpiReducer;
00616
00617
00618 CkReductionMsg *AmpiReducerFunc(int nMsg, CkReductionMsg **msgs) noexcept {
00619 AmpiOpHeader *hdr = (AmpiOpHeader *)msgs[0]->getData();
00620 MPI_Datatype dtype;
00621 int szhdr, szdata, len;
00622 MPI_User_function* func;
00623 func = hdr->func;
00624 dtype = hdr->dtype;
00625 szdata = hdr->szdata;
00626 len = hdr->len;
00627 szhdr = sizeof(AmpiOpHeader);
00628
00629 CkReductionMsg *retmsg = CkReductionMsg::buildNew(szhdr+szdata,NULL,AmpiReducer,msgs[0]);
00630 void *retPtr = (char *)retmsg->getData() + szhdr;
00631 for(int i=1;i<nMsg;i++){
00632 (*func)((void *)((char *)msgs[i]->getData()+szhdr),retPtr,&len,&dtype);
00633 }
00634 return retmsg;
00635 }
00636
00637 static CkReduction::reducerType getBuiltinReducerType(MPI_Datatype type, MPI_Op op) noexcept
00638 {
00639 switch (type) {
00640 case MPI_INT32_T:
00641 if (getDDT()->getSize(MPI_INT32_T) != getDDT()->getSize(MPI_INT)) break;
00642
00643 case MPI_INT:
00644 switch (op) {
00645 case MPI_MAX: return CkReduction::max_int;
00646 case MPI_MIN: return CkReduction::min_int;
00647 case MPI_SUM: return CkReduction::sum_int;
00648 case MPI_PROD: return CkReduction::product_int;
00649 case MPI_LAND: return CkReduction::logical_and_int;
00650 case MPI_LOR: return CkReduction::logical_or_int;
00651 case MPI_LXOR: return CkReduction::logical_xor_int;
00652 case MPI_BAND: return CkReduction::bitvec_and_int;
00653 case MPI_BOR: return CkReduction::bitvec_or_int;
00654 case MPI_BXOR: return CkReduction::bitvec_xor_int;
00655 default: break;
00656 }
00657 case MPI_FLOAT:
00658 switch (op) {
00659 case MPI_MAX: return CkReduction::max_float;
00660 case MPI_MIN: return CkReduction::min_float;
00661 case MPI_SUM: return CkReduction::sum_float;
00662 case MPI_PROD: return CkReduction::product_float;
00663 default: break;
00664 }
00665 case MPI_DOUBLE:
00666 switch (op) {
00667 case MPI_MAX: return CkReduction::max_double;
00668 case MPI_MIN: return CkReduction::min_double;
00669 case MPI_SUM: return CkReduction::sum_double;
00670 case MPI_PROD: return CkReduction::product_double;
00671 default: break;
00672 }
00673 case MPI_INT8_T:
00674 if (getDDT()->getSize(MPI_INT8_T) != getDDT()->getSize(MPI_CHAR)) break;
00675
00676 case MPI_CHAR:
00677 switch (op) {
00678 case MPI_MAX: return CkReduction::max_char;
00679 case MPI_MIN: return CkReduction::min_char;
00680 case MPI_SUM: return CkReduction::sum_char;
00681 case MPI_PROD: return CkReduction::product_char;
00682 default: break;
00683 }
00684 case MPI_INT16_T:
00685 if (getDDT()->getSize(MPI_INT16_T) != getDDT()->getSize(MPI_SHORT)) break;
00686
00687 case MPI_SHORT:
00688 switch (op) {
00689 case MPI_MAX: return CkReduction::max_short;
00690 case MPI_MIN: return CkReduction::min_short;
00691 case MPI_SUM: return CkReduction::sum_short;
00692 case MPI_PROD: return CkReduction::product_short;
00693 default: break;
00694 }
00695 case MPI_LONG:
00696 switch (op) {
00697 case MPI_MAX: return CkReduction::max_long;
00698 case MPI_MIN: return CkReduction::min_long;
00699 case MPI_SUM: return CkReduction::sum_long;
00700 case MPI_PROD: return CkReduction::product_long;
00701 default: break;
00702 }
00703 case MPI_INT64_T:
00704 if (getDDT()->getSize(MPI_INT64_T) != getDDT()->getSize(MPI_LONG_LONG)) break;
00705
00706 case MPI_LONG_LONG:
00707 switch (op) {
00708 case MPI_MAX: return CkReduction::max_long_long;
00709 case MPI_MIN: return CkReduction::min_long_long;
00710 case MPI_SUM: return CkReduction::sum_long_long;
00711 case MPI_PROD: return CkReduction::product_long_long;
00712 default: break;
00713 }
00714 case MPI_UINT8_T:
00715 if (getDDT()->getSize(MPI_UINT8_T) != getDDT()->getSize(MPI_UNSIGNED_CHAR)) break;
00716
00717 case MPI_UNSIGNED_CHAR:
00718 switch (op) {
00719 case MPI_MAX: return CkReduction::max_uchar;
00720 case MPI_MIN: return CkReduction::min_uchar;
00721 case MPI_SUM: return CkReduction::sum_uchar;
00722 case MPI_PROD: return CkReduction::product_uchar;
00723 default: break;
00724 }
00725 case MPI_UINT16_T:
00726 if (getDDT()->getSize(MPI_UINT16_T) != getDDT()->getSize(MPI_UNSIGNED_SHORT)) break;
00727
00728 case MPI_UNSIGNED_SHORT:
00729 switch (op) {
00730 case MPI_MAX: return CkReduction::max_ushort;
00731 case MPI_MIN: return CkReduction::min_ushort;
00732 case MPI_SUM: return CkReduction::sum_ushort;
00733 case MPI_PROD: return CkReduction::product_ushort;
00734 default: break;
00735 }
00736 case MPI_UINT32_T:
00737 if (getDDT()->getSize(MPI_UINT32_T) != getDDT()->getSize(MPI_UNSIGNED)) break;
00738
00739 case MPI_UNSIGNED:
00740 switch (op) {
00741 case MPI_MAX: return CkReduction::max_uint;
00742 case MPI_MIN: return CkReduction::min_uint;
00743 case MPI_SUM: return CkReduction::sum_uint;
00744 case MPI_PROD: return CkReduction::product_uint;
00745 default: break;
00746 }
00747 case MPI_UNSIGNED_LONG:
00748 switch (op) {
00749 case MPI_MAX: return CkReduction::max_ulong;
00750 case MPI_MIN: return CkReduction::min_ulong;
00751 case MPI_SUM: return CkReduction::sum_ulong;
00752 case MPI_PROD: return CkReduction::product_ulong;
00753 default: break;
00754 }
00755 case MPI_UINT64_T:
00756 if (getDDT()->getSize(MPI_UINT64_T) != getDDT()->getSize(MPI_UNSIGNED_LONG_LONG)) break;
00757
00758 case MPI_UNSIGNED_LONG_LONG:
00759 switch (op) {
00760 case MPI_MAX: return CkReduction::max_ulong_long;
00761 case MPI_MIN: return CkReduction::min_ulong_long;
00762 case MPI_SUM: return CkReduction::sum_ulong_long;
00763 case MPI_PROD: return CkReduction::product_ulong_long;
00764 default: break;
00765 }
00766 case MPI_C_BOOL:
00767 switch (op) {
00768 case MPI_LAND: return CkReduction::logical_and_bool;
00769 case MPI_LOR: return CkReduction::logical_or_bool;
00770 case MPI_LXOR: return CkReduction::logical_xor_bool;
00771 default: break;
00772 }
00773 case MPI_LOGICAL:
00774 switch (op) {
00775 case MPI_LAND: return CkReduction::logical_and_int;
00776 case MPI_LOR: return CkReduction::logical_or_int;
00777 case MPI_LXOR: return CkReduction::logical_xor_int;
00778 default: break;
00779 }
00780 case MPI_BYTE:
00781 switch (op) {
00782 case MPI_BAND: return CkReduction::bitvec_and_bool;
00783 case MPI_BOR: return CkReduction::bitvec_or_bool;
00784 case MPI_BXOR: return CkReduction::bitvec_xor_bool;
00785 default: break;
00786 }
00787 default:
00788 break;
00789 }
00790 return CkReduction::invalid;
00791 }
00792
00793 class Builtin_kvs{
00794 public:
00795 int tag_ub,host,io,wtime_is_global,appnum,lastusedcode,universe_size;
00796 int win_disp_unit,win_create_flavor,win_model;
00797 int ampi_tmp;
00798 void* win_base;
00799 MPI_Aint win_size;
00800 Builtin_kvs() noexcept {
00801 tag_ub = MPI_TAG_UB_VALUE;
00802 host = MPI_PROC_NULL;
00803 io = 0;
00804 wtime_is_global = 0;
00805 appnum = 0;
00806 lastusedcode = MPI_ERR_LASTCODE;
00807 universe_size = 0;
00808 win_base = NULL;
00809 win_size = 0;
00810 win_disp_unit = 0;
00811 win_create_flavor = MPI_WIN_FLAVOR_CREATE;
00812 win_model = MPI_WIN_SEPARATE;
00813 ampi_tmp = 0;
00814 }
00815 };
00816
00817
00818 int _ampi_fallback_setup_count = -1;
00819 CLINKAGE void AMPI_Setup(void);
00820 FLINKAGE void FTN_NAME(AMPI_SETUP,ampi_setup)(void);
00821
00822 FLINKAGE void FTN_NAME(MPI_MAIN,mpi_main)(void);
00823
00824
00825 CLINKAGE
00826 void AMPI_Fallback_Main(int argc,char **argv)
00827 {
00828 AMPI_Main_cpp();
00829 AMPI_Main_cpp(argc,argv);
00830 AMPI_Main_c(argc,argv);
00831 FTN_NAME(MPI_MAIN,mpi_main)();
00832 }
00833
00834 void ampiCreateMain(MPI_MainFn mainFn, const char *name,int nameLen);
00835
00836
00837
00838 CLINKAGE
00839 void AMPI_Setup_Switch(void) {
00840 _ampi_fallback_setup_count=0;
00841 FTN_NAME(AMPI_SETUP,ampi_setup)();
00842 AMPI_Setup();
00843 if (_ampi_fallback_setup_count==2)
00844 {
00845 ampiCreateMain(AMPI_Fallback_Main,"default",strlen("default"));
00846 }
00847 }
00848
00849 int AMPI_RDMA_THRESHOLD = AMPI_RDMA_THRESHOLD_DEFAULT;
00850 int AMPI_SMP_RDMA_THRESHOLD = AMPI_SMP_RDMA_THRESHOLD_DEFAULT;
00851 static bool nodeinit_has_been_called=false;
00852 CtvDeclare(ampiParent*, ampiPtr);
00853 CtvDeclare(bool, ampiInitDone);
00854 CtvDeclare(void*,stackBottom);
00855 CtvDeclare(bool, ampiFinalized);
00856 CkpvDeclare(Builtin_kvs, bikvs);
00857 CkpvDeclare(int, ampiThreadLevel);
00858 CkpvDeclare(AmpiMsgPool, msgPool);
00859
00860 CLINKAGE
00861 long ampiCurrentStackUsage(void){
00862 int localVariable;
00863
00864 unsigned long p1 = (unsigned long)(uintptr_t)((void*)&localVariable);
00865 unsigned long p2 = (unsigned long)(uintptr_t)(CtvAccess(stackBottom));
00866
00867 if(p1 > p2)
00868 return p1 - p2;
00869 else
00870 return p2 - p1;
00871 }
00872
00873 FLINKAGE
00874 void FTN_NAME(AMPICURRENTSTACKUSAGE, ampicurrentstackusage)(void){
00875 long usage = ampiCurrentStackUsage();
00876 CkPrintf("[%d] Stack usage is currently %ld\n", CkMyPe(), usage);
00877 }
00878
00879 CLINKAGE
00880 void AMPI_threadstart(void *data);
00881 static int AMPI_threadstart_idx = -1;
00882
00883 #if CMK_TRACE_ENABLED
00884 CsvExtern(funcmap*, tcharm_funcmap);
00885 #endif
00886
00887
00888 static const std::array<const CkDDT_DataType *, AMPI_MAX_PREDEFINED_TYPE+1> ampiPredefinedTypes = CkDDT::createPredefinedTypes();
00889
00890 static constexpr std::array<MPI_User_function*, AMPI_MAX_PREDEFINED_OP+1> ampiPredefinedOps = {{
00891 MPI_MAX_USER_FN,
00892 MPI_MIN_USER_FN,
00893 MPI_SUM_USER_FN,
00894 MPI_PROD_USER_FN,
00895 MPI_LAND_USER_FN,
00896 MPI_BAND_USER_FN,
00897 MPI_LOR_USER_FN,
00898 MPI_BOR_USER_FN,
00899 MPI_LXOR_USER_FN,
00900 MPI_BXOR_USER_FN,
00901 MPI_MAXLOC_USER_FN,
00902 MPI_MINLOC_USER_FN,
00903 MPI_REPLACE_USER_FN,
00904 MPI_NO_OP_USER_FN
00905 }};
00906
00907 #if defined _WIN32
00908 # ifndef WIN32_LEAN_AND_MEAN
00909 # define WIN32_LEAN_AND_MEAN
00910 # endif
00911 # ifndef NOMINMAX
00912 # define NOMINMAX
00913 # endif
00914 # include <windows.h>
00915 #elif defined __APPLE__
00916 # include <unistd.h>
00917 # include <libproc.h>
00918 #elif CMK_HAS_REALPATH || CMK_HAS_READLINK
00919 # ifndef _GNU_SOURCE
00920 # define _GNU_SOURCE
00921 # endif
00922 # ifndef __USE_GNU
00923 # define __USE_GNU
00924 # endif
00925 # include <unistd.h>
00926 #endif
00927
00928 char * ampi_binary_path;
00929
00930 static void getAmpiBinaryPath() noexcept
00931 {
00932 #if defined _WIN32
00933 ssize_t bufsize = MAX_PATH;
00934 DWORD n;
00935 do
00936 {
00937 ampi_binary_path = (char *)realloc(ampi_binary_path, bufsize);
00938 SetLastError(0);
00939 n = GetModuleFileName(NULL, ampi_binary_path, bufsize);
00940 bufsize *= 2;
00941 }
00942 while (n == bufsize || GetLastError() == ERROR_INSUFFICIENT_BUFFER);
00943
00944 if (n == 0)
00945 {
00946 CkError("ERROR> GetModuleFileName(): %d\n", (int)GetLastError());
00947 free(ampi_binary_path);
00948 ampi_binary_path = nullptr;
00949 }
00950 #elif defined __APPLE__
00951 ampi_binary_path = (char *)malloc(PROC_PIDPATHINFO_MAXSIZE);
00952 pid_t pid = getpid();
00953 int n = proc_pidpath(pid, ampi_binary_path, PROC_PIDPATHINFO_MAXSIZE);
00954
00955 if (n == 0)
00956 {
00957 CkError("ERROR> proc_pidpath(): %s\n", strerror(errno));
00958 free(ampi_binary_path);
00959 ampi_binary_path = nullptr;
00960 }
00961 #elif CMK_HAS_REALPATH
00962 ampi_binary_path = realpath("/proc/self/exe", nullptr);
00963 if (ampi_binary_path == nullptr)
00964 CkError("ERROR> realpath(): %s\n", strerror(errno));
00965 #elif CMK_HAS_READLINK
00966 ssize_t bufsize = 256;
00967 ssize_t n;
00968 do
00969 {
00970 ampi_binary_path = (char *)realloc(ampi_binary_path, bufsize);
00971 n = readlink("/proc/self/exe", ampi_binary_path, bufsize-1);
00972 bufsize *= 2;
00973 }
00974 while (n == bufsize-1);
00975
00976 if (n == -1)
00977 {
00978 CkError("ERROR> readlink(): %s\n", strerror(errno));
00979 free(ampi_binary_path);
00980 ampi_binary_path = nullptr;
00981 }
00982 else
00983 {
00984 ampi_binary_path[n] = '\0';
00985 }
00986 #else
00987 CkAbort("Could not get path to current binary!");
00988 #endif
00989 }
00990
00991 static void ampiNodeInit() noexcept
00992 {
00993 getAmpiBinaryPath();
00994
00995 #if CMK_TRACE_ENABLED
00996 TCharm::nodeInit();
00997 int funclength = sizeof(funclist)/sizeof(char*);
00998 for (int i=0; i<funclength; i++) {
00999 int event_id = traceRegisterUserEvent(funclist[i], -1);
01000 CsvAccess(tcharm_funcmap)->insert(std::pair<std::string, int>(funclist[i], event_id));
01001 }
01002
01003
01004
01005 for (int i=0; i<_chareTable.size(); i++){
01006 if (strcmp(_chareTable[i]->name, "dummy_thread_chare") == 0)
01007 _chareTable[i]->name = "AMPI";
01008 }
01009 for (int i=0; i<_entryTable.size(); i++){
01010 if (strcmp(_entryTable[i]->name, "dummy_thread_ep") == 0)
01011 _entryTable[i]->setName("rank");
01012 }
01013 #endif
01014
01015 _mpi_nworlds=0;
01016 for(int i=0;i<MPI_MAX_COMM_WORLDS; i++)
01017 {
01018 MPI_COMM_UNIVERSE[i] = MPI_COMM_WORLD+1+i;
01019 }
01020 TCHARM_Set_fallback_setup(AMPI_Setup_Switch);
01021
01022
01023 char *value;
01024 bool rdmaSet = false;
01025 if ((value = getenv("AMPI_RDMA_THRESHOLD"))) {
01026 AMPI_RDMA_THRESHOLD = atoi(value);
01027 rdmaSet = true;
01028 }
01029 if ((value = getenv("AMPI_SMP_RDMA_THRESHOLD"))) {
01030 AMPI_SMP_RDMA_THRESHOLD = atoi(value);
01031 rdmaSet = true;
01032 }
01033 if (rdmaSet && CkMyNode() == 0) {
01034 #if AMPI_RDMA_IMPL
01035 CkPrintf("AMPI> RDMA threshold is %d Bytes and SMP RDMA threshold is %d Bytes.\n", AMPI_RDMA_THRESHOLD, AMPI_SMP_RDMA_THRESHOLD);
01036 #else
01037 CkPrintf("Warning: AMPI RDMA threshold ignored since AMPI RDMA is disabled.\n");
01038 #endif
01039 }
01040
01041 AmpiReducer = CkReduction::addReducer(AmpiReducerFunc, true , "AmpiReducerFunc");
01042
01043 CkAssert(AMPI_threadstart_idx == -1);
01044 AMPI_threadstart_idx = TCHARM_Register_thread_function(AMPI_threadstart);
01045
01046 nodeinit_has_been_called=true;
01047
01048
01049 _isAnytimeMigration = false;
01050 _isStaticInsertion = true;
01051 }
01052
01053 #if AMPI_PRINT_IDLE
01054 static double totalidle=0.0, startT=0.0;
01055 static int beginHandle, endHandle;
01056 static void BeginIdle(void *dummy,double curWallTime) noexcept
01057 {
01058 startT = curWallTime;
01059 }
01060 static void EndIdle(void *dummy,double curWallTime) noexcept
01061 {
01062 totalidle += curWallTime - startT;
01063 }
01064 #endif
01065
01066 static void ampiProcInit() noexcept {
01067 CtvInitialize(ampiParent*, ampiPtr);
01068 CtvInitialize(bool,ampiInitDone);
01069 CtvInitialize(bool,ampiFinalized);
01070 CtvInitialize(void*,stackBottom);
01071
01072 CkpvInitialize(int, ampiThreadLevel);
01073 CkpvAccess(ampiThreadLevel) = MPI_THREAD_SINGLE;
01074
01075 CkpvInitialize(Builtin_kvs, bikvs);
01076 CkpvAccess(bikvs) = Builtin_kvs();
01077
01078 CkpvInitialize(AmpiMsgPool, msgPool);
01079 CkpvAccess(msgPool) = AmpiMsgPool(AMPI_MSG_POOL_SIZE, AMPI_POOLED_MSG_SIZE);
01080
01081 #if AMPIMSGLOG
01082 char **argv=CkGetArgv();
01083 msgLogWrite = CmiGetArgFlag(argv, "+msgLogWrite");
01084 if (CmiGetArgIntDesc(argv,"+msgLogRead", &msgLogRank, "Re-play message processing order for AMPI")) {
01085 msgLogRead = 1;
01086 }
01087 char *procs = NULL;
01088 if (CmiGetArgStringDesc(argv, "+msgLogRanks", &procs, "A list of AMPI processors to record , e.g. 0,10,20-30")) {
01089 msgLogRanks.set(procs);
01090 }
01091 CmiGetArgString(argv, "+msgLogFilename", &msgLogFilename);
01092 if (CkMyPe() == 0) {
01093 if (msgLogWrite) CkPrintf("Writing AMPI messages of rank %s to log: %s\n", procs?procs:"", msgLogFilename);
01094 if (msgLogRead) CkPrintf("Reading AMPI messages of rank %s from log: %s\n", procs?procs:"", msgLogFilename);
01095 }
01096 #endif
01097
01098 #if AMPI_PRINT_MSG_SIZES
01099
01100
01101 char *ranks = NULL;
01102 CkpvInitialize(CkListString, msgSizesRanks);
01103 if (CmiGetArgStringDesc(CkGetArgv(), "+msgSizesRanks", &ranks,
01104 "A list of AMPI ranks to record and print message sizes on, e.g. 0,10,20-30")) {
01105 CkpvAccess(msgSizesRanks).set(ranks);
01106 }
01107 #endif
01108 }
01109
01110 #if AMPIMSGLOG
01111 static inline int record_msglog(int rank) noexcept {
01112 return msgLogRanks.includes(rank);
01113 }
01114 #endif
01115
01116 PUPfunctionpointer(MPI_MainFn)
01117
01118 class MPI_threadstart_t {
01119 public:
01120 MPI_MainFn fn;
01121 MPI_threadstart_t() noexcept {}
01122 MPI_threadstart_t(MPI_MainFn fn_) noexcept :fn(fn_) {}
01123 void start() {
01124 char **argv=CmiCopyArgs(CkGetArgv());
01125 int argc=CkGetArgc();
01126
01127
01128
01129 CtvAccess(stackBottom) = &argv;
01130
01131 #if !CMK_NO_BUILD_SHARED
01132
01133
01134
01135 if (_ampi_fallback_setup_count != -1 && _ampi_fallback_setup_count != 2 && CkMyPe() == 0) {
01136 CkAbort("AMPI> The application provided a custom AMPI_Setup() method, "
01137 "but AMPI is built with shared library support. This is an unsupported "
01138 "configuration. Please recompile charm++/AMPI without `-build-shared` or "
01139 "remove the AMPI_Setup() function from your application.\n");
01140 }
01141 AMPI_Fallback_Main(argc,argv);
01142 #else
01143 (fn)(argc,argv);
01144 #endif
01145 }
01146 void pup(PUP::er &p) noexcept {
01147 p|fn;
01148 }
01149 };
01150 PUPmarshall(MPI_threadstart_t)
01151
01152 CLINKAGE
01153 void AMPI_threadstart(void *data)
01154 {
01155 STARTUP_DEBUG("MPI_threadstart")
01156 MPI_threadstart_t t;
01157 pupFromBuf(data,t);
01158 #if CMK_TRACE_IN_CHARM
01159 if(CpvAccess(traceOn)) CthTraceResume(CthSelf());
01160 #endif
01161 t.start();
01162 }
01163
01164 void ampiCreateMain(MPI_MainFn mainFn, const char *name,int nameLen)
01165 {
01166 STARTUP_DEBUG("ampiCreateMain")
01167 int _nchunks=TCHARM_Get_num_chunks();
01168
01169 MPI_threadstart_t s(mainFn);
01170 memBuf b; pupIntoBuf(b,s);
01171 TCHARM_Create_data(_nchunks,AMPI_threadstart_idx,
01172 b.getData(), b.getSize());
01173 }
01174
01175
01176 #define AMPI_TCHARM_SEMAID 0x00A34100
01177 #define AMPI_BARRIER_SEMAID 0x00A34200
01178
01179 static CProxy_ampiWorlds ampiWorldsGroup;
01180
01181
01182 static void createCommSelf() noexcept {
01183 STARTUP_DEBUG("ampiInit> creating MPI_COMM_SELF")
01184 MPI_Comm selfComm;
01185 MPI_Group worldGroup, selfGroup;
01186 int ranks[1] = { getAmpiInstance(MPI_COMM_WORLD)->getRank() };
01187
01188 MPI_Comm_group(MPI_COMM_WORLD, &worldGroup);
01189 MPI_Group_incl(worldGroup, 1, ranks, &selfGroup);
01190 MPI_Comm_create(MPI_COMM_WORLD, selfGroup, &selfComm);
01191 MPI_Comm_set_name(selfComm, "MPI_COMM_SELF");
01192
01193 CkAssert(selfComm == MPI_COMM_SELF);
01194 STARTUP_DEBUG("ampiInit> created MPI_COMM_SELF")
01195 }
01196
01197
01198
01199
01200
01201
01202 static ampi *ampiInit(char **argv) noexcept
01203 {
01204 FUNCCALL_DEBUG(CkPrintf("Calling from proc %d for tcharm element %d\n", CkMyPe(), TCHARM_Element());)
01205 if (CtvAccess(ampiInitDone)) return NULL;
01206 STARTUP_DEBUG("ampiInit> begin")
01207
01208 MPI_Comm new_world;
01209 int _nchunks;
01210 CkArrayOptions opts;
01211 CProxy_ampiParent parent;
01212 if (TCHARM_Element()==0)
01213 {
01214 STARTUP_DEBUG("ampiInit> creating arrays")
01215
01216
01217
01218 if(_mpi_nworlds == MPI_MAX_COMM_WORLDS)
01219 {
01220 CkAbort("AMPI> Number of registered comm_worlds exceeded limit.\n");
01221 }
01222 int new_idx=_mpi_nworlds;
01223 new_world=MPI_COMM_WORLD+new_idx;
01224
01225
01226 CkArrayID threads;
01227 opts=TCHARM_Attach_start(&threads,&_nchunks);
01228 opts.setSectionAutoDelegate(false);
01229 CkArrayCreatedMsg *m;
01230 CProxy_ampiParent::ckNew(new_world, threads, _nchunks, opts, CkCallbackResumeThread((void*&)m));
01231 parent = CProxy_ampiParent(m->aid);
01232 delete m;
01233 STARTUP_DEBUG("ampiInit> array size "<<_nchunks);
01234 }
01235 int *barrier = (int *)TCharm::get()->semaGet(AMPI_BARRIER_SEMAID);
01236
01237 FUNCCALL_DEBUG(CkPrintf("After BARRIER: sema size %d from tcharm's ele %d\n", TCharm::get()->sema.size(), TCHARM_Element());)
01238
01239 if (TCHARM_Element()==0)
01240 {
01241
01242 CkArrayID empty;
01243
01244 ampiCommStruct worldComm(new_world,empty,_nchunks);
01245 CProxy_ampi arr;
01246 CkArrayCreatedMsg *m;
01247 CProxy_ampi::ckNew(parent, worldComm, opts, CkCallbackResumeThread((void*&)m));
01248 arr = CProxy_ampi(m->aid);
01249 delete m;
01250
01251
01252
01253 ampiCommStruct newComm(new_world,arr,_nchunks);
01254 if (ampiWorldsGroup.ckGetGroupID().isZero())
01255 ampiWorldsGroup=CProxy_ampiWorlds::ckNew(newComm);
01256 else
01257 ampiWorldsGroup.add(newComm);
01258 STARTUP_DEBUG("ampiInit> arrays created")
01259 }
01260
01261
01262 ampi *ptr=(ampi *)TCharm::get()->semaGet(AMPI_TCHARM_SEMAID);
01263 CtvAccess(ampiInitDone)=true;
01264 CtvAccess(ampiFinalized)=false;
01265 STARTUP_DEBUG("ampiInit> complete")
01266 #if CMK_BIGSIM_CHARM
01267
01268 TRACE_BG_ADD_TAG("AMPI_START");
01269 #endif
01270
01271 ampiParent* pptr = getAmpiParent();
01272 vector<int>& keyvals = pptr->getKeyvals(MPI_COMM_WORLD);
01273 pptr->setAttr(MPI_COMM_WORLD, keyvals, MPI_UNIVERSE_SIZE, &_nchunks);
01274 ptr->setCommName("MPI_COMM_WORLD");
01275
01276 pptr->ampiInitCallDone = 0;
01277
01278 CProxy_ampi cbproxy = ptr->getProxy();
01279 CkCallback cb(CkReductionTarget(ampi, allInitDone), cbproxy[0]);
01280 ptr->contribute(cb);
01281
01282 ampiParent *thisParent = getAmpiParent();
01283 while(thisParent->ampiInitCallDone!=1){
01284 thisParent->getTCharmThread()->stop();
01285
01286
01287
01288
01289 thisParent = getAmpiParent();
01290 }
01291
01292 createCommSelf();
01293
01294 #if CMK_BIGSIM_CHARM
01295 BgSetStartOutOfCore();
01296 #endif
01297
01298 return ptr;
01299 }
01300
01302 class ampiWorlds : public CBase_ampiWorlds {
01303 public:
01304 ampiWorlds(const ampiCommStruct &nextWorld) noexcept {
01305 ampiWorldsGroup=thisgroup;
01306 add(nextWorld);
01307 }
01308 ampiWorlds(CkMigrateMessage *m) noexcept : CBase_ampiWorlds(m) {}
01309 void pup(PUP::er &p) noexcept { }
01310 void add(const ampiCommStruct &nextWorld) noexcept {
01311 int new_idx=nextWorld.getComm()-(MPI_COMM_WORLD);
01312 mpi_worlds[new_idx]=nextWorld;
01313 if (_mpi_nworlds<=new_idx) _mpi_nworlds=new_idx+1;
01314 STARTUP_DEBUG("ampiInit> listed MPI_COMM_UNIVERSE "<<new_idx)
01315 }
01316 };
01317
01318
01319 ampiParent::ampiParent(MPI_Comm worldNo_,CProxy_TCharm threads_,int nRanks_) noexcept
01320 : threads(threads_), ampiReqs(64, &reqPool), myDDT(ampiPredefinedTypes),
01321 worldNo(worldNo_), predefinedOps(ampiPredefinedOps), isTmpRProxySet(false)
01322 {
01323 int barrier = 0x1234;
01324 STARTUP_DEBUG("ampiParent> starting up")
01325 thread=NULL;
01326 worldPtr=NULL;
01327 userAboutToMigrateFn=NULL;
01328 userJustMigratedFn=NULL;
01329 prepareCtv();
01330
01331
01332 groups.push_back(new groupStruct);
01333
01334 init();
01335
01336
01337 defineInfoEnv(nRanks_);
01338
01339 defineInfoMigration();
01340
01341 thread->semaPut(AMPI_BARRIER_SEMAID,&barrier);
01342
01343 #if CMK_FAULT_EVAC
01344 AsyncEvacuate(false);
01345 #endif
01346 }
01347
01348 ampiParent::ampiParent(CkMigrateMessage *msg) noexcept
01349 : CBase_ampiParent(msg), myDDT(ampiPredefinedTypes), predefinedOps(ampiPredefinedOps)
01350 {
01351 thread=NULL;
01352 worldPtr=NULL;
01353
01354 init();
01355
01356 #if CMK_FAULT_EVAC
01357 AsyncEvacuate(false);
01358 #endif
01359 }
01360
01361 PUPfunctionpointer(MPI_MigrateFn)
01362
01363 void ampiParent::pup(PUP::er &p) noexcept {
01364 p|threads;
01365 p|worldNo;
01366 p|myDDT;
01367 p|splitComm;
01368 p|groupComm;
01369 p|cartComm;
01370 p|graphComm;
01371 p|distGraphComm;
01372 p|interComm;
01373 p|intraComm;
01374
01375 p|groups;
01376 p|winStructList;
01377 p|infos;
01378 p|userOps;
01379
01380 p|reqPool;
01381 ampiReqs.pup(p, &reqPool);
01382
01383 p|kvlist;
01384 p|isTmpRProxySet;
01385 p|tmpRProxy;
01386
01387 p|userAboutToMigrateFn;
01388 p|userJustMigratedFn;
01389
01390 p|ampiInitCallDone;
01391 p|resumeOnRecv;
01392 p|resumeOnColl;
01393 p|numBlockedReqs;
01394 p|bsendBufferSize;
01395 p((char *)&bsendBuffer, sizeof(void *));
01396
01397
01398 AmpiReqType reqType;
01399 if (!p.isUnpacking()) {
01400 if (blockingReq) {
01401 reqType = blockingReq->getType();
01402 } else {
01403 reqType = AMPI_INVALID_REQ;
01404 }
01405 }
01406 p|reqType;
01407 if (reqType != AMPI_INVALID_REQ) {
01408 if (p.isUnpacking()) {
01409 switch (reqType) {
01410 case AMPI_I_REQ:
01411 blockingReq = new IReq;
01412 break;
01413 case AMPI_REDN_REQ:
01414 blockingReq = new RednReq;
01415 break;
01416 case AMPI_GATHER_REQ:
01417 blockingReq = new GatherReq;
01418 break;
01419 case AMPI_GATHERV_REQ:
01420 blockingReq = new GathervReq;
01421 break;
01422 case AMPI_SEND_REQ:
01423 blockingReq = new SendReq;
01424 break;
01425 case AMPI_SSEND_REQ:
01426 blockingReq = new SsendReq;
01427 break;
01428 case AMPI_ATA_REQ:
01429 blockingReq = new ATAReq;
01430 break;
01431 case AMPI_G_REQ:
01432 blockingReq = new GReq;
01433 break;
01434 #if CMK_CUDA
01435 case AMPI_GPU_REQ:
01436 CkAbort("AMPI> error trying to PUP a non-migratable GPU request!");
01437 break;
01438 #endif
01439 case AMPI_INVALID_REQ:
01440 CkAbort("AMPI> error trying to PUP an invalid request!");
01441 break;
01442 }
01443 }
01444 blockingReq->pup(p);
01445 } else {
01446 blockingReq = NULL;
01447 }
01448 if (p.isDeleting()) {
01449 delete blockingReq; blockingReq = NULL;
01450 }
01451
01452 #if AMPI_PRINT_MSG_SIZES
01453 p|msgSizes;
01454 #endif
01455 }
01456
01457 void ampiParent::prepareCtv() noexcept {
01458 thread=threads[thisIndex].ckLocal();
01459 if (thread==NULL) CkAbort("AMPIParent cannot find its thread!\n");
01460 CtvAccessOther(thread->getThread(),ampiPtr) = this;
01461 STARTUP_DEBUG("ampiParent> found TCharm")
01462 }
01463
01464 void ampiParent::init() noexcept{
01465 resumeOnRecv = false;
01466 resumeOnColl = false;
01467 numBlockedReqs = 0;
01468 bsendBufferSize = 0;
01469 bsendBuffer = NULL;
01470 blockingReq = NULL;
01471 #if AMPIMSGLOG
01472 if(msgLogWrite && record_msglog(thisIndex)){
01473 char fname[128];
01474 sprintf(fname, "%s.%d", msgLogFilename,thisIndex);
01475 #if CMK_USE_ZLIB && 0
01476 fMsgLog = gzopen(fname,"wb");
01477 toPUPer = new PUP::tozDisk(fMsgLog);
01478 #else
01479 fMsgLog = fopen(fname,"wb");
01480 CkAssert(fMsgLog != NULL);
01481 toPUPer = new PUP::toDisk(fMsgLog);
01482 #endif
01483 }else if(msgLogRead){
01484 char fname[128];
01485 sprintf(fname, "%s.%d", msgLogFilename,msgLogRank);
01486 #if CMK_USE_ZLIB && 0
01487 fMsgLog = gzopen(fname,"rb");
01488 fromPUPer = new PUP::fromzDisk(fMsgLog);
01489 #else
01490 fMsgLog = fopen(fname,"rb");
01491 CkAssert(fMsgLog != NULL);
01492 fromPUPer = new PUP::fromDisk(fMsgLog);
01493 #endif
01494 CkPrintf("AMPI> opened message log file: %s for replay\n", fname);
01495 }
01496 #endif
01497 }
01498
01499 void ampiParent::finalize() noexcept {
01500 #if AMPIMSGLOG
01501 if(msgLogWrite && record_msglog(thisIndex)){
01502 delete toPUPer;
01503 #if CMK_USE_ZLIB && 0
01504 gzclose(fMsgLog);
01505 #else
01506 fclose(fMsgLog);
01507 #endif
01508 }else if(msgLogRead){
01509 delete fromPUPer;
01510 #if CMK_USE_ZLIB && 0
01511 gzclose(fMsgLog);
01512 #else
01513 fclose(fMsgLog);
01514 #endif
01515 }
01516 #endif
01517 }
01518
01519 void ampiParent::setUserAboutToMigrateFn(MPI_MigrateFn f) noexcept {
01520 userAboutToMigrateFn = f;
01521 }
01522
01523 void ampiParent::setUserJustMigratedFn(MPI_MigrateFn f) noexcept {
01524 userJustMigratedFn = f;
01525 }
01526
01527 void ampiParent::ckAboutToMigrate() noexcept {
01528 if (userAboutToMigrateFn) {
01529 (*userAboutToMigrateFn)();
01530 }
01531 }
01532
01533 void ampiParent::ckJustMigrated() noexcept {
01534 ArrayElement1D::ckJustMigrated();
01535 prepareCtv();
01536 if (userJustMigratedFn) {
01537 (*userJustMigratedFn)();
01538 }
01539 }
01540
01541 void ampiParent::ckJustRestored() noexcept {
01542 FUNCCALL_DEBUG(CkPrintf("Call just restored from ampiParent[%d] with ampiInitCallDone %d\n", thisIndex, ampiInitCallDone);)
01543 ArrayElement1D::ckJustRestored();
01544 prepareCtv();
01545 }
01546
01547 ampiParent::~ampiParent() noexcept {
01548 STARTUP_DEBUG("ampiParent> destructor called");
01549 finalize();
01550 }
01551
01552 const ampiCommStruct& ampiParent::getWorldStruct() const noexcept {
01553 return worldPtr->getCommStruct();
01554 }
01555
01556
01557 TCharm *ampiParent::registerAmpi(ampi *ptr,ampiCommStruct s,bool forMigration) noexcept
01558 {
01559 if (thread==NULL) prepareCtv();
01560
01561 if (s.getComm()>=MPI_COMM_WORLD)
01562 {
01563
01564
01565 if (worldPtr!=NULL) CkAbort("One ampiParent has two MPI_COMM_WORLDs");
01566 worldPtr=ptr;
01567 }
01568
01569 if (forMigration) {
01570 AmmEntry<AmpiRequest *> *e = ptr->postedReqs.first;
01571 while (e) {
01572
01573 MPI_Request reqIdx = (MPI_Request)(intptr_t)e->msg;
01574 CkAssert(reqIdx != MPI_REQUEST_NULL);
01575 AmpiRequest* req = ampiReqs[reqIdx];
01576 CkAssert(req);
01577 e->msg = req;
01578 e = e->next;
01579 }
01580 }
01581 else {
01582 MPI_Comm comm = s.getComm();
01583 STARTUP_DEBUG("ampiParent> registering new communicator "<<comm)
01584 if (comm>=MPI_COMM_WORLD) {
01585
01586 thread->semaPut(AMPI_TCHARM_SEMAID, ptr);
01587 } else if (isSplit(comm)) {
01588 splitChildRegister(s);
01589 } else if (isGroup(comm)) {
01590 groupChildRegister(s);
01591 } else if (isCart(comm)) {
01592 cartChildRegister(s);
01593 } else if (isGraph(comm)) {
01594 graphChildRegister(s);
01595 } else if (isDistGraph(comm)) {
01596 distGraphChildRegister(s);
01597 } else if (isInter(comm)) {
01598 interChildRegister(s);
01599 } else if (isIntra(comm)) {
01600 intraChildRegister(s);
01601 }else
01602 CkAbort("ampiParent received child with bad communicator");
01603 }
01604
01605 return thread;
01606 }
01607
01608
01609 class ckptClientStruct {
01610 public:
01611 const char *dname;
01612 ampiParent *ampiPtr;
01613 ckptClientStruct(const char *s, ampiParent *a) noexcept : dname(s), ampiPtr(a) {}
01614 };
01615
01616 static void checkpointClient(void *param,void *msg) noexcept
01617 {
01618 ckptClientStruct *client = (ckptClientStruct*)param;
01619 const char *dname = client->dname;
01620 ampiParent *ampiPtr = client->ampiPtr;
01621 ampiPtr->Checkpoint(strlen(dname), dname);
01622 delete client;
01623 }
01624
01625 void ampiParent::startCheckpoint(const char* dname) noexcept {
01626 if (thisIndex==0) {
01627 ckptClientStruct *clientData = new ckptClientStruct(dname, this);
01628 CkCallback *cb = new CkCallback(checkpointClient, clientData);
01629 thisProxy.ckSetReductionClient(cb);
01630 }
01631 contribute();
01632
01633 thread->stop();
01634
01635 #if CMK_BIGSIM_CHARM
01636 TRACE_BG_ADD_TAG("CHECKPOINT_RESUME");
01637 #endif
01638 }
01639
01640 void ampiParent::Checkpoint(int len, const char* dname) noexcept {
01641 if (len == 0) {
01642
01643 CkCallback cb(CkIndex_ampiParent::ResumeThread(),thisArrayID);
01644 CkStartMemCheckpoint(cb);
01645 }
01646 else {
01647 char dirname[256];
01648 strncpy(dirname,dname,len);
01649 dirname[len]='\0';
01650 CkCallback cb(CkIndex_ampiParent::ResumeThread(),thisArrayID);
01651 CkStartCheckpoint(dirname,cb);
01652 }
01653 }
01654
01655 void ampiParent::ResumeThread() noexcept {
01656 thread->resume();
01657 }
01658
01659 int ampiParent::createKeyval(MPI_Comm_copy_attr_function *copy_fn, MPI_Comm_delete_attr_function *delete_fn,
01660 int *keyval, void* extra_state) noexcept {
01661 KeyvalNode* newnode = new KeyvalNode(copy_fn, delete_fn, extra_state);
01662 int idx = kvlist.size();
01663 kvlist.resize(idx+1);
01664 kvlist[idx] = newnode;
01665 *keyval = idx;
01666 return 0;
01667 }
01668
01669 int ampiParent::setUserKeyval(int context, int keyval, void *attribute_val) noexcept {
01670 #if AMPI_ERROR_CHECKING
01671 if (keyval < 0 || keyval >= kvlist.size() || kvlist[keyval] == NULL) {
01672 return MPI_ERR_KEYVAL;
01673 }
01674 #endif
01675 KeyvalNode &kv = *kvlist[keyval];
01676 if (kv.hasVal()) {
01677 int ret = (*kv.delete_fn)(context, keyval, kv.val, kv.extra_state);
01678 if (ret != MPI_SUCCESS) {
01679 return ret;
01680 }
01681 }
01682 kvlist[keyval]->setVal(attribute_val);
01683 return MPI_SUCCESS;
01684 }
01685
01686 int ampiParent::setAttr(int context, vector<int>& keyvals, int keyval, void* attribute_val) noexcept {
01687 if (kv_set_builtin(keyval, attribute_val)) {
01688 return MPI_SUCCESS;
01689 }
01690 keyvals.push_back(keyval);
01691 kvlist[keyval]->incRefCount();
01692 return setUserKeyval(context, keyval, attribute_val);
01693 }
01694
01695 bool ampiParent::kv_set_builtin(int keyval, void* attribute_val) noexcept {
01696 switch(keyval) {
01697 case MPI_TAG_UB: return false;
01698 case MPI_HOST: return false;
01699 case MPI_IO: return false;
01700 case MPI_WTIME_IS_GLOBAL: return false;
01701 case MPI_APPNUM: return false;
01702 case MPI_LASTUSEDCODE: return false;
01703 case MPI_UNIVERSE_SIZE: (CkpvAccess(bikvs).universe_size) = *((int*)attribute_val); return true;
01704 case MPI_WIN_BASE: (CkpvAccess(bikvs).win_base) = attribute_val; return true;
01705 case MPI_WIN_SIZE: (CkpvAccess(bikvs).win_size) = *((MPI_Aint*)attribute_val); return true;
01706 case MPI_WIN_DISP_UNIT: (CkpvAccess(bikvs).win_disp_unit) = *((int*)attribute_val); return true;
01707 case MPI_WIN_CREATE_FLAVOR: (CkpvAccess(bikvs).win_create_flavor) = *((int*)attribute_val); return true;
01708 case MPI_WIN_MODEL: (CkpvAccess(bikvs).win_model) = *((int*)attribute_val); return true;
01709 case AMPI_MY_WTH: return false;
01710 case AMPI_NUM_WTHS: return false;
01711 case AMPI_MY_PROCESS: return false;
01712 case AMPI_NUM_PROCESSES: return false;
01713 default: return false;
01714 };
01715 }
01716
01717 bool ampiParent::kv_get_builtin(int keyval) noexcept {
01718 switch(keyval) {
01719 case MPI_TAG_UB: kv_builtin_storage = &(CkpvAccess(bikvs).tag_ub); return true;
01720 case MPI_HOST: kv_builtin_storage = &(CkpvAccess(bikvs).host); return true;
01721 case MPI_IO: kv_builtin_storage = &(CkpvAccess(bikvs).io); return true;
01722 case MPI_WTIME_IS_GLOBAL: kv_builtin_storage = &(CkpvAccess(bikvs).wtime_is_global); return true;
01723 case MPI_APPNUM: kv_builtin_storage = &(CkpvAccess(bikvs).appnum); return true;
01724 case MPI_LASTUSEDCODE: kv_builtin_storage = &(CkpvAccess(bikvs).lastusedcode); return true;
01725 case MPI_UNIVERSE_SIZE: kv_builtin_storage = &(CkpvAccess(bikvs).universe_size); return true;
01726 case MPI_WIN_BASE: win_base_storage = &(CkpvAccess(bikvs).win_base); return true;
01727 case MPI_WIN_SIZE: win_size_storage = &(CkpvAccess(bikvs).win_size); return true;
01728 case MPI_WIN_DISP_UNIT: kv_builtin_storage = &(CkpvAccess(bikvs).win_disp_unit); return true;
01729 case MPI_WIN_CREATE_FLAVOR: kv_builtin_storage = &(CkpvAccess(bikvs).win_create_flavor); return true;
01730 case MPI_WIN_MODEL: kv_builtin_storage = &(CkpvAccess(bikvs).win_model); return true;
01731 default: return false;
01732 };
01733 }
01734
01735 bool ampiParent::getBuiltinKeyval(int keyval, void *attribute_val) noexcept {
01736 if (kv_get_builtin(keyval)){
01737
01738
01739
01740
01741 if (keyval == MPI_WIN_BASE)
01742 *((void**)attribute_val) = *win_base_storage;
01743 else if (keyval == MPI_WIN_SIZE)
01744 *(MPI_Aint**)attribute_val = win_size_storage;
01745 else
01746 *(int **)attribute_val = kv_builtin_storage;
01747 return true;
01748 } else {
01749 switch(keyval) {
01750 case AMPI_MY_WTH: *(int *)attribute_val = CkMyPe(); return true;
01751 case AMPI_NUM_WTHS: *(int *)attribute_val = CkNumPes(); return true;
01752 case AMPI_MY_PROCESS: *(int *)attribute_val = CkMyNode(); return true;
01753 case AMPI_NUM_PROCESSES: *(int *)attribute_val = CkNumNodes(); return true;
01754 }
01755 }
01756 return false;
01757 }
01758
01759
01760 int ampiParent::dupUserKeyvals(MPI_Comm old_comm, MPI_Comm new_comm) noexcept {
01761 ampiCommStruct &old_cs = *(ampiCommStruct *)&comm2CommStruct(old_comm);
01762 for (int i=0; i<old_cs.getKeyvals().size(); i++) {
01763 int keyval = old_cs.getKeyvals()[i];
01764 void *val_out;
01765 int flag = 0;
01766 bool isValid = (keyval != MPI_KEYVAL_INVALID && kvlist[keyval] != NULL);
01767 if (isValid) {
01768
01769 KeyvalNode& kv = *kvlist[keyval];
01770 int ret = (*kv.copy_fn)(old_comm, keyval, kv.extra_state, kv.val, &val_out, &flag);
01771 if (ret != MPI_SUCCESS) {
01772 return ret;
01773 }
01774 if (flag == 1) {
01775
01776 ampiCommStruct &cs = *(ampiCommStruct *)&comm2CommStruct(new_comm);
01777 cs.getKeyvals().push_back(keyval);
01778 kv.incRefCount();
01779 }
01780 }
01781 }
01782 return MPI_SUCCESS;
01783 }
01784
01785 int ampiParent::freeUserKeyval(int context, vector<int>& keyvals, int* keyval) noexcept {
01786 if (*keyval < 0 || *keyval >= kvlist.size()) {
01787 return MPI_SUCCESS;
01788 }
01789
01790 KeyvalNode& kv = *kvlist[*keyval];
01791 int ret = (*kv.delete_fn)(context, *keyval, kv.val, kv.extra_state);
01792 if (ret != MPI_SUCCESS) {
01793 return ret;
01794 }
01795
01796 kv.clearVal();
01797 for (int i=0; i<keyvals.size(); i++) {
01798 if (keyvals[i] == *keyval) {
01799 keyvals[*keyval] = MPI_KEYVAL_INVALID;
01800 }
01801 }
01802 if (!keyvals.empty()) {
01803 while (keyvals.back() == MPI_KEYVAL_INVALID) keyvals.pop_back();
01804 }
01805
01806 if (kv.decRefCount() == 0) {
01807 delete kvlist[*keyval];
01808 kvlist[*keyval] = NULL;
01809 }
01810 *keyval = MPI_KEYVAL_INVALID;
01811 return MPI_SUCCESS;
01812 }
01813
01814 int ampiParent::freeUserKeyvals(int context, vector<int>& keyvals) noexcept {
01815 for (int i=0; i<keyvals.size(); i++) {
01816 int keyval = keyvals[i];
01817
01818 KeyvalNode& kv = *kvlist[keyval];
01819 int ret = (*kv.delete_fn)(context, keyval, kv.val, kv.extra_state);
01820 if (ret != MPI_SUCCESS) {
01821 return ret;
01822 }
01823 kv.clearVal();
01824 keyvals[i] = MPI_KEYVAL_INVALID;
01825
01826 if (kv.decRefCount() == 0) {
01827 delete kvlist[keyval];
01828 kvlist[keyval] = NULL;
01829 }
01830 }
01831 keyvals.clear();
01832 return MPI_SUCCESS;
01833 }
01834
01835 bool ampiParent::getUserKeyval(MPI_Comm comm, vector<int>& keyvals, int keyval, void *attribute_val, int *flag) noexcept {
01836 if (keyval < 0 || keyval >= kvlist.size() || kvlist[keyval] == NULL) {
01837 *flag = 0;
01838 return false;
01839 }
01840 else {
01841 for (int i=0; i<keyvals.size(); i++) {
01842 int kv = keyvals[i];
01843 if (keyval == kv) {
01844 *(void **)attribute_val = kvlist[keyval]->getVal();
01845 *flag = 1;
01846 return true;
01847 }
01848 }
01849 *flag = 0;
01850 return false;
01851 }
01852 }
01853
01854 int ampiParent::getAttr(int context, vector<int>& keyvals, int keyval, void *attribute_val, int *flag) noexcept {
01855 if (keyval == MPI_KEYVAL_INVALID) {
01856 *flag = 0;
01857 return MPI_ERR_KEYVAL;
01858 }
01859 else if (getBuiltinKeyval(keyval, attribute_val)) {
01860 *flag = 1;
01861 return MPI_SUCCESS;
01862 }
01863 else if (getUserKeyval(context, keyvals, keyval, attribute_val, flag)) {
01864 *flag = 1;
01865 return MPI_SUCCESS;
01866 }
01867 else {
01868 *flag = 0;
01869 return MPI_SUCCESS;
01870 }
01871 }
01872
01873 int ampiParent::deleteAttr(int context, vector<int>& keyvals, int keyval) noexcept {
01874 return freeUserKeyval(context, keyvals, &keyval);
01875 }
01876
01877
01878
01879
01880
01881
01882
01883 template class Amm<AmpiMsg *, AMPI_AMM_PT2PT_POOL_SIZE>;
01884 template class Amm<AmpiRequest *, AMPI_AMM_PT2PT_POOL_SIZE>;
01885
01886
01887 template class Amm<AmpiMsg *, AMPI_AMM_COLL_POOL_SIZE>;
01888 template class Amm<AmpiRequest *, AMPI_AMM_COLL_POOL_SIZE>;
01889
01890
01891 template<typename T, size_t N>
01892 void Amm<T, N>::freeAll() noexcept
01893 {
01894 AmmEntry<T>* cur = first;
01895 while (cur) {
01896 AmmEntry<T>* toDel = cur;
01897 cur = cur->next;
01898 deleteEntry(toDel);
01899 }
01900 }
01901
01902
01903 template<typename T, size_t N>
01904 void Amm<T, N>::flushMsgs() noexcept
01905 {
01906 T msg = get(MPI_ANY_TAG, MPI_ANY_SOURCE);
01907 while (msg) {
01908 delete msg;
01909 msg = get(MPI_ANY_TAG, MPI_ANY_SOURCE);
01910 }
01911 }
01912
01913 template<typename T, size_t N>
01914 void Amm<T, N>::put(T msg) noexcept
01915 {
01916 AmmEntry<T>* e = newEntry(msg);
01917 *lasth = e;
01918 lasth = &e->next;
01919 }
01920
01921 template<typename T, size_t N>
01922 void Amm<T, N>::put(int tag, int src, T msg) noexcept
01923 {
01924 AmmEntry<T>* e = newEntry(tag, src, msg);
01925 *lasth = e;
01926 lasth = &e->next;
01927 }
01928
01929 template<typename T, size_t N>
01930 bool Amm<T, N>::match(const int tags1[AMM_NTAGS], const int tags2[AMM_NTAGS]) const noexcept
01931 {
01932 if (tags1[AMM_TAG]==tags2[AMM_TAG] && tags1[AMM_SRC]==tags2[AMM_SRC]) {
01933
01934 return true;
01935 }
01936 else if (tags1[AMM_TAG]==tags2[AMM_TAG] && (tags1[AMM_SRC]==MPI_ANY_SOURCE || tags2[AMM_SRC]==MPI_ANY_SOURCE)) {
01937
01938 return true;
01939 }
01940 else if (tags1[AMM_SRC]==tags2[AMM_SRC] && (tags1[AMM_TAG]==MPI_ANY_TAG || tags2[AMM_TAG]==MPI_ANY_TAG)) {
01941
01942 return true;
01943 }
01944 else if ((tags1[AMM_SRC]==MPI_ANY_SOURCE || tags2[AMM_SRC]==MPI_ANY_SOURCE) && (tags1[AMM_TAG]==MPI_ANY_TAG || tags2[AMM_TAG]==MPI_ANY_TAG)) {
01945
01946 return true;
01947 }
01948 else {
01949
01950 return false;
01951 }
01952 }
01953
01954 template<typename T, size_t N>
01955 T Amm<T, N>::get(int tag, int src, int* rtags) noexcept
01956 {
01957 AmmEntry<T> *ent, **enth;
01958 T msg;
01959 int tags[AMM_NTAGS] = { tag, src };
01960
01961 enth = &first;
01962 while (true) {
01963 ent = *enth;
01964 if (!ent) return NULL;
01965 if (match(tags, ent->tags)) {
01966 if (rtags) memcpy(rtags, ent->tags, sizeof(int)*AMM_NTAGS);
01967 msg = ent->msg;
01968
01969 AmmEntry<T>* next = ent->next;
01970 *enth = next;
01971 if (!next) lasth = enth;
01972 deleteEntry(ent);
01973 return msg;
01974 }
01975 enth = &ent->next;
01976 }
01977 }
01978
01979 template<typename T, size_t N>
01980 T Amm<T, N>::probe(int tag, int src, int* rtags) noexcept
01981 {
01982 AmmEntry<T> *ent, **enth;
01983 T msg;
01984 int tags[AMM_NTAGS] = { tag, src };
01985 CkAssert(rtags);
01986
01987 enth = &first;
01988 while (true) {
01989 ent = *enth;
01990 if (!ent) return NULL;
01991 if (match(tags, ent->tags)) {
01992 memcpy(rtags, ent->tags, sizeof(int)*AMM_NTAGS);
01993 msg = ent->msg;
01994 return msg;
01995 }
01996 enth = &ent->next;
01997 }
01998 }
01999
02000 template<typename T, size_t N>
02001 int Amm<T, N>::size() const noexcept
02002 {
02003 int n = 0;
02004 AmmEntry<T> *e = first;
02005 while (e) {
02006 e = e->next;
02007 n++;
02008 }
02009 return n;
02010 }
02011
02012 template<typename T, size_t N>
02013 void Amm<T, N>::pup(PUP::er& p, AmmPupMessageFn msgpup) noexcept
02014 {
02015 int sz;
02016 if (!p.isUnpacking()) {
02017 sz = size();
02018 p|sz;
02019 AmmEntry<T> *doomed, *e = first;
02020 while (e) {
02021 pup_ints(&p, e->tags, AMM_NTAGS);
02022 msgpup(p, (void**)&e->msg);
02023 doomed = e;
02024 e = e->next;
02025 if (p.isDeleting()) {
02026 deleteEntry(doomed);
02027 }
02028 }
02029 } else {
02030 p|sz;
02031 for (int i=0; i<sz; i++) {
02032 T msg;
02033 int tags[AMM_NTAGS];
02034 pup_ints(&p, tags, AMM_NTAGS);
02035 msgpup(p, (void**)&msg);
02036 put(tags[0], tags[1], msg);
02037 }
02038 }
02039 }
02040
02041
02042 void ampi::init() noexcept {
02043 parent=NULL;
02044 thread=NULL;
02045
02046 #if CMK_FAULT_EVAC
02047 AsyncEvacuate(false);
02048 #endif
02049 }
02050
02051 ampi::ampi() noexcept
02052 {
02053
02054 CkAbort("Default ampi constructor should never be called");
02055 }
02056
02057 ampi::ampi(CkArrayID parent_,const ampiCommStruct &s) noexcept :parentProxy(parent_), oorder(s.getSize())
02058 {
02059 init();
02060
02061 myComm=s; myComm.setArrayID(thisArrayID);
02062 myRank=myComm.getRankForIndex(thisIndex);
02063
02064 findParent(false);
02065 }
02066
02067 ampi::ampi(CkMigrateMessage *msg) noexcept : CBase_ampi(msg)
02068 {
02069 init();
02070 }
02071
02072 void ampi::ckJustMigrated() noexcept
02073 {
02074 findParent(true);
02075 ArrayElement1D::ckJustMigrated();
02076 }
02077
02078 void ampi::ckJustRestored() noexcept
02079 {
02080 FUNCCALL_DEBUG(CkPrintf("Call just restored from ampi[%d]\n", thisIndex);)
02081 findParent(true);
02082 ArrayElement1D::ckJustRestored();
02083 }
02084
02085 void ampi::findParent(bool forMigration) noexcept {
02086 STARTUP_DEBUG("ampi> finding my parent")
02087 parent=parentProxy[thisIndex].ckLocal();
02088 #if CMK_ERROR_CHECKING
02089 if (parent==NULL) CkAbort("AMPI can't find its parent!");
02090 #endif
02091 thread=parent->registerAmpi(this,myComm,forMigration);
02092 #if CMK_ERROR_CHECKING
02093 if (thread==NULL) CkAbort("AMPI can't find its thread!");
02094 #endif
02095 }
02096
02097
02098
02099 void ampi::allInitDone() noexcept {
02100 FUNCCALL_DEBUG(CkPrintf("All mpi_init have been called!\n");)
02101 thisProxy.setInitDoneFlag();
02102 }
02103
02104 void ampi::setInitDoneFlag() noexcept {
02105 parent->ampiInitCallDone=1;
02106 parent->getTCharmThread()->start();
02107 }
02108
02109 static void AmmPupUnexpectedMsgs(PUP::er& p,void **msg) noexcept {
02110 CkPupMessage(p,msg,1);
02111 if (p.isDeleting()) delete (AmpiMsg *)*msg;
02112 }
02113
02114 static void AmmPupPostedReqs(PUP::er& p,void **msg) noexcept {
02115
02116
02117
02118
02119 if (p.isPacking()) {
02120 int reqIdx = ((AmpiRequest*)*msg)->getReqIdx();
02121 CkAssert(reqIdx != MPI_REQUEST_NULL);
02122 *msg = (void*)(intptr_t)reqIdx;
02123 }
02124 pup_pointer(&p, msg);
02125 #if CMK_ERROR_CHECKING
02126 if (p.isUnpacking()) {
02127 MPI_Request reqIdx = (MPI_Request)(intptr_t)*msg;
02128 CkAssert(reqIdx != MPI_REQUEST_NULL);
02129 }
02130 #endif
02131 }
02132
02133 void ampi::pup(PUP::er &p) noexcept
02134 {
02135 p|parentProxy;
02136 p|myComm;
02137 p|myRank;
02138 p|tmpVec;
02139 p|remoteProxy;
02140 unexpectedMsgs.pup(p, AmmPupUnexpectedMsgs);
02141 postedReqs.pup(p, AmmPupPostedReqs);
02142 unexpectedBcastMsgs.pup(p, AmmPupUnexpectedMsgs);
02143 postedBcastReqs.pup(p, AmmPupPostedReqs);
02144 p|greq_classes;
02145 p|oorder;
02146 }
02147
02148 ampi::~ampi() noexcept
02149 {
02150 if (CkInRestarting() || _BgOutOfCoreFlag==1) {
02151
02152 unexpectedMsgs.flushMsgs();
02153 postedReqs.freeAll();
02154 unexpectedBcastMsgs.flushMsgs();
02155 postedBcastReqs.freeAll();
02156 }
02157 }
02158
02159
02160 class ampiSplitKey {
02161 public:
02162 int nextSplitComm;
02163 int color;
02164 int key;
02165 int rank;
02166 ampiSplitKey() noexcept {}
02167 ampiSplitKey(int nextSplitComm_,int color_,int key_,int rank_) noexcept
02168 :nextSplitComm(nextSplitComm_), color(color_), key(key_), rank(rank_) {}
02169 };
02170
02171 #define MPI_INTER 10
02172
02173
02174 void ampi::split(int color,int key,MPI_Comm *dest, int type) noexcept
02175 {
02176 #if CMK_BIGSIM_CHARM
02177 void *curLog;
02178 _TRACE_BG_TLINE_END(&curLog);
02179 #endif
02180 if (type == MPI_CART) {
02181 ampiSplitKey splitKey(parent->getNextCart(),color,key,myRank);
02182 int rootIdx=myComm.getIndexForRank(0);
02183 CkCallback cb(CkIndex_ampi::splitPhase1(0),CkArrayIndex1D(rootIdx),myComm.getProxy());
02184 contribute(sizeof(splitKey),&splitKey,CkReduction::concat,cb);
02185
02186 thread->suspend();
02187 MPI_Comm newComm=parent->getNextCart()-1;
02188 *dest=newComm;
02189 }
02190 else if (type == MPI_GRAPH) {
02191 ampiSplitKey splitKey(parent->getNextGraph(),color,key,myRank);
02192 int rootIdx=myComm.getIndexForRank(0);
02193 CkCallback cb(CkIndex_ampi::splitPhase1(0),CkArrayIndex1D(rootIdx),myComm.getProxy());
02194 contribute(sizeof(splitKey),&splitKey,CkReduction::concat,cb);
02195
02196 thread->suspend();
02197 MPI_Comm newComm=parent->getNextGraph()-1;
02198 *dest=newComm;
02199 }
02200 else if (type == MPI_DIST_GRAPH) {
02201 ampiSplitKey splitKey(parent->getNextDistGraph(),color,key,myRank);
02202 int rootIdx=myComm.getIndexForRank(0);
02203 CkCallback cb(CkIndex_ampi::splitPhase1(0),CkArrayIndex1D(rootIdx),myComm.getProxy());
02204 contribute(sizeof(splitKey),&splitKey,CkReduction::concat,cb);
02205
02206 thread->suspend();
02207 MPI_Comm newComm=parent->getNextDistGraph()-1;
02208 *dest=newComm;
02209 }
02210 else if (type == MPI_INTER) {
02211 ampiSplitKey splitKey(parent->getNextInter(),color,key,myRank);
02212 int rootIdx=myComm.getIndexForRank(0);
02213 CkCallback cb(CkIndex_ampi::splitPhaseInter(0),CkArrayIndex1D(rootIdx),myComm.getProxy());
02214 contribute(sizeof(splitKey),&splitKey,CkReduction::concat,cb);
02215
02216 thread->suspend();
02217 MPI_Comm newComm=parent->getNextInter()-1;
02218 *dest=newComm;
02219 }
02220 else {
02221 ampiSplitKey splitKey(parent->getNextSplit(),color,key,myRank);
02222 int rootIdx=myComm.getIndexForRank(0);
02223 CkCallback cb(CkIndex_ampi::splitPhase1(0),CkArrayIndex1D(rootIdx),myComm.getProxy());
02224 contribute(sizeof(splitKey),&splitKey,CkReduction::concat,cb);
02225
02226 thread->suspend();
02227 MPI_Comm newComm=parent->getNextSplit()-1;
02228 *dest=newComm;
02229 }
02230 #if CMK_BIGSIM_CHARM
02231 _TRACE_BG_SET_INFO(NULL, "SPLIT_RESUME", NULL, 0);
02232 #endif
02233 }
02234
02235 CLINKAGE
02236 int compareAmpiSplitKey(const void *a_, const void *b_) {
02237 const ampiSplitKey *a=(const ampiSplitKey *)a_;
02238 const ampiSplitKey *b=(const ampiSplitKey *)b_;
02239 if (a->color!=b->color) return a->color-b->color;
02240 if (a->key!=b->key) return a->key-b->key;
02241 return a->rank-b->rank;
02242 }
02243
02244
02245 CProxy_ampi ampi::createNewChildAmpiSync() noexcept {
02246 CkArrayOptions opts;
02247 opts.bindTo(parentProxy);
02248 opts.setSectionAutoDelegate(false);
02249 opts.setNumInitial(0);
02250 CkArrayID unusedAID;
02251 ampiCommStruct unusedComm;
02252 CkCallback cb(CkCallback::resumeThread);
02253 CProxy_ampi::ckNew(unusedAID, unusedComm, opts, cb);
02254 CkArrayCreatedMsg *newAmpiMsg = static_cast<CkArrayCreatedMsg*>(cb.thread_delay());
02255 CProxy_ampi newAmpi = newAmpiMsg->aid;
02256 delete newAmpiMsg;
02257 return newAmpi;
02258 }
02259
02260 void ampi::splitPhase1(CkReductionMsg *msg) noexcept
02261 {
02262
02263 int nKeys=msg->getSize()/sizeof(ampiSplitKey);
02264 ampiSplitKey *keys=(ampiSplitKey *)msg->getData();
02265 if (nKeys!=myComm.getSize()) CkAbort("ampi::splitReduce expected a split contribution from every rank!");
02266 qsort(keys,nKeys,sizeof(ampiSplitKey),compareAmpiSplitKey);
02267
02268 MPI_Comm newComm = -1;
02269 for(int i=0;i<nKeys;i++){
02270 if(keys[i].nextSplitComm>newComm)
02271 newComm = keys[i].nextSplitComm;
02272 }
02273
02274
02275 int lastColor=keys[0].color-1;
02276 CProxy_ampi lastAmpi;
02277 int lastRoot=0;
02278 ampiCommStruct lastComm;
02279 for (int c=0;c<nKeys;c++) {
02280 if (keys[c].color!=lastColor)
02281 {
02282 lastColor=keys[c].color;
02283 lastRoot=c;
02284
02285 if (c!=0) lastAmpi.doneInserting();
02286 lastAmpi = createNewChildAmpiSync();
02287
02288 vector<int> indices;
02289 for (int i=c;i<nKeys;i++) {
02290 if (keys[i].color!=lastColor) break;
02291 int idx=myComm.getIndexForRank(keys[i].rank);
02292 indices.push_back(idx);
02293 }
02294
02295
02296
02297 lastComm=ampiCommStruct(newComm,lastAmpi,indices);
02298 }
02299 int newRank=c-lastRoot;
02300 int newIdx=lastComm.getIndexForRank(newRank);
02301
02302 lastAmpi[newIdx].insert(parentProxy,lastComm);
02303 }
02304 lastAmpi.doneInserting();
02305
02306 delete msg;
02307 }
02308
02309 void ampi::splitPhaseInter(CkReductionMsg *msg) noexcept
02310 {
02311
02312 int nKeys=msg->getSize()/sizeof(ampiSplitKey);
02313 ampiSplitKey *keys=(ampiSplitKey *)msg->getData();
02314 if (nKeys!=myComm.getSize()) CkAbort("ampi::splitReduce expected a split contribution from every rank!");
02315 qsort(keys,nKeys,sizeof(ampiSplitKey),compareAmpiSplitKey);
02316
02317 MPI_Comm newComm = -1;
02318 for(int i=0;i<nKeys;i++){
02319 if(keys[i].nextSplitComm>newComm)
02320 newComm = keys[i].nextSplitComm;
02321 }
02322
02323
02324 int lastColor=keys[0].color-1;
02325 CProxy_ampi lastAmpi;
02326 int lastRoot=0;
02327 ampiCommStruct lastComm;
02328
02329 lastAmpi = createNewChildAmpiSync();
02330
02331 for (int c=0;c<nKeys;c++) {
02332 vector<int> indices;
02333 if (keys[c].color!=lastColor)
02334 {
02335 lastColor=keys[c].color;
02336 lastRoot=c;
02337
02338 for (int i=c;i<nKeys;i++) {
02339 if (keys[i].color!=lastColor) break;
02340 int idx=myComm.getIndexForRank(keys[i].rank);
02341 indices.push_back(idx);
02342 }
02343
02344 if (c==0) {
02345 lastComm=ampiCommStruct(newComm,lastAmpi,indices, myComm.getRemoteIndices());
02346 for (int i=0; i<indices.size(); i++) {
02347 lastAmpi[indices[i]].insert(parentProxy,lastComm);
02348 }
02349 lastAmpi.doneInserting();
02350 }
02351 }
02352 }
02353
02354 parentProxy[0].ExchangeProxy(lastAmpi);
02355 delete msg;
02356 }
02357
02358
02359 void ampiParent::splitChildRegister(const ampiCommStruct &s) noexcept {
02360 int idx=s.getComm()-MPI_COMM_FIRST_SPLIT;
02361 if (splitComm.size()<=idx) splitComm.resize(idx+1);
02362 splitComm[idx]=new ampiCommStruct(s);
02363 thread->resume();
02364 }
02365
02366
02367
02368
02369
02370
02371
02372 void ampi::commCreate(const vector<int>& vec,MPI_Comm* newcomm) noexcept {
02373 int rootIdx=vec[0];
02374 tmpVec = vec;
02375 CkCallback cb(CkReductionTarget(ampi,commCreatePhase1),CkArrayIndex1D(rootIdx),myComm.getProxy());
02376 MPI_Comm nextgroup = parent->getNextGroup();
02377 contribute(sizeof(nextgroup), &nextgroup,CkReduction::max_int,cb);
02378
02379 if(getPosOp(thisIndex,vec)>=0){
02380 thread->suspend();
02381 MPI_Comm retcomm = parent->getNextGroup()-1;
02382 *newcomm = retcomm;
02383 }else{
02384 *newcomm = MPI_COMM_NULL;
02385 }
02386 }
02387
02388 void ampi::insertNewChildAmpiElements(MPI_Comm nextComm, CProxy_ampi newAmpi) noexcept {
02389 ampiCommStruct newCommStruct = ampiCommStruct(nextComm, newAmpi, tmpVec);
02390 for (int i = 0; i < tmpVec.size(); ++i)
02391 newAmpi[tmpVec[i]].insert(parentProxy, newCommStruct);
02392 newAmpi.doneInserting();
02393 }
02394
02395 void ampi::commCreatePhase1(MPI_Comm nextGroupComm) noexcept {
02396 CProxy_ampi newAmpi = createNewChildAmpiSync();
02397 insertNewChildAmpiElements(nextGroupComm, newAmpi);
02398 }
02399
02400 void ampiParent::groupChildRegister(const ampiCommStruct &s) noexcept {
02401 int idx=s.getComm()-MPI_COMM_FIRST_GROUP;
02402 if (groupComm.size()<=idx) groupComm.resize(idx+1);
02403 groupComm[idx]=new ampiCommStruct(s);
02404 thread->resume();
02405 }
02406
02407
02408
02409
02410 MPI_Comm ampi::cartCreate0D() noexcept {
02411 if (getRank() == 0) {
02412 tmpVec.clear();
02413 tmpVec.push_back(0);
02414 commCreatePhase1(parent->getNextCart());
02415 MPI_Comm newComm = parent->getNextCart()-1;
02416 ampiCommStruct &newCommStruct = getAmpiParent()->getCart(newComm);
02417 ampiTopology *newTopo = newCommStruct.getTopology();
02418 newTopo->setndims(0);
02419 return newComm;
02420 }
02421 else {
02422 return MPI_COMM_NULL;
02423 }
02424 }
02425
02426 MPI_Comm ampi::cartCreate(vector<int>& vec, int ndims, const int* dims) noexcept {
02427 if (ndims == 0) {
02428 return cartCreate0D();
02429 }
02430
02431
02432 int newsize = dims[0];
02433 for (int i = 1; i < ndims; i++) {
02434 newsize *= dims[i];
02435 }
02436 for (int i = vec.size(); i > newsize; i--) {
02437 vec.pop_back();
02438 }
02439
02440 int rootIdx = vec[0];
02441 tmpVec = vec;
02442 CkCallback cb(CkReductionTarget(ampi,commCreatePhase1),CkArrayIndex1D(rootIdx),myComm.getProxy());
02443
02444 MPI_Comm nextcart = parent->getNextCart();
02445 contribute(sizeof(nextcart), &nextcart,CkReduction::max_int,cb);
02446
02447 if (getPosOp(thisIndex,vec)>=0) {
02448 thread->suspend();
02449 return parent->getNextCart()-1;
02450 } else {
02451 return MPI_COMM_NULL;
02452 }
02453 }
02454
02455 void ampiParent::cartChildRegister(const ampiCommStruct &s) noexcept {
02456 int idx=s.getComm()-MPI_COMM_FIRST_CART;
02457 if (cartComm.size()<=idx) {
02458 cartComm.resize(idx+1);
02459 cartComm.length()=idx+1;
02460 }
02461 cartComm[idx]=new ampiCommStruct(s,MPI_CART);
02462 thread->resume();
02463 }
02464
02465 void ampi::graphCreate(const vector<int>& vec,MPI_Comm* newcomm) noexcept {
02466 int rootIdx=vec[0];
02467 tmpVec = vec;
02468 CkCallback cb(CkReductionTarget(ampi,commCreatePhase1),CkArrayIndex1D(rootIdx),
02469 myComm.getProxy());
02470 MPI_Comm nextgraph = parent->getNextGraph();
02471 contribute(sizeof(nextgraph), &nextgraph,CkReduction::max_int,cb);
02472
02473 if(getPosOp(thisIndex,vec)>=0){
02474 thread->suspend();
02475 MPI_Comm retcomm = parent->getNextGraph()-1;
02476 *newcomm = retcomm;
02477 }else
02478 *newcomm = MPI_COMM_NULL;
02479 }
02480
02481 void ampiParent::graphChildRegister(const ampiCommStruct &s) noexcept {
02482 int idx=s.getComm()-MPI_COMM_FIRST_GRAPH;
02483 if (graphComm.size()<=idx) {
02484 graphComm.resize(idx+1);
02485 graphComm.length()=idx+1;
02486 }
02487 graphComm[idx]=new ampiCommStruct(s,MPI_GRAPH);
02488 thread->resume();
02489 }
02490
02491 void ampi::distGraphCreate(const vector<int>& vec, MPI_Comm* newcomm) noexcept
02492 {
02493 int rootIdx = vec[0];
02494 tmpVec = vec;
02495 CkCallback cb(CkReductionTarget(ampi,commCreatePhase1), CkArrayIndex1D(rootIdx), myComm.getProxy());
02496 MPI_Comm nextDistGraph = parent->getNextDistGraph();
02497 contribute(sizeof(nextDistGraph), &nextDistGraph, CkReduction::max_int, cb);
02498
02499 if (getPosOp(thisIndex,vec) >= 0) {
02500 thread->suspend();
02501 MPI_Comm retcomm = parent->getNextDistGraph()-1;
02502 *newcomm = retcomm;
02503 }
02504 else {
02505 *newcomm = MPI_COMM_NULL;
02506 }
02507 }
02508
02509 void ampiParent::distGraphChildRegister(const ampiCommStruct &s) noexcept
02510 {
02511 int idx = s.getComm()-MPI_COMM_FIRST_DIST_GRAPH;
02512 if (distGraphComm.size() <= idx) {
02513 distGraphComm.resize(idx+1);
02514 distGraphComm.length() = idx+1;
02515 }
02516 distGraphComm[idx] = new ampiCommStruct(s,MPI_DIST_GRAPH);
02517 thread->resume();
02518 }
02519
02520 void ampi::intercommCreate(const vector<int>& remoteVec, const int root, MPI_Comm tcomm, MPI_Comm *ncomm) noexcept {
02521 if (thisIndex==root) {
02522 tmpVec = remoteVec;
02523 }
02524 CkCallback cb(CkReductionTarget(ampi, intercommCreatePhase1),CkArrayIndex1D(root),myComm.getProxy());
02525 MPI_Comm nextinter = parent->getNextInter();
02526 contribute(sizeof(nextinter), &nextinter,CkReduction::max_int,cb);
02527 thread->suspend();
02528 *ncomm = parent->getNextInter()-1;
02529 }
02530
02531 void ampi::intercommCreatePhase1(MPI_Comm nextInterComm) noexcept {
02532
02533 CProxy_ampi newAmpi = createNewChildAmpiSync();
02534 const vector<int>& lgroup = myComm.getIndices();
02535 ampiCommStruct newCommstruct = ampiCommStruct(nextInterComm,newAmpi,lgroup,tmpVec);
02536 for(int i=0;i<lgroup.size();i++){
02537 int newIdx=lgroup[i];
02538 newAmpi[newIdx].insert(parentProxy,newCommstruct);
02539 }
02540 newAmpi.doneInserting();
02541
02542 parentProxy[0].ExchangeProxy(newAmpi);
02543 }
02544
02545 void ampiParent::interChildRegister(const ampiCommStruct &s) noexcept {
02546 int idx=s.getComm()-MPI_COMM_FIRST_INTER;
02547 if (interComm.size()<=idx) interComm.resize(idx+1);
02548 interComm[idx]=new ampiCommStruct(s);
02549
02550 }
02551
02552 void ampi::intercommMerge(int first, MPI_Comm *ncomm) noexcept {
02553 if(myRank == 0 && first == 1){
02554 vector<int> lvec = myComm.getIndices();
02555 vector<int> rvec = myComm.getRemoteIndices();
02556 int rsize = rvec.size();
02557 tmpVec = lvec;
02558 for(int i=0;i<rsize;i++)
02559 tmpVec.push_back(rvec[i]);
02560 if(tmpVec.size()==0) CkAbort("Error in ampi::intercommMerge: merging empty comms!\n");
02561 }else{
02562 tmpVec.resize(0);
02563 }
02564
02565 int rootIdx=myComm.getIndexForRank(0);
02566 CkCallback cb(CkReductionTarget(ampi, intercommMergePhase1),CkArrayIndex1D(rootIdx),myComm.getProxy());
02567 MPI_Comm nextintra = parent->getNextIntra();
02568 contribute(sizeof(nextintra), &nextintra,CkReduction::max_int,cb);
02569
02570 thread->suspend();
02571 MPI_Comm newcomm=parent->getNextIntra()-1;
02572 *ncomm=newcomm;
02573 }
02574
02575 void ampi::intercommMergePhase1(MPI_Comm nextIntraComm) noexcept {
02576
02577 if(tmpVec.size()==0) return;
02578 CProxy_ampi newAmpi = createNewChildAmpiSync();
02579 insertNewChildAmpiElements(nextIntraComm, newAmpi);
02580 }
02581
02582 void ampiParent::intraChildRegister(const ampiCommStruct &s) noexcept {
02583 int idx=s.getComm()-MPI_COMM_FIRST_INTRA;
02584 if (intraComm.size()<=idx) intraComm.resize(idx+1);
02585 intraComm[idx]=new ampiCommStruct(s);
02586 thread->resume();
02587 }
02588
02589 void ampi::topoDup(int topoType, int rank, MPI_Comm comm, MPI_Comm *newComm) noexcept
02590 {
02591 if (getAmpiParent()->isInter(comm)) {
02592 split(0, rank, newComm, MPI_INTER);
02593 } else {
02594 split(0, rank, newComm, topoType);
02595
02596 if (topoType != MPI_UNDEFINED) {
02597 ampiTopology *topo, *newTopo;
02598 if (topoType == MPI_CART) {
02599 topo = getAmpiParent()->getCart(comm).getTopology();
02600 newTopo = getAmpiParent()->getCart(*newComm).getTopology();
02601 } else if (topoType == MPI_GRAPH) {
02602 topo = getAmpiParent()->getGraph(comm).getTopology();
02603 newTopo = getAmpiParent()->getGraph(*newComm).getTopology();
02604 } else {
02605 CkAssert(topoType == MPI_DIST_GRAPH);
02606 topo = getAmpiParent()->getDistGraph(comm).getTopology();
02607 newTopo = getAmpiParent()->getDistGraph(*newComm).getTopology();
02608 }
02609 newTopo->dup(topo);
02610 }
02611 }
02612 }
02613
02614
02615 const ampiCommStruct &universeComm2CommStruct(MPI_Comm universeNo) noexcept
02616 {
02617 if (universeNo>MPI_COMM_WORLD) {
02618 int worldDex=universeNo-MPI_COMM_WORLD-1;
02619 if (worldDex>=_mpi_nworlds)
02620 CkAbort("Bad world communicator passed to universeComm2CommStruct");
02621 return mpi_worlds[worldDex];
02622 }
02623 CkAbort("Bad communicator passed to universeComm2CommStruct");
02624 return mpi_worlds[0];
02625 }
02626
02627 void ampiParent::block() noexcept {
02628 thread->suspend();
02629 }
02630
02631 void ampiParent::yield() noexcept {
02632 thread->schedule();
02633 }
02634
02635 void ampi::unblock() noexcept {
02636 thread->resume();
02637 }
02638
02639 ampiParent* ampiParent::blockOnRecv() noexcept {
02640 resumeOnRecv = true;
02641
02642
02643
02644 thread->suspend();
02645 ampiParent* dis = getAmpiParent();
02646 dis->resumeOnRecv = false;
02647 return dis;
02648 }
02649
02650 ampi* ampi::blockOnRecv() noexcept {
02651 parent->resumeOnRecv = true;
02652
02653
02654
02655 MPI_Comm comm = myComm.getComm();
02656 thread->suspend();
02657 ampi *dis = getAmpiInstance(comm);
02658 dis->parent->resumeOnRecv = false;
02659 return dis;
02660 }
02661
02662 void ampi::setBlockingReq(AmpiRequest *req) noexcept {
02663 CkAssert(parent->blockingReq == NULL);
02664 CkAssert(parent->resumeOnColl == false);
02665 parent->blockingReq = req;
02666 parent->resumeOnColl = true;
02667 }
02668
02669
02670 ampi* ampi::blockOnColl() noexcept {
02671 #if CMK_BIGSIM_CHARM
02672 void *curLog;
02673 _TRACE_BG_TLINE_END(&curLog);
02674 #if CMK_TRACE_IN_CHARM
02675 if(CpvAccess(traceOn)) traceSuspend();
02676 #endif
02677 #endif
02678
02679 CkAssert(parent->resumeOnColl == true);
02680 MPI_Comm comm = myComm.getComm();
02681 thread->suspend();
02682 ampi *dis = getAmpiInstance(comm);
02683 dis->parent->resumeOnColl = false;
02684
02685 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
02686 CpvAccess(_currentObj) = dis;
02687 #endif
02688 #if CMK_BIGSIM_CHARM
02689 #if CMK_TRACE_IN_CHARM
02690 if(CpvAccess(traceOn)) CthTraceResume(dis->thread->getThread());
02691 #endif
02692 TRACE_BG_AMPI_BREAK(dis->thread->getThread(), "RECV_RESUME", NULL, 0, 0);
02693 if (dis->parent->blockingReq->eventPe == CkMyPe()) _TRACE_BG_ADD_BACKWARD_DEP(dis->parent->blockingReq->event);
02694 #endif
02695
02696 delete dis->parent->blockingReq; dis->parent->blockingReq = NULL;
02697 return dis;
02698 }
02699
02700 void ampi::ssend_ack(int sreq_idx) noexcept {
02701 if (sreq_idx == 1)
02702 thread->resume();
02703 else {
02704 sreq_idx -= 2;
02705 AmpiRequestList& reqs = getReqs();
02706 AmpiRequest *sreq = reqs[sreq_idx];
02707 sreq->complete = true;
02708 handleBlockedReq(sreq);
02709 resumeThreadIfReady();
02710 }
02711 }
02712
02713 void ampi::injectMsg(int size, char* buf) noexcept
02714 {
02715 generic(makeAmpiMsg(thisIndex, 0, thisIndex, (void*)buf, size, MPI_CHAR, MPI_COMM_WORLD, 0));
02716 }
02717
02718 void ampi::generic(AmpiMsg* msg) noexcept
02719 {
02720 MSG_ORDER_DEBUG(
02721 CkPrintf("AMPI vp %d arrival: tag=%d, src=%d, comm=%d (seq %d) resumeOnRecv %d\n",
02722 thisIndex, msg->getTag(), msg->getSrcRank(), getComm(), msg->getSeq(), parent->resumeOnRecv);
02723 )
02724 #if CMK_BIGSIM_CHARM
02725 TRACE_BG_ADD_TAG("AMPI_generic");
02726 msg->event = NULL;
02727 #endif
02728
02729 if(msg->getSeq() != 0) {
02730 int seqIdx = msg->getSeqIdx();
02731 int n=oorder.put(seqIdx,msg);
02732 if (n>0) {
02733 inorder(msg);
02734 if (n>1) {
02735 while((msg=oorder.getOutOfOrder(seqIdx))!=0) {
02736 inorder(msg);
02737 }
02738 }
02739 }
02740 } else {
02741 inorder(msg);
02742 }
02743
02744
02745 resumeThreadIfReady();
02746 }
02747
02748
02749 void ampi::bcastResult(AmpiMsg* msg) noexcept
02750 {
02751 MSG_ORDER_DEBUG(
02752 CkPrintf("AMPI vp %d bcast arrival: tag=%d, src=%d, comm=%d (seq %d) resumeOnRecv %d\n",
02753 thisIndex, msg->getTag(), msg->getSrcRank(), getComm(), msg->getSeq(), parent->resumeOnRecv);
02754 )
02755 #if CMK_BIGSIM_CHARM
02756 TRACE_BG_ADD_TAG("AMPI_generic");
02757 msg->event = NULL;
02758 #endif
02759
02760 CkAssert(msg->getSeq() != 0);
02761 int seqIdx = msg->getSeqIdx();
02762 int n=oorder.put(seqIdx,msg);
02763 if (n>0) {
02764 inorderBcast(msg, false);
02765 if (n>1) {
02766 while((msg=oorder.getOutOfOrder(seqIdx))!=0) {
02767 inorderBcast(msg, true);
02768 }
02769 }
02770 }
02771
02772 resumeThreadIfReady();
02773 }
02774
02775 inline static AmpiRequestList &getReqs() noexcept;
02776
02777 void AmpiRequestList::freeNonPersReq(int &idx) noexcept {
02778 ampiParent* pptr = getAmpiParent();
02779 if (!reqs[idx]->isPersistent()) {
02780 free(pptr->reqPool, idx, pptr->getDDT());
02781 idx = MPI_REQUEST_NULL;
02782 }
02783 }
02784
02785 void AmpiRequestList::free(AmpiRequestPool &reqPool, int idx, CkDDT *ddt) noexcept {
02786 if (idx < 0) return;
02787 reqs[idx]->free(ddt);
02788 reqPool.deleteReq(reqs[idx]);
02789 reqs[idx] = NULL;
02790 startIdx = std::min(idx, startIdx);
02791 }
02792
02793 void ampi::inorder(AmpiMsg* msg) noexcept
02794 {
02795 MSG_ORDER_DEBUG(
02796 CkPrintf("AMPI vp %d inorder: tag=%d, src=%d, comm=%d (seq %d)\n",
02797 thisIndex, msg->getTag(), msg->getSrcRank(), getComm(), msg->getSeq());
02798 )
02799
02800 #if CMK_BIGSIM_CHARM
02801 _TRACE_BG_TLINE_END(&msg->event);
02802 msg->eventPe = CkMyPe();
02803 #endif
02804
02805
02806 int tag = msg->getTag();
02807 int srcRank = msg->getSrcRank();
02808 AmpiRequest* req = postedReqs.get(tag, srcRank);
02809 if (req) {
02810 handleBlockedReq(req);
02811 req->receive(this, msg);
02812 } else {
02813 unexpectedMsgs.put(msg);
02814 }
02815 }
02816
02817 void ampi::inorderBcast(AmpiMsg* msg, bool deleteMsg) noexcept
02818 {
02819 MSG_ORDER_DEBUG(
02820 CkPrintf("AMPI vp %d inorder bcast: tag=%d, src=%d, comm=%d (seq %d)\n",
02821 thisIndex, msg->getTag(), msg->getSrcRank(), getComm(), msg->getSeq());
02822 )
02823
02824 #if CMK_BIGSIM_CHARM
02825 _TRACE_BG_TLINE_END(&msg->event);
02826 msg->eventPe = CkMyPe();
02827 #endif
02828
02829
02830 int tag = msg->getTag();
02831 int srcRank = msg->getSrcRank();
02832 AmpiRequest* req = postedBcastReqs.get(tag, srcRank);
02833 if (req) {
02834 handleBlockedReq(req);
02835 req->receive(this, msg, deleteMsg);
02836 } else {
02837
02838 CmiReference(UsrToEnv(msg));
02839 unexpectedBcastMsgs.put(msg);
02840 }
02841 }
02842
02843 static inline AmpiMsg* rdma2AmpiMsg(char *buf, int size, CMK_REFNUM_TYPE seq, int tag, int srcRank,
02844 int ssendReq) noexcept
02845 {
02846
02847 AmpiMsg* msg = new (size, 0) AmpiMsg(seq, ssendReq, tag, srcRank, size);
02848 memcpy(msg->data, buf, size);
02849 return msg;
02850 }
02851
02852
02853 void ampi::genericRdma(char* buf, int size, CMK_REFNUM_TYPE seq, int tag, int srcRank, MPI_Comm destcomm, int ssendReq) noexcept
02854 {
02855 MSG_ORDER_DEBUG(
02856 CkPrintf("[%d] in ampi::genericRdma on index %d, size=%d, seq=%d, srcRank=%d, tag=%d, comm=%d, ssendReq=%d\n",
02857 CkMyPe(), getIndexForRank(getRank()), size, seq, srcRank, tag, destcomm, ssendReq);
02858 )
02859
02860 if (seq != 0) {
02861 int seqIdx = srcRank;
02862 int n = oorder.isInOrder(seqIdx, seq);
02863 if (n > 0) {
02864 inorderRdma(buf, size, seq, tag, srcRank, destcomm, ssendReq);
02865 if (n > 1) {
02866 AmpiMsg *msg = NULL;
02867 while ((msg = oorder.getOutOfOrder(seqIdx)) != 0) {
02868 inorder(msg);
02869 }
02870 }
02871 } else {
02872 AmpiMsg *msg = rdma2AmpiMsg(buf, size, seq, tag, srcRank, ssendReq);
02873 oorder.putOutOfOrder(seqIdx, msg);
02874 }
02875 } else {
02876 inorderRdma(buf, size, seq, tag, srcRank, destcomm, ssendReq);
02877 }
02878
02879 resumeThreadIfReady();
02880 }
02881
02882
02883 void ampi::inorderRdma(char* buf, int size, CMK_REFNUM_TYPE seq, int tag, int srcRank,
02884 MPI_Comm comm, int ssendReq) noexcept
02885 {
02886 MSG_ORDER_DEBUG(
02887 CkPrintf("AMPI vp %d inorderRdma: tag=%d, src=%d, comm=%d (seq %d)\n",
02888 thisIndex, tag, srcRank, comm, seq);
02889 )
02890
02891
02892 AmpiRequest* req = postedReqs.get(tag, srcRank);
02893 if (req) {
02894 handleBlockedReq(req);
02895 req->receiveRdma(this, buf, size, ssendReq, srcRank, comm);
02896 } else {
02897 AmpiMsg* msg = rdma2AmpiMsg(buf, size, seq, tag, srcRank, ssendReq);
02898 unexpectedMsgs.put(msg);
02899 }
02900 }
02901
02902
02903 void ampi::completedRdmaSend(CkDataMsg *msg) noexcept
02904 {
02905
02906 int reqIdx = CkGetRefNum(msg);
02907
02908 MSG_ORDER_DEBUG(
02909 CkPrintf("[%d] in ampi::completedRdmaSend on index %d, reqIdx = %d\n",
02910 CkMyPe(), parent->thisIndex, reqIdx);
02911 )
02912
02913 AmpiRequestList& reqList = getReqs();
02914 AmpiRequest* sreq = reqList[reqIdx];
02915 sreq->complete = true;
02916
02917 handleBlockedReq(sreq);
02918 resumeThreadIfReady();
02919
02920 }
02921
02922 void handle_MPI_BOTTOM(void* &buf, MPI_Datatype type) noexcept
02923 {
02924 if (buf == MPI_BOTTOM) {
02925 buf = (void*)getDDT()->getType(type)->getLB();
02926 getDDT()->getType(type)->setAbsolute(true);
02927 }
02928 }
02929
02930 void handle_MPI_BOTTOM(void* &buf1, MPI_Datatype type1, void* &buf2, MPI_Datatype type2) noexcept
02931 {
02932 if (buf1 == MPI_BOTTOM) {
02933 buf1 = (void*)getDDT()->getType(type1)->getLB();
02934 getDDT()->getType(type1)->setAbsolute(true);
02935 }
02936 if (buf2 == MPI_BOTTOM) {
02937 buf2 = (void*)getDDT()->getType(type2)->getLB();
02938 getDDT()->getType(type2)->setAbsolute(true);
02939 }
02940 }
02941
02942 AmpiMsg *ampi::makeBcastMsg(const void *buf,int count,MPI_Datatype type,int root,MPI_Comm destcomm) noexcept
02943 {
02944 CkDDT_DataType *ddt = getDDT()->getType(type);
02945 int len = ddt->getSize(count);
02946 CMK_REFNUM_TYPE seq = getSeqNo(root, destcomm, MPI_BCAST_TAG);
02947
02948 AmpiMsg *msg = new (len, 0) AmpiMsg(seq, MPI_REQUEST_NULL, MPI_BCAST_TAG, root, len);
02949 ddt->serialize((char*)buf, msg->getData(), count, msg->getLength(), PACK);
02950 return msg;
02951 }
02952
02953 AmpiMsg *ampi::makeAmpiMsg(int destRank,int t,int sRank,const void *buf,int count,
02954 MPI_Datatype type,MPI_Comm destcomm, int ssendReq) noexcept
02955 {
02956 CkDDT_DataType *ddt = getDDT()->getType(type);
02957 int len = ddt->getSize(count);
02958 CMK_REFNUM_TYPE seq = getSeqNo(destRank, destcomm, t);
02959 AmpiMsg *msg = CkpvAccess(msgPool).newAmpiMsg(seq, ssendReq, t, sRank, len);
02960 ddt->serialize((char*)buf, msg->getData(), count, msg->getLength(), PACK);
02961 return msg;
02962 }
02963
02964 MPI_Request ampi::send(int t, int sRank, const void* buf, int count, MPI_Datatype type,
02965 int rank, MPI_Comm destcomm, int ssendReq, AmpiSendType sendType) noexcept
02966 {
02967 #if CMK_TRACE_IN_CHARM
02968 TRACE_BG_AMPI_BREAK(thread->getThread(), "AMPI_SEND", NULL, 0, 1);
02969 #endif
02970
02971 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
02972 MPI_Comm disComm = myComm.getComm();
02973 ampi *dis = getAmpiInstance(disComm);
02974 CpvAccess(_currentObj) = dis;
02975 #endif
02976
02977 const ampiCommStruct &dest=comm2CommStruct(destcomm);
02978 MPI_Request req = delesend(t,sRank,buf,count,type,rank,destcomm,dest.getProxy(),ssendReq,sendType);
02979 if (sendType == BLOCKING_SEND && req != MPI_REQUEST_NULL) {
02980 AmpiRequestList& reqList = getReqs();
02981 AmpiRequest *sreq = reqList[req];
02982 sreq->wait(MPI_STATUS_IGNORE);
02983 reqList.free(parent->reqPool, req, parent->getDDT());
02984 req = MPI_REQUEST_NULL;
02985 }
02986
02987 #if CMK_TRACE_IN_CHARM
02988 TRACE_BG_AMPI_BREAK(thread->getThread(), "AMPI_SEND_END", NULL, 0, 1);
02989 #endif
02990
02991 if (ssendReq == 1) {
02992
02993 parent->resumeOnRecv = false;
02994 parent->block();
02995 }
02996
02997 return req;
02998 }
02999
03000 void ampi::sendraw(int t, int sRank, void* buf, int len, CkArrayID aid, int idx) noexcept
03001 {
03002 AmpiMsg *msg = new (len, 0) AmpiMsg(0, 0, t, sRank, len);
03003 memcpy(msg->getData(), buf, len);
03004 CProxy_ampi pa(aid);
03005 pa[idx].generic(msg);
03006 }
03007
03008 CMK_REFNUM_TYPE ampi::getSeqNo(int destRank, MPI_Comm destcomm, int tag) noexcept {
03009 int seqIdx = (tag >= MPI_BCAST_TAG) ? COLL_SEQ_IDX : destRank;
03010 CMK_REFNUM_TYPE seq = 0;
03011 if (destcomm<=MPI_COMM_WORLD && tag<=MPI_BCAST_TAG) {
03012 seq = oorder.nextOutgoing(seqIdx);
03013 }
03014 return seq;
03015 }
03016
03017 MPI_Request ampi::sendRdmaMsg(int t, int sRank, const void* buf, int size, MPI_Datatype type, int destIdx,
03018 int destRank, MPI_Comm destcomm, CProxy_ampi arrProxy, int ssendReq) noexcept
03019 {
03020 CMK_REFNUM_TYPE seq = getSeqNo(destRank, destcomm, t);
03021
03022 if (ssendReq) {
03023 arrProxy[destIdx].genericRdma(CkSendBuffer(buf), size, seq, t, sRank, destcomm, ssendReq);
03024 return MPI_REQUEST_NULL;
03025 }
03026 else {
03027 MPI_Request req = postReq(parent->reqPool.newReq<SendReq>(type, destcomm, getDDT()));
03028 CkCallback completedSendCB(CkIndex_ampi::completedRdmaSend(NULL), thisProxy[thisIndex], true);
03029 completedSendCB.setRefnum(req);
03030
03031 arrProxy[destIdx].genericRdma(CkSendBuffer(buf, completedSendCB), size, seq, t, sRank, destcomm, ssendReq);
03032 return req;
03033 }
03034 }
03035
03036
03037 MPI_Request ampi::sendLocalMsg(int t, int sRank, const void* buf, int size, MPI_Datatype type, int destRank,
03038 MPI_Comm destcomm, ampi* destPtr, int ssendReq, AmpiSendType sendType) noexcept
03039 {
03040 CMK_REFNUM_TYPE seq = getSeqNo(destRank, destcomm, t);
03041
03042 destPtr->genericRdma((char*)buf, size, seq, t, sRank, destcomm, ssendReq);
03043
03044 if (ssendReq || sendType == BLOCKING_SEND) {
03045 return MPI_REQUEST_NULL;
03046 }
03047 else {
03048 return postReq(parent->reqPool.newReq<SendReq>(type, destcomm, getDDT(), AMPI_REQ_COMPLETED));
03049 }
03050 }
03051
03052 MPI_Request ampi::delesend(int t, int sRank, const void* buf, int count, MPI_Datatype type,
03053 int rank, MPI_Comm destcomm, CProxy_ampi arrProxy, int ssendReq,
03054 AmpiSendType sendType) noexcept
03055 {
03056 if (rank==MPI_PROC_NULL) return MPI_REQUEST_NULL;
03057 const ampiCommStruct &dest=comm2CommStruct(destcomm);
03058 int destIdx;
03059 if(isInter()){
03060 sRank = thisIndex;
03061 destIdx = dest.getIndexForRemoteRank(rank);
03062 arrProxy = remoteProxy;
03063 } else {
03064 destIdx = dest.getIndexForRank(rank);
03065 }
03066
03067 MSG_ORDER_DEBUG(
03068 CkPrintf("AMPI vp %d send: tag=%d, src=%d, comm=%d (to %d)\n",thisIndex,t,sRank,destcomm,destIdx);
03069 )
03070
03071 ampi *destPtr = arrProxy[destIdx].ckLocal();
03072 CkDDT_DataType *ddt = getDDT()->getType(type);
03073 int size = ddt->getSize(count);
03074 if (ddt->isContig()) {
03075 #if AMPI_LOCAL_IMPL
03076 if (destPtr != NULL) {
03077 return sendLocalMsg(t, sRank, buf, size, type, rank, destcomm, destPtr, ssendReq, sendType);
03078 }
03079 #endif
03080 #if AMPI_RDMA_IMPL
03081 if (size >= AMPI_RDMA_THRESHOLD ||
03082 (size >= AMPI_SMP_RDMA_THRESHOLD && destLikelyWithinProcess(arrProxy, destIdx)))
03083 {
03084 return sendRdmaMsg(t, sRank, buf, size, type, destIdx, rank, destcomm, arrProxy, ssendReq);
03085 }
03086 #endif
03087 }
03088 #if AMPI_LOCAL_IMPL
03089 if (destPtr != NULL) {
03090 destPtr->generic(makeAmpiMsg(rank, t, sRank, buf, count, type, destcomm, ssendReq));
03091 return MPI_REQUEST_NULL;
03092 } else
03093 #endif
03094 {
03095 arrProxy[destIdx].generic(makeAmpiMsg(rank, t, sRank, buf, count, type, destcomm, ssendReq));
03096 return MPI_REQUEST_NULL;
03097 }
03098 }
03099
03100 void ampi::processAmpiMsg(AmpiMsg *msg, void* buf, MPI_Datatype type, int count) noexcept
03101 {
03102 int ssendReq = msg->getSsendReq();
03103 if (ssendReq > 0) {
03104 int srcRank = msg->getSrcRank();
03105 int srcIdx = getIndexForRank(srcRank);
03106 thisProxy[srcIdx].ssend_ack(ssendReq);
03107 }
03108
03109 CkDDT_DataType *ddt = getDDT()->getType(type);
03110
03111 ddt->serialize((char*)buf, msg->getData(), count, msg->getLength(), UNPACK);
03112 }
03113
03114
03115 void ampi::processRdmaMsg(const void *sbuf, int slength, int ssendReq, int srank, void* rbuf,
03116 int rcount, MPI_Datatype rtype, MPI_Comm comm) noexcept
03117 {
03118 if (ssendReq > 0) {
03119 int srcIdx = getIndexForRank(srank);
03120 thisProxy[srcIdx].ssend_ack(ssendReq);
03121 }
03122
03123 CkDDT_DataType *ddt = getDDT()->getType(rtype);
03124
03125 ddt->serialize((char*)rbuf, (char*)sbuf, rcount, slength, UNPACK);
03126 }
03127
03128 void ampi::processRednMsg(CkReductionMsg *msg, void* buf, MPI_Datatype type, int count) noexcept
03129 {
03130
03131
03132 int szhdr = (msg->getReducer() == AmpiReducer) ? sizeof(AmpiOpHeader) : 0;
03133 getDDT()->getType(type)->serialize((char*)buf, (char*)msg->getData()+szhdr, count, msg->getLength()-szhdr, UNPACK);
03134 }
03135
03136 void ampi::processNoncommutativeRednMsg(CkReductionMsg *msg, void* buf, MPI_Datatype type, int count, MPI_User_function* func) noexcept
03137 {
03138 CkReduction::tupleElement* results = NULL;
03139 int numReductions = 0;
03140 msg->toTuple(&results, &numReductions);
03141
03142
03143 char *data = (char*)(results[1].data);
03144 CkDDT_DataType *ddt = getDDT()->getType(type);
03145 int contributionSize = ddt->getSize(count);
03146 int commSize = getSize();
03147
03148
03149
03150 vector<void *> contributionData(commSize);
03151 if (commSize < std::numeric_limits<unsigned short int>::max()) {
03152 unsigned short int *srcRank = (unsigned short int*)(results[0].data);
03153 for (int i=0; i<commSize; i++) {
03154 contributionData[srcRank[i]] = &data[i * contributionSize];
03155 }
03156 }
03157 else {
03158 int *srcRank = (int*)(results[0].data);
03159 for (int i=0; i<commSize; i++) {
03160 contributionData[srcRank[i]] = &data[i * contributionSize];
03161 }
03162 }
03163
03164 if (ddt->isContig()) {
03165
03166 memcpy(buf, contributionData[0], contributionSize);
03167
03168
03169 for (int i=1; i<commSize; i++) {
03170 (*func)(contributionData[i], buf, &count, &type);
03171 }
03172 }
03173 else {
03174 int contributionExtent = ddt->getExtent() * count;
03175
03176
03177 ddt->serialize((char*)contributionData[0], (char*)buf, count, contributionExtent, UNPACK);
03178
03179
03180 vector<char> deserializedBuf(contributionExtent);
03181 for (int i=1; i<commSize; i++) {
03182 ddt->serialize((char*)contributionData[i], deserializedBuf.data(), count, contributionExtent, UNPACK);
03183 (*func)(deserializedBuf.data(), buf, &count, &type);
03184 }
03185 }
03186 delete [] results;
03187 }
03188
03189 void ampi::processGatherMsg(CkReductionMsg *msg, void* buf, MPI_Datatype type, int recvCount) noexcept
03190 {
03191 CkReduction::tupleElement* results = NULL;
03192 int numReductions = 0;
03193 msg->toTuple(&results, &numReductions);
03194 CkAssert(numReductions == 2);
03195
03196
03197 char *data = (char*)(results[1].data);
03198 CkDDT_DataType *ddt = getDDT()->getType(type);
03199 int contributionSize = ddt->getSize(recvCount);
03200 int contributionExtent = ddt->getExtent()*recvCount;
03201 int commSize = getSize();
03202
03203
03204 if (commSize < std::numeric_limits<unsigned short int>::max()) {
03205 unsigned short int *srcRank = (unsigned short int*)(results[0].data);
03206 for (int i=0; i<commSize; i++) {
03207 ddt->serialize(&(((char*)buf)[srcRank[i] * contributionExtent]),
03208 &data[i * contributionSize],
03209 recvCount,
03210 contributionSize,
03211 UNPACK);
03212 }
03213 }
03214 else {
03215 int *srcRank = (int*)(results[0].data);
03216 for (int i=0; i<commSize; i++) {
03217 ddt->serialize(&(((char*)buf)[srcRank[i] * contributionExtent]),
03218 &data[i * contributionSize],
03219 recvCount,
03220 contributionSize,
03221 UNPACK);
03222 }
03223 }
03224 delete [] results;
03225 }
03226
03227 void ampi::processGathervMsg(CkReductionMsg *msg, void* buf, MPI_Datatype type,
03228 int* recvCounts, int* displs) noexcept
03229 {
03230 CkReduction::tupleElement* results = NULL;
03231 int numReductions = 0;
03232 msg->toTuple(&results, &numReductions);
03233 CkAssert(numReductions == 3);
03234
03235
03236 int *dataSize = (int*)(results[1].data);
03237 char *data = (char*)(results[2].data);
03238 CkDDT_DataType *ddt = getDDT()->getType(type);
03239 int contributionSize = ddt->getSize();
03240 int contributionExtent = ddt->getExtent();
03241 int commSize = getSize();
03242 int currDataOffset = 0;
03243
03244
03245 if (commSize < std::numeric_limits<unsigned short int>::max()) {
03246 unsigned short int *srcRank = (unsigned short int*)(results[0].data);
03247 for (int i=0; i<commSize; i++) {
03248 ddt->serialize(&((char*)buf)[displs[srcRank[i]] * contributionExtent],
03249 &data[currDataOffset],
03250 recvCounts[srcRank[i]],
03251 contributionSize * recvCounts[srcRank[i]],
03252 UNPACK);
03253 currDataOffset += dataSize[i];
03254 }
03255 }
03256 else {
03257 int *srcRank = (int*)(results[0].data);
03258 for (int i=0; i<commSize; i++) {
03259 ddt->serialize(&((char*)buf)[displs[srcRank[i]] * contributionExtent],
03260 &data[currDataOffset],
03261 recvCounts[srcRank[i]],
03262 contributionSize * recvCounts[srcRank[i]],
03263 UNPACK);
03264 currDataOffset += dataSize[i];
03265 }
03266 }
03267 delete [] results;
03268 }
03269
03270 static inline void clearStatus(MPI_Status *sts) noexcept {
03271 if (sts != MPI_STATUS_IGNORE) {
03272 sts->MPI_TAG = MPI_ANY_TAG;
03273 sts->MPI_SOURCE = MPI_ANY_SOURCE;
03274 sts->MPI_COMM = MPI_COMM_NULL;
03275 sts->MPI_LENGTH = 0;
03276 sts->MPI_ERROR = MPI_SUCCESS;
03277 sts->MPI_CANCEL = 0;
03278 }
03279 }
03280
03281 static inline void clearStatus(MPI_Status sts[], int idx) noexcept {
03282 if (sts != MPI_STATUSES_IGNORE) {
03283 clearStatus(&sts[idx]);
03284 }
03285 }
03286
03287 static inline bool handle_MPI_PROC_NULL(int src, MPI_Comm comm, MPI_Status* sts) noexcept
03288 {
03289 if (src == MPI_PROC_NULL) {
03290 clearStatus(sts);
03291 if (sts != MPI_STATUS_IGNORE) sts->MPI_SOURCE = MPI_PROC_NULL;
03292 return true;
03293 }
03294 return false;
03295 }
03296
03297 int ampi::recv(int t, int s, void* buf, int count, MPI_Datatype type, MPI_Comm comm, MPI_Status *sts) noexcept
03298 {
03299 MPI_Comm disComm = myComm.getComm();
03300 if (handle_MPI_PROC_NULL(s, disComm, sts)) return 0;
03301
03302 #if CMK_BIGSIM_CHARM
03303 void *curLog;
03304 _TRACE_BG_TLINE_END(&curLog);
03305 #if CMK_TRACE_IN_CHARM
03306 if(CpvAccess(traceOn)) traceSuspend();
03307 #endif
03308 #endif
03309
03310 if (isInter()) {
03311 s = myComm.getIndexForRemoteRank(s);
03312 }
03313
03314 MSG_ORDER_DEBUG(
03315 CkPrintf("AMPI vp %d blocking recv: tag=%d, src=%d, comm=%d\n",thisIndex,t,s,comm);
03316 )
03317
03318 ampi *dis = getAmpiInstance(disComm);
03319 MPI_Status tmpStatus;
03320 AmpiMsg* msg = unexpectedMsgs.get(t, s, (sts == MPI_STATUS_IGNORE) ? (int*)&tmpStatus : (int*)sts);
03321 if (msg) {
03322 if (sts != MPI_STATUS_IGNORE) {
03323 sts->MPI_SOURCE = msg->getSrcRank();
03324 sts->MPI_TAG = msg->getTag();
03325 sts->MPI_COMM = comm;
03326 sts->MPI_LENGTH = msg->getLength();
03327 sts->MPI_CANCEL = 0;
03328 }
03329 processAmpiMsg(msg, buf, type, count);
03330 #if CMK_BIGSIM_CHARM
03331 TRACE_BG_AMPI_BREAK(thread->getThread(), "RECV_RESUME", NULL, 0, 0);
03332 if (msg->eventPe == CkMyPe()) _TRACE_BG_ADD_BACKWARD_DEP(msg->event);
03333 #endif
03334 CkpvAccess(msgPool).deleteAmpiMsg(msg);
03335 }
03336 else {
03337 int request = postReq(dis->parent->reqPool.newReq<IReq>(buf, count, type, s, t, comm, getDDT(), AMPI_REQ_BLOCKED));
03338 CkAssert(parent->numBlockedReqs == 0);
03339 parent->numBlockedReqs = 1;
03340 dis = dis->blockOnRecv();
03341 parent = dis->parent;
03342 AmpiRequestList& reqs = parent->getReqs();
03343 if (sts != MPI_STATUS_IGNORE) {
03344 AmpiRequest& req = *reqs[request];
03345 sts->MPI_SOURCE = req.src;
03346 sts->MPI_TAG = req.tag;
03347 sts->MPI_COMM = req.comm;
03348 sts->MPI_LENGTH = req.getNumReceivedBytes(getDDT());
03349 sts->MPI_CANCEL = 0;
03350 }
03351 reqs.freeNonPersReq(request);
03352 }
03353
03354 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
03355 CpvAccess(_currentObj) = dis;
03356 MSG_ORDER_DEBUG( printf("[%d] AMPI thread rescheduled to Index %d buf %p src %d\n",CkMyPe(),dis->thisIndex,buf,s); )
03357 #endif
03358 #if CMK_BIGSIM_CHARM && CMK_TRACE_IN_CHARM
03359
03360
03361 if(CpvAccess(traceOn)) CthTraceResume(dis->thread->getThread());
03362 #endif
03363
03364 return 0;
03365 }
03366
03367 void ampi::probe(int t, int s, MPI_Comm comm, MPI_Status *sts) noexcept
03368 {
03369 if (handle_MPI_PROC_NULL(s, comm, sts)) return;
03370
03371 #if CMK_BIGSIM_CHARM
03372 void *curLog;
03373 _TRACE_BG_TLINE_END(&curLog);
03374 #endif
03375
03376 ampi *dis = getAmpiInstance(comm);
03377 AmpiMsg *msg = NULL;
03378 while(1) {
03379 MPI_Status tmpStatus;
03380 msg = unexpectedMsgs.probe(t, s, (sts == MPI_STATUS_IGNORE) ? (int*)&tmpStatus : (int*)sts);
03381 if (msg) break;
03382
03383 dis = dis->blockOnRecv();
03384 }
03385
03386 if (sts != MPI_STATUS_IGNORE) {
03387 sts->MPI_SOURCE = msg->getSrcRank();
03388 sts->MPI_TAG = msg->getTag();
03389 sts->MPI_COMM = comm;
03390 sts->MPI_LENGTH = msg->getLength();
03391 sts->MPI_CANCEL = 0;
03392 }
03393
03394 #if CMK_BIGSIM_CHARM
03395 _TRACE_BG_SET_INFO((char *)msg, "PROBE_RESUME", &curLog, 1);
03396 #endif
03397 }
03398
03399 void ampi::mprobe(int t, int s, MPI_Comm comm, MPI_Status *sts, MPI_Message *message) noexcept
03400 {
03401 if (handle_MPI_PROC_NULL(s, comm, sts)) {
03402 *message = MPI_MESSAGE_NO_PROC;
03403 return;
03404 }
03405
03406 #if CMK_BIGSIM_CHARM
03407 void *curLog;
03408 _TRACE_BG_TLINE_END(&curLog);
03409 #endif
03410
03411 ampi *dis = this;
03412 AmpiMsg *msg = NULL;
03413 while(1) {
03414 MPI_Status tmpStatus;
03415
03416
03417 msg = unexpectedMsgs.get(t, s, (sts == MPI_STATUS_IGNORE) ? (int*)&tmpStatus : (int*)sts);
03418 if (msg)
03419 break;
03420
03421 dis = dis->blockOnRecv();
03422 }
03423
03424 msg->setComm(comm);
03425 *message = parent->putMatchedMsg(msg);
03426
03427 if (sts != MPI_STATUS_IGNORE) {
03428 sts->MPI_SOURCE = msg->getSrcRank();
03429 sts->MPI_TAG = msg->getTag();
03430 sts->MPI_COMM = msg->getComm();
03431 sts->MPI_LENGTH = msg->getLength();
03432 sts->MPI_CANCEL = 0;
03433 }
03434
03435 #if CMK_BIGSIM_CHARM
03436 _TRACE_BG_SET_INFO((char *)msg, "MPROBE_RESUME", &curLog, 1);
03437 #endif
03438 }
03439
03440 int ampi::iprobe(int t, int s, MPI_Comm comm, MPI_Status *sts) noexcept
03441 {
03442 if (handle_MPI_PROC_NULL(s, comm, sts)) return 1;
03443
03444 MPI_Status tmpStatus;
03445 AmpiMsg* msg = unexpectedMsgs.probe(t, s, (sts == MPI_STATUS_IGNORE) ? (int*)&tmpStatus : (int*)sts);
03446 if (msg) {
03447 msg->setComm(comm);
03448 if (sts != MPI_STATUS_IGNORE) {
03449 sts->MPI_SOURCE = msg->getSrcRank();
03450 sts->MPI_TAG = msg->getTag();
03451 sts->MPI_COMM = msg->getComm();
03452 sts->MPI_LENGTH = msg->getLength();
03453 sts->MPI_CANCEL = 0;
03454 }
03455 return 1;
03456 }
03457 #if CMK_BIGSIM_CHARM
03458 void *curLog;
03459 _TRACE_BG_TLINE_END(&curLog);
03460 #endif
03461 thread->schedule();
03462 #if CMK_BIGSIM_CHARM
03463 _TRACE_BG_SET_INFO(NULL, "IPROBE_RESUME", &curLog, 1);
03464 #endif
03465 return 0;
03466 }
03467
03468 int ampi::improbe(int tag, int source, MPI_Comm comm, MPI_Status *sts,
03469 MPI_Message *message) noexcept
03470 {
03471 if (handle_MPI_PROC_NULL(source, comm, sts)) {
03472 *message = MPI_MESSAGE_NO_PROC;
03473 return 1;
03474 }
03475
03476 MPI_Status tmpStatus;
03477
03478
03479 AmpiMsg* msg = unexpectedMsgs.get(tag, source, (sts == MPI_STATUS_IGNORE) ? (int*)&tmpStatus : (int*)sts);
03480 if (msg) {
03481 msg->setComm(comm);
03482 *message = parent->putMatchedMsg(msg);
03483 if (sts != MPI_STATUS_IGNORE) {
03484 sts->MPI_SOURCE = msg->getSrcRank();
03485 sts->MPI_TAG = msg->getTag();
03486 sts->MPI_COMM = comm;
03487 sts->MPI_LENGTH = msg->getLength();
03488 sts->MPI_CANCEL = 0;
03489 }
03490 return 1;
03491 }
03492
03493 #if CMK_BIGSIM_CHARM
03494 void *curLog;
03495 _TRACE_BG_TLINE_END(&curLog);
03496 #endif
03497 thread->schedule();
03498 #if CMK_BIGSIM_CHARM
03499 _TRACE_BG_SET_INFO(NULL, "IMPROBE_RESUME", &curLog, 1);
03500 #endif
03501 return 0;
03502 }
03503
03504 void ampi::bcast(int root, void* buf, int count, MPI_Datatype type, MPI_Comm destcomm) noexcept
03505 {
03506 MPI_Request req;
03507
03508 if (root==getRank()) {
03509 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
03510 CpvAccess(_currentObj) = this;
03511 #endif
03512 irecvBcast(buf, count, type, root, destcomm, &req);
03513 thisProxy.bcastResult(makeBcastMsg(buf, count, type, root, destcomm));
03514 }
03515 else {
03516 oorder.incCollSeqOutgoing();
03517 irecvBcast(buf, count, type, root, destcomm, &req);
03518 }
03519
03520 MPI_Wait(&req, MPI_STATUS_IGNORE);
03521 }
03522
03523 int ampi::intercomm_bcast(int root, void* buf, int count, MPI_Datatype type, MPI_Comm intercomm) noexcept
03524 {
03525 if (root==MPI_ROOT) {
03526 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
03527 CpvAccess(_currentObj) = this;
03528 #endif
03529 remoteProxy.bcastResult(makeBcastMsg(buf, count, type, getRank(), intercomm));
03530 }
03531 else {
03532 oorder.incCollSeqOutgoing();
03533 }
03534
03535 if (root!=MPI_PROC_NULL && root!=MPI_ROOT) {
03536
03537 MPI_Request req;
03538 irecvBcast(buf, count, type, root, intercomm, &req);
03539 MPI_Wait(&req, MPI_STATUS_IGNORE);
03540 }
03541 return MPI_SUCCESS;
03542 }
03543
03544 void ampi::ibcast(int root, void* buf, int count, MPI_Datatype type, MPI_Comm destcomm, MPI_Request* request) noexcept
03545 {
03546 if (root==getRank()) {
03547 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
03548 CpvAccess(_currentObj) = this;
03549 #endif
03550 thisProxy.bcastResult(makeBcastMsg(buf, count, type, getRank(), destcomm));
03551 }
03552 else {
03553 oorder.incCollSeqOutgoing();
03554 }
03555
03556
03557 irecvBcast(buf, count, type, root, destcomm, request);
03558 }
03559
03560 int ampi::intercomm_ibcast(int root, void* buf, int count, MPI_Datatype type, MPI_Comm intercomm, MPI_Request *request) noexcept
03561 {
03562 if (root==MPI_ROOT) {
03563 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
03564 CpvAccess(_currentObj) = this;
03565 #endif
03566 remoteProxy.bcastResult(makeBcastMsg(buf, count, type, getRank(), intercomm));
03567 }
03568 else {
03569 oorder.incCollSeqOutgoing();
03570 }
03571
03572 if (root!=MPI_PROC_NULL && root!=MPI_ROOT) {
03573
03574 irecvBcast(buf, count, type, root, intercomm, request);
03575 }
03576 return MPI_SUCCESS;
03577 }
03578
03579 void ampi::bcastraw(void* buf, int len, CkArrayID aid) noexcept
03580 {
03581 AmpiMsg *msg = new (len, 0) AmpiMsg(0, 0, MPI_BCAST_TAG, 0, len);
03582 memcpy(msg->getData(), buf, len);
03583 CProxy_ampi pa(aid);
03584 pa.generic(msg);
03585 }
03586
03587 int ampi::intercomm_scatter(int root, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
03588 void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm intercomm) noexcept
03589 {
03590 if (root == MPI_ROOT) {
03591 int remote_size = getRemoteIndices().size();
03592
03593 CkDDT_DataType* dttype = getDDT()->getType(sendtype) ;
03594 int itemsize = dttype->getSize(sendcount) ;
03595 for(int i = 0; i < remote_size; i++) {
03596 send(MPI_SCATTER_TAG, getRank(), ((char*)sendbuf)+(itemsize*i),
03597 sendcount, sendtype, i, intercomm);
03598 }
03599 }
03600
03601 if (root!=MPI_PROC_NULL && root!=MPI_ROOT) {
03602 if(-1==recv(MPI_SCATTER_TAG, root, recvbuf, recvcount, recvtype, intercomm))
03603 CkAbort("AMPI> Error in intercomm MPI_Scatter recv");
03604 }
03605
03606 return MPI_SUCCESS;
03607 }
03608
03609 int ampi::intercomm_iscatter(int root, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
03610 void *recvbuf, int recvcount, MPI_Datatype recvtype,
03611 MPI_Comm intercomm, MPI_Request *request) noexcept
03612 {
03613 if (root == MPI_ROOT) {
03614 int remote_size = getRemoteIndices().size();
03615
03616 CkDDT_DataType* dttype = getDDT()->getType(sendtype) ;
03617 int itemsize = dttype->getSize(sendcount) ;
03618
03619 ATAReq *newreq = new ATAReq(remote_size);
03620 for(int i = 0; i < remote_size; i++) {
03621 newreq->reqs[i] = send(MPI_SCATTER_TAG, getRank(), ((char*)sendbuf)+(itemsize*i),
03622 sendcount, sendtype, i, intercomm, 0, I_SEND);
03623 }
03624 *request = postReq(newreq);
03625 }
03626
03627 if (root!=MPI_PROC_NULL && root!=MPI_ROOT) {
03628
03629 irecv(recvbuf,recvcount,recvtype,root,MPI_SCATTER_TAG,intercomm,request);
03630 }
03631
03632 return MPI_SUCCESS;
03633 }
03634
03635 int ampi::intercomm_scatterv(int root, const void* sendbuf, const int* sendcounts, const int* displs,
03636 MPI_Datatype sendtype, void* recvbuf, int recvcount,
03637 MPI_Datatype recvtype, MPI_Comm intercomm) noexcept
03638 {
03639 if (root == MPI_ROOT) {
03640 int remote_size = getRemoteIndices().size();
03641
03642 CkDDT_DataType* dttype = getDDT()->getType(sendtype);
03643 int itemsize = dttype->getSize();
03644 for (int i = 0; i < remote_size; i++) {
03645 send(MPI_SCATTER_TAG, getRank(), ((char*)sendbuf)+(itemsize*displs[i]),
03646 sendcounts[i], sendtype, i, intercomm);
03647 }
03648 }
03649
03650 if (root != MPI_PROC_NULL && root != MPI_ROOT) {
03651 if (-1 == recv(MPI_SCATTER_TAG, root, recvbuf, recvcount, recvtype, intercomm))
03652 CkAbort("AMPI> Error in intercomm MPI_Scatterv recv");
03653 }
03654
03655 return MPI_SUCCESS;
03656 }
03657
03658 int ampi::intercomm_iscatterv(int root, const void* sendbuf, const int* sendcounts, const int* displs,
03659 MPI_Datatype sendtype, void* recvbuf, int recvcount,
03660 MPI_Datatype recvtype, MPI_Comm intercomm, MPI_Request* request) noexcept
03661 {
03662 if (root == MPI_ROOT) {
03663 int remote_size = getRemoteIndices().size();
03664
03665 CkDDT_DataType* dttype = getDDT()->getType(sendtype);
03666 int itemsize = dttype->getSize();
03667
03668 ATAReq *newreq = new ATAReq(remote_size);
03669 for (int i = 0; i < remote_size; i++) {
03670 newreq->reqs[i] = send(MPI_SCATTER_TAG, getRank(), ((char*)sendbuf)+(itemsize*displs[i]),
03671 sendcounts[i], sendtype, i, intercomm, 0, I_SEND);
03672 }
03673 *request = postReq(newreq);
03674 }
03675
03676 if (root != MPI_PROC_NULL && root != MPI_ROOT) {
03677
03678 irecv(recvbuf, recvcount, recvtype, root, MPI_SCATTER_TAG, intercomm, request);
03679 }
03680
03681 return MPI_SUCCESS;
03682 }
03683
03684 int MPI_comm_null_copy_fn(MPI_Comm comm, int keyval, void *extra_state,
03685 void *attr_in, void *attr_out, int *flag){
03686 (*flag) = 0;
03687 return (MPI_SUCCESS);
03688 }
03689
03690 int MPI_comm_dup_fn(MPI_Comm comm, int keyval, void *extra_state,
03691 void *attr_in, void *attr_out, int *flag){
03692 (*(void **)attr_out) = attr_in;
03693 (*flag) = 1;
03694 return (MPI_SUCCESS);
03695 }
03696
03697 int MPI_comm_null_delete_fn(MPI_Comm comm, int keyval, void *attr, void *extra_state){
03698 return (MPI_SUCCESS);
03699 }
03700
03701 int MPI_type_null_copy_fn(MPI_Datatype type, int keyval, void *extra_state,
03702 void *attr_in, void *attr_out, int *flag){
03703 (*flag) = 0;
03704 return (MPI_SUCCESS);
03705 }
03706
03707 int MPI_type_dup_fn(MPI_Datatype type, int keyval, void *extra_state,
03708 void *attr_in, void *attr_out, int *flag){
03709 (*(void **)attr_out) = attr_in;
03710 (*flag) = 1;
03711 return (MPI_SUCCESS);
03712 }
03713
03714 int MPI_type_null_delete_fn(MPI_Datatype type, int keyval, void *attr, void *extra_state){
03715 return (MPI_SUCCESS);
03716 }
03717
03718 void AmpiSeqQ::pup(PUP::er &p) noexcept {
03719 p|out;
03720 p|elements;
03721 }
03722
03723 void AmpiSeqQ::putOutOfOrder(int seqIdx, AmpiMsg *msg) noexcept
03724 {
03725 AmpiOtherElement &el=elements[seqIdx];
03726 #if CMK_ERROR_CHECKING
03727 if (msg->getSeq() < el.getSeqIncoming())
03728 CkAbort("AMPI Logic error: received late out-of-order message!\n");
03729 #endif
03730 if (seqIdx == COLL_SEQ_IDX) CmiReference(UsrToEnv(msg));
03731 out.enq(msg);
03732 el.incNumOutOfOrder();
03733 }
03734
03735 AmpiMsg *AmpiSeqQ::getOutOfOrder(int seqIdx) noexcept
03736 {
03737 AmpiOtherElement &el=elements[seqIdx];
03738 if (el.getNumOutOfOrder()==0) return 0;
03739
03740 for (int i=0;i<out.length();i++) {
03741 AmpiMsg *msg=out.deq();
03742 if (msg->getSeqIdx()==seqIdx && msg->getSeq()==el.getSeqIncoming()) {
03743 el.incSeqIncoming();
03744 el.decNumOutOfOrder();
03745 return msg;
03746 }
03747 else
03748 out.enq(msg);
03749 }
03750
03751 return 0;
03752 }
03753
03754 void AmpiRequest::print() const noexcept {
03755 CkPrintf("In AmpiRequest: buf=%p, count=%d, type=%d, src=%d, tag=%d, comm=%d, reqIdx=%d, complete=%d, blocked=%d\n",
03756 buf, count, type, src, tag, comm, reqIdx, (int)complete, (int)blocked);
03757 }
03758
03759 void IReq::print() const noexcept {
03760 AmpiRequest::print();
03761 CkPrintf("In IReq: this=%p, length=%d, cancelled=%d, persistent=%d\n", this, length, (int)cancelled, (int)persistent);
03762 }
03763
03764 void RednReq::print() const noexcept {
03765 AmpiRequest::print();
03766 CkPrintf("In RednReq: this=%p, op=%d\n", this, op);
03767 }
03768
03769 void GatherReq::print() const noexcept {
03770 AmpiRequest::print();
03771 CkPrintf("In GatherReq: this=%p\n", this);
03772 }
03773
03774 void GathervReq::print() const noexcept {
03775 AmpiRequest::print();
03776 CkPrintf("In GathervReq: this=%p\n", this);
03777 }
03778
03779 void ATAReq::print() const noexcept {
03780 AmpiRequest::print();
03781 CkPrintf("In ATAReq: num_reqs=%d\n", reqs.size());
03782 }
03783
03784 void GReq::print() const noexcept {
03785 AmpiRequest::print();
03786 CkPrintf("In GReq: this=%p\n", this);
03787 }
03788
03789 void SendReq::print() const noexcept {
03790 AmpiRequest::print();
03791 CkPrintf("In SendReq: this=%p, persistent=%d\n", this, (int)persistent);
03792 }
03793
03794 void SsendReq::print() const noexcept {
03795 AmpiRequest::print();
03796 CkPrintf("In SsendReq: this=%p, persistent=%d\n", this, (int)persistent);
03797 }
03798
03799 void AmpiRequestList::pup(PUP::er &p, AmpiRequestPool* pool) noexcept {
03800 if (p.isUnpacking()) {
03801 CkAssert(pool);
03802 reqPool = pool;
03803 }
03804 if(!CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC)){
03805 return;
03806 }
03807
03808 p|startIdx;
03809 int size;
03810 if(!p.isUnpacking()){
03811 size = reqs.size();
03812 }
03813 p|size;
03814 if(p.isUnpacking()){
03815 reqs.resize(size);
03816 }
03817
03818 for(int i=0;i<size;i++){
03819 AmpiReqType reqType;
03820 if(!p.isUnpacking()){
03821 if(reqs[i] == NULL){
03822 reqType = AMPI_INVALID_REQ;
03823 }else{
03824 reqType = reqs[i]->getType();
03825 }
03826 }
03827 p|reqType;
03828 if(reqType != AMPI_INVALID_REQ){
03829 if(p.isUnpacking()){
03830 switch(reqType){
03831 case AMPI_I_REQ:
03832 reqs[i] = reqPool->newReq<IReq>();
03833 break;
03834 case AMPI_REDN_REQ:
03835 reqs[i] = new RednReq;
03836 break;
03837 case AMPI_GATHER_REQ:
03838 reqs[i] = new GatherReq;
03839 break;
03840 case AMPI_GATHERV_REQ:
03841 reqs[i] = new GathervReq;
03842 break;
03843 case AMPI_SEND_REQ:
03844 reqs[i] = reqPool->newReq<SendReq>();
03845 break;
03846 case AMPI_SSEND_REQ:
03847 reqs[i] = reqPool->newReq<SsendReq>();
03848 break;
03849 case AMPI_ATA_REQ:
03850 reqs[i] = new ATAReq;
03851 break;
03852 case AMPI_G_REQ:
03853 reqs[i] = new GReq;
03854 break;
03855 #if CMK_CUDA
03856 case AMPI_GPU_REQ:
03857 CkAbort("AMPI> error trying to PUP a non-migratable GPU request!");
03858 break;
03859 #endif
03860 case AMPI_INVALID_REQ:
03861 CkAbort("AMPI> error trying to PUP an invalid request!");
03862 break;
03863 }
03864 }
03865 reqs[i]->pup(p);
03866 }else{
03867 reqs[i] = NULL;
03868 }
03869 }
03870 if(p.isDeleting()){
03871 reqs.clear();
03872 }
03873 }
03874
03875
03876 ampiParent *getAmpiParent() noexcept {
03877 ampiParent *p = CtvAccess(ampiPtr);
03878 #if CMK_ERROR_CHECKING
03879 if (p==NULL) CkAbort("Cannot call MPI routines before AMPI is initialized.\n");
03880 #endif
03881 return p;
03882 }
03883
03884 ampi *getAmpiInstance(MPI_Comm comm) noexcept {
03885 ampi *ptr=getAmpiParent()->comm2ampi(comm);
03886 #if CMK_ERROR_CHECKING
03887 if (ptr==NULL) CkAbort("AMPI's getAmpiInstance> null pointer\n");
03888 #endif
03889 return ptr;
03890 }
03891
03892 bool isAmpiThread() noexcept {
03893 return (CtvAccess(ampiPtr) != NULL);
03894 }
03895
03896 inline static AmpiRequestList &getReqs() noexcept {
03897 return getAmpiParent()->ampiReqs;
03898 }
03899
03900 inline void checkComm(MPI_Comm comm) noexcept {
03901 #if AMPI_ERROR_CHECKING
03902 getAmpiParent()->checkComm(comm);
03903 #endif
03904 }
03905
03906 inline void checkRequest(MPI_Request req) noexcept {
03907 #if AMPI_ERROR_CHECKING
03908 getReqs().checkRequest(req);
03909 #endif
03910 }
03911
03912 inline void checkRequests(int n, MPI_Request* reqs) noexcept {
03913 #if AMPI_ERROR_CHECKING
03914 AmpiRequestList& reqlist = getReqs();
03915 for(int i=0;i<n;i++)
03916 reqlist.checkRequest(reqs[i]);
03917 #endif
03918 }
03919
03920 int testRequest(MPI_Request *reqIdx, int *flag, MPI_Status *sts) noexcept {
03921 if(*reqIdx==MPI_REQUEST_NULL){
03922 *flag = 1;
03923 clearStatus(sts);
03924 return MPI_SUCCESS;
03925 }
03926 checkRequest(*reqIdx);
03927 ampiParent* pptr = getAmpiParent();
03928 AmpiRequestList& reqList = pptr->getReqs();
03929 AmpiRequest& req = *reqList[*reqIdx];
03930 if(1 == (*flag = req.test())){
03931 req.wait(sts);
03932 reqList.freeNonPersReq(*reqIdx);
03933 }
03934 return MPI_SUCCESS;
03935 }
03936
03937 int testRequestNoFree(MPI_Request *reqIdx, int *flag, MPI_Status *sts) noexcept {
03938 if(*reqIdx==MPI_REQUEST_NULL){
03939 *flag = 1;
03940 clearStatus(sts);
03941 return MPI_SUCCESS;
03942 }
03943 checkRequest(*reqIdx);
03944 AmpiRequestList& reqList = getReqs();
03945 AmpiRequest& req = *reqList[*reqIdx];
03946 *flag = req.test();
03947 if(*flag)
03948 req.wait(sts);
03949 return MPI_SUCCESS;
03950 }
03951
03952 AMPI_API_IMPL(int, MPI_Is_thread_main, int *flag)
03953 {
03954 AMPI_API_INIT("AMPI_Is_thread_main");
03955 if (isAmpiThread()) {
03956 *flag = 1;
03957 } else {
03958 *flag = 0;
03959 }
03960 return MPI_SUCCESS;
03961 }
03962
03963 AMPI_API_IMPL(int, MPI_Query_thread, int *provided)
03964 {
03965 AMPI_API("AMPI_Query_thread");
03966 *provided = CkpvAccess(ampiThreadLevel);
03967 return MPI_SUCCESS;
03968 }
03969
03970 AMPI_API_IMPL(int, MPI_Init_thread, int *p_argc, char*** p_argv, int required, int *provided)
03971 {
03972 if (nodeinit_has_been_called) {
03973 AMPI_API_INIT("AMPI_Init_thread");
03974
03975 #if AMPI_ERROR_CHECKING
03976 if (required < MPI_THREAD_SINGLE || required > MPI_THREAD_MULTIPLE) {
03977 return ampiErrhandler("AMPI_Init_thread", MPI_ERR_ARG);
03978 }
03979 #endif
03980
03981 if (required == MPI_THREAD_SINGLE) {
03982 CkpvAccess(ampiThreadLevel) = MPI_THREAD_SINGLE;
03983 }
03984 else {
03985 CkpvAccess(ampiThreadLevel) = MPI_THREAD_FUNNELED;
03986 }
03987
03988
03989 *provided = CkpvAccess(ampiThreadLevel);
03990 return MPI_Init(p_argc, p_argv);
03991 }
03992 else
03993 {
03994 CkAbort("MPI_Init_thread> AMPI has not been initialized! Possibly due to AMPI requiring '#include \"mpi.h\" be in the same file as main() in C/C++ programs and \'program main\' be renamed to \'subroutine mpi_main\' in Fortran programs!");
03995 return MPI_SUCCESS;
03996 }
03997 }
03998
03999 AMPI_API_IMPL(int, MPI_Init, int *p_argc, char*** p_argv)
04000 {
04001 if (nodeinit_has_been_called) {
04002 AMPI_API_INIT("AMPI_Init");
04003 char **argv;
04004 if (p_argv) argv=*p_argv;
04005 else argv=CkGetArgv();
04006 ampiInit(argv);
04007 if (p_argc) *p_argc=CmiGetArgc(argv);
04008 }
04009 else
04010 {
04011 CkAbort("MPI_Init> AMPI has not been initialized! Possibly due to AMPI requiring '#include \"mpi.h\" be in the same file as main() in C/C++ programs and \'program main\' be renamed to \'subroutine mpi_main\' in Fortran programs!");
04012 }
04013
04014 return MPI_SUCCESS;
04015 }
04016
04017 AMPI_API_IMPL(int, MPI_Initialized, int *isInit)
04018 {
04019 if (nodeinit_has_been_called) {
04020 AMPI_API_INIT("AMPI_Initialized");
04021 *isInit=CtvAccess(ampiInitDone);
04022 }
04023 else {
04024 *isInit=0;
04025 }
04026 return MPI_SUCCESS;
04027 }
04028
04029 AMPI_API_IMPL(int, MPI_Finalized, int *isFinalized)
04030 {
04031 AMPI_API_INIT("AMPI_Finalized");
04032 *isFinalized=(CtvAccess(ampiFinalized)) ? 1 : 0;
04033 return MPI_SUCCESS;
04034 }
04035
04036 AMPI_API_IMPL(int, MPI_Comm_rank, MPI_Comm comm, int *rank)
04037 {
04038 AMPI_API("AMPI_Comm_rank");
04039
04040 #if AMPI_ERROR_CHECKING
04041 int ret = checkCommunicator("AMPI_Comm_rank", comm);
04042 if(ret != MPI_SUCCESS)
04043 return ret;
04044 #endif
04045
04046 #if AMPIMSGLOG
04047 ampiParent* pptr = getAmpiParent();
04048 if(msgLogRead){
04049 PUParray(*(pptr->fromPUPer), (char*)rank, sizeof(int));
04050 return MPI_SUCCESS;
04051 }
04052 #endif
04053
04054 *rank = getAmpiInstance(comm)->getRank();
04055
04056 #if AMPIMSGLOG
04057 if(msgLogWrite && record_msglog(pptr->thisIndex)){
04058 PUParray(*(pptr->toPUPer), (char*)rank, sizeof(int));
04059 }
04060 #endif
04061 return MPI_SUCCESS;
04062 }
04063
04064 AMPI_API_IMPL(int, MPI_Comm_size, MPI_Comm comm, int *size)
04065 {
04066 AMPI_API("AMPI_Comm_size");
04067
04068 #if AMPI_ERROR_CHECKING
04069 int ret = checkCommunicator("AMPI_Comm_size", comm);
04070 if(ret != MPI_SUCCESS)
04071 return ret;
04072 #endif
04073
04074 #if AMPIMSGLOG
04075 ampiParent* pptr = getAmpiParent();
04076 if(msgLogRead){
04077 PUParray(*(pptr->fromPUPer), (char*)size, sizeof(int));
04078 return MPI_SUCCESS;
04079 }
04080 #endif
04081
04082 *size = getAmpiInstance(comm)->getSize();
04083
04084 #if AMPIMSGLOG
04085 if(msgLogWrite && record_msglog(pptr->thisIndex)){
04086 PUParray(*(pptr->toPUPer), (char*)size, sizeof(int));
04087 }
04088 #endif
04089
04090 return MPI_SUCCESS;
04091 }
04092
04093 AMPI_API_IMPL(int, MPI_Comm_compare, MPI_Comm comm1, MPI_Comm comm2, int *result)
04094 {
04095 AMPI_API("AMPI_Comm_compare");
04096
04097 #if AMPI_ERROR_CHECKING
04098 int ret;
04099 ret = checkCommunicator("AMPI_Comm_compare", comm1);
04100 if(ret != MPI_SUCCESS)
04101 return ret;
04102 ret = checkCommunicator("AMPI_Comm_compare", comm2);
04103 if(ret != MPI_SUCCESS)
04104 return ret;
04105 #endif
04106
04107 if(comm1==comm2) *result=MPI_IDENT;
04108 else{
04109 int congruent=1;
04110 vector<int> ind1, ind2;
04111 ind1 = getAmpiInstance(comm1)->getIndices();
04112 ind2 = getAmpiInstance(comm2)->getIndices();
04113 if(ind1.size()==ind2.size()){
04114 for(int i=0;i<ind1.size();i++){
04115 int equal=0;
04116 for(int j=0;j<ind2.size();j++){
04117 if(ind1[i]==ind2[j]){
04118 equal=1;
04119 if(i!=j) congruent=0;
04120 }
04121 }
04122 if(!equal){
04123 *result=MPI_UNEQUAL;
04124 return MPI_SUCCESS;
04125 }
04126 }
04127 }
04128 else{
04129 *result=MPI_UNEQUAL;
04130 return MPI_SUCCESS;
04131 }
04132 if(congruent==1) *result=MPI_CONGRUENT;
04133 else *result=MPI_SIMILAR;
04134 }
04135 return MPI_SUCCESS;
04136 }
04137
04138 static bool atexit_called = false;
04139
04140 CLINKAGE
04141 void ampiMarkAtexit()
04142 {
04143 atexit_called = true;
04144 }
04145
04146 CLINKAGE
04147 void AMPI_Exit(int exitCode)
04148 {
04149
04150
04151 AMPI_API_INIT("AMPI_Exit");
04152 CkpvAccess(msgPool).clear();
04153
04154 if (!atexit_called)
04155 TCHARM_Done(exitCode);
04156 }
04157
04158 FLINKAGE
04159 void FTN_NAME(MPI_EXIT,mpi_exit)(int *exitCode)
04160 {
04161 AMPI_Exit(*exitCode);
04162 }
04163
04164 AMPI_API_IMPL(int, MPI_Finalize, void)
04165 {
04166 {
04167
04168
04169 AMPI_API("AMPI_Finalize");
04170
04171 #if AMPI_PRINT_IDLE
04172 CkPrintf("[%d] Idle time %fs.\n", CkMyPe(), totalidle);
04173 #endif
04174 CtvAccess(ampiFinalized)=true;
04175
04176 #if AMPI_PRINT_MSG_SIZES
04177 getAmpiParent()->printMsgSizes();
04178 #endif
04179
04180 #if CMK_BIGSIM_CHARM && CMK_TRACE_IN_CHARM
04181 if(CpvAccess(traceOn)) traceSuspend();
04182 #endif
04183 }
04184
04185 AMPI_Exit(0);
04186 return MPI_SUCCESS;
04187 }
04188
04189 MPI_Request ampi::postReq(AmpiRequest* newreq) noexcept
04190 {
04191
04192 MPI_Request request = getReqs().insert(newreq);
04193
04194
04195
04196 if (newreq->isUnmatched()) {
04197 postedReqs.put(newreq);
04198 }
04199 return request;
04200 }
04201
04202 AMPI_API_IMPL(int, MPI_Send, const void *msg, int count, MPI_Datatype type,
04203 int dest, int tag, MPI_Comm comm)
04204 {
04205 AMPI_API("AMPI_Send");
04206
04207 handle_MPI_BOTTOM((void*&)msg, type);
04208
04209 #if AMPI_ERROR_CHECKING
04210 int ret;
04211 ret = errorCheck("AMPI_Send", comm, 1, count, 1, type, 1, tag, 1, dest, 1, msg, 1);
04212 if(ret != MPI_SUCCESS)
04213 return ret;
04214 #endif
04215
04216 #if AMPIMSGLOG
04217 if(msgLogRead){
04218 return MPI_SUCCESS;
04219 }
04220 #endif
04221
04222 ampi *ptr = getAmpiInstance(comm);
04223 ptr->send(tag, ptr->getRank(), msg, count, type, dest, comm);
04224
04225 return MPI_SUCCESS;
04226 }
04227
04228 AMPI_API_IMPL(int, MPI_Bsend, const void *buf, int count, MPI_Datatype datatype,
04229 int dest, int tag, MPI_Comm comm)
04230 {
04231 AMPI_API("AMPI_Bsend");
04232
04233
04234 return MPI_Send(buf, count, datatype, dest, tag, comm);
04235 }
04236
04237 AMPI_API_IMPL(int, MPI_Buffer_attach, void *buffer, int size)
04238 {
04239 AMPI_API("AMPI_Buffer_attach");
04240 #if AMPI_ERROR_CHECKING
04241 if (size < 0) {
04242 return ampiErrhandler("AMPI_Buffer_attach", MPI_ERR_ARG);
04243 }
04244 #endif
04245
04246
04247
04248 getAmpiParent()->attachBuffer(buffer, size);
04249 return MPI_SUCCESS;
04250 }
04251
04252 AMPI_API_IMPL(int, MPI_Buffer_detach, void *buffer, int *size)
04253 {
04254 AMPI_API("AMPI_Buffer_detach");
04255 getAmpiParent()->detachBuffer(buffer, size);
04256 return MPI_SUCCESS;
04257 }
04258
04259 AMPI_API_IMPL(int, MPI_Rsend, const void *buf, int count, MPI_Datatype datatype,
04260 int dest, int tag, MPI_Comm comm)
04261 {
04262
04263 AMPI_API("AMPI_Rsend");
04264 return MPI_Send(buf, count, datatype, dest, tag, comm);
04265 }
04266
04267 AMPI_API_IMPL(int, MPI_Ssend, const void *msg, int count, MPI_Datatype type,
04268 int dest, int tag, MPI_Comm comm)
04269 {
04270 AMPI_API("AMPI_Ssend");
04271
04272 handle_MPI_BOTTOM((void*&)msg, type);
04273
04274 #if AMPI_ERROR_CHECKING
04275 int ret = errorCheck("AMPI_Ssend", comm, 1, count, 1, type, 1, tag, 1, dest, 1, msg, 1);
04276 if(ret != MPI_SUCCESS)
04277 return ret;
04278 #endif
04279
04280 #if AMPIMSGLOG
04281 if(msgLogRead){
04282 return MPI_SUCCESS;
04283 }
04284 #endif
04285
04286 ampi *ptr = getAmpiInstance(comm);
04287 ptr->send(tag, ptr->getRank(), msg, count, type, dest, comm, 1);
04288
04289 return MPI_SUCCESS;
04290 }
04291
04292 AMPI_API_IMPL(int, MPI_Issend, const void *buf, int count, MPI_Datatype type, int dest,
04293 int tag, MPI_Comm comm, MPI_Request *request)
04294 {
04295 AMPI_API("AMPI_Issend");
04296
04297 handle_MPI_BOTTOM((void*&)buf, type);
04298
04299 #if AMPI_ERROR_CHECKING
04300 int ret = errorCheck("AMPI_Issend", comm, 1, count, 1, type, 1, tag, 1, dest, 1, buf, 1);
04301 if(ret != MPI_SUCCESS){
04302 *request = MPI_REQUEST_NULL;
04303 return ret;
04304 }
04305 #endif
04306
04307 #if AMPIMSGLOG
04308 ampiParent* pptr = getAmpiParent();
04309 if(msgLogRead){
04310 PUParray(*(pptr->fromPUPer), (char *)request, sizeof(MPI_Request));
04311 return MPI_SUCCESS;
04312 }
04313 #endif
04314
04315 USER_CALL_DEBUG("AMPI_Issend("<<type<<","<<dest<<","<<tag<<","<<comm<<")");
04316 ampiParent* pptr = getAmpiParent();
04317 ampi *ptr = getAmpiInstance(comm);
04318 *request = ptr->postReq(pptr->reqPool.newReq<SsendReq>(type, comm, pptr->getDDT()));
04319
04320
04321 ptr->send(tag, ptr->getRank(), buf, count, type, dest, comm, *request+2, I_SEND);
04322
04323 #if AMPIMSGLOG
04324 if(msgLogWrite && record_msglog(pptr->thisIndex)){
04325 PUParray(*(pptr->toPUPer), (char *)request, sizeof(MPI_Request));
04326 }
04327 #endif
04328
04329 return MPI_SUCCESS;
04330 }
04331
04332 AMPI_API_IMPL(int, MPI_Recv, void *msg, int count, MPI_Datatype type, int src, int tag,
04333 MPI_Comm comm, MPI_Status *status)
04334 {
04335 AMPI_API("AMPI_Recv");
04336
04337 handle_MPI_BOTTOM(msg, type);
04338
04339 #if AMPI_ERROR_CHECKING
04340 int ret = errorCheck("AMPI_Recv", comm, 1, count, 1, type, 1, tag, 1, src, 1, msg, 1);
04341 if(ret != MPI_SUCCESS)
04342 return ret;
04343 #endif
04344
04345 #if AMPIMSGLOG
04346 ampiParent* pptr = getAmpiParent();
04347 if(msgLogRead){
04348 (*(pptr->fromPUPer))|(pptr->pupBytes);
04349 PUParray(*(pptr->fromPUPer), (char *)msg, (pptr->pupBytes));
04350 PUParray(*(pptr->fromPUPer), (char *)status, sizeof(MPI_Status));
04351 return MPI_SUCCESS;
04352 }
04353 #endif
04354
04355 ampi *ptr = getAmpiInstance(comm);
04356 if(-1==ptr->recv(tag,src,msg,count,type,comm,status)) CkAbort("AMPI> Error in MPI_Recv");
04357
04358 #if AMPIMSGLOG
04359 if(msgLogWrite && record_msglog(pptr->thisIndex)){
04360 (pptr->pupBytes) = getDDT()->getSize(type) * count;
04361 (*(pptr->toPUPer))|(pptr->pupBytes);
04362 PUParray(*(pptr->toPUPer), (char *)msg, (pptr->pupBytes));
04363 PUParray(*(pptr->toPUPer), (char *)status, sizeof(MPI_Status));
04364 }
04365 #endif
04366
04367 return MPI_SUCCESS;
04368 }
04369
04370 AMPI_API_IMPL(int, MPI_Probe, int src, int tag, MPI_Comm comm, MPI_Status *status)
04371 {
04372 AMPI_API("AMPI_Probe");
04373
04374 #if AMPI_ERROR_CHECKING
04375 int ret = errorCheck("AMPI_Probe", comm, 1, 0, 0, 0, 0, tag, 1, src, 1, 0, 0);
04376 if(ret != MPI_SUCCESS)
04377 return ret;
04378 #endif
04379
04380 ampi *ptr = getAmpiInstance(comm);
04381 ptr->probe(tag, src, comm, status);
04382 return MPI_SUCCESS;
04383 }
04384
04385 AMPI_API_IMPL(int, MPI_Iprobe, int src, int tag, MPI_Comm comm, int *flag, MPI_Status *status)
04386 {
04387 AMPI_API("AMPI_Iprobe");
04388
04389 #if AMPI_ERROR_CHECKING
04390 int ret = errorCheck("AMPI_Iprobe", comm, 1, 0, 0, 0, 0, tag, 1, src, 1, 0, 0);
04391 if(ret != MPI_SUCCESS)
04392 return ret;
04393 #endif
04394
04395 ampi *ptr = getAmpiInstance(comm);
04396 *flag = ptr->iprobe(tag, src, comm, status);
04397 return MPI_SUCCESS;
04398 }
04399
04400 AMPI_API_IMPL(int, MPI_Improbe, int source, int tag, MPI_Comm comm, int *flag,
04401 MPI_Message *message, MPI_Status *status)
04402 {
04403 AMPI_API("AMPI_Improbe");
04404
04405 #if AMPI_ERROR_CHECKING
04406 int ret = errorCheck("AMPI_Improbe", comm, 1, 0, 0, 0, 0, tag, 1, source, 1, 0, 0);
04407 if(ret != MPI_SUCCESS)
04408 return ret;
04409 #endif
04410
04411 ampi *ptr = getAmpiInstance(comm);
04412 *flag = ptr->improbe(tag, source, comm, status, message);
04413
04414 return MPI_SUCCESS;
04415 }
04416
04417 AMPI_API_IMPL(int, MPI_Imrecv, void* buf, int count, MPI_Datatype datatype, MPI_Message *message,
04418 MPI_Request *request)
04419 {
04420 AMPI_API("AMPI_Imrecv");
04421
04422 #if AMPI_ERROR_CHECKING
04423 if (*message == MPI_MESSAGE_NULL) {
04424 return ampiErrhandler("AMPI_Imrecv", MPI_ERR_REQUEST);
04425 }
04426 #endif
04427
04428 if (*message == MPI_MESSAGE_NO_PROC) {
04429 *message = MPI_MESSAGE_NULL;
04430 IReq *newreq = getAmpiParent()->reqPool.newReq<IReq>(buf, count, datatype, MPI_PROC_NULL, MPI_ANY_TAG,
04431 MPI_COMM_NULL, getDDT(), AMPI_REQ_COMPLETED);
04432 *request = getReqs().insert(newreq);
04433 return MPI_SUCCESS;
04434 }
04435
04436 handle_MPI_BOTTOM(buf, datatype);
04437
04438 #if AMPI_ERROR_CHECKING
04439 int ret = errorCheck("AMPI_Imrecv", 0, 0, count, 1, datatype, 1, 0, 0, 0, 0, buf, 1);
04440 if(ret != MPI_SUCCESS){
04441 *request = MPI_REQUEST_NULL;
04442 return ret;
04443 }
04444 #endif
04445
04446 USER_CALL_DEBUG("AMPI_Imrecv("<<datatype<<","<<src<<","<<tag<<","<<comm<<")");
04447 ampiParent* parent = getAmpiParent();
04448 AmpiMsg* msg = parent->getMatchedMsg(*message);
04449 CkAssert(msg);
04450 MPI_Comm comm = msg->getComm();
04451 int tag = msg->getTag();
04452 int src = msg->getSrcRank();
04453
04454 ampi *ptr = getAmpiInstance(comm);
04455 AmpiRequestList& reqs = getReqs();
04456 IReq *newreq = parent->reqPool.newReq<IReq>(buf, count, datatype, src, tag, comm, parent->getDDT());
04457 *request = reqs.insert(newreq);
04458
04459 newreq->receive(ptr, msg);
04460 *message = MPI_MESSAGE_NULL;
04461
04462 return MPI_SUCCESS;
04463 }
04464
04465 AMPI_API_IMPL(int, MPI_Mprobe, int source, int tag, MPI_Comm comm, MPI_Message *message,
04466 MPI_Status *status)
04467 {
04468 AMPI_API("AMPI_Mprobe");
04469
04470 #if AMPI_ERROR_CHECKING
04471 int ret = errorCheck("AMPI_Mprobe", comm, 1, 0, 0, 0, 0, tag, 1, source, 1, 0, 0);
04472 if(ret != MPI_SUCCESS)
04473 return ret;
04474 #endif
04475
04476 ampi *ptr = getAmpiInstance(comm);
04477 ptr->mprobe(tag, source, comm, status, message);
04478
04479 return MPI_SUCCESS;
04480 }
04481
04482 AMPI_API_IMPL(int, MPI_Mrecv, void* buf, int count, MPI_Datatype datatype, MPI_Message *message,
04483 MPI_Status *status)
04484 {
04485 AMPI_API("AMPI_Mrecv");
04486
04487 #if AMPI_ERROR_CHECKING
04488 if (*message == MPI_MESSAGE_NULL) {
04489 return ampiErrhandler("AMPI_Mrecv", MPI_ERR_REQUEST);
04490 }
04491 #endif
04492
04493 if (*message == MPI_MESSAGE_NO_PROC) {
04494 if (status != MPI_STATUS_IGNORE) {
04495 status->MPI_SOURCE = MPI_PROC_NULL;
04496 status->MPI_TAG = MPI_ANY_TAG;
04497 status->MPI_LENGTH = 0;
04498 }
04499 *message = MPI_MESSAGE_NULL;
04500 return MPI_SUCCESS;
04501 }
04502
04503 #if AMPI_ERROR_CHECKING
04504 int ret = errorCheck("AMPI_Mrecv", 0, 0, count, 1, datatype, 1, 0, 0, 0, 0, buf, 1);
04505 if(ret != MPI_SUCCESS)
04506 return ret;
04507 #endif
04508
04509 handle_MPI_BOTTOM(buf, datatype);
04510
04511 ampiParent* parent = getAmpiParent();
04512 AmpiMsg *msg = parent->getMatchedMsg(*message);
04513 CkAssert(msg);
04514 MPI_Comm comm = msg->getComm();
04515 int src = msg->getSrcRank();
04516 int tag = msg->getTag();
04517
04518 #if AMPIMSGLOG
04519 ampiParent* pptr = getAmpiParent();
04520 if(msgLogRead){
04521 (*(pptr->fromPUPer))|(pptr->pupBytes);
04522 PUParray(*(pptr->fromPUPer), (char *)buf, (pptr->pupBytes));
04523 PUParray(*(pptr->fromPUPer), (char *)status, sizeof(MPI_Status));
04524 return MPI_SUCCESS;
04525 }
04526 #endif
04527
04528 ampi *ptr = getAmpiInstance(comm);
04529 if (status != MPI_STATUS_IGNORE) {
04530 status->MPI_SOURCE = msg->getSrcRank();
04531 status->MPI_TAG = msg->getTag();
04532 status->MPI_COMM = comm;
04533 status->MPI_LENGTH = msg->getLength();
04534 status->MPI_CANCEL = 0;
04535 }
04536 ptr->processAmpiMsg(msg, buf, datatype, count);
04537 CkpvAccess(msgPool).deleteAmpiMsg(msg);
04538 *message = MPI_MESSAGE_NULL;
04539
04540 #if AMPIMSGLOG
04541 if(msgLogWrite && record_msglog(pptr->thisIndex)){
04542 (pptr->pupBytes) = getDDT()->getSize(datatype) * count;
04543 (*(pptr->toPUPer))|(pptr->pupBytes);
04544 PUParray(*(pptr->toPUPer), (char *)buf, (pptr->pupBytes));
04545 PUParray(*(pptr->toPUPer), (char *)status, sizeof(MPI_Status));
04546 }
04547 #endif
04548
04549 return MPI_SUCCESS;
04550 }
04551
04552 void ampi::sendrecv(const void *sbuf, int scount, MPI_Datatype stype, int dest, int stag,
04553 void *rbuf, int rcount, MPI_Datatype rtype, int src, int rtag,
04554 MPI_Comm comm, MPI_Status *sts) noexcept
04555 {
04556 MPI_Request reqs[2];
04557 irecv(rbuf, rcount, rtype, src, rtag, comm, &reqs[0]);
04558
04559 reqs[1] = send(stag, getRank(), sbuf, scount, stype, dest, comm, 0, I_SEND);
04560
04561 if (sts == MPI_STATUS_IGNORE) {
04562 MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
04563 }
04564 else {
04565 MPI_Status statuses[2];
04566 MPI_Waitall(2, reqs, statuses);
04567 *sts = statuses[0];
04568 }
04569 }
04570
04571 AMPI_API_IMPL(int, MPI_Sendrecv, const void *sbuf, int scount, MPI_Datatype stype, int dest,
04572 int stag, void *rbuf, int rcount, MPI_Datatype rtype,
04573 int src, int rtag, MPI_Comm comm, MPI_Status *sts)
04574 {
04575 AMPI_API("AMPI_Sendrecv");
04576
04577 handle_MPI_BOTTOM((void*&)sbuf, stype, rbuf, rtype);
04578
04579 #if AMPI_ERROR_CHECKING
04580 if(sbuf == MPI_IN_PLACE || rbuf == MPI_IN_PLACE)
04581 CkAbort("MPI_sendrecv does not accept MPI_IN_PLACE; use MPI_Sendrecv_replace instead.");
04582 int ret;
04583 ret = errorCheck("AMPI_Sendrecv", comm, 1, scount, 1, stype, 1, stag, 1, dest, 1, sbuf, 1);
04584 if(ret != MPI_SUCCESS)
04585 return ret;
04586 ret = errorCheck("AMPI_Sendrecv", comm, 1, rcount, 1, rtype, 1, rtag, 1, src, 1, rbuf, 1);
04587 if(ret != MPI_SUCCESS)
04588 return ret;
04589 #endif
04590
04591 ampi *ptr = getAmpiInstance(comm);
04592
04593 ptr->sendrecv(sbuf, scount, stype, dest, stag,
04594 rbuf, rcount, rtype, src, rtag,
04595 comm, sts);
04596
04597 return MPI_SUCCESS;
04598 }
04599
04600 void ampi::sendrecv_replace(void* buf, int count, MPI_Datatype datatype,
04601 int dest, int sendtag, int source, int recvtag,
04602 MPI_Comm comm, MPI_Status *status) noexcept
04603 {
04604 CkDDT_DataType* ddt = getDDT()->getType(datatype);
04605 vector<char> tmpBuf(ddt->getSize(count));
04606 ddt->serialize((char*)buf, tmpBuf.data(), count, ddt->getSize(count), PACK);
04607
04608 MPI_Request reqs[2];
04609 irecv(buf, count, datatype, source, recvtag, comm, &reqs[0]);
04610
04611
04612 reqs[1] = send(sendtag, getRank(), tmpBuf.data(), count, datatype, dest, comm, 0, I_SEND);
04613
04614 if (status == MPI_STATUS_IGNORE) {
04615 MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
04616 }
04617 else {
04618 MPI_Status statuses[2];
04619 MPI_Waitall(2, reqs, statuses);
04620 *status = statuses[0];
04621 }
04622 }
04623
04624 AMPI_API_IMPL(int, MPI_Sendrecv_replace, void* buf, int count, MPI_Datatype datatype,
04625 int dest, int sendtag, int source, int recvtag,
04626 MPI_Comm comm, MPI_Status *status)
04627 {
04628 AMPI_API("AMPI_Sendrecv_replace");
04629
04630 handle_MPI_BOTTOM(buf, datatype);
04631
04632 #if AMPI_ERROR_CHECKING
04633 int ret;
04634 ret = errorCheck("AMPI_Sendrecv_replace", comm, 1, count, 1, datatype, 1, sendtag, 1, dest, 1, buf, 1);
04635 if(ret != MPI_SUCCESS)
04636 return ret;
04637 ret = errorCheck("AMPI_Sendrecv_replace", comm, 1, count, 1, datatype, 1, recvtag, 1, source, 1, buf, 1);
04638 if(ret != MPI_SUCCESS)
04639 return ret;
04640 #endif
04641
04642 ampi* ptr = getAmpiInstance(comm);
04643
04644 ptr->sendrecv_replace(buf, count, datatype, dest, sendtag, source, recvtag, comm, status);
04645
04646 return MPI_SUCCESS;
04647 }
04648
04649 void ampi::barrier() noexcept
04650 {
04651 CkAssert(parent->resumeOnColl == false);
04652 parent->resumeOnColl = true;
04653 CkCallback barrierCB(CkReductionTarget(ampi, barrierResult), getProxy());
04654 contribute(barrierCB);
04655 thread->suspend();
04656 getAmpiParent()->resumeOnColl = false;
04657 }
04658
04659 void ampi::barrierResult() noexcept
04660 {
04661 MSG_ORDER_DEBUG(CkPrintf("[%d] barrierResult called\n", thisIndex));
04662 CkAssert(parent->resumeOnColl == true);
04663 thread->resume();
04664 }
04665
04666 AMPI_API_IMPL(int, MPI_Barrier, MPI_Comm comm)
04667 {
04668 AMPI_API("AMPI_Barrier");
04669
04670 #if AMPI_ERROR_CHECKING
04671 int ret = checkCommunicator("AMPI_Barrier", comm);
04672 if(ret != MPI_SUCCESS)
04673 return ret;
04674 #endif
04675
04676 #if CMK_BIGSIM_CHARM
04677 TRACE_BG_AMPI_LOG(MPI_BARRIER, 0);
04678 #endif
04679
04680 ampi *ptr = getAmpiInstance(comm);
04681 MSG_ORDER_DEBUG(CkPrintf("[%d] AMPI_Barrier called on comm %d\n", ptr->thisIndex, comm));
04682
04683 if (ptr->getSize() == 1 && !getAmpiParent()->isInter(comm))
04684 return MPI_SUCCESS;
04685
04686
04687
04688 ptr->barrier();
04689
04690 return MPI_SUCCESS;
04691 }
04692
04693 void ampi::ibarrier(MPI_Request *request) noexcept
04694 {
04695 *request = postReq(parent->reqPool.newReq<IReq>(nullptr, 0, MPI_INT, AMPI_COLL_SOURCE, MPI_ATA_TAG, myComm.getComm(), getDDT()));
04696 CkCallback ibarrierCB(CkReductionTarget(ampi, ibarrierResult), getProxy());
04697 contribute(ibarrierCB);
04698 }
04699
04700 void ampi::ibarrierResult() noexcept
04701 {
04702 MSG_ORDER_DEBUG(CkPrintf("[%d] ibarrierResult called\n", thisIndex));
04703 ampi::sendraw(MPI_ATA_TAG, AMPI_COLL_SOURCE, NULL, 0, thisArrayID, thisIndex);
04704 }
04705
04706 AMPI_API_IMPL(int, MPI_Ibarrier, MPI_Comm comm, MPI_Request *request)
04707 {
04708 AMPI_API("AMPI_Ibarrier");
04709
04710 #if AMPI_ERROR_CHECKING
04711 int ret = checkCommunicator("AMPI_Ibarrier", comm);
04712 if(ret != MPI_SUCCESS){
04713 *request = MPI_REQUEST_NULL;
04714 return ret;
04715 }
04716 #endif
04717
04718 ampi *ptr = getAmpiInstance(comm);
04719
04720 if (ptr->getSize() == 1 && !getAmpiParent()->isInter(comm)) {
04721 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(nullptr, 0, MPI_INT, AMPI_COLL_SOURCE, MPI_ATA_TAG, AMPI_COLL_COMM,
04722 getDDT(), AMPI_REQ_COMPLETED));
04723 return MPI_SUCCESS;
04724 }
04725
04726
04727
04728 #if CMK_BIGSIM_CHARM
04729 TRACE_BG_AMPI_LOG(MPI_BARRIER, 0);
04730 #endif
04731
04732 MSG_ORDER_DEBUG(CkPrintf("[%d] AMPI_Ibarrier called on comm %d\n", ptr->thisIndex, comm));
04733
04734 ptr->ibarrier(request);
04735
04736 return MPI_SUCCESS;
04737 }
04738
04739 AMPI_API_IMPL(int, MPI_Bcast, void *buf, int count, MPI_Datatype type, int root, MPI_Comm comm)
04740 {
04741 AMPI_API("AMPI_Bcast");
04742
04743 handle_MPI_BOTTOM(buf, type);
04744
04745 #if AMPI_ERROR_CHECKING
04746 int validateBuf = 1;
04747 if (getAmpiParent()->isInter(comm)) {
04748
04749
04750 if (root==MPI_PROC_NULL) validateBuf = 0;
04751 }
04752 int ret = errorCheck("AMPI_Bcast", comm, 1, count, 1, type, 1, 0, 0, root, 1, buf, validateBuf);
04753
04754 if(ret != MPI_SUCCESS)
04755 return ret;
04756 #endif
04757
04758 ampi* ptr = getAmpiInstance(comm);
04759
04760 if(getAmpiParent()->isInter(comm)) {
04761 return ptr->intercomm_bcast(root, buf, count, type, comm);
04762 }
04763 if(ptr->getSize() == 1)
04764 return MPI_SUCCESS;
04765
04766 #if AMPIMSGLOG
04767 ampiParent* pptr = getAmpiParent();
04768 if(msgLogRead){
04769 (*(pptr->fromPUPer))|(pptr->pupBytes);
04770 PUParray(*(pptr->fromPUPer), (char *)buf, (pptr->pupBytes));
04771 return MPI_SUCCESS;
04772 }
04773 #endif
04774
04775 ptr->bcast(root, buf, count, type,comm);
04776
04777 #if AMPIMSGLOG
04778 if(msgLogWrite && record_msglog(pptr->thisIndex)) {
04779 (pptr->pupBytes) = getDDT()->getSize(type) * count;
04780 (*(pptr->toPUPer))|(pptr->pupBytes);
04781 PUParray(*(pptr->toPUPer), (char *)buf, (pptr->pupBytes));
04782 }
04783 #endif
04784
04785 return MPI_SUCCESS;
04786 }
04787
04788 AMPI_API_IMPL(int, MPI_Ibcast, void *buf, int count, MPI_Datatype type, int root,
04789 MPI_Comm comm, MPI_Request *request)
04790 {
04791 AMPI_API("AMPI_Ibcast");
04792
04793 handle_MPI_BOTTOM(buf, type);
04794
04795 #if AMPI_ERROR_CHECKING
04796 int validateBuf = 1;
04797 if (getAmpiParent()->isInter(comm)) {
04798
04799
04800 if (root==MPI_PROC_NULL) validateBuf = 0;
04801 }
04802 int ret = errorCheck("AMPI_Ibcast", comm, 1, count, 1, type, 1, 0, 0, root, 1, buf, validateBuf);
04803
04804 if(ret != MPI_SUCCESS){
04805 *request = MPI_REQUEST_NULL;
04806 return ret;
04807 }
04808 #endif
04809
04810 ampi* ptr = getAmpiInstance(comm);
04811
04812 if(getAmpiParent()->isInter(comm)) {
04813 return ptr->intercomm_ibcast(root, buf, count, type, comm, request);
04814 }
04815 if(ptr->getSize() == 1){
04816 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(buf, count, type, root, MPI_BCAST_TAG, comm,
04817 getDDT(), AMPI_REQ_COMPLETED));
04818 return MPI_SUCCESS;
04819 }
04820
04821 #if AMPIMSGLOG
04822 ampiParent* pptr = getAmpiParent();
04823 if(msgLogRead){
04824 (*(pptr->fromPUPer))|(pptr->pupBytes);
04825 PUParray(*(pptr->fromPUPer), (char *)buf, (pptr->pupBytes));
04826 return MPI_SUCCESS;
04827 }
04828 #endif
04829
04830 ptr->ibcast(root, buf, count, type, comm, request);
04831
04832 #if AMPIMSGLOG
04833 if(msgLogWrite && record_msglog(pptr->thisIndex)) {
04834 (pptr->pupBytes) = getDDT()->getSize(type) * count;
04835 (*(pptr->toPUPer))|(pptr->pupBytes);
04836 PUParray(*(pptr->toPUPer), (char *)buf, (pptr->pupBytes));
04837 }
04838 #endif
04839
04840 return MPI_SUCCESS;
04841 }
04842
04843
04844 void ampi::rednResult(CkReductionMsg *msg) noexcept
04845 {
04846 MSG_ORDER_DEBUG(CkPrintf("[%d] rednResult called on comm %d\n", thisIndex, myComm.getComm()));
04847
04848 #if CMK_ERROR_CHECKING
04849 if (parent->blockingReq == NULL) {
04850 CkAbort("AMPI> recv'ed a blocking reduction unexpectedly!\n");
04851 }
04852 #endif
04853
04854 #if CMK_BIGSIM_CHARM
04855 TRACE_BG_ADD_TAG("AMPI_generic");
04856 msg->event = NULL;
04857 _TRACE_BG_TLINE_END(&msg->event);
04858 msg->eventPe = CkMyPe();
04859 #endif
04860
04861 parent->blockingReq->receive(this, msg);
04862
04863 CkAssert(parent->resumeOnColl);
04864 thread->resume();
04865
04866 }
04867
04868
04869 void ampi::irednResult(CkReductionMsg *msg) noexcept
04870 {
04871 MSG_ORDER_DEBUG(CkPrintf("[%d] irednResult called on comm %d\n", thisIndex, myComm.getComm()));
04872
04873 AmpiRequest* req = postedReqs.get(MPI_REDN_TAG, AMPI_COLL_SOURCE);
04874 if (req == NULL)
04875 CkAbort("AMPI> recv'ed a non-blocking reduction unexpectedly!\n");
04876
04877 #if CMK_BIGSIM_CHARM
04878 TRACE_BG_ADD_TAG("AMPI_generic");
04879 msg->event = NULL;
04880 _TRACE_BG_TLINE_END(&msg->event);
04881 msg->eventPe = CkMyPe();
04882 #endif
04883 #if AMPIMSGLOG
04884 if(msgLogRead){
04885 PUParray(*(getAmpiParent()->fromPUPer), (char *)req, sizeof(int));
04886 return;
04887 }
04888 #endif
04889
04890 handleBlockedReq(req);
04891 req->receive(this, msg);
04892
04893 #if AMPIMSGLOG
04894 if(msgLogWrite && record_msglog(getAmpiParent()->thisIndex)){
04895 PUParray(*(getAmpiParent()->toPUPer), (char *)reqnReq, sizeof(int));
04896 }
04897 #endif
04898
04899 if (parent->resumeOnColl && parent->numBlockedReqs==0) {
04900 thread->resume();
04901 }
04902
04903 }
04904
04905 static CkReductionMsg *makeRednMsg(CkDDT_DataType *ddt, const void *inbuf, int count, int type,
04906 int rank, int size, MPI_Op op) noexcept
04907 {
04908 CkReductionMsg *msg;
04909 ampiParent *parent = getAmpiParent();
04910 int szdata = ddt->getSize(count);
04911 CkReduction::reducerType reducer = getBuiltinReducerType(type, op);
04912
04913 if (reducer != CkReduction::invalid) {
04914
04915 AMPI_DEBUG("[%d] In makeRednMsg, using Charm++ built-in reducer type for a predefined op\n", parent->thisIndex);
04916 msg = CkReductionMsg::buildNew(szdata, NULL, reducer);
04917 ddt->serialize((char*)inbuf, (char*)msg->getData(), count, msg->getLength(), PACK);
04918 }
04919 else if (parent->opIsCommutative(op) && ddt->isContig()) {
04920
04921
04922 AMPI_DEBUG("[%d] In makeRednMsg, using custom AmpiReducer type for a commutative op\n", parent->thisIndex);
04923 AmpiOpHeader newhdr = parent->op2AmpiOpHeader(op, type, count);
04924 int szhdr = sizeof(AmpiOpHeader);
04925 msg = CkReductionMsg::buildNew(szdata+szhdr, NULL, AmpiReducer);
04926 memcpy(msg->getData(), &newhdr, szhdr);
04927 ddt->serialize((char*)inbuf, (char*)msg->getData()+szhdr, count, msg->getLength()-szhdr, PACK);
04928 }
04929 else {
04930
04931
04932 AMPI_DEBUG("[%d] In makeRednMsg, using a non-commutative user-defined operation\n", parent->thisIndex);
04933 const int tupleSize = 2;
04934 CkReduction::tupleElement tupleRedn[tupleSize];
04935
04936
04937 unsigned short int ushortRank;
04938 if (size < std::numeric_limits<unsigned short int>::max()) {
04939 ushortRank = static_cast<unsigned short int>(rank);
04940 tupleRedn[0] = CkReduction::tupleElement(sizeof(unsigned short int), &ushortRank, CkReduction::concat);
04941 } else {
04942 tupleRedn[0] = CkReduction::tupleElement(sizeof(int), &rank, CkReduction::concat);
04943 }
04944
04945 vector<char> sbuf;
04946 if (!ddt->isContig()) {
04947 sbuf.resize(szdata);
04948 ddt->serialize((char*)inbuf, sbuf.data(), count, szdata, PACK);
04949 tupleRedn[1] = CkReduction::tupleElement(szdata, sbuf.data(), CkReduction::concat);
04950 }
04951 else {
04952 tupleRedn[1] = CkReduction::tupleElement(szdata, (void*)inbuf, CkReduction::concat);
04953 }
04954 msg = CkReductionMsg::buildFromTuple(tupleRedn, tupleSize);
04955 }
04956 return msg;
04957 }
04958
04959
04960 static int copyDatatype(MPI_Datatype sendtype, int sendcount, MPI_Datatype recvtype,
04961 int recvcount, const void *inbuf, void *outbuf) noexcept
04962 {
04963 if (inbuf == outbuf) return MPI_SUCCESS;
04964
04965 CkDDT_DataType *sddt = getDDT()->getType(sendtype);
04966 CkDDT_DataType *rddt = getDDT()->getType(recvtype);
04967
04968 if (sddt->isContig() && rddt->isContig()) {
04969 int slen = sddt->getSize(sendcount);
04970 memcpy(outbuf, inbuf, slen);
04971 } else if (sddt->isContig()) {
04972 rddt->serialize((char*)outbuf, (char*)inbuf, recvcount, sddt->getSize(sendcount), UNPACK);
04973 } else if (rddt->isContig()) {
04974 sddt->serialize((char*)inbuf, (char*)outbuf, sendcount, rddt->getSize(recvcount), PACK);
04975 } else {
04976
04977
04978 int slen = sddt->getSize(sendcount);
04979 vector<char> serialized(slen);
04980 sddt->serialize((char*)inbuf, serialized.data(), sendcount, rddt->getSize(recvcount), PACK);
04981 rddt->serialize((char*)outbuf, serialized.data(), recvcount, sddt->getSize(sendcount), UNPACK);
04982 }
04983
04984 return MPI_SUCCESS;
04985 }
04986
04987 static void handle_MPI_IN_PLACE(void* &inbuf, void* &outbuf) noexcept
04988 {
04989 if (inbuf == MPI_IN_PLACE) inbuf = outbuf;
04990 if (outbuf == MPI_IN_PLACE) outbuf = inbuf;
04991 CkAssert(inbuf != MPI_IN_PLACE && outbuf != MPI_IN_PLACE);
04992 }
04993
04994 static void handle_MPI_IN_PLACE_gather(void* &sendbuf, void* recvbuf, int &sendcount,
04995 MPI_Datatype &sendtype, int recvdispl,
04996 int recvcount, MPI_Datatype recvtype) noexcept
04997 {
04998 if (sendbuf == MPI_IN_PLACE) {
04999
05000
05001
05002 sendbuf = (char*)recvbuf + (recvdispl * getDDT()->getExtent(recvtype));
05003 sendcount = recvcount;
05004 sendtype = recvtype;
05005 }
05006 CkAssert(recvbuf != MPI_IN_PLACE);
05007 }
05008
05009 static void handle_MPI_IN_PLACE_gatherv(void* &sendbuf, void* recvbuf, int &sendcount,
05010 MPI_Datatype &sendtype, const int recvdispls[],
05011 const int recvcounts[], int rank,
05012 MPI_Datatype recvtype) noexcept
05013 {
05014 if (sendbuf == MPI_IN_PLACE) {
05015
05016
05017
05018 CkAssert(recvbuf != NULL && recvdispls != NULL && recvcounts != NULL);
05019 sendbuf = (char*)recvbuf + (recvdispls[rank] * getDDT()->getExtent(recvtype));
05020 sendcount = recvcounts[rank];
05021 sendtype = recvtype;
05022 }
05023 CkAssert(recvbuf != MPI_IN_PLACE);
05024 }
05025
05026 static void handle_MPI_IN_PLACE_alltoall(void* &sendbuf, void* recvbuf, int &sendcount,
05027 MPI_Datatype &sendtype, int recvcount,
05028 MPI_Datatype recvtype) noexcept
05029 {
05030 if (sendbuf == MPI_IN_PLACE) {
05031 sendbuf = recvbuf;
05032 sendcount = recvcount;
05033 sendtype = recvtype;
05034 }
05035 CkAssert(recvbuf != MPI_IN_PLACE);
05036 }
05037
05038 static void handle_MPI_IN_PLACE_alltoallv(void* &sendbuf, void* recvbuf, int* &sendcounts,
05039 MPI_Datatype &sendtype, int* &sdispls,
05040 const int* recvcounts, MPI_Datatype recvtype,
05041 const int* rdispls) noexcept
05042 {
05043 if (sendbuf == MPI_IN_PLACE) {
05044 sendbuf = recvbuf;
05045 sendcounts = (int*)recvcounts;
05046 sendtype = recvtype;
05047 sdispls = (int*)rdispls;
05048 }
05049 CkAssert(recvbuf != MPI_IN_PLACE);
05050 }
05051
05052 static void handle_MPI_IN_PLACE_alltoallw(void* &sendbuf, void* recvbuf, int* &sendcounts,
05053 MPI_Datatype* &sendtypes, int* &sdispls,
05054 const int* recvcounts, const MPI_Datatype* recvtypes,
05055 const int* rdispls) noexcept
05056 {
05057 if (sendbuf == MPI_IN_PLACE) {
05058 sendbuf = recvbuf;
05059 sendcounts = (int*)recvcounts;
05060 sendtypes = (MPI_Datatype*)recvtypes;
05061 sdispls = (int*)rdispls;
05062 }
05063 CkAssert(recvbuf != MPI_IN_PLACE);
05064 }
05065
05066 #define AMPI_SYNC_REDUCE 0
05067
05068 AMPI_API_IMPL(int, MPI_Reduce, const void *inbuf, void *outbuf, int count, MPI_Datatype type,
05069 MPI_Op op, int root, MPI_Comm comm)
05070 {
05071 AMPI_API("AMPI_Reduce");
05072
05073 handle_MPI_BOTTOM((void*&)inbuf, type, outbuf, type);
05074 handle_MPI_IN_PLACE((void*&)inbuf, outbuf);
05075
05076 #if AMPI_ERROR_CHECKING
05077 if(op == MPI_OP_NULL)
05078 return ampiErrhandler("AMPI_Reduce", MPI_ERR_OP);
05079 int ret = errorCheck("AMPI_Reduce", comm, 1, count, 1, type, 1, 0, 0, root, 1, inbuf, 1,
05080 outbuf, getAmpiInstance(comm)->getRank() == root);
05081 if(ret != MPI_SUCCESS)
05082 return ret;
05083 #endif
05084
05085 ampi *ptr = getAmpiInstance(comm);
05086 int rank = ptr->getRank();
05087 int size = ptr->getSize();
05088
05089 if(ptr->isInter())
05090 CkAbort("AMPI does not implement MPI_Reduce for Inter-communicators!");
05091 if(size == 1)
05092 return copyDatatype(type,count,type,count,inbuf,outbuf);
05093
05094 #if AMPIMSGLOG
05095 ampiParent* pptr = getAmpiParent();
05096 if(msgLogRead){
05097 (*(pptr->fromPUPer))|(pptr->pupBytes);
05098 PUParray(*(pptr->fromPUPer), (char *)outbuf, (pptr->pupBytes));
05099 return MPI_SUCCESS;
05100 }
05101 #endif
05102
05103 if (rank == root) {
05104 ptr->setBlockingReq(new RednReq(outbuf, count, type, comm, op, getDDT()));
05105 }
05106
05107 int rootIdx=ptr->comm2CommStruct(comm).getIndexForRank(root);
05108 CkReductionMsg *msg=makeRednMsg(ptr->getDDT()->getType(type),inbuf,count,type,rank,size,op);
05109 CkCallback reduceCB(CkIndex_ampi::rednResult(0),CkArrayIndex1D(rootIdx),ptr->getProxy());
05110 msg->setCallback(reduceCB);
05111 MSG_ORDER_DEBUG(CkPrintf("[%d] AMPI_Reduce called on comm %d root %d \n",ptr->thisIndex,comm,rootIdx));
05112 ptr->contribute(msg);
05113
05114 if (rank == root) {
05115 ptr = ptr->blockOnColl();
05116
05117 #if AMPI_SYNC_REDUCE
05118 AmpiMsg *msg = new (0, 0) AmpiMsg(0, 0, MPI_REDN_TAG, -1, rootIdx, 0);
05119 CProxy_ampi pa(ptr->getProxy());
05120 pa.generic(msg);
05121 #endif
05122 }
05123 #if AMPI_SYNC_REDUCE
05124 ptr->recv(MPI_REDN_TAG, AMPI_COLL_SOURCE, NULL, 0, type, comm);
05125 #endif
05126
05127 #if AMPIMSGLOG
05128 if(msgLogWrite && record_msglog(pptr->thisIndex)){
05129 (pptr->pupBytes) = getDDT()->getSize(type) * count;
05130 (*(pptr->toPUPer))|(pptr->pupBytes);
05131 PUParray(*(pptr->toPUPer), (char *)outbuf, (pptr->pupBytes));
05132 }
05133 #endif
05134
05135 return MPI_SUCCESS;
05136 }
05137
05138 AMPI_API_IMPL(int, MPI_Allreduce, const void *inbuf, void *outbuf, int count, MPI_Datatype type,
05139 MPI_Op op, MPI_Comm comm)
05140 {
05141 AMPI_API("AMPI_Allreduce");
05142
05143 handle_MPI_BOTTOM((void*&)inbuf, type, outbuf, type);
05144 handle_MPI_IN_PLACE((void*&)inbuf, outbuf);
05145
05146 #if AMPI_ERROR_CHECKING
05147 if(op == MPI_OP_NULL)
05148 return ampiErrhandler("AMPI_Allreduce", MPI_ERR_OP);
05149 int ret = errorCheck("AMPI_Allreduce", comm, 1, count, 1, type, 1, 0, 0, 0, 0, inbuf, 1, outbuf, 1);
05150 if(ret != MPI_SUCCESS)
05151 return ret;
05152 #endif
05153
05154 ampi *ptr = getAmpiInstance(comm);
05155 int rank = ptr->getRank();
05156 int size = ptr->getSize();
05157
05158 if(ptr->isInter())
05159 CkAbort("AMPI does not implement MPI_Allreduce for Inter-communicators!");
05160 if(size == 1)
05161 return copyDatatype(type,count,type,count,inbuf,outbuf);
05162
05163 #if CMK_BIGSIM_CHARM
05164 TRACE_BG_AMPI_LOG(MPI_ALLREDUCE, getAmpiInstance(comm)->getDDT()->getType(type)->getSize(count));
05165 #endif
05166
05167 #if AMPIMSGLOG
05168 ampiParent* pptr = getAmpiParent();
05169 if(msgLogRead){
05170 (*(pptr->fromPUPer))|(pptr->pupBytes);
05171 PUParray(*(pptr->fromPUPer), (char *)outbuf, (pptr->pupBytes));
05172 return MPI_SUCCESS;
05173 }
05174 #endif
05175
05176 ptr->setBlockingReq(new RednReq(outbuf, count, type, comm, op, getDDT()));
05177
05178 CkReductionMsg *msg=makeRednMsg(ptr->getDDT()->getType(type), inbuf, count, type, rank, size, op);
05179 CkCallback allreduceCB(CkIndex_ampi::rednResult(0),ptr->getProxy());
05180 msg->setCallback(allreduceCB);
05181 ptr->contribute(msg);
05182
05183 ptr->blockOnColl();
05184
05185 #if AMPIMSGLOG
05186 if(msgLogWrite && record_msglog(pptr->thisIndex)){
05187 (pptr->pupBytes) = getDDT()->getSize(type) * count;
05188 (*(pptr->toPUPer))|(pptr->pupBytes);
05189 PUParray(*(pptr->toPUPer), (char *)outbuf, (pptr->pupBytes));
05190 }
05191 #endif
05192
05193 return MPI_SUCCESS;
05194 }
05195
05196 AMPI_API_IMPL(int, MPI_Iallreduce, const void *inbuf, void *outbuf, int count, MPI_Datatype type,
05197 MPI_Op op, MPI_Comm comm, MPI_Request* request)
05198 {
05199 AMPI_API("AMPI_Iallreduce");
05200
05201 handle_MPI_BOTTOM((void*&)inbuf, type, outbuf, type);
05202 handle_MPI_IN_PLACE((void*&)inbuf, outbuf);
05203
05204 #if AMPI_ERROR_CHECKING
05205 if(op == MPI_OP_NULL)
05206 return ampiErrhandler("AMPI_Iallreduce", MPI_ERR_OP);
05207 int ret = errorCheck("AMPI_Iallreduce", comm, 1, count, 1, type, 1, 0, 0, 0, 0, inbuf, 1, outbuf, 1);
05208 if(ret != MPI_SUCCESS){
05209 *request = MPI_REQUEST_NULL;
05210 return ret;
05211 }
05212 #endif
05213
05214 ampi *ptr = getAmpiInstance(comm);
05215 int rank = ptr->getRank();
05216 int size = ptr->getSize();
05217
05218 if(ptr->isInter())
05219 CkAbort("AMPI does not implement MPI_Iallreduce for Inter-communicators!");
05220 if(size == 1){
05221 *request = ptr->postReq(new RednReq(outbuf,count,type,comm,op,getDDT(),AMPI_REQ_COMPLETED));
05222 return copyDatatype(type,count,type,count,inbuf,outbuf);
05223 }
05224
05225 *request = ptr->postReq(new RednReq(outbuf,count,type,comm,op,getDDT()));
05226
05227 CkReductionMsg *msg=makeRednMsg(ptr->getDDT()->getType(type),inbuf,count,type,rank,size,op);
05228 CkCallback allreduceCB(CkIndex_ampi::irednResult(0),ptr->getProxy());
05229 msg->setCallback(allreduceCB);
05230 ptr->contribute(msg);
05231
05232 return MPI_SUCCESS;
05233 }
05234
05235 AMPI_API_IMPL(int, MPI_Reduce_local, const void *inbuf, void *outbuf, int count,
05236 MPI_Datatype type, MPI_Op op)
05237 {
05238 AMPI_API("AMPI_Reduce_local");
05239
05240 handle_MPI_BOTTOM((void*&)inbuf, type, outbuf, type);
05241
05242 #if AMPI_ERROR_CHECKING
05243 if(op == MPI_OP_NULL)
05244 return ampiErrhandler("AMPI_Reduce_local", MPI_ERR_OP);
05245 if(inbuf == MPI_IN_PLACE || outbuf == MPI_IN_PLACE)
05246 CkAbort("MPI_Reduce_local does not accept MPI_IN_PLACE!");
05247 int ret = errorCheck("AMPI_Reduce_local", MPI_COMM_SELF, 1, count, 1, type, 1, 0, 0, 0, 1, inbuf, 1, outbuf, 1);
05248 if(ret != MPI_SUCCESS)
05249 return ret;
05250 #endif
05251
05252 getAmpiParent()->applyOp(type, op, count, inbuf, outbuf);
05253 return MPI_SUCCESS;
05254 }
05255
05256 AMPI_API_IMPL(int, MPI_Reduce_scatter_block, const void* sendbuf, void* recvbuf, int count,
05257 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
05258 {
05259 AMPI_API("AMPI_Reduce_scatter_block");
05260
05261 handle_MPI_BOTTOM((void*&)sendbuf, datatype, recvbuf, datatype);
05262 handle_MPI_IN_PLACE((void*&)sendbuf, recvbuf);
05263
05264 #if AMPI_ERROR_CHECKING
05265 if(op == MPI_OP_NULL)
05266 return ampiErrhandler("AMPI_Reduce_scatter_block", MPI_ERR_OP);
05267 int ret = errorCheck("AMPI_Reduce_scatter_block", comm, 1, 0, 0, datatype, 1, 0, 0, 0, 0, sendbuf, 1, recvbuf, 1);
05268 if(ret != MPI_SUCCESS)
05269 return ret;
05270 #endif
05271
05272 ampi *ptr = getAmpiInstance(comm);
05273 int size = ptr->getSize();
05274
05275 if(getAmpiParent()->isInter(comm))
05276 CkAbort("AMPI does not implement MPI_Reduce_scatter_block for Inter-communicators!");
05277 if(size == 1)
05278 return copyDatatype(datatype, count, datatype, count, sendbuf, recvbuf);
05279
05280 vector<char> tmpbuf(ptr->getDDT()->getType(datatype)->getSize(count)*size);
05281
05282 MPI_Reduce(sendbuf, &tmpbuf[0], count*size, datatype, op, AMPI_COLL_SOURCE, comm);
05283 MPI_Scatter(&tmpbuf[0], count, datatype, recvbuf, count, datatype, AMPI_COLL_SOURCE, comm);
05284
05285 return MPI_SUCCESS;
05286 }
05287
05288 AMPI_API_IMPL(int, MPI_Ireduce_scatter_block, const void* sendbuf, void* recvbuf, int count,
05289 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm,
05290 MPI_Request* request)
05291 {
05292 AMPI_API("AMPI_Ireduce_scatter_block");
05293
05294 int ret = MPI_Reduce_scatter_block(sendbuf, recvbuf, count, datatype, op, comm);
05295 *request = MPI_REQUEST_NULL;
05296 return ret;
05297 }
05298
05299 AMPI_API_IMPL(int, MPI_Reduce_scatter, const void* sendbuf, void* recvbuf, const int *recvcounts,
05300 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
05301 {
05302 AMPI_API("AMPI_Reduce_scatter");
05303
05304 handle_MPI_BOTTOM((void*&)sendbuf, datatype, recvbuf, datatype);
05305 handle_MPI_IN_PLACE((void*&)sendbuf, recvbuf);
05306
05307 #if AMPI_ERROR_CHECKING
05308 if(op == MPI_OP_NULL)
05309 return ampiErrhandler("AMPI_Reduce_scatter", MPI_ERR_OP);
05310 int ret = errorCheck("AMPI_Reduce_scatter", comm, 1, 0, 0, datatype, 1, 0, 0, 0, 0, sendbuf, 1, recvbuf, 1);
05311 if(ret != MPI_SUCCESS)
05312 return ret;
05313 #endif
05314
05315 ampi *ptr = getAmpiInstance(comm);
05316 int size = ptr->getSize();
05317
05318 if(getAmpiParent()->isInter(comm))
05319 CkAbort("AMPI does not implement MPI_Reduce_scatter for Inter-communicators!");
05320 if(size == 1)
05321 return copyDatatype(datatype,recvcounts[0],datatype,recvcounts[0],sendbuf,recvbuf);
05322
05323 int count=0;
05324 vector<int> displs(size);
05325 int len;
05326
05327
05328 for(int i=0;i<size;i++){
05329 displs[i] = count;
05330 count+= recvcounts[i];
05331 }
05332 vector<char> tmpbuf(ptr->getDDT()->getType(datatype)->getSize(count));
05333 MPI_Reduce(sendbuf, tmpbuf.data(), count, datatype, op, AMPI_COLL_SOURCE, comm);
05334 MPI_Scatterv(tmpbuf.data(), recvcounts, displs.data(), datatype,
05335 recvbuf, recvcounts[ptr->getRank()], datatype, AMPI_COLL_SOURCE, comm);
05336 return MPI_SUCCESS;
05337 }
05338
05339 AMPI_API_IMPL(int, MPI_Ireduce_scatter, const void* sendbuf, void* recvbuf, const int *recvcounts,
05340 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request* request)
05341 {
05342 AMPI_API("AMPI_Ireduce_scatter");
05343
05344 int ret = MPI_Reduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, comm);
05345 *request = MPI_REQUEST_NULL;
05346 return ret;
05347 }
05348
05349 AMPI_API_IMPL(int, MPI_Scan, const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
05350 MPI_Op op, MPI_Comm comm)
05351 {
05352 AMPI_API("AMPI_Scan");
05353
05354 handle_MPI_BOTTOM((void*&)sendbuf, datatype, recvbuf, datatype);
05355 handle_MPI_IN_PLACE((void*&)sendbuf,recvbuf);
05356
05357 #if AMPI_ERROR_CHECKING
05358 if(op == MPI_OP_NULL)
05359 return ampiErrhandler("AMPI_Scan", MPI_ERR_OP);
05360 int ret = errorCheck("AMPI_Scan", comm, 1, count, 1, datatype, 1, 0, 0, 0, 0, sendbuf, 1, recvbuf, 1);
05361 if(ret != MPI_SUCCESS)
05362 return ret;
05363 #endif
05364
05365 ampi *ptr = getAmpiInstance(comm);
05366 int size = ptr->getSize();
05367
05368 if (size == 1 && !getAmpiParent()->isInter(comm))
05369 return copyDatatype(datatype, count, datatype, count, sendbuf, recvbuf);
05370
05371 int blklen = ptr->getDDT()->getType(datatype)->getSize(count);
05372 int rank = ptr->getRank();
05373 int mask = 0x1;
05374 int dst;
05375 vector<char> tmp_buf(blklen);
05376 vector<char> partial_scan(blklen);
05377
05378 memcpy(recvbuf, sendbuf, blklen);
05379 memcpy(partial_scan.data(), sendbuf, blklen);
05380 while(mask < size){
05381 dst = rank^mask;
05382 if(dst < size){
05383 ptr->sendrecv(partial_scan.data(), count, datatype, dst, MPI_SCAN_TAG,
05384 tmp_buf.data(), count, datatype, dst, MPI_SCAN_TAG, comm, MPI_STATUS_IGNORE);
05385 if(rank > dst){
05386 getAmpiParent()->applyOp(datatype, op, count, tmp_buf.data(), partial_scan.data());
05387 getAmpiParent()->applyOp(datatype, op, count, tmp_buf.data(), recvbuf);
05388 }else {
05389 getAmpiParent()->applyOp(datatype, op, count, partial_scan.data(), tmp_buf.data());
05390 memcpy(partial_scan.data(), tmp_buf.data(), blklen);
05391 }
05392 }
05393 mask <<= 1;
05394 }
05395
05396 return MPI_SUCCESS;
05397 }
05398
05399 AMPI_API_IMPL(int, MPI_Iscan, const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
05400 MPI_Op op, MPI_Comm comm, MPI_Request* request)
05401 {
05402 AMPI_API("AMPI_Iscan");
05403
05404 int ret = MPI_Scan(sendbuf, recvbuf, count, datatype, op, comm);
05405 *request = MPI_REQUEST_NULL;
05406 return ret;
05407 }
05408
05409 AMPI_API_IMPL(int, MPI_Exscan, const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
05410 MPI_Op op, MPI_Comm comm)
05411 {
05412 AMPI_API("AMPI_Exscan");
05413
05414 handle_MPI_BOTTOM((void*&)sendbuf, datatype, recvbuf, datatype);
05415 handle_MPI_IN_PLACE((void*&)sendbuf,recvbuf);
05416
05417 #if AMPI_ERROR_CHECKING
05418 if(op == MPI_OP_NULL)
05419 return ampiErrhandler("AMPI_Exscan", MPI_ERR_OP);
05420 int ret = errorCheck("AMPI_Excan", comm, 1, count, 1, datatype, 1, 0, 0, 0, 0, sendbuf, 1, recvbuf, 1);
05421 if(ret != MPI_SUCCESS)
05422 return ret;
05423 #endif
05424
05425 ampi *ptr = getAmpiInstance(comm);
05426 int size = ptr->getSize();
05427
05428 if (size == 1 && !getAmpiParent()->isInter(comm))
05429 return MPI_SUCCESS;
05430
05431 int blklen = ptr->getDDT()->getType(datatype)->getSize(count);
05432 int rank = ptr->getRank();
05433 int mask = 0x1;
05434 int dst, flag;
05435 vector<char> tmp_buf(blklen);
05436 vector<char> partial_scan(blklen);
05437
05438 if (rank > 0) memcpy(recvbuf, sendbuf, blklen);
05439 memcpy(partial_scan.data(), sendbuf, blklen);
05440 flag = 0;
05441 mask = 0x1;
05442 while(mask < size){
05443 dst = rank^mask;
05444 if(dst < size){
05445 ptr->sendrecv(partial_scan.data(), count, datatype, dst, MPI_EXSCAN_TAG,
05446 tmp_buf.data(), count, datatype, dst, MPI_EXSCAN_TAG, comm, MPI_STATUS_IGNORE);
05447 if(rank > dst){
05448 getAmpiParent()->applyOp(datatype, op, count, tmp_buf.data(), partial_scan.data());
05449 if(rank != 0){
05450 if(flag == 0){
05451 memcpy(recvbuf, tmp_buf.data(), blklen);
05452 flag = 1;
05453 }
05454 else{
05455 getAmpiParent()->applyOp(datatype, op, count, tmp_buf.data(), recvbuf);
05456 }
05457 }
05458 }
05459 else{
05460 getAmpiParent()->applyOp(datatype, op, count, partial_scan.data(), tmp_buf.data());
05461 memcpy(partial_scan.data(), tmp_buf.data(), blklen);
05462 }
05463 mask <<= 1;
05464 }
05465 }
05466
05467 return MPI_SUCCESS;
05468 }
05469
05470 AMPI_API_IMPL(int, MPI_Iexscan, const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
05471 MPI_Op op, MPI_Comm comm, MPI_Request* request)
05472 {
05473 AMPI_API("AMPI_Iexscan");
05474
05475 int ret = MPI_Exscan(sendbuf, recvbuf, count, datatype, op, comm);
05476 *request = MPI_REQUEST_NULL;
05477 return ret;
05478 }
05479
05480 AMPI_API_IMPL(int, MPI_Op_create, MPI_User_function *function, int commute, MPI_Op *op)
05481 {
05482 AMPI_API("AMPI_Op_create");
05483 *op = getAmpiParent()->createOp(function, commute);
05484 return MPI_SUCCESS;
05485 }
05486
05487 AMPI_API_IMPL(int, MPI_Op_free, MPI_Op *op)
05488 {
05489 AMPI_API("AMPI_Op_free");
05490 getAmpiParent()->freeOp(*op);
05491 *op = MPI_OP_NULL;
05492 return MPI_SUCCESS;
05493 }
05494
05495 AMPI_API_IMPL(int, MPI_Op_commutative, MPI_Op op, int *commute)
05496 {
05497 AMPI_API("AMPI_Op_commutative");
05498 if (op == MPI_OP_NULL)
05499 return ampiErrhandler("AMPI_Op_commutative", MPI_ERR_OP);
05500 *commute = (int)getAmpiParent()->opIsCommutative(op);
05501 return MPI_SUCCESS;
05502 }
05503
05504 AMPI_API_IMPL(double, MPI_Wtime, void)
05505 {
05506
05507
05508 #if AMPIMSGLOG
05509 double ret=TCHARM_Wall_timer();
05510 ampiParent* pptr = getAmpiParent();
05511 if(msgLogRead){
05512 (*(pptr->fromPUPer))|ret;
05513 return ret;
05514 }
05515
05516 if(msgLogWrite && record_msglog(pptr->thisIndex)){
05517 (*(pptr->toPUPer))|ret;
05518 }
05519 #endif
05520
05521 #if CMK_BIGSIM_CHARM
05522 return BgGetTime();
05523 #else
05524 return TCHARM_Wall_timer();
05525 #endif
05526 }
05527
05528 AMPI_API_IMPL(double, MPI_Wtick, void)
05529 {
05530
05531 return 1e-6;
05532 }
05533
05534 AMPI_API_IMPL(int, MPI_Start, MPI_Request *request)
05535 {
05536 AMPI_API("AMPI_Start");
05537 checkRequest(*request);
05538 AmpiRequestList& reqs = getReqs();
05539 #if AMPI_ERROR_CHECKING
05540 if (!reqs[*request]->isPersistent())
05541 return ampiErrhandler("AMPI_Start", MPI_ERR_REQUEST);
05542 #endif
05543 reqs[*request]->start(*request);
05544 return MPI_SUCCESS;
05545 }
05546
05547 AMPI_API_IMPL(int, MPI_Startall, int count, MPI_Request *requests)
05548 {
05549 AMPI_API("AMPI_Startall");
05550 checkRequests(count,requests);
05551 AmpiRequestList& reqs = getReqs();
05552 for(int i=0;i<count;i++){
05553 #if AMPI_ERROR_CHECKING
05554 if (!reqs[requests[i]]->isPersistent())
05555 return ampiErrhandler("MPI_Startall", MPI_ERR_REQUEST);
05556 #endif
05557 reqs[requests[i]]->start(requests[i]);
05558 }
05559 return MPI_SUCCESS;
05560 }
05561
05562 void IReq::start(MPI_Request reqIdx) noexcept {
05563 CkAssert(persistent);
05564 complete = false;
05565 ampi* ptr = getAmpiInstance(comm);
05566 AmpiMsg* msg = ptr->unexpectedMsgs.get(tag, src);
05567 if (msg) {
05568 receive(ptr, msg);
05569 }
05570 else {
05571 ptr->postedReqs.put(this);
05572 }
05573 }
05574
05575 void SendReq::start(MPI_Request reqIdx) noexcept {
05576 CkAssert(persistent);
05577 complete = false;
05578 ampi* ptr = getAmpiInstance(comm);
05579 ptr->send(tag, ptr->getRank(), buf, count, type, src , comm);
05580 complete = true;
05581 }
05582
05583 void SsendReq::start(MPI_Request reqIdx) noexcept {
05584 CkAssert(persistent);
05585 complete = false;
05586 ampi* ptr = getAmpiInstance(comm);
05587 ptr->send(tag, ptr->getRank(), buf, count, type, src , comm, reqIdx+2, I_SEND);
05588 }
05589
05590 int IReq::wait(MPI_Status *sts) noexcept {
05591
05592 ampiParent *parent = getAmpiParent();
05593
05594 while (!complete) {
05595
05596 parent->resumeOnRecv = true;
05597 parent->numBlockedReqs = 1;
05598 setBlocked(true);
05599 parent->block();
05600 setBlocked(false);
05601 parent = getAmpiParent();
05602
05603 if (cancelled) {
05604 if (sts != MPI_STATUS_IGNORE) sts->MPI_CANCEL = 1;
05605 complete = true;
05606 parent->resumeOnRecv = false;
05607 return 0;
05608 }
05609
05610 #if CMK_BIGSIM_CHARM
05611
05612
05613
05614 if(_BgInOutOfCoreMode)
05615 return -1;
05616 #endif
05617 }
05618 parent->resumeOnRecv = false;
05619
05620 AMPI_DEBUG("IReq::wait has resumed\n");
05621
05622 if(sts!=MPI_STATUS_IGNORE) {
05623 AMPI_DEBUG("Setting sts->MPI_TAG to this->tag=%d in IReq::wait this=%p\n", (int)this->tag, this);
05624 sts->MPI_TAG = tag;
05625 sts->MPI_SOURCE = src;
05626 sts->MPI_COMM = comm;
05627 sts->MPI_LENGTH = length;
05628 sts->MPI_CANCEL = 0;
05629 }
05630
05631 return 0;
05632 }
05633
05634 int RednReq::wait(MPI_Status *sts) noexcept {
05635
05636 ampiParent *parent = getAmpiParent();
05637
05638 while (!complete) {
05639 parent->resumeOnColl = true;
05640 parent->numBlockedReqs = 1;
05641 setBlocked(true);
05642 parent->block();
05643 setBlocked(false);
05644 parent = getAmpiParent();
05645
05646 #if CMK_BIGSIM_CHARM
05647
05648
05649
05650 if (_BgInOutOfCoreMode)
05651 return -1;
05652 #endif
05653 }
05654 parent->resumeOnColl = false;
05655
05656 AMPI_DEBUG("RednReq::wait has resumed\n");
05657
05658 if (sts != MPI_STATUS_IGNORE) {
05659 sts->MPI_TAG = tag;
05660 sts->MPI_SOURCE = src;
05661 sts->MPI_COMM = comm;
05662 sts->MPI_CANCEL = 0;
05663 }
05664 return 0;
05665 }
05666
05667 int GatherReq::wait(MPI_Status *sts) noexcept {
05668
05669 ampiParent *parent = getAmpiParent();
05670
05671 while (!complete) {
05672 parent->resumeOnColl = true;
05673 parent->numBlockedReqs = 1;
05674 setBlocked(true);
05675 parent->block();
05676 setBlocked(false);
05677 parent = getAmpiParent();
05678
05679 #if CMK_BIGSIM_CHARM
05680
05681
05682
05683 if (_BgInOutOfCoreMode)
05684 return -1;
05685 #endif
05686 }
05687 parent->resumeOnColl = false;
05688
05689 AMPI_DEBUG("GatherReq::wait has resumed\n");
05690
05691 if (sts != MPI_STATUS_IGNORE) {
05692 sts->MPI_TAG = tag;
05693 sts->MPI_SOURCE = src;
05694 sts->MPI_COMM = comm;
05695 sts->MPI_CANCEL = 0;
05696 }
05697 return 0;
05698 }
05699
05700 int GathervReq::wait(MPI_Status *sts) noexcept {
05701
05702 ampiParent *parent = getAmpiParent();
05703
05704 while (!complete) {
05705 parent->resumeOnColl = true;
05706 parent->numBlockedReqs = 1;
05707 setBlocked(true);
05708 parent->block();
05709 setBlocked(false);
05710 parent = getAmpiParent();
05711
05712 #if CMK_BIGSIM_CHARM
05713
05714
05715
05716 if (_BgInOutOfCoreMode)
05717 return -1;
05718 #endif
05719 }
05720 parent->resumeOnColl = false;
05721
05722 AMPI_DEBUG("GathervReq::wait has resumed\n");
05723
05724 if (sts != MPI_STATUS_IGNORE) {
05725 sts->MPI_TAG = tag;
05726 sts->MPI_SOURCE = src;
05727 sts->MPI_COMM = comm;
05728 sts->MPI_CANCEL = 0;
05729 }
05730 return 0;
05731 }
05732
05733 int SendReq::wait(MPI_Status *sts) noexcept {
05734 ampiParent *parent = getAmpiParent();
05735 while (!complete) {
05736 parent->resumeOnRecv = true;
05737 parent->numBlockedReqs = 1;
05738 setBlocked(true);
05739 parent->block();
05740 setBlocked(false);
05741
05742 parent = getAmpiParent();
05743 }
05744 parent->resumeOnRecv = false;
05745 AMPI_DEBUG("SendReq::wait has resumed\n");
05746 if (sts != MPI_STATUS_IGNORE) {
05747 sts->MPI_COMM = comm;
05748 sts->MPI_CANCEL = 0;
05749 }
05750 return 0;
05751 }
05752
05753 int SsendReq::wait(MPI_Status *sts) noexcept {
05754 ampiParent *parent = getAmpiParent();
05755 while (!complete) {
05756
05757 parent = parent->blockOnRecv();
05758 }
05759 if (sts != MPI_STATUS_IGNORE) {
05760 sts->MPI_COMM = comm;
05761 sts->MPI_CANCEL = 0;
05762 }
05763 return 0;
05764 }
05765
05766 int ATAReq::wait(MPI_Status *sts) noexcept {
05767 MPI_Waitall(reqs.size(), reqs.data(), MPI_STATUSES_IGNORE);
05768 reqs.clear();
05769 complete = true;
05770 return 0;
05771 }
05772
05773 int GReq::wait(MPI_Status *sts) noexcept {
05774 MPI_Status tmpStatus;
05775 if (pollFn)
05776 (*pollFn)(extraState, (sts == MPI_STATUS_IGNORE || sts == MPI_STATUSES_IGNORE) ? &tmpStatus : sts);
05777 (*queryFn)(extraState, (sts == MPI_STATUS_IGNORE || sts == MPI_STATUSES_IGNORE) ? &tmpStatus : sts);
05778 complete = true;
05779 return 0;
05780 }
05781
05782 AMPI_API_IMPL(int, MPI_Wait, MPI_Request *request, MPI_Status *sts)
05783 {
05784 AMPI_API("AMPI_Wait");
05785
05786 if(*request == MPI_REQUEST_NULL){
05787 clearStatus(sts);
05788 return MPI_SUCCESS;
05789 }
05790 checkRequest(*request);
05791 ampiParent* pptr = getAmpiParent();
05792 AmpiRequestList& reqs = pptr->getReqs();
05793
05794 #if AMPIMSGLOG
05795 if(msgLogRead){
05796 (*(pptr->fromPUPer))|(pptr->pupBytes);
05797 PUParray(*(pptr->fromPUPer), (char *)(reqs[*request]->buf), (pptr->pupBytes));
05798 PUParray(*(pptr->fromPUPer), (char *)sts, sizeof(MPI_Status));
05799 return MPI_SUCCESS;
05800 }
05801 #endif
05802
05803 #if CMK_BIGSIM_CHARM
05804 void *curLog;
05805 _TRACE_BG_TLINE_END(&curLog);
05806 #endif
05807
05808 AMPI_DEBUG("AMPI_Wait request=%d reqs[*request]=%p reqs[*request]->tag=%d\n",
05809 *request, reqs[*request], (int)(reqs[*request]->tag));
05810 AMPI_DEBUG("MPI_Wait: request=%d, reqs.size=%d, &reqs=%d\n",
05811 *request, reqs.size(), reqs);
05812 CkAssert(pptr->numBlockedReqs == 0);
05813 int waitResult = -1;
05814 do{
05815 AmpiRequest& waitReq = *reqs[*request];
05816 waitResult = waitReq.wait(sts);
05817 #if CMK_BIGSIM_CHARM
05818 if(_BgInOutOfCoreMode){
05819 reqs = getReqs();
05820 }
05821 #endif
05822 }while(waitResult==-1);
05823
05824 CkAssert(pptr->numBlockedReqs == 0);
05825 AMPI_DEBUG("AMPI_Wait after calling wait, request=%d reqs[*request]=%p reqs[*request]->tag=%d\n",
05826 *request, reqs[*request], (int)(reqs[*request]->tag));
05827
05828 #if AMPIMSGLOG
05829 if(msgLogWrite && record_msglog(pptr->thisIndex)){
05830 (pptr->pupBytes) = getDDT()->getSize(reqs[*request]->type) * (reqs[*request]->count);
05831 (*(pptr->toPUPer))|(pptr->pupBytes);
05832 PUParray(*(pptr->toPUPer), (char *)(reqs[*request]->buf), (pptr->pupBytes));
05833 PUParray(*(pptr->toPUPer), (char *)sts, sizeof(MPI_Status));
05834 }
05835 #endif
05836
05837 #if CMK_BIGSIM_CHARM
05838 TRACE_BG_AMPI_WAIT(&reqs);
05839 #endif
05840
05841 reqs.freeNonPersReq(*request);
05842
05843 AMPI_DEBUG("End of AMPI_Wait\n");
05844
05845 return MPI_SUCCESS;
05846 }
05847
05848 AMPI_API_IMPL(int, MPI_Waitall, int count, MPI_Request request[], MPI_Status sts[])
05849 {
05850 AMPI_API("AMPI_Waitall");
05851
05852 checkRequests(count, request);
05853 if (count == 0) return MPI_SUCCESS;
05854
05855 ampiParent* pptr = getAmpiParent();
05856 AmpiRequestList& reqs = pptr->getReqs();
05857 CkAssert(pptr->numBlockedReqs == 0);
05858
05859 #if AMPIMSGLOG
05860 if(msgLogRead){
05861 for(int i=0;i<count;i++){
05862 if(request[i] == MPI_REQUEST_NULL){
05863 clearStatus(sts, i);
05864 continue;
05865 }
05866 AmpiRequest *waitReq = reqs[request[i]];
05867 (*(pptr->fromPUPer))|(pptr->pupBytes);
05868 PUParray(*(pptr->fromPUPer), (char *)(waitReq->buf), pptr->pupBytes);
05869 PUParray(*(pptr->fromPUPer), (char *)(&sts[i]), sizeof(MPI_Status));
05870 }
05871 return MPI_SUCCESS;
05872 }
05873 #endif
05874 #if CMK_BIGSIM_CHARM
05875 void *curLog;
05876 _TRACE_BG_TLINE_END(&curLog);
05877 #endif
05878
05879
05880 for (int i=0; i<count; i++) {
05881 if (request[i] == MPI_REQUEST_NULL) {
05882 clearStatus(sts, i);
05883 continue;
05884 }
05885 AmpiRequest& req = *reqs[request[i]];
05886 if (req.test()) {
05887 req.wait((sts == MPI_STATUSES_IGNORE) ? MPI_STATUS_IGNORE : &sts[i]);
05888 req.setBlocked(false);
05889 #if AMPIMSGLOG
05890 if(msgLogWrite && record_msglog(pptr->thisIndex)){
05891 (pptr->pupBytes) = getDDT()->getSize(req.type) * req.count;
05892 (*(pptr->toPUPer))|(pptr->pupBytes);
05893 PUParray(*(pptr->toPUPer), (char *)(req.buf), pptr->pupBytes);
05894 PUParray(*(pptr->toPUPer), (char *)(&sts[i]), sizeof(MPI_Status));
05895 }
05896 #endif
05897 reqs.freeNonPersReq(request[i]);
05898 }
05899 else {
05900 req.setBlocked(true);
05901 pptr->numBlockedReqs++;
05902 }
05903 }
05904
05905
05906 if (pptr->numBlockedReqs > 0) {
05907 getAmpiParent()->blockOnRecv();
05908 pptr = getAmpiParent();
05909 reqs = pptr->getReqs();
05910
05911 for (int i=0; i<count; i++) {
05912 if (request[i] == MPI_REQUEST_NULL) {
05913 continue;
05914 }
05915 AmpiRequest& req = *reqs[request[i]];
05916 #if CMK_ERROR_CHECKING
05917 if (!req.test())
05918 CkAbort("In AMPI_Waitall, all requests should have completed by now!");
05919 #endif
05920 req.wait((sts == MPI_STATUSES_IGNORE) ? MPI_STATUS_IGNORE : &sts[i]);
05921 req.setBlocked(false);
05922 #if AMPIMSGLOG
05923 if(msgLogWrite && record_msglog(pptr->thisIndex)){
05924 (pptr->pupBytes) = getDDT()->getSize(req.type) * req.count;
05925 (*(pptr->toPUPer))|(pptr->pupBytes);
05926 PUParray(*(pptr->toPUPer), (char *)(req.buf), pptr->pupBytes);
05927 PUParray(*(pptr->toPUPer), (char *)(&sts[i]), sizeof(MPI_Status));
05928 }
05929 #endif
05930 reqs.freeNonPersReq(request[i]);
05931 }
05932 }
05933
05934 CkAssert(getAmpiParent()->numBlockedReqs == 0);
05935
05936 #if CMK_BIGSIM_CHARM
05937 TRACE_BG_AMPI_WAITALL(&reqs);
05938 #endif
05939
05940 return MPI_SUCCESS;
05941 }
05942
05943 AMPI_API_IMPL(int, MPI_Waitany, int count, MPI_Request *request, int *idx, MPI_Status *sts)
05944 {
05945 AMPI_API("AMPI_Waitany");
05946
05947 checkRequests(count, request);
05948 if (count == 0) {
05949 *idx = MPI_UNDEFINED;
05950 return MPI_SUCCESS;
05951 }
05952
05953 ampiParent* pptr = getAmpiParent();
05954 CkAssert(pptr->numBlockedReqs == 0);
05955 AmpiRequestList& reqs = pptr->getReqs();
05956 int nullReqs = 0;
05957
05958
05959 for (int i=0; i<count; i++) {
05960 if (request[i] == MPI_REQUEST_NULL) {
05961 nullReqs++;
05962 continue;
05963 }
05964 AmpiRequest& req = *reqs[request[i]];
05965 if (req.test()) {
05966 req.wait(sts);
05967 reqs.unblockReqs(&request[0], i);
05968 reqs.freeNonPersReq(request[i]);
05969 *idx = i;
05970 CkAssert(pptr->numBlockedReqs == 0);
05971 return MPI_SUCCESS;
05972 }
05973
05974 req.setBlocked(true);
05975 }
05976
05977 if (nullReqs == count) {
05978 clearStatus(sts);
05979 *idx = MPI_UNDEFINED;
05980 CkAssert(pptr->numBlockedReqs == 0);
05981 return MPI_SUCCESS;
05982 }
05983
05984
05985 pptr->numBlockedReqs = 1;
05986 pptr = pptr->blockOnRecv();
05987 reqs = pptr->getReqs();
05988
05989 for (int i=0; i<count; i++) {
05990 if (request[i] == MPI_REQUEST_NULL) {
05991 continue;
05992 }
05993 AmpiRequest& req = *reqs[request[i]];
05994 if (req.test()) {
05995 req.wait(sts);
05996 reqs.unblockReqs(&request[i], count-i);
05997 reqs.freeNonPersReq(request[i]);
05998 *idx = i;
05999 CkAssert(pptr->numBlockedReqs == 0);
06000 return MPI_SUCCESS;
06001 }
06002
06003 req.setBlocked(false);
06004 }
06005 #if CMK_ERROR_CHECKING
06006 CkAbort("In AMPI_Waitany, a request should have completed by now!");
06007 #endif
06008 return MPI_SUCCESS;
06009 }
06010
06011 AMPI_API_IMPL(int, MPI_Waitsome, int incount, MPI_Request *array_of_requests, int *outcount,
06012 int *array_of_indices, MPI_Status *array_of_statuses)
06013 {
06014 AMPI_API("AMPI_Waitsome");
06015
06016 checkRequests(incount, array_of_requests);
06017 if (incount == 0) {
06018 *outcount = MPI_UNDEFINED;
06019 return MPI_SUCCESS;
06020 }
06021
06022 ampiParent* pptr = getAmpiParent();
06023 CkAssert(pptr->numBlockedReqs == 0);
06024 AmpiRequestList& reqs = pptr->getReqs();
06025 MPI_Status sts;
06026 int nullReqs = 0;
06027 *outcount = 0;
06028
06029 for (int i=0; i<incount; i++) {
06030 if (array_of_requests[i] == MPI_REQUEST_NULL) {
06031 clearStatus(array_of_statuses, i);
06032 nullReqs++;
06033 continue;
06034 }
06035 AmpiRequest& req = *reqs[array_of_requests[i]];
06036 if (req.test()) {
06037 req.wait(&sts);
06038 array_of_indices[(*outcount)] = i;
06039 (*outcount)++;
06040 if (array_of_statuses != MPI_STATUSES_IGNORE)
06041 array_of_statuses[(*outcount)] = sts;
06042 reqs.freeNonPersReq(array_of_requests[i]);
06043 }
06044 else {
06045 req.setBlocked(true);
06046 }
06047 }
06048
06049 if (*outcount > 0) {
06050 reqs.unblockReqs(&array_of_requests[0], incount);
06051 CkAssert(pptr->numBlockedReqs == 0);
06052 return MPI_SUCCESS;
06053 }
06054 else if (nullReqs == incount) {
06055 *outcount = MPI_UNDEFINED;
06056 CkAssert(pptr->numBlockedReqs == 0);
06057 return MPI_SUCCESS;
06058 }
06059 else {
06060 pptr->numBlockedReqs = 1;
06061 pptr = pptr->blockOnRecv();
06062 reqs = pptr->getReqs();
06063
06064 for (int i=0; i<incount; i++) {
06065 if (array_of_requests[i] == MPI_REQUEST_NULL) {
06066 continue;
06067 }
06068 AmpiRequest& req = *reqs[array_of_requests[i]];
06069 if (req.test()) {
06070 req.wait(&sts);
06071 array_of_indices[(*outcount)] = i;
06072 (*outcount)++;
06073 if (array_of_statuses != MPI_STATUSES_IGNORE)
06074 array_of_statuses[(*outcount)] = sts;
06075 reqs.unblockReqs(&array_of_requests[i], incount-i);
06076 reqs.freeNonPersReq(array_of_requests[i]);
06077 CkAssert(pptr->numBlockedReqs == 0);
06078 return MPI_SUCCESS;
06079 }
06080 else {
06081 req.setBlocked(false);
06082 }
06083 }
06084 #if CMK_ERROR_CHECKING
06085 CkAbort("In AMPI_Waitsome, a request should have completed by now!");
06086 #endif
06087 return MPI_SUCCESS;
06088 }
06089 }
06090
06091 bool IReq::test(MPI_Status *sts) noexcept {
06092 if (sts != MPI_STATUS_IGNORE) {
06093 if (cancelled) {
06094 sts->MPI_CANCEL = 1;
06095 complete = true;
06096 }
06097 else if (complete) {
06098 sts->MPI_SOURCE = src;
06099 sts->MPI_TAG = tag;
06100 sts->MPI_COMM = comm;
06101 sts->MPI_LENGTH = length;
06102 sts->MPI_CANCEL = 0;
06103 }
06104 }
06105 else if (cancelled) {
06106 complete = true;
06107 }
06108 return complete;
06109 }
06110
06111 bool RednReq::test(MPI_Status *sts) noexcept {
06112 return complete;
06113 }
06114
06115 bool GatherReq::test(MPI_Status *sts) noexcept {
06116 return complete;
06117 }
06118
06119 bool GathervReq::test(MPI_Status *sts) noexcept {
06120 return complete;
06121 }
06122
06123 bool SendReq::test(MPI_Status *sts) noexcept {
06124 return complete;
06125 }
06126
06127 bool SsendReq::test(MPI_Status *sts) noexcept {
06128 return complete;
06129 }
06130
06131 bool GReq::test(MPI_Status *sts) noexcept {
06132 MPI_Status tmpStatus;
06133 if (pollFn)
06134 (*pollFn)(extraState, (sts == MPI_STATUS_IGNORE || sts == MPI_STATUSES_IGNORE) ? &tmpStatus : sts);
06135 (*queryFn)(extraState, (sts == MPI_STATUS_IGNORE || sts == MPI_STATUSES_IGNORE) ? &tmpStatus : sts);
06136 return complete;
06137 }
06138
06139 bool ATAReq::test(MPI_Status *sts) noexcept {
06140 AmpiRequestList& reqList = getReqs();
06141 int i = 0;
06142 while (i < reqs.size()) {
06143 if (reqs[i] == MPI_REQUEST_NULL) {
06144 std::swap(reqs[i], reqs.back());
06145 reqs.pop_back();
06146 continue;
06147 }
06148 AmpiRequest& req = *reqList[reqs[i]];
06149 if (req.test()) {
06150 req.wait(sts);
06151 reqList.freeNonPersReq(reqs[i]);
06152 std::swap(reqs[i], reqs.back());
06153 reqs.pop_back();
06154 continue;
06155 }
06156 i++;
06157 }
06158 complete = reqs.empty();
06159 return complete;
06160 }
06161
06162 void IReq::receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg) noexcept
06163 {
06164 ptr->processAmpiMsg(msg, buf, type, count);
06165 complete = true;
06166 length = msg->getLength();
06167 this->tag = msg->getTag();
06168 src = msg->getSrcRank();
06169 comm = ptr->getComm();
06170 AMPI_DEBUG("Setting this->tag to %d in IReq::receive this=%p\n", tag, this);
06171 #if CMK_BIGSIM_CHARM
06172 event = msg->event;
06173 eventPe = msg->eventPe;
06174 #endif
06175
06176 if (deleteMsg) {
06177 CkpvAccess(msgPool).deleteAmpiMsg(msg);
06178 }
06179 }
06180
06181 void IReq::receiveRdma(ampi *ptr, char *sbuf, int slength, int ssendReq, int srcRank, MPI_Comm scomm) noexcept
06182 {
06183 ptr->processRdmaMsg(sbuf, slength, ssendReq, srcRank, buf, count, type, scomm);
06184 complete = true;
06185 length = slength;
06186 comm = scomm;
06187
06188 }
06189
06190 void RednReq::receive(ampi *ptr, CkReductionMsg *msg) noexcept
06191 {
06192 if (ptr->opIsCommutative(op) && ptr->getDDT()->isContig(type)) {
06193 ptr->processRednMsg(msg, buf, type, count);
06194 } else {
06195 MPI_User_function* func = ptr->op2User_function(op);
06196 ptr->processNoncommutativeRednMsg(msg, const_cast<void*>(buf), type, count, func);
06197 }
06198 complete = true;
06199 comm = ptr->getComm();
06200 #if CMK_BIGSIM_CHARM
06201 event = msg->event;
06202 eventPe = msg->eventPe;
06203 #endif
06204
06205 }
06206
06207 void GatherReq::receive(ampi *ptr, CkReductionMsg *msg) noexcept
06208 {
06209 ptr->processGatherMsg(msg, buf, type, count);
06210 complete = true;
06211 comm = ptr->getComm();
06212 #if CMK_BIGSIM_CHARM
06213 event = msg->event;
06214 eventPe = msg->eventPe;
06215 #endif
06216
06217 }
06218
06219 void GathervReq::receive(ampi *ptr, CkReductionMsg *msg) noexcept
06220 {
06221 ptr->processGathervMsg(msg, buf, type, recvCounts.data(), displs.data());
06222 complete = true;
06223 comm = ptr->getComm();
06224 #if CMK_BIGSIM_CHARM
06225 event = msg->event;
06226 eventPe = msg->eventPe;
06227 #endif
06228
06229 }
06230
06231 AMPI_API_IMPL(int, MPI_Request_get_status, MPI_Request request, int *flag, MPI_Status *sts)
06232 {
06233 AMPI_API("AMPI_Request_get_status");
06234 testRequestNoFree(&request, flag, sts);
06235 if(*flag != 1)
06236 getAmpiParent()->yield();
06237 return MPI_SUCCESS;
06238 }
06239
06240 AMPI_API_IMPL(int, MPI_Test, MPI_Request *request, int *flag, MPI_Status *sts)
06241 {
06242 AMPI_API("AMPI_Test");
06243 testRequest(request, flag, sts);
06244 if(*flag != 1)
06245 getAmpiParent()->yield();
06246 return MPI_SUCCESS;
06247 }
06248
06249 AMPI_API_IMPL(int, MPI_Testany, int count, MPI_Request *request, int *index, int *flag, MPI_Status *sts)
06250 {
06251 AMPI_API("AMPI_Testany");
06252
06253 checkRequests(count, request);
06254
06255 if (count == 0) {
06256 *flag = 1;
06257 *index = MPI_UNDEFINED;
06258 clearStatus(sts);
06259 return MPI_SUCCESS;
06260 }
06261
06262 int nullReqs = 0;
06263 *flag = 0;
06264
06265 for (int i=0; i<count; i++) {
06266 if (request[i] == MPI_REQUEST_NULL) {
06267 nullReqs++;
06268 continue;
06269 }
06270 testRequest(&request[i], flag, sts);
06271 if (*flag) {
06272 *index = i;
06273 return MPI_SUCCESS;
06274 }
06275 }
06276
06277 *index = MPI_UNDEFINED;
06278 if (nullReqs == count) {
06279 *flag = 1;
06280 clearStatus(sts);
06281 }
06282 else {
06283 getAmpiParent()->yield();
06284 }
06285
06286 return MPI_SUCCESS;
06287 }
06288
06289 AMPI_API_IMPL(int, MPI_Testall, int count, MPI_Request *request, int *flag, MPI_Status *sts)
06290 {
06291 AMPI_API("AMPI_Testall");
06292
06293 checkRequests(count, request);
06294 if (count == 0) {
06295 *flag = 1;
06296 return MPI_SUCCESS;
06297 }
06298
06299 ampiParent* pptr = getAmpiParent();
06300 AmpiRequestList& reqs = pptr->getReqs();
06301 int nullReqs = 0;
06302 *flag = 1;
06303
06304 for (int i=0; i<count; i++) {
06305 if (request[i] == MPI_REQUEST_NULL) {
06306 clearStatus(sts, i);
06307 nullReqs++;
06308 continue;
06309 }
06310 if (!reqs[request[i]]->test()) {
06311 *flag = 0;
06312 pptr->yield();
06313 return MPI_SUCCESS;
06314 }
06315 }
06316
06317 if (nullReqs != count) {
06318 for (int i=0; i<count; i++) {
06319 int reqIdx = request[i];
06320 if (reqIdx != MPI_REQUEST_NULL) {
06321 AmpiRequest& req = *reqs[reqIdx];
06322 req.wait((sts == MPI_STATUSES_IGNORE) ? MPI_STATUS_IGNORE : &sts[i]);
06323 reqs.freeNonPersReq(request[i]);
06324 }
06325 }
06326 }
06327
06328 return MPI_SUCCESS;
06329 }
06330
06331 AMPI_API_IMPL(int, MPI_Testsome, int incount, MPI_Request *array_of_requests, int *outcount,
06332 int *array_of_indices, MPI_Status *array_of_statuses)
06333 {
06334 AMPI_API("AMPI_Testsome");
06335
06336 checkRequests(incount, array_of_requests);
06337 if (incount == 0) {
06338 *outcount = MPI_UNDEFINED;
06339 return MPI_SUCCESS;
06340 }
06341
06342 MPI_Status sts;
06343 int flag = 0, nullReqs = 0;
06344 *outcount = 0;
06345
06346 for (int i=0; i<incount; i++) {
06347 if (array_of_requests[i] == MPI_REQUEST_NULL) {
06348 clearStatus(array_of_statuses, i);
06349 nullReqs++;
06350 continue;
06351 }
06352 testRequest(&array_of_requests[i], &flag, &sts);
06353 if (flag) {
06354 array_of_indices[(*outcount)] = i;
06355 (*outcount)++;
06356 if (array_of_statuses != MPI_STATUSES_IGNORE)
06357 array_of_statuses[(*outcount)] = sts;
06358 }
06359 }
06360
06361 if (nullReqs == incount) {
06362 *outcount = MPI_UNDEFINED;
06363 }
06364 else if (*outcount == 0) {
06365 getAmpiParent()->yield();
06366 }
06367
06368 return MPI_SUCCESS;
06369 }
06370
06371 AMPI_API_IMPL(int, MPI_Request_free, MPI_Request *request)
06372 {
06373 AMPI_API("AMPI_Request_free");
06374 if(*request==MPI_REQUEST_NULL) return MPI_SUCCESS;
06375 checkRequest(*request);
06376 ampiParent* pptr = getAmpiParent();
06377 AmpiRequestList& reqs = pptr->getReqs();
06378 reqs.free(pptr->reqPool, *request, pptr->getDDT());
06379 *request = MPI_REQUEST_NULL;
06380 return MPI_SUCCESS;
06381 }
06382
06383 AMPI_API_IMPL(int, MPI_Grequest_start, MPI_Grequest_query_function *query_fn, MPI_Grequest_free_function *free_fn,
06384 MPI_Grequest_cancel_function *cancel_fn, void *extra_state, MPI_Request *request)
06385 {
06386 AMPI_API("AMPI_Grequest_start");
06387
06388 ampi* ptr = getAmpiInstance(MPI_COMM_SELF);
06389 GReq *newreq = new GReq(query_fn, free_fn, cancel_fn, extra_state);
06390 *request = ptr->postReq(newreq);
06391
06392 return MPI_SUCCESS;
06393 }
06394
06395 AMPI_API_IMPL(int, MPI_Grequest_complete, MPI_Request request)
06396 {
06397 AMPI_API("AMPI_Grequest_complete");
06398
06399 #if AMPI_ERROR_CHECKING
06400 if (request == MPI_REQUEST_NULL) {
06401 return ampiErrhandler("AMPI_Grequest_complete", MPI_ERR_REQUEST);
06402 }
06403 if (getReqs()[request]->getType() != AMPI_G_REQ) {
06404 return ampiErrhandler("AMPI_Grequest_complete", MPI_ERR_REQUEST);
06405 }
06406 #endif
06407
06408 ampiParent* parent = getAmpiParent();
06409 AmpiRequestList& reqs = parent->getReqs();
06410 reqs[request]->complete = true;
06411
06412 return MPI_SUCCESS;
06413 }
06414
06415 AMPI_API_IMPL(int, MPI_Cancel, MPI_Request *request)
06416 {
06417 AMPI_API("AMPI_Cancel");
06418 if(*request == MPI_REQUEST_NULL) return MPI_SUCCESS;
06419 checkRequest(*request);
06420 AmpiRequestList& reqs = getReqs();
06421 AmpiRequest& req = *reqs[*request];
06422 if(req.getType() == AMPI_I_REQ || req.getType() == AMPI_G_REQ) {
06423 req.cancel();
06424 return MPI_SUCCESS;
06425 }
06426 else {
06427 return ampiErrhandler("AMPI_Cancel", MPI_ERR_REQUEST);
06428 }
06429 }
06430
06431 AMPI_API_IMPL(int, MPI_Test_cancelled, const MPI_Status* status, int* flag)
06432 {
06433 AMPI_API("AMPI_Test_cancelled");
06434
06435
06436 *flag = status->MPI_CANCEL;
06437 return MPI_SUCCESS;
06438 }
06439
06440 AMPI_API_IMPL(int, MPI_Status_set_cancelled, MPI_Status *status, int flag)
06441 {
06442 AMPI_API("AMPI_Status_set_cancelled");
06443 status->MPI_CANCEL = flag;
06444 return MPI_SUCCESS;
06445 }
06446
06447 AMPI_API_IMPL(int, MPI_Recv_init, void *buf, int count, MPI_Datatype type, int src,
06448 int tag, MPI_Comm comm, MPI_Request *req)
06449 {
06450 AMPI_API("AMPI_Recv_init");
06451
06452 handle_MPI_BOTTOM(buf, type);
06453
06454 #if AMPI_ERROR_CHECKING
06455 int ret = errorCheck("AMPI_Recv_init", comm, 1, count, 1, type, 1, tag, 1, src, 1, buf, 1);
06456 if(ret != MPI_SUCCESS){
06457 *req = MPI_REQUEST_NULL;
06458 return ret;
06459 }
06460 #endif
06461
06462 IReq* ireq = getAmpiParent()->reqPool.newReq<IReq>(buf,count,type,src,tag,comm,getDDT());
06463 ireq->setPersistent(true);
06464 *req = getAmpiInstance(comm)->postReq(ireq);
06465 return MPI_SUCCESS;
06466 }
06467
06468 AMPI_API_IMPL(int, MPI_Send_init, const void *buf, int count, MPI_Datatype type, int dest,
06469 int tag, MPI_Comm comm, MPI_Request *req)
06470 {
06471 AMPI_API("AMPI_Send_init");
06472
06473 handle_MPI_BOTTOM((void*&)buf, type);
06474
06475 #if AMPI_ERROR_CHECKING
06476 int ret = errorCheck("AMPI_Send_init", comm, 1, count, 1, type, 1, tag, 1, dest, 1, buf, 1);
06477 if(ret != MPI_SUCCESS){
06478 *req = MPI_REQUEST_NULL;
06479 return ret;
06480 }
06481 #endif
06482
06483 SendReq* sreq = getAmpiParent()->reqPool.newReq<SendReq>((void*)buf, count, type, dest, tag, comm, getDDT());
06484 sreq->setPersistent(true);
06485 *req = getAmpiInstance(comm)->postReq(sreq);
06486 return MPI_SUCCESS;
06487 }
06488
06489 AMPI_API_IMPL(int, MPI_Rsend_init, const void *buf, int count, MPI_Datatype type, int dest,
06490 int tag, MPI_Comm comm, MPI_Request *req)
06491 {
06492 AMPI_API("AMPI_Rsend_init");
06493 return MPI_Send_init(buf, count, type, dest, tag, comm, req);
06494 }
06495
06496 AMPI_API_IMPL(int, MPI_Bsend_init, const void *buf, int count, MPI_Datatype type, int dest,
06497 int tag, MPI_Comm comm, MPI_Request *req)
06498 {
06499 AMPI_API("AMPI_Bsend_init");
06500 return MPI_Send_init(buf, count, type, dest, tag, comm, req);
06501 }
06502
06503 AMPI_API_IMPL(int, MPI_Ssend_init, const void *buf, int count, MPI_Datatype type, int dest,
06504 int tag, MPI_Comm comm, MPI_Request *req)
06505 {
06506 AMPI_API("AMPI_Ssend_init");
06507
06508 handle_MPI_BOTTOM((void*&)buf, type);
06509
06510 #if AMPI_ERROR_CHECKING
06511 int ret = errorCheck("AMPI_Ssend_init", comm, 1, count, 1, type, 1, tag, 1, dest, 1, buf, 1);
06512 if(ret != MPI_SUCCESS){
06513 *req = MPI_REQUEST_NULL;
06514 return ret;
06515 }
06516 #endif
06517
06518 ampi* ptr = getAmpiInstance(comm);
06519 SsendReq* sreq = getAmpiParent()->reqPool.newReq<SsendReq>((void*)buf, count, type, dest, tag, comm, ptr->getRank(), getDDT());
06520 sreq->setPersistent(true);
06521 *req = ptr->postReq(sreq);
06522 return MPI_SUCCESS;
06523 }
06524
06525 AMPI_API_IMPL(int, MPI_Type_contiguous, int count, MPI_Datatype oldtype, MPI_Datatype *newtype)
06526 {
06527 AMPI_API("AMPI_Type_contiguous");
06528
06529 #if AMPI_ERROR_CHECKING
06530 int ret = checkData("MPI_Type_contiguous", oldtype);
06531 if (ret!=MPI_SUCCESS)
06532 return ret;
06533 #endif
06534
06535 getDDT()->newContiguous(count, oldtype, newtype);
06536 return MPI_SUCCESS;
06537 }
06538
06539 AMPI_API_IMPL(int, MPI_Type_vector, int count, int blocklength, int stride,
06540 MPI_Datatype oldtype, MPI_Datatype* newtype)
06541 {
06542 AMPI_API("AMPI_Type_vector");
06543
06544 #if AMPI_ERROR_CHECKING
06545 int ret = checkData("AMPI_Type_vector", oldtype);
06546 if (ret!=MPI_SUCCESS)
06547 return ret;
06548 #endif
06549
06550 getDDT()->newVector(count, blocklength, stride, oldtype, newtype);
06551 return MPI_SUCCESS;
06552 }
06553
06554 AMPI_API_IMPL(int, MPI_Type_create_hvector, int count, int blocklength, MPI_Aint stride,
06555 MPI_Datatype oldtype, MPI_Datatype* newtype)
06556 {
06557 AMPI_API("AMPI_Type_create_hvector");
06558
06559 #if AMPI_ERROR_CHECKING
06560 int ret = checkData("AMPI_Type_create_hvector", oldtype);
06561 if (ret!=MPI_SUCCESS)
06562 return ret;
06563 #endif
06564
06565 getDDT()->newHVector(count, blocklength, stride, oldtype, newtype);
06566 return MPI_SUCCESS;
06567 }
06568
06569 AMPI_API_IMPL(int, MPI_Type_hvector, int count, int blocklength, MPI_Aint stride,
06570 MPI_Datatype oldtype, MPI_Datatype* newtype)
06571 {
06572 AMPI_API("AMPI_Type_hvector");
06573
06574 #if AMPI_ERROR_CHECKING
06575 int ret = checkData("AMPI_Type_hvector", oldtype);
06576 if (ret!=MPI_SUCCESS)
06577 return ret;
06578 #endif
06579
06580 return MPI_Type_create_hvector(count, blocklength, stride, oldtype, newtype);
06581 }
06582
06583 AMPI_API_IMPL(int, MPI_Type_indexed, int count, const int* arrBlength, const int* arrDisp,
06584 MPI_Datatype oldtype, MPI_Datatype* newtype)
06585 {
06586 AMPI_API("AMPI_Type_indexed");
06587
06588 #if AMPI_ERROR_CHECKING
06589 int ret = checkData("AMPI_Type_indexed", oldtype);
06590 if (ret!=MPI_SUCCESS)
06591 return ret;
06592 #endif
06593
06594
06595 vector<MPI_Aint> arrDispAint(count);
06596 for(int i=0; i<count; i++)
06597 arrDispAint[i] = (MPI_Aint)(arrDisp[i]);
06598 getDDT()->newIndexed(count, arrBlength, arrDispAint.data(), oldtype, newtype);
06599 return MPI_SUCCESS;
06600 }
06601
06602 AMPI_API_IMPL(int, MPI_Type_create_hindexed, int count, const int* arrBlength, const MPI_Aint* arrDisp,
06603 MPI_Datatype oldtype, MPI_Datatype* newtype)
06604 {
06605 AMPI_API("AMPI_Type_create_hindexed");
06606
06607 #if AMPI_ERROR_CHECKING
06608 int ret = checkData("AMPI_Type_create_hindexed", oldtype);
06609 if (ret!=MPI_SUCCESS)
06610 return ret;
06611 #endif
06612
06613 getDDT()->newHIndexed(count, arrBlength, arrDisp, oldtype, newtype);
06614 return MPI_SUCCESS;
06615 }
06616
06617 AMPI_API_IMPL(int, MPI_Type_hindexed, int count, int* arrBlength, MPI_Aint* arrDisp,
06618 MPI_Datatype oldtype, MPI_Datatype* newtype)
06619 {
06620 AMPI_API("AMPI_Type_hindexed");
06621
06622 #if AMPI_ERROR_CHECKING
06623 int ret = checkData("AMPI_Type_hindexed", oldtype);
06624 if (ret!=MPI_SUCCESS)
06625 return ret;
06626 #endif
06627
06628 return MPI_Type_create_hindexed(count, arrBlength, arrDisp, oldtype, newtype);
06629 }
06630
06631 AMPI_API_IMPL(int, MPI_Type_create_indexed_block, int count, int Blength, const int *arr,
06632 MPI_Datatype oldtype, MPI_Datatype *newtype)
06633 {
06634 AMPI_API("AMPI_Type_create_indexed_block");
06635
06636 #if AMPI_ERROR_CHECKING
06637 int ret = checkData("AMPI_Type_create_indexed_block", oldtype);
06638 if (ret!=MPI_SUCCESS)
06639 return ret;
06640 #endif
06641
06642 getDDT()->newIndexedBlock(count,Blength, arr, oldtype, newtype);
06643 return MPI_SUCCESS;
06644 }
06645
06646 AMPI_API_IMPL(int, MPI_Type_create_hindexed_block, int count, int Blength, const MPI_Aint *arr,
06647 MPI_Datatype oldtype, MPI_Datatype *newtype)
06648 {
06649 AMPI_API("AMPI_Type_create_hindexed_block");
06650
06651 #if AMPI_ERROR_CHECKING
06652 int ret = checkData("AMPI_Type_create_hindexed_block", oldtype);
06653 if (ret!=MPI_SUCCESS)
06654 return ret;
06655 #endif
06656
06657 getDDT()->newHIndexedBlock(count,Blength, arr, oldtype, newtype);
06658 return MPI_SUCCESS;
06659 }
06660
06661 AMPI_API_IMPL(int, MPI_Type_create_struct, int count, const int* arrBlength, const MPI_Aint* arrDisp,
06662 const MPI_Datatype* oldtype, MPI_Datatype* newtype)
06663 {
06664 AMPI_API("AMPI_Type_create_struct");
06665 getDDT()->newStruct(count, arrBlength, arrDisp, oldtype, newtype);
06666 return MPI_SUCCESS;
06667 }
06668
06669 AMPI_API_IMPL(int, MPI_Type_struct, int count, int* arrBlength, MPI_Aint* arrDisp,
06670 MPI_Datatype* oldtype, MPI_Datatype* newtype)
06671 {
06672 AMPI_API("AMPI_Type_struct");
06673 return MPI_Type_create_struct(count, arrBlength, arrDisp, oldtype, newtype);
06674 }
06675
06676 AMPI_API_IMPL(int, MPI_Type_commit, MPI_Datatype *datatype)
06677 {
06678 AMPI_API("AMPI_Type_commit");
06679
06680 #if AMPI_ERROR_CHECKING
06681 int ret = checkData("MPI_Type_commit", *datatype);
06682 if (ret!=MPI_SUCCESS)
06683 return ret;
06684 #endif
06685
06686 return MPI_SUCCESS;
06687 }
06688
06689 AMPI_API_IMPL(int, MPI_Type_free, MPI_Datatype *datatype)
06690 {
06691 AMPI_API("AMPI_Type_free");
06692
06693 #if AMPI_ERROR_CHECKING
06694 int ret = checkData("AMPI_Type_free", *datatype);
06695 if (ret!=MPI_SUCCESS)
06696 return ret;
06697
06698 if (datatype == nullptr) {
06699 return ampiErrhandler("AMPI_Type_free", MPI_ERR_ARG);
06700 } else if (*datatype <= AMPI_MAX_PREDEFINED_TYPE) {
06701 return ampiErrhandler("AMPI_Type_free", MPI_ERR_TYPE);
06702 }
06703 #endif
06704 getDDT()->freeType(*datatype);
06705 *datatype = MPI_DATATYPE_NULL;
06706 return MPI_SUCCESS;
06707 }
06708
06709 AMPI_API_IMPL(int, MPI_Type_get_extent, MPI_Datatype datatype, MPI_Aint *lb, MPI_Aint *extent)
06710 {
06711 AMPI_API("AMPI_Type_get_extent");
06712
06713 #if AMPI_ERROR_CHECKING
06714 int ret = checkData("AMPI_Type_get_extent", datatype);
06715 if (ret!=MPI_SUCCESS)
06716 return(ret);
06717 #endif
06718
06719 *lb = getDDT()->getLB(datatype);
06720 *extent = getDDT()->getExtent(datatype);
06721 return MPI_SUCCESS;
06722 }
06723
06724 AMPI_API_IMPL(int, MPI_Type_get_extent_x, MPI_Datatype datatype, MPI_Count *lb, MPI_Count *extent)
06725 {
06726 AMPI_API("AMPI_Type_get_extent_x");
06727
06728 #if AMPI_ERROR_CHECKING
06729 int ret = checkData("AMPI_Type_get_extent_x", datatype);
06730 if (ret!=MPI_SUCCESS)
06731 return(ret);
06732 #endif
06733
06734 *lb = getDDT()->getLB(datatype);
06735 *extent = getDDT()->getExtent(datatype);
06736 return MPI_SUCCESS;
06737 }
06738
06739 AMPI_API_IMPL(int, MPI_Type_extent, MPI_Datatype datatype, MPI_Aint *extent)
06740 {
06741 AMPI_API("AMPI_Type_extent");
06742
06743 #if AMPI_ERROR_CHECKING
06744 int ret = checkData("AMPI_Type_extent", datatype);
06745 if (ret!=MPI_SUCCESS)
06746 return ret;
06747 #endif
06748
06749 MPI_Aint tmpLB;
06750 return MPI_Type_get_extent(datatype, &tmpLB, extent);
06751 }
06752
06753 AMPI_API_IMPL(int, MPI_Type_get_true_extent, MPI_Datatype datatype, MPI_Aint *true_lb, MPI_Aint *true_extent)
06754 {
06755 AMPI_API("AMPI_Type_get_true_extent");
06756
06757 #if AMPI_ERROR_CHECKING
06758 int ret = checkData("AMPI_Type_get_true_extent", datatype);
06759 if (ret!=MPI_SUCCESS)
06760 return(ret);
06761 #endif
06762
06763 *true_lb = getDDT()->getTrueLB(datatype);
06764 *true_extent = getDDT()->getTrueExtent(datatype);
06765 return MPI_SUCCESS;
06766 }
06767
06768 AMPI_API_IMPL(int, MPI_Type_get_true_extent_x, MPI_Datatype datatype, MPI_Count *true_lb, MPI_Count *true_extent)
06769 {
06770 AMPI_API("AMPI_Type_get_true_extent_x");
06771
06772 #if AMPI_ERROR_CHECKING
06773 int ret = checkData("AMPI_Type_get_true_extent_x", datatype);
06774 if (ret!=MPI_SUCCESS)
06775 return(ret);
06776 #endif
06777
06778 *true_lb = getDDT()->getTrueLB(datatype);
06779 *true_extent = getDDT()->getTrueExtent(datatype);
06780 return MPI_SUCCESS;
06781 }
06782
06783 AMPI_API_IMPL(int, MPI_Type_size, MPI_Datatype datatype, int *size)
06784 {
06785 AMPI_API("AMPI_Type_size");
06786
06787 #if AMPI_ERROR_CHECKING
06788 int ret = checkData("AMPI_Type_size", datatype);
06789 if (ret!=MPI_SUCCESS)
06790 return ret;
06791 #endif
06792
06793 *size=getDDT()->getSize(datatype);
06794 return MPI_SUCCESS;
06795 }
06796
06797 AMPI_API_IMPL(int, MPI_Type_size_x, MPI_Datatype datatype, MPI_Count *size)
06798 {
06799 AMPI_API("AMPI_Type_size_x");
06800
06801 #if AMPI_ERROR_CHECKING
06802 int ret = checkData("AMPI_Type_size_x", datatype);
06803 if (ret!=MPI_SUCCESS)
06804 return ret;
06805 #endif
06806
06807 *size=getDDT()->getSize(datatype);
06808 return MPI_SUCCESS;
06809 }
06810
06811 AMPI_API_IMPL(int, MPI_Type_set_name, MPI_Datatype datatype, const char *name)
06812 {
06813 AMPI_API("AMPI_Type_set_name");
06814
06815 #if AMPI_ERROR_CHECKING
06816 int ret = checkData("MPI_Type_set_name", datatype);
06817 if (ret!=MPI_SUCCESS)
06818 return ret;
06819 #endif
06820
06821 getDDT()->setName(datatype, name);
06822 return MPI_SUCCESS;
06823 }
06824
06825 AMPI_API_IMPL(int, MPI_Type_get_name, MPI_Datatype datatype, char *name, int *resultlen)
06826 {
06827 AMPI_API("AMPI_Type_get_name");
06828
06829 #if AMPI_ERROR_CHECKING
06830 int ret = checkData("AMPI_Type_get_name", datatype);
06831 if (ret!=MPI_SUCCESS)
06832 return ret;
06833 #endif
06834
06835 getDDT()->getName(datatype, name, resultlen);
06836 return MPI_SUCCESS;
06837 }
06838
06839 AMPI_API_IMPL(int, MPI_Type_create_resized, MPI_Datatype oldtype, MPI_Aint lb,
06840 MPI_Aint extent, MPI_Datatype *newtype)
06841 {
06842 AMPI_API("AMPI_Type_create_resized");
06843
06844 #if AMPI_ERROR_CHECKING
06845 int ret = checkData("AMPI_Type_create_resized", oldtype);
06846 if (ret!=MPI_SUCCESS)
06847 return ret;
06848 #endif
06849
06850 getDDT()->createResized(oldtype, lb, extent, newtype);
06851 return MPI_SUCCESS;
06852 }
06853
06854 AMPI_API_IMPL(int, MPI_Type_dup, MPI_Datatype oldtype, MPI_Datatype *newtype)
06855 {
06856 AMPI_API("AMPI_Type_dup");
06857
06858 #if AMPI_ERROR_CHECKING
06859 int ret = checkData("AMPI_Type_dup", oldtype);
06860 if (ret!=MPI_SUCCESS)
06861 return ret;
06862 #endif
06863
06864 getDDT()->createDup(oldtype, newtype);
06865 return MPI_SUCCESS;
06866 }
06867
06868 AMPI_API_IMPL(int, MPI_Type_set_attr, MPI_Datatype datatype, int keyval, void *attribute_val)
06869 {
06870 AMPI_API("AMPI_Type_set_attr");
06871
06872 #if AMPI_ERROR_CHECKING
06873 int ret = checkData("AMPI_Type_set_attr", datatype);
06874 if (ret!=MPI_SUCCESS)
06875 return ret;
06876 #endif
06877
06878 ampiParent *parent = getAmpiParent();
06879 vector<int>& keyvals = parent->getDDT()->getType(datatype)->getKeyvals();
06880 int err = parent->setAttr(datatype, keyvals, keyval, attribute_val);
06881 return ampiErrhandler("AMPI_Type_set_attr", err);
06882 }
06883
06884 AMPI_API_IMPL(int, MPI_Type_get_attr, MPI_Datatype datatype, int keyval,
06885 void *attribute_val, int *flag)
06886 {
06887 AMPI_API("AMPI_Type_get_attr");
06888
06889 #if AMPI_ERROR_CHECKING
06890 int ret = checkData("AMPI_Type_get_attr", datatype);
06891 if (ret!=MPI_SUCCESS)
06892 return ret;
06893 #endif
06894
06895 ampiParent *parent = getAmpiParent();
06896 vector<int>& keyvals = parent->getDDT()->getType(datatype)->getKeyvals();
06897 int err = parent->getAttr(datatype, keyvals, keyval, attribute_val, flag);
06898 return ampiErrhandler("AMPI_Type_get_attr", err);
06899 }
06900
06901 AMPI_API_IMPL(int, MPI_Type_delete_attr, MPI_Datatype datatype, int keyval)
06902 {
06903 AMPI_API("AMPI_Type_delete_attr");
06904
06905 #if AMPI_ERROR_CHECKING
06906 int ret = checkData("AMPI_Type_delete_attr", datatype);
06907 if (ret!=MPI_SUCCESS)
06908 return ret;
06909 #endif
06910
06911 ampiParent *parent = getAmpiParent();
06912 vector<int>& keyvals = parent->getDDT()->getType(datatype)->getKeyvals();
06913 int err = parent->deleteAttr(datatype, keyvals, keyval);
06914 return ampiErrhandler("AMPI_Type_delete_attr", err);
06915 }
06916
06917 AMPI_API_IMPL(int, MPI_Type_create_keyval, MPI_Type_copy_attr_function *copy_fn,
06918 MPI_Type_delete_attr_function *delete_fn,
06919 int *keyval, void *extra_state)
06920 {
06921 AMPI_API("AMPI_Type_create_keyval");
06922 return MPI_Comm_create_keyval(copy_fn, delete_fn, keyval, extra_state);
06923 }
06924
06925 AMPI_API_IMPL(int, MPI_Type_free_keyval, int *keyval)
06926 {
06927 AMPI_API("AMPI_Type_free_keyval");
06928 return MPI_Comm_free_keyval(keyval);
06929 }
06930
06931 static int MPIOI_Type_block(const int array_of_gsizes[], int dim, int ndims, int nprocs,
06932 int rank, int darg, int order, MPI_Aint orig_extent,
06933 MPI_Datatype type_old, MPI_Datatype *type_new,
06934 MPI_Aint *st_offset)
06935 {
06936
06937
06938 int blksize, global_size, mysize, i, j;
06939 MPI_Aint stride;
06940
06941 global_size = array_of_gsizes[dim];
06942
06943 if (darg == MPI_DISTRIBUTE_DFLT_DARG)
06944 blksize = (global_size + nprocs - 1)/nprocs;
06945 else {
06946 blksize = darg;
06947
06948
06949 if (blksize <= 0) {
06950 return MPI_ERR_ARG;
06951 }
06952
06953 if (blksize * nprocs < global_size) {
06954 return MPI_ERR_ARG;
06955 }
06956
06957 }
06958
06959 j = global_size - blksize*rank;
06960 mysize = std::min(blksize, j);
06961 if (mysize < 0) mysize = 0;
06962
06963 stride = orig_extent;
06964 if (order == MPI_ORDER_FORTRAN) {
06965 if (dim == 0)
06966 MPI_Type_contiguous(mysize, type_old, type_new);
06967 else {
06968 for (i=0; i<dim; i++) stride *= (MPI_Aint)array_of_gsizes[i];
06969 MPI_Type_hvector(mysize, 1, stride, type_old, type_new);
06970 }
06971 }
06972 else {
06973 if (dim == ndims-1)
06974 MPI_Type_contiguous(mysize, type_old, type_new);
06975 else {
06976 for (i=ndims-1; i>dim; i--) stride *= (MPI_Aint)array_of_gsizes[i];
06977 MPI_Type_hvector(mysize, 1, stride, type_old, type_new);
06978 }
06979
06980 }
06981
06982 *st_offset = (MPI_Aint)blksize * (MPI_Aint)rank;
06983
06984 if (mysize == 0) *st_offset = 0;
06985
06986 return MPI_SUCCESS;
06987 }
06988
06989
06990
06991
06992
06993 static int MPIOI_Type_cyclic(const int array_of_gsizes[], int dim, int ndims, int nprocs,
06994 int rank, int darg, int order, MPI_Aint orig_extent,
06995 MPI_Datatype type_old, MPI_Datatype *type_new,
06996 MPI_Aint *st_offset)
06997 {
06998
06999
07000 int blksize, i, blklens[3], st_index, end_index, local_size, rem, count;
07001 MPI_Aint stride, disps[3];
07002 MPI_Datatype type_tmp, types[3];
07003
07004 if (darg == MPI_DISTRIBUTE_DFLT_DARG) blksize = 1;
07005 else blksize = darg;
07006
07007
07008 if (blksize <= 0) {
07009 return MPI_ERR_ARG;
07010 }
07011
07012
07013 st_index = rank*blksize;
07014 end_index = array_of_gsizes[dim] - 1;
07015
07016 if (end_index < st_index) local_size = 0;
07017 else {
07018 local_size = ((end_index - st_index + 1)/(nprocs*blksize))*blksize;
07019 rem = (end_index - st_index + 1) % (nprocs*blksize);
07020 local_size += std::min(rem, blksize);
07021 }
07022
07023 count = local_size/blksize;
07024 rem = local_size % blksize;
07025
07026 stride = (MPI_Aint)nprocs*(MPI_Aint)blksize*orig_extent;
07027 if (order == MPI_ORDER_FORTRAN)
07028 for (i=0; i<dim; i++) stride *= (MPI_Aint)array_of_gsizes[i];
07029 else for (i=ndims-1; i>dim; i--) stride *= (MPI_Aint)array_of_gsizes[i];
07030
07031 MPI_Type_hvector(count, blksize, stride, type_old, type_new);
07032
07033 if (rem) {
07034
07035
07036
07037 types[0] = *type_new;
07038 types[1] = type_old;
07039 disps[0] = 0;
07040 disps[1] = (MPI_Aint)count*stride;
07041 blklens[0] = 1;
07042 blklens[1] = rem;
07043
07044 MPI_Type_struct(2, blklens, disps, types, &type_tmp);
07045
07046 MPI_Type_free(type_new);
07047 *type_new = type_tmp;
07048 }
07049
07050
07051
07052 if ( ((order == MPI_ORDER_FORTRAN) && (dim == 0)) ||
07053 ((order == MPI_ORDER_C) && (dim == ndims-1)) ) {
07054 types[0] = MPI_LB;
07055 disps[0] = 0;
07056 types[1] = *type_new;
07057 disps[1] = (MPI_Aint)rank * (MPI_Aint)blksize * orig_extent;
07058 types[2] = MPI_UB;
07059 disps[2] = orig_extent * (MPI_Aint)array_of_gsizes[dim];
07060 blklens[0] = blklens[1] = blklens[2] = 1;
07061 MPI_Type_struct(3, blklens, disps, types, &type_tmp);
07062 MPI_Type_free(type_new);
07063 *type_new = type_tmp;
07064
07065 *st_offset = 0;
07066
07067 }
07068 else {
07069 *st_offset = (MPI_Aint)rank * (MPI_Aint)blksize;
07070
07071
07072 }
07073
07074 if (local_size == 0) *st_offset = 0;
07075
07076 return MPI_SUCCESS;
07077 }
07078
07079
07080
07081 AMPI_API_IMPL(int, MPI_Type_create_darray, int size, int rank, int ndims,
07082 const int array_of_gsizes[], const int array_of_distribs[],
07083 const int array_of_dargs[], const int array_of_psizes[],
07084 int order, MPI_Datatype oldtype,
07085 MPI_Datatype *newtype)
07086 {
07087
07088 AMPI_API("AMPI_Type_create_darray");
07089 MPI_Datatype type_old, type_new=MPI_DATATYPE_NULL, types[3];
07090 int procs, tmp_rank, i, tmp_size, blklens[3], *coords;
07091 MPI_Aint *st_offsets, orig_extent, disps[3];
07092
07093 MPI_Type_extent(oldtype, &orig_extent);
07094
07095
07096
07097 coords = (int *) malloc(ndims*sizeof(int));
07098 procs = size;
07099 tmp_rank = rank;
07100 for (i=0; i<ndims; i++) {
07101 procs = procs/array_of_psizes[i];
07102 coords[i] = tmp_rank/procs;
07103 tmp_rank = tmp_rank % procs;
07104 }
07105
07106 st_offsets = (MPI_Aint *) malloc(ndims*sizeof(MPI_Aint));
07107 type_old = oldtype;
07108
07109 if (order == MPI_ORDER_FORTRAN) {
07110
07111 for (i=0; i<ndims; i++) {
07112 switch(array_of_distribs[i]) {
07113 case MPI_DISTRIBUTE_BLOCK:
07114 MPIOI_Type_block(array_of_gsizes, i, ndims,
07115 array_of_psizes[i],
07116 coords[i], array_of_dargs[i],
07117 order, orig_extent,
07118 type_old, &type_new,
07119 st_offsets+i);
07120 break;
07121 case MPI_DISTRIBUTE_CYCLIC:
07122 MPIOI_Type_cyclic(array_of_gsizes, i, ndims,
07123 array_of_psizes[i], coords[i],
07124 array_of_dargs[i], order,
07125 orig_extent, type_old,
07126 &type_new, st_offsets+i);
07127 break;
07128 case MPI_DISTRIBUTE_NONE:
07129
07130 MPIOI_Type_block(array_of_gsizes, i, ndims, 1, 0,
07131 MPI_DISTRIBUTE_DFLT_DARG, order,
07132 orig_extent,
07133 type_old, &type_new,
07134 st_offsets+i);
07135 break;
07136 }
07137 if (i) MPI_Type_free(&type_old);
07138 type_old = type_new;
07139 }
07140
07141
07142 disps[1] = st_offsets[0];
07143 tmp_size = 1;
07144 for (i=1; i<ndims; i++) {
07145 tmp_size *= array_of_gsizes[i-1];
07146 disps[1] += (MPI_Aint)tmp_size*st_offsets[i];
07147 }
07148
07149 }
07150
07151 else {
07152
07153 for (i=ndims-1; i>=0; i--) {
07154 switch(array_of_distribs[i]) {
07155 case MPI_DISTRIBUTE_BLOCK:
07156 MPIOI_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i],
07157 coords[i], array_of_dargs[i], order,
07158 orig_extent, type_old, &type_new,
07159 st_offsets+i);
07160 break;
07161 case MPI_DISTRIBUTE_CYCLIC:
07162 MPIOI_Type_cyclic(array_of_gsizes, i, ndims,
07163 array_of_psizes[i], coords[i],
07164 array_of_dargs[i], order,
07165 orig_extent, type_old, &type_new,
07166 st_offsets+i);
07167 break;
07168 case MPI_DISTRIBUTE_NONE:
07169
07170 MPIOI_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i],
07171 coords[i], MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent,
07172 type_old, &type_new, st_offsets+i);
07173 break;
07174 }
07175 if (i != ndims-1) MPI_Type_free(&type_old);
07176 type_old = type_new;
07177 }
07178
07179
07180 disps[1] = st_offsets[ndims-1];
07181 tmp_size = 1;
07182 for (i=ndims-2; i>=0; i--) {
07183 tmp_size *= array_of_gsizes[i+1];
07184 disps[1] += (MPI_Aint)tmp_size*st_offsets[i];
07185 }
07186 }
07187
07188 disps[1] *= orig_extent;
07189
07190 disps[2] = orig_extent;
07191 for (i=0; i<ndims; i++) disps[2] *= (MPI_Aint)array_of_gsizes[i];
07192
07193 disps[0] = 0;
07194 blklens[0] = blklens[1] = blklens[2] = 1;
07195 types[0] = MPI_LB;
07196 types[1] = type_new;
07197 types[2] = MPI_UB;
07198
07199 MPI_Type_struct(3, blklens, disps, types, newtype);
07200
07201 MPI_Type_free(&type_new);
07202 free(st_offsets);
07203 free(coords);
07204 return MPI_SUCCESS;
07205 }
07206
07207
07208
07209 AMPI_API_IMPL(int, MPI_Type_create_subarray, int ndims,
07210 const int array_of_sizes[], const int array_of_subsizes[],
07211 const int array_of_starts[], int order, MPI_Datatype oldtype,
07212 MPI_Datatype *newtype)
07213 {
07214
07215 AMPI_API("AMPI_Type_create_subarray");
07216 MPI_Aint extent, disps[3], size;
07217 int i, blklens[3];
07218 MPI_Datatype tmp1, tmp2, types[3];
07219
07220 MPI_Type_extent(oldtype, &extent);
07221
07222 if (order == MPI_ORDER_FORTRAN) {
07223
07224 if (ndims == 1) {
07225 MPI_Type_contiguous(array_of_subsizes[0], oldtype, &tmp1);
07226 }
07227 else {
07228 MPI_Type_vector(array_of_subsizes[1],
07229 array_of_subsizes[0],
07230 array_of_sizes[0], oldtype, &tmp1);
07231
07232 size = (MPI_Aint)array_of_sizes[0]*extent;
07233 for (i=2; i<ndims; i++) {
07234 size *= (MPI_Aint)array_of_sizes[i-1];
07235 MPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2);
07236 MPI_Type_free(&tmp1);
07237 tmp1 = tmp2;
07238 }
07239 }
07240
07241
07242 disps[1] = array_of_starts[0];
07243 size = 1;
07244 for (i=1; i<ndims; i++) {
07245 size *= (MPI_Aint)array_of_sizes[i-1];
07246 disps[1] += size*(MPI_Aint)array_of_starts[i];
07247 }
07248
07249 }
07250
07251 else {
07252
07253 if (ndims == 1) {
07254 MPI_Type_contiguous(array_of_subsizes[0], oldtype, &tmp1);
07255 }
07256 else {
07257 MPI_Type_vector(array_of_subsizes[ndims-2],
07258 array_of_subsizes[ndims-1],
07259 array_of_sizes[ndims-1], oldtype, &tmp1);
07260
07261 size = (MPI_Aint)array_of_sizes[ndims-1]*extent;
07262 for (i=ndims-3; i>=0; i--) {
07263 size *= (MPI_Aint)array_of_sizes[i+1];
07264 MPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2);
07265 MPI_Type_free(&tmp1);
07266 tmp1 = tmp2;
07267 }
07268 }
07269
07270
07271 disps[1] = array_of_starts[ndims-1];
07272 size = 1;
07273 for (i=ndims-2; i>=0; i--) {
07274 size *= (MPI_Aint)array_of_sizes[i+1];
07275 disps[1] += size*(MPI_Aint)array_of_starts[i];
07276 }
07277 }
07278
07279 disps[1] *= extent;
07280
07281 disps[2] = extent;
07282 for (i=0; i<ndims; i++) disps[2] *= (MPI_Aint)array_of_sizes[i];
07283
07284 disps[0] = 0;
07285 blklens[0] = blklens[1] = blklens[2] = 1;
07286 types[0] = MPI_LB;
07287 types[1] = tmp1;
07288 types[2] = MPI_UB;
07289
07290 MPI_Type_struct(3, blklens, disps, types, newtype);
07291
07292 MPI_Type_free(&tmp1);
07293
07294 return MPI_SUCCESS;
07295 }
07296
07297
07298 AMPI_API_IMPL(int, MPI_Isend, const void *buf, int count, MPI_Datatype type, int dest,
07299 int tag, MPI_Comm comm, MPI_Request *request)
07300 {
07301 AMPI_API("AMPI_Isend");
07302
07303 handle_MPI_BOTTOM((void*&)buf, type);
07304
07305 #if AMPI_ERROR_CHECKING
07306 int ret = errorCheck("AMPI_Isend", comm, 1, count, 1, type, 1, tag, 1, dest, 1, buf, 1);
07307 if(ret != MPI_SUCCESS){
07308 *request = MPI_REQUEST_NULL;
07309 return ret;
07310 }
07311 #endif
07312
07313 #if AMPIMSGLOG
07314 ampiParent* pptr = getAmpiParent();
07315 if(msgLogRead){
07316 PUParray(*(pptr->fromPUPer), (char *)request, sizeof(MPI_Request));
07317 return MPI_SUCCESS;
07318 }
07319 #endif
07320
07321 USER_CALL_DEBUG("AMPI_Isend("<<type<<","<<dest<<","<<tag<<","<<comm<<")");
07322
07323 ampi *ptr = getAmpiInstance(comm);
07324 *request = ptr->send(tag, ptr->getRank(), buf, count, type, dest, comm, 0, I_SEND);
07325
07326 #if AMPIMSGLOG
07327 if(msgLogWrite && record_msglog(pptr->thisIndex)){
07328 PUParray(*(pptr->toPUPer), (char *)request, sizeof(MPI_Request));
07329 }
07330 #endif
07331
07332 return MPI_SUCCESS;
07333 }
07334
07335 AMPI_API_IMPL(int, MPI_Ibsend, const void *buf, int count, MPI_Datatype type, int dest,
07336 int tag, MPI_Comm comm, MPI_Request *request)
07337 {
07338 AMPI_API("AMPI_Ibsend");
07339 return MPI_Isend(buf, count, type, dest, tag, comm, request);
07340 }
07341
07342 AMPI_API_IMPL(int, MPI_Irsend, const void *buf, int count, MPI_Datatype type, int dest,
07343 int tag, MPI_Comm comm, MPI_Request *request)
07344 {
07345 AMPI_API("AMPI_Irsend");
07346 return MPI_Isend(buf, count, type, dest, tag, comm, request);
07347 }
07348
07349 void ampi::irecvBcast(void *buf, int count, MPI_Datatype type, int src,
07350 MPI_Comm comm, MPI_Request *request) noexcept
07351 {
07352 if (isInter()) {
07353 src = myComm.getIndexForRemoteRank(src);
07354 }
07355 AmpiRequestList& reqs = getReqs();
07356 IReq *newreq = parent->reqPool.newReq<IReq>(buf, count, type, src, MPI_BCAST_TAG, comm, getDDT());
07357 *request = reqs.insert(newreq);
07358
07359 AmpiMsg* msg = unexpectedBcastMsgs.get(MPI_BCAST_TAG, src);
07360
07361 if (msg) {
07362 newreq->receive(this, msg);
07363 }
07364 else {
07365 postedBcastReqs.put(newreq);
07366 }
07367 }
07368
07369 void ampi::irecv(void *buf, int count, MPI_Datatype type, int src,
07370 int tag, MPI_Comm comm, MPI_Request *request) noexcept
07371 {
07372 if (src==MPI_PROC_NULL) {
07373 *request = MPI_REQUEST_NULL;
07374 return;
07375 }
07376
07377 if (isInter()) {
07378 src = myComm.getIndexForRemoteRank(src);
07379 }
07380
07381 AmpiRequestList& reqs = getReqs();
07382 IReq *newreq = parent->reqPool.newReq<IReq>(buf, count, type, src, tag, comm, getDDT());
07383 *request = reqs.insert(newreq);
07384
07385 #if AMPIMSGLOG
07386 ampiParent* pptr = getAmpiParent();
07387 if(msgLogRead){
07388 PUParray(*(pptr->fromPUPer), (char *)request, sizeof(MPI_Request));
07389 return MPI_SUCCESS;
07390 }
07391 #endif
07392
07393 AmpiMsg* msg = unexpectedMsgs.get(tag, src);
07394
07395 if (msg) {
07396 newreq->receive(this, msg);
07397 }
07398 else {
07399 postedReqs.put(newreq);
07400 }
07401
07402 #if AMPIMSGLOG
07403 if(msgLogWrite && record_msglog(pptr->thisIndex)){
07404 PUParray(*(pptr->toPUPer), (char *)request, sizeof(MPI_Request));
07405 }
07406 #endif
07407 }
07408
07409 AMPI_API_IMPL(int, MPI_Irecv, void *buf, int count, MPI_Datatype type, int src,
07410 int tag, MPI_Comm comm, MPI_Request *request)
07411 {
07412 AMPI_API("AMPI_Irecv");
07413
07414 handle_MPI_BOTTOM(buf, type);
07415
07416 #if AMPI_ERROR_CHECKING
07417 int ret = errorCheck("AMPI_Irecv", comm, 1, count, 1, type, 1, tag, 1, src, 1, buf, 1);
07418 if(ret != MPI_SUCCESS){
07419 *request = MPI_REQUEST_NULL;
07420 return ret;
07421 }
07422 #endif
07423
07424 USER_CALL_DEBUG("AMPI_Irecv("<<type<<","<<src<<","<<tag<<","<<comm<<")");
07425 ampi *ptr = getAmpiInstance(comm);
07426
07427 ptr->irecv(buf, count, type, src, tag, comm, request);
07428
07429 return MPI_SUCCESS;
07430 }
07431
07432 AMPI_API_IMPL(int, MPI_Ireduce, const void *sendbuf, void *recvbuf, int count,
07433 MPI_Datatype type, MPI_Op op, int root,
07434 MPI_Comm comm, MPI_Request *request)
07435 {
07436 AMPI_API("AMPI_Ireduce");
07437
07438 handle_MPI_BOTTOM((void*&)sendbuf, type, recvbuf, type);
07439 handle_MPI_IN_PLACE((void*&)sendbuf, recvbuf);
07440
07441 #if AMPI_ERROR_CHECKING
07442 if(op == MPI_OP_NULL)
07443 return ampiErrhandler("AMPI_Ireduce", MPI_ERR_OP);
07444 int ret = errorCheck("AMPI_Ireduce", comm, 1, count, 1, type, 1, 0, 0, root, 1, sendbuf, 1,
07445 recvbuf, getAmpiInstance(comm)->getRank() == root);
07446 if(ret != MPI_SUCCESS){
07447 *request = MPI_REQUEST_NULL;
07448 return ret;
07449 }
07450 #endif
07451
07452 ampi *ptr = getAmpiInstance(comm);
07453 int rank = ptr->getRank();
07454 int size = ptr->getSize();
07455
07456 if(ptr->isInter())
07457 CkAbort("AMPI does not implement MPI_Ireduce for Inter-communicators!");
07458 if(size == 1){
07459 *request = ptr->postReq(new RednReq(recvbuf, count, type, comm, op, getDDT(), AMPI_REQ_COMPLETED));
07460 return copyDatatype(type,count,type,count,sendbuf,recvbuf);
07461 }
07462
07463 if (rank == root){
07464 *request = ptr->postReq(new RednReq(recvbuf,count,type,comm,op,getDDT()));
07465 }
07466 else {
07467 *request = ptr->postReq(new RednReq(recvbuf,count,type,comm,op,getDDT(),AMPI_REQ_COMPLETED));
07468 }
07469
07470 CkReductionMsg *msg=makeRednMsg(ptr->getDDT()->getType(type),sendbuf,count,type,rank,size,op);
07471 int rootIdx=ptr->comm2CommStruct(comm).getIndexForRank(root);
07472 CkCallback reduceCB(CkIndex_ampi::irednResult(0),CkArrayIndex1D(rootIdx),ptr->getProxy());
07473 msg->setCallback(reduceCB);
07474 ptr->contribute(msg);
07475
07476 return MPI_SUCCESS;
07477 }
07478
07479
07480 static CkReductionMsg *makeGatherMsg(const void *inbuf, int count, MPI_Datatype type, int rank, int size) noexcept
07481 {
07482 CkDDT_DataType* ddt = getDDT()->getType(type);
07483 int szdata = ddt->getSize(count);
07484 const int tupleSize = 2;
07485 CkReduction::tupleElement tupleRedn[tupleSize];
07486
07487
07488 unsigned short int ushortRank;
07489 if (size < std::numeric_limits<unsigned short int>::max()) {
07490 ushortRank = static_cast<unsigned short int>(rank);
07491 tupleRedn[0] = CkReduction::tupleElement(sizeof(unsigned short int), &ushortRank, CkReduction::concat);
07492 } else {
07493 tupleRedn[0] = CkReduction::tupleElement(sizeof(int), &rank, CkReduction::concat);
07494 }
07495
07496 vector<char> sbuf;
07497 if (ddt->isContig()) {
07498 tupleRedn[1] = CkReduction::tupleElement(szdata, (void*)inbuf, CkReduction::concat);
07499 } else {
07500 sbuf.resize(szdata);
07501 ddt->serialize((char*)inbuf, sbuf.data(), count, szdata, PACK);
07502 tupleRedn[1] = CkReduction::tupleElement(szdata, sbuf.data(), CkReduction::concat);
07503 }
07504
07505 return CkReductionMsg::buildFromTuple(tupleRedn, tupleSize);
07506 }
07507
07508
07509 static CkReductionMsg *makeGathervMsg(const void *inbuf, int count, MPI_Datatype type, int rank, int size) noexcept
07510 {
07511 CkDDT_DataType* ddt = getDDT()->getType(type);
07512 int szdata = ddt->getSize(count);
07513 const int tupleSize = 3;
07514 CkReduction::tupleElement tupleRedn[tupleSize];
07515
07516
07517 unsigned short int ushortRank;
07518 if (size < std::numeric_limits<unsigned short int>::max()) {
07519 ushortRank = static_cast<unsigned short int>(rank);
07520 tupleRedn[0] = CkReduction::tupleElement(sizeof(unsigned short int), &ushortRank, CkReduction::concat);
07521 } else {
07522 tupleRedn[0] = CkReduction::tupleElement(sizeof(int), &rank, CkReduction::concat);
07523 }
07524
07525 tupleRedn[1] = CkReduction::tupleElement(sizeof(int), &szdata, CkReduction::concat);
07526
07527 vector<char> sbuf;
07528 if (ddt->isContig()) {
07529 tupleRedn[2] = CkReduction::tupleElement(szdata, (void*)inbuf, CkReduction::concat);
07530 } else {
07531 sbuf.resize(szdata);
07532 ddt->serialize((char*)inbuf, sbuf.data(), count, szdata, PACK);
07533 tupleRedn[2] = CkReduction::tupleElement(szdata, sbuf.data(), CkReduction::concat);
07534 }
07535
07536 return CkReductionMsg::buildFromTuple(tupleRedn, tupleSize);
07537 }
07538
07539 AMPI_API_IMPL(int, MPI_Allgather, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
07540 void *recvbuf, int recvcount, MPI_Datatype recvtype,
07541 MPI_Comm comm)
07542 {
07543 AMPI_API("AMPI_Allgather");
07544
07545 ampi *ptr = getAmpiInstance(comm);
07546 int rank = ptr->getRank();
07547 int size = ptr->getSize();
07548
07549 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
07550 handle_MPI_IN_PLACE_gather((void*&)sendbuf, recvbuf, sendcount, sendtype,
07551 rank*recvcount, recvcount, recvtype);
07552
07553 #if AMPI_ERROR_CHECKING
07554 int ret;
07555 if (sendbuf != recvbuf) {
07556 ret = errorCheck("AMPI_Allgather", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
07557 if(ret != MPI_SUCCESS)
07558 return ret;
07559 }
07560 ret = errorCheck("AMPI_Allgather", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
07561 if(ret != MPI_SUCCESS)
07562 return ret;
07563 #endif
07564
07565 if(ptr->isInter())
07566 CkAbort("AMPI does not implement MPI_Allgather for Inter-communicators!");
07567 if(size == 1)
07568 return copyDatatype(sendtype,sendcount,recvtype,recvcount,sendbuf,recvbuf);
07569
07570 ptr->setBlockingReq(new GatherReq(recvbuf, recvcount, recvtype, comm, getDDT()));
07571
07572 CkReductionMsg* msg = makeGatherMsg(sendbuf, sendcount, sendtype, rank, size);
07573 CkCallback allgatherCB(CkIndex_ampi::rednResult(0), ptr->getProxy());
07574 msg->setCallback(allgatherCB);
07575 MSG_ORDER_DEBUG(CkPrintf("[%d] AMPI_Allgather called on comm %d\n", ptr->thisIndex, comm));
07576 ptr->contribute(msg);
07577
07578 ptr->blockOnColl();
07579
07580 return MPI_SUCCESS;
07581 }
07582
07583 AMPI_API_IMPL(int, MPI_Iallgather, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
07584 void *recvbuf, int recvcount, MPI_Datatype recvtype,
07585 MPI_Comm comm, MPI_Request* request)
07586 {
07587 AMPI_API("AMPI_Iallgather");
07588
07589 ampi *ptr = getAmpiInstance(comm);
07590 int rank = ptr->getRank();
07591 int size = ptr->getSize();
07592
07593 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
07594 handle_MPI_IN_PLACE_gather((void*&)sendbuf, recvbuf, sendcount, sendtype,
07595 rank*recvcount, recvcount, recvtype);
07596
07597 #if AMPI_ERROR_CHECKING
07598 int ret;
07599 if (sendbuf != recvbuf) {
07600 ret = errorCheck("AMPI_Iallgather", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
07601 if(ret != MPI_SUCCESS){
07602 *request = MPI_REQUEST_NULL;
07603 return ret;
07604 }
07605 }
07606 ret = errorCheck("AMPI_Iallgather", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
07607 if(ret != MPI_SUCCESS){
07608 *request = MPI_REQUEST_NULL;
07609 return ret;
07610 }
07611 #endif
07612
07613 if(ptr->isInter())
07614 CkAbort("AMPI does not implement MPI_Iallgather for Inter-communicators!");
07615 if(size == 1){
07616 *request = ptr->postReq(new GatherReq(recvbuf, recvcount, recvtype, comm, getDDT(), AMPI_REQ_COMPLETED));
07617 return copyDatatype(sendtype,sendcount,recvtype,recvcount,sendbuf,recvbuf);
07618 }
07619
07620 *request = ptr->postReq(new GatherReq(recvbuf, recvcount, recvtype, comm, getDDT()));
07621
07622 CkReductionMsg* msg = makeGatherMsg(sendbuf, sendcount, sendtype, rank, size);
07623 CkCallback allgatherCB(CkIndex_ampi::irednResult(0), ptr->getProxy());
07624 msg->setCallback(allgatherCB);
07625 MSG_ORDER_DEBUG(CkPrintf("[%d] AMPI_Iallgather called on comm %d\n", ptr->thisIndex, comm));
07626 ptr->contribute(msg);
07627
07628 return MPI_SUCCESS;
07629 }
07630
07631 AMPI_API_IMPL(int, MPI_Allgatherv, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
07632 void *recvbuf, const int *recvcounts, const int *displs,
07633 MPI_Datatype recvtype, MPI_Comm comm)
07634 {
07635 AMPI_API("AMPI_Allgatherv");
07636
07637 ampi *ptr = getAmpiInstance(comm);
07638 int rank = ptr->getRank();
07639 int size = ptr->getSize();
07640
07641 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
07642 handle_MPI_IN_PLACE_gatherv((void*&)sendbuf, recvbuf, sendcount, sendtype,
07643 displs, recvcounts, rank, recvtype);
07644
07645 #if AMPI_ERROR_CHECKING
07646 int ret;
07647 if (sendbuf != recvbuf) {
07648 ret = errorCheck("AMPI_Allgatherv", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
07649 if(ret != MPI_SUCCESS)
07650 return ret;
07651 }
07652 ret = errorCheck("AMPI_Allgatherv", comm, 1, recvcounts[0], 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
07653 if(ret != MPI_SUCCESS)
07654 return ret;
07655 #endif
07656
07657 if(ptr->isInter())
07658 CkAbort("AMPI does not implement MPI_Allgatherv for Inter-communicators!");
07659 if(size == 1)
07660 return copyDatatype(sendtype,sendcount,recvtype,recvcounts[0],sendbuf,recvbuf);
07661
07662 ptr->setBlockingReq(new GathervReq(recvbuf, size, recvtype, comm, recvcounts, displs, getDDT()));
07663
07664 CkReductionMsg* msg = makeGathervMsg(sendbuf, sendcount, sendtype, rank, size);
07665 CkCallback allgathervCB(CkIndex_ampi::rednResult(0), ptr->getProxy());
07666 msg->setCallback(allgathervCB);
07667 MSG_ORDER_DEBUG(CkPrintf("[%d] AMPI_Allgatherv called on comm %d\n", ptr->thisIndex, comm));
07668 ptr->contribute(msg);
07669
07670 ptr->blockOnColl();
07671
07672 return MPI_SUCCESS;
07673 }
07674
07675 AMPI_API_IMPL(int, MPI_Iallgatherv, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
07676 void *recvbuf, const int *recvcounts, const int *displs,
07677 MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request)
07678 {
07679 AMPI_API("AMPI_Iallgatherv");
07680
07681 ampi *ptr = getAmpiInstance(comm);
07682 int rank = ptr->getRank();
07683 int size = ptr->getSize();
07684
07685 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
07686 handle_MPI_IN_PLACE_gatherv((void*&)sendbuf, recvbuf, sendcount, sendtype,
07687 displs, recvcounts, rank, recvtype);
07688
07689 #if AMPI_ERROR_CHECKING
07690 int ret;
07691 if (sendbuf != recvbuf) {
07692 ret = errorCheck("AMPI_Iallgatherv", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
07693 if(ret != MPI_SUCCESS){
07694 *request = MPI_REQUEST_NULL;
07695 return ret;
07696 }
07697 }
07698 ret = errorCheck("AMPI_Iallgatherv", comm, 1, recvcounts[0], 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
07699 if(ret != MPI_SUCCESS){
07700 *request = MPI_REQUEST_NULL;
07701 return ret;
07702 }
07703 #endif
07704
07705 if(ptr->isInter())
07706 CkAbort("AMPI does not implement MPI_Iallgatherv for Inter-communicators!");
07707 if(size == 1){
07708 *request = ptr->postReq(new GathervReq(recvbuf, rank, recvtype, comm, recvcounts, displs,
07709 getDDT(), AMPI_REQ_COMPLETED));
07710 return copyDatatype(sendtype,sendcount,recvtype,recvcounts[0],sendbuf,recvbuf);
07711 }
07712
07713 *request = ptr->postReq(new GathervReq(recvbuf, size, recvtype, comm,
07714 recvcounts, displs, getDDT()));
07715
07716 CkReductionMsg* msg = makeGathervMsg(sendbuf, sendcount, sendtype, rank, size);
07717 CkCallback allgathervCB(CkIndex_ampi::irednResult(0), ptr->getProxy());
07718 msg->setCallback(allgathervCB);
07719 MSG_ORDER_DEBUG(CkPrintf("[%d] AMPI_Iallgatherv called on comm %d\n", ptr->thisIndex, comm));
07720 ptr->contribute(msg);
07721
07722 return MPI_SUCCESS;
07723 }
07724
07725 AMPI_API_IMPL(int, MPI_Gather, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
07726 void *recvbuf, int recvcount, MPI_Datatype recvtype,
07727 int root, MPI_Comm comm)
07728 {
07729 AMPI_API("AMPI_Gather");
07730
07731 ampi *ptr = getAmpiInstance(comm);
07732 int rank = ptr->getRank();
07733 int size = ptr->getSize();
07734
07735 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
07736 handle_MPI_IN_PLACE_gather((void*&)sendbuf, recvbuf, sendcount, sendtype,
07737 rank*recvcount, recvcount, recvtype);
07738
07739 #if AMPI_ERROR_CHECKING
07740 int ret;
07741 if (sendbuf != recvbuf) {
07742 ret = errorCheck("AMPI_Gather", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
07743 if(ret != MPI_SUCCESS)
07744 return ret;
07745 }
07746 if (getAmpiInstance(comm)->getRank() == root) {
07747 ret = errorCheck("AMPI_Gather", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
07748 if(ret != MPI_SUCCESS)
07749 return ret;
07750 }
07751 #endif
07752
07753 if(ptr->isInter())
07754 CkAbort("AMPI does not implement MPI_Gather for Inter-communicators!");
07755 if(size == 1)
07756 return copyDatatype(sendtype,sendcount,recvtype,recvcount,sendbuf,recvbuf);
07757
07758 #if AMPIMSGLOG
07759 ampiParent* pptr = getAmpiParent();
07760 if(msgLogRead){
07761 (*(pptr->fromPUPer))|(pptr->pupBytes);
07762 PUParray(*(pptr->fromPUPer), (char *)recvbuf, (pptr->pupBytes));
07763 return MPI_SUCCESS;
07764 }
07765 #endif
07766
07767 if (rank == root) {
07768 ptr->setBlockingReq(new GatherReq(recvbuf, recvcount, recvtype, comm, getDDT()));
07769 }
07770
07771 int rootIdx = ptr->comm2CommStruct(comm).getIndexForRank(root);
07772 CkReductionMsg* msg = makeGatherMsg(sendbuf, sendcount, sendtype, rank, size);
07773 CkCallback gatherCB(CkIndex_ampi::rednResult(0), CkArrayIndex1D(rootIdx), ptr->getProxy());
07774 msg->setCallback(gatherCB);
07775 MSG_ORDER_DEBUG(CkPrintf("[%d] AMPI_Gather called on comm %d root %d \n", ptr->thisIndex, comm, rootIdx));
07776 ptr->contribute(msg);
07777
07778 if (rank == root) {
07779 ptr->blockOnColl();
07780 }
07781
07782 #if AMPIMSGLOG
07783 if(msgLogWrite && record_msglog(pptr->thisIndex)){
07784 (pptr->pupBytes) = getDDT()->getSize(recvtype) * recvcount * size;
07785 (*(pptr->toPUPer))|(pptr->pupBytes);
07786 PUParray(*(pptr->toPUPer), (char *)recvbuf, (pptr->pupBytes));
07787 }
07788 #endif
07789
07790 return MPI_SUCCESS;
07791 }
07792
07793 AMPI_API_IMPL(int, MPI_Igather, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
07794 void *recvbuf, int recvcount, MPI_Datatype recvtype,
07795 int root, MPI_Comm comm, MPI_Request *request)
07796 {
07797 AMPI_API("AMPI_Igather");
07798
07799 ampi *ptr = getAmpiInstance(comm);
07800 int rank = ptr->getRank();
07801 int size = ptr->getSize();
07802
07803 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
07804 handle_MPI_IN_PLACE_gather((void*&)sendbuf, recvbuf, sendcount, sendtype,
07805 rank*recvcount, recvcount, recvtype);
07806
07807 #if AMPI_ERROR_CHECKING
07808 int ret;
07809 if (sendbuf != recvbuf) {
07810 ret = errorCheck("AMPI_Igather", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
07811 if(ret != MPI_SUCCESS){
07812 *request = MPI_REQUEST_NULL;
07813 return ret;
07814 }
07815 }
07816 if (getAmpiInstance(comm)->getRank() == root) {
07817 ret = errorCheck("AMPI_Igather", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
07818 if(ret != MPI_SUCCESS){
07819 *request = MPI_REQUEST_NULL;
07820 return ret;
07821 }
07822 }
07823 #endif
07824
07825 if(ptr->isInter())
07826 CkAbort("AMPI does not implement MPI_Igather for Inter-communicators!");
07827 if(size == 1){
07828 *request = ptr->postReq(new GatherReq(recvbuf, recvcount, recvtype, comm, getDDT(), AMPI_REQ_COMPLETED));
07829 return copyDatatype(sendtype,sendcount,recvtype,recvcount,sendbuf,recvbuf);
07830 }
07831
07832 #if AMPIMSGLOG
07833 ampiParent* pptr = getAmpiParent();
07834 if(msgLogRead){
07835 (*(pptr->fromPUPer))|(pptr->pupBytes);
07836 PUParray(*(pptr->fromPUPer), (char *)recvbuf, (pptr->pupBytes));
07837 return MPI_SUCCESS;
07838 }
07839 #endif
07840
07841 if (rank == root) {
07842 *request = ptr->postReq(new GatherReq(recvbuf, recvcount, recvtype, comm, getDDT()));
07843 }
07844 else {
07845 *request = ptr->postReq(new GatherReq(recvbuf, recvcount, recvtype, comm, getDDT(), AMPI_REQ_COMPLETED));
07846 }
07847
07848 int rootIdx = ptr->comm2CommStruct(comm).getIndexForRank(root);
07849 CkReductionMsg* msg = makeGatherMsg(sendbuf, sendcount, sendtype, rank, size);
07850 CkCallback gatherCB(CkIndex_ampi::irednResult(0), CkArrayIndex1D(rootIdx), ptr->getProxy());
07851 msg->setCallback(gatherCB);
07852 MSG_ORDER_DEBUG(CkPrintf("[%d] AMPI_Igather called on comm %d root %d \n", ptr->thisIndex, comm, rootIdx));
07853 ptr->contribute(msg);
07854
07855 #if AMPIMSGLOG
07856 if(msgLogWrite && record_msglog(pptr->thisIndex)){
07857 (pptr->pupBytes) = getDDT()->getSize(recvtype) * recvcount * size;
07858 (*(pptr->toPUPer))|(pptr->pupBytes);
07859 PUParray(*(pptr->toPUPer), (char *)recvbuf, (pptr->pupBytes));
07860 }
07861 #endif
07862
07863 return MPI_SUCCESS;
07864 }
07865
07866 AMPI_API_IMPL(int, MPI_Gatherv, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
07867 void *recvbuf, const int *recvcounts, const int *displs,
07868 MPI_Datatype recvtype, int root, MPI_Comm comm)
07869 {
07870 AMPI_API("AMPI_Gatherv");
07871
07872 ampi *ptr = getAmpiInstance(comm);
07873 int rank = ptr->getRank();
07874 int size = ptr->getSize();
07875
07876 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
07877 handle_MPI_IN_PLACE_gatherv((void*&)sendbuf, recvbuf, sendcount, sendtype,
07878 displs, recvcounts, rank, recvtype);
07879
07880 #if AMPI_ERROR_CHECKING
07881 int ret;
07882 if (sendbuf != recvbuf) {
07883 ret = errorCheck("AMPI_Gatherv", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
07884 if(ret != MPI_SUCCESS)
07885 return ret;
07886 }
07887 if (getAmpiInstance(comm)->getRank() == root) {
07888 ret = errorCheck("AMPI_Gatherv", comm, 1, recvcounts[0], 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
07889 if(ret != MPI_SUCCESS)
07890 return ret;
07891 }
07892 #endif
07893
07894 if(ptr->isInter())
07895 CkAbort("AMPI does not implement MPI_Gatherv for Inter-communicators!");
07896 if(size == 1)
07897 return copyDatatype(sendtype,sendcount,recvtype,recvcounts[0],sendbuf,recvbuf);
07898
07899 #if AMPIMSGLOG
07900 ampiParent* pptr = getAmpiParent();
07901 if(msgLogRead){
07902 int commsize;
07903 int itemsize = getDDT()->getSize(recvtype);
07904 (*(pptr->fromPUPer))|commsize;
07905 for(int i=0;i<commsize;i++){
07906 (*(pptr->fromPUPer))|(pptr->pupBytes);
07907 PUParray(*(pptr->fromPUPer), (char *)(((char*)recvbuf)+(itemsize*displs[i])), (pptr->pupBytes));
07908 }
07909 return MPI_SUCCESS;
07910 }
07911 #endif
07912
07913 if (rank == root) {
07914 ptr->setBlockingReq(new GathervReq(recvbuf, size, recvtype, comm, recvcounts, displs, getDDT()));
07915 }
07916
07917 int rootIdx = ptr->comm2CommStruct(comm).getIndexForRank(root);
07918 CkReductionMsg* msg = makeGathervMsg(sendbuf, sendcount, sendtype, rank, size);
07919 CkCallback gathervCB(CkIndex_ampi::rednResult(0), CkArrayIndex1D(rootIdx), ptr->getProxy());
07920 msg->setCallback(gathervCB);
07921 MSG_ORDER_DEBUG(CkPrintf("[%d] AMPI_Gatherv called on comm %d root %d \n", ptr->thisIndex, comm, rootIdx));
07922 ptr->contribute(msg);
07923
07924 if (rank == root) {
07925 ptr->blockOnColl();
07926 }
07927
07928 #if AMPIMSGLOG
07929 if(msgLogWrite && record_msglog(pptr->thisIndex)){
07930 for(int i=0;i<size;i++){
07931 (pptr->pupBytes) = getDDT()->getSize(recvtype) * recvcounts[i];
07932 (*(pptr->toPUPer))|(pptr->pupBytes);
07933 PUParray(*(pptr->toPUPer), (char *)(((char*)recvbuf)+(itemsize*displs[i])), (pptr->pupBytes));
07934 }
07935 }
07936 #endif
07937
07938 return MPI_SUCCESS;
07939 }
07940
07941 AMPI_API_IMPL(int, MPI_Igatherv, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
07942 void *recvbuf, const int *recvcounts, const int *displs,
07943 MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request)
07944 {
07945 AMPI_API("AMPI_Igatherv");
07946
07947 ampi *ptr = getAmpiInstance(comm);
07948 int rank = ptr->getRank();
07949 int size = ptr->getSize();
07950
07951 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
07952 handle_MPI_IN_PLACE_gatherv((void*&)sendbuf, recvbuf, sendcount, sendtype,
07953 displs, recvcounts, rank, recvtype);
07954
07955 #if AMPI_ERROR_CHECKING
07956 int ret;
07957 if (sendbuf != recvbuf) {
07958 ret = errorCheck("AMPI_Igatherv", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
07959 if(ret != MPI_SUCCESS){
07960 *request = MPI_REQUEST_NULL;
07961 return ret;
07962 }
07963 }
07964 if (getAmpiInstance(comm)->getRank() == root) {
07965 ret = errorCheck("AMPI_Igatherv", comm, 1, recvcounts[0], 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
07966 if(ret != MPI_SUCCESS){
07967 *request = MPI_REQUEST_NULL;
07968 return ret;
07969 }
07970 }
07971 #endif
07972
07973 if(ptr->isInter())
07974 CkAbort("AMPI does not implement MPI_Igatherv for Inter-communicators!");
07975 if(size == 1){
07976 *request = ptr->postReq(new GathervReq(recvbuf, rank, recvtype, comm, recvcounts, displs,
07977 getDDT(), AMPI_REQ_COMPLETED));
07978 return copyDatatype(sendtype,sendcount,recvtype,recvcounts[0],sendbuf,recvbuf);
07979 }
07980
07981 #if AMPIMSGLOG
07982 ampiParent* pptr = getAmpiParent();
07983 if(msgLogRead){
07984 int commsize;
07985 int itemsize = getDDT()->getSize(recvtype);
07986 (*(pptr->fromPUPer))|commsize;
07987 for(int i=0;i<commsize;i++){
07988 (*(pptr->fromPUPer))|(pptr->pupBytes);
07989 PUParray(*(pptr->fromPUPer), (char *)(((char*)recvbuf)+(itemsize*displs[i])), (pptr->pupBytes));
07990 }
07991 return MPI_SUCCESS;
07992 }
07993 #endif
07994
07995 if (rank == root) {
07996 *request = ptr->postReq(new GathervReq(recvbuf, size, recvtype, comm,
07997 recvcounts, displs, getDDT()));
07998 }
07999 else {
08000 *request = ptr->postReq(new GathervReq(recvbuf, size, recvtype, comm,
08001 recvcounts, displs, getDDT(), AMPI_REQ_COMPLETED));
08002 }
08003
08004 int rootIdx = ptr->comm2CommStruct(comm).getIndexForRank(root);
08005 CkReductionMsg* msg = makeGathervMsg(sendbuf, sendcount, sendtype, rank, size);
08006 CkCallback gathervCB(CkIndex_ampi::irednResult(0), CkArrayIndex1D(rootIdx), ptr->getProxy());
08007 msg->setCallback(gathervCB);
08008 MSG_ORDER_DEBUG(CkPrintf("[%d] AMPI_Igatherv called on comm %d root %d \n", ptr->thisIndex, comm, rootIdx));
08009 ptr->contribute(msg);
08010
08011 #if AMPIMSGLOG
08012 if(msgLogWrite && record_msglog(pptr->thisIndex)){
08013 for(int i=0;i<size;i++){
08014 (pptr->pupBytes) = getDDT()->getSize(recvtype) * recvcounts[i];
08015 (*(pptr->toPUPer))|(pptr->pupBytes);
08016 PUParray(*(pptr->toPUPer), (char *)(((char*)recvbuf)+(itemsize*displs[i])), (pptr->pupBytes));
08017 }
08018 }
08019 #endif
08020
08021 return MPI_SUCCESS;
08022 }
08023
08024 AMPI_API_IMPL(int, MPI_Scatter, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
08025 void *recvbuf, int recvcount, MPI_Datatype recvtype,
08026 int root, MPI_Comm comm)
08027 {
08028 AMPI_API("AMPI_Scatter");
08029
08030 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
08031 handle_MPI_IN_PLACE((void*&)sendbuf,recvbuf);
08032
08033 #if AMPI_ERROR_CHECKING
08034 int ret;
08035 if (getAmpiInstance(comm)->getRank() == root) {
08036 ret = errorCheck("AMPI_Scatter", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08037 if(ret != MPI_SUCCESS)
08038 return ret;
08039 }
08040 if (sendbuf != recvbuf || getAmpiInstance(comm)->getRank() != root) {
08041 ret = errorCheck("AMPI_Scatter", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08042 if(ret != MPI_SUCCESS)
08043 return ret;
08044 }
08045 #endif
08046
08047 ampi *ptr = getAmpiInstance(comm);
08048
08049 if(getAmpiParent()->isInter(comm)) {
08050 return ptr->intercomm_scatter(root,sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm);
08051 }
08052 if(ptr->getSize() == 1)
08053 return copyDatatype(sendtype,sendcount,recvtype,recvcount,sendbuf,recvbuf);
08054
08055 #if AMPIMSGLOG
08056 ampiParent* pptr = getAmpiParent();
08057 if(msgLogRead){
08058 (*(pptr->fromPUPer))|(pptr->pupBytes);
08059 PUParray(*(pptr->fromPUPer), (char *)recvbuf, (pptr->pupBytes));
08060 return MPI_SUCCESS;
08061 }
08062 #endif
08063
08064 int size = ptr->getSize();
08065 int rank = ptr->getRank();
08066 int i;
08067
08068 if(rank==root) {
08069 CkDDT_DataType* dttype = ptr->getDDT()->getType(sendtype) ;
08070 int itemextent = dttype->getExtent() * sendcount;
08071 for(i=0;i<size;i++) {
08072 if (i != rank) {
08073 ptr->send(MPI_SCATTER_TAG, rank, ((char*)sendbuf)+(itemextent*i),
08074 sendcount, sendtype, i, comm);
08075 }
08076 }
08077 if (sendbuf != recvbuf) {
08078 copyDatatype(sendtype,sendcount,recvtype,recvcount,(char*)sendbuf+(itemextent*rank),recvbuf);
08079 }
08080 }
08081 else {
08082 if(-1==ptr->recv(MPI_SCATTER_TAG, root, recvbuf, recvcount, recvtype, comm))
08083 CkAbort("AMPI> Error in MPI_Scatter recv");
08084 }
08085
08086 #if AMPIMSGLOG
08087 if(msgLogWrite && record_msglog(pptr->thisIndex)){
08088 (pptr->pupBytes) = getDDT()->getSize(recvtype) * recvcount;
08089 (*(pptr->toPUPer))|(pptr->pupBytes);
08090 PUParray(*(pptr->toPUPer), (char *)recvbuf, (pptr->pupBytes));
08091 }
08092 #endif
08093
08094 return MPI_SUCCESS;
08095 }
08096
08097 AMPI_API_IMPL(int, MPI_Iscatter, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
08098 void *recvbuf, int recvcount, MPI_Datatype recvtype,
08099 int root, MPI_Comm comm, MPI_Request *request)
08100 {
08101 AMPI_API("AMPI_Iscatter");
08102
08103 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
08104 handle_MPI_IN_PLACE((void*&)sendbuf,recvbuf);
08105
08106 #if AMPI_ERROR_CHECKING
08107 int ret;
08108 if (getAmpiInstance(comm)->getRank() == root) {
08109 ret = errorCheck("AMPI_Iscatter", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08110 if(ret != MPI_SUCCESS){
08111 *request = MPI_REQUEST_NULL;
08112 return ret;
08113 }
08114 }
08115 if (sendbuf != recvbuf || getAmpiInstance(comm)->getRank() != root) {
08116 ret = errorCheck("AMPI_Iscatter", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08117 if(ret != MPI_SUCCESS){
08118 *request = MPI_REQUEST_NULL;
08119 return ret;
08120 }
08121 }
08122 #endif
08123
08124 ampi *ptr = getAmpiInstance(comm);
08125
08126 if(getAmpiParent()->isInter(comm)) {
08127 return ptr->intercomm_iscatter(root,sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm,request);
08128 }
08129 if(ptr->getSize() == 1){
08130 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(recvbuf,recvcount,recvtype,root,MPI_SCATTER_TAG,comm,
08131 getDDT(), AMPI_REQ_COMPLETED));
08132 return copyDatatype(sendtype,sendcount,recvtype,recvcount,sendbuf,recvbuf);
08133 }
08134
08135 #if AMPIMSGLOG
08136 ampiParent* pptr = getAmpiParent();
08137 if(msgLogRead){
08138 (*(pptr->fromPUPer))|(pptr->pupBytes);
08139 PUParray(*(pptr->fromPUPer), (char *)recvbuf, (pptr->pupBytes));
08140 return MPI_SUCCESS;
08141 }
08142 #endif
08143
08144 int size = ptr->getSize();
08145 int rank = ptr->getRank();
08146 int i;
08147
08148 if(rank==root) {
08149 CkDDT_DataType* dttype = ptr->getDDT()->getType(sendtype) ;
08150 int itemextent = dttype->getExtent() * sendcount;
08151
08152 ATAReq *newreq = new ATAReq(size);
08153 for(i=0;i<size;i++) {
08154 if (i != rank) {
08155 newreq->reqs[i] = ptr->send(MPI_SCATTER_TAG, rank, (char*)sendbuf+(itemextent*i),
08156 sendcount, sendtype, i, comm, 0, I_SEND);
08157 }
08158 }
08159 newreq->reqs[rank] = MPI_REQUEST_NULL;
08160
08161 if (sendbuf != recvbuf) {
08162 copyDatatype(sendtype,sendcount,recvtype,recvcount,(char*)sendbuf+(itemextent*rank),recvbuf);
08163 }
08164 *request = ptr->postReq(newreq);
08165 }
08166 else {
08167 ptr->irecv(recvbuf,recvcount,recvtype,root,MPI_SCATTER_TAG,comm,request);
08168 }
08169
08170 #if AMPIMSGLOG
08171 if(msgLogWrite && record_msglog(pptr->thisIndex)){
08172 (pptr->pupBytes) = getDDT()->getSize(recvtype) * recvcount;
08173 (*(pptr->toPUPer))|(pptr->pupBytes);
08174 PUParray(*(pptr->toPUPer), (char *)recvbuf, (pptr->pupBytes));
08175 }
08176 #endif
08177
08178 return MPI_SUCCESS;
08179 }
08180
08181 AMPI_API_IMPL(int, MPI_Scatterv, const void *sendbuf, const int *sendcounts, const int *displs,
08182 MPI_Datatype sendtype, void *recvbuf, int recvcount,
08183 MPI_Datatype recvtype, int root, MPI_Comm comm)
08184 {
08185 AMPI_API("AMPI_Scatterv");
08186
08187 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
08188 handle_MPI_IN_PLACE((void*&)sendbuf, recvbuf);
08189
08190 #if AMPI_ERROR_CHECKING
08191 int ret;
08192 if (getAmpiInstance(comm)->getRank() == root) {
08193 ret = errorCheck("AMPI_Scatterv", comm, 1, 0, 0, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08194 if(ret != MPI_SUCCESS)
08195 return ret;
08196 }
08197 if (sendbuf != recvbuf || getAmpiInstance(comm)->getRank() != root) {
08198 ret = errorCheck("AMPI_Scatterv", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08199 if(ret != MPI_SUCCESS)
08200 return ret;
08201 }
08202 #endif
08203
08204 ampi* ptr = getAmpiInstance(comm);
08205
08206 if (getAmpiParent()->isInter(comm)) {
08207 return ptr->intercomm_scatterv(root, sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, comm);
08208 }
08209 if(ptr->getSize() == 1)
08210 return copyDatatype(sendtype,sendcounts[0],recvtype,recvcount,sendbuf,recvbuf);
08211
08212 #if AMPIMSGLOG
08213 ampiParent* pptr = getAmpiParent();
08214 if(msgLogRead){
08215 (*(pptr->fromPUPer))|(pptr->pupBytes);
08216 PUParray(*(pptr->fromPUPer), (char *)recvbuf, (pptr->pupBytes));
08217 return MPI_SUCCESS;
08218 }
08219 #endif
08220
08221 int size = ptr->getSize();
08222 int rank = ptr->getRank();
08223 int i;
08224
08225 if(rank == root) {
08226 CkDDT_DataType* dttype = ptr->getDDT()->getType(sendtype) ;
08227 int itemextent = dttype->getExtent();
08228 for(i=0;i<size;i++) {
08229 if (i != rank) {
08230 ptr->send(MPI_SCATTER_TAG, rank, ((char*)sendbuf)+(itemextent*displs[i]),
08231 sendcounts[i], sendtype, i, comm);
08232 }
08233 }
08234 if (sendbuf != recvbuf) {
08235 copyDatatype(sendtype,sendcounts[rank],recvtype,recvcount,(char*)sendbuf+(itemextent*displs[rank]),recvbuf);
08236 }
08237 }
08238 else {
08239 if(-1==ptr->recv(MPI_SCATTER_TAG, root, recvbuf, recvcount, recvtype, comm))
08240 CkAbort("AMPI> Error in MPI_Scatterv recv");
08241 }
08242
08243 #if AMPIMSGLOG
08244 if(msgLogWrite && record_msglog(pptr->thisIndex)){
08245 (pptr->pupBytes) = getDDT()->getSize(recvtype) * recvcount;
08246 (*(pptr->toPUPer))|(pptr->pupBytes);
08247 PUParray(*(pptr->toPUPer), (char *)recvbuf, (pptr->pupBytes));
08248 }
08249 #endif
08250
08251 return MPI_SUCCESS;
08252 }
08253
08254 AMPI_API_IMPL(int, MPI_Iscatterv, const void *sendbuf, const int *sendcounts, const int *displs,
08255 MPI_Datatype sendtype, void *recvbuf, int recvcount,
08256 MPI_Datatype recvtype, int root, MPI_Comm comm,
08257 MPI_Request *request)
08258 {
08259 AMPI_API("AMPI_Iscatterv");
08260
08261 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
08262 handle_MPI_IN_PLACE((void*&)sendbuf,recvbuf);
08263
08264 #if AMPI_ERROR_CHECKING
08265 int ret;
08266 if (getAmpiInstance(comm)->getRank() == root) {
08267 ret = errorCheck("AMPI_Iscatterv", comm, 1, 0, 0, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08268 if(ret != MPI_SUCCESS){
08269 *request = MPI_REQUEST_NULL;
08270 return ret;
08271 }
08272 }
08273 if (sendbuf != recvbuf || getAmpiInstance(comm)->getRank() != root) {
08274 ret = errorCheck("AMPI_Iscatterv", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08275 if(ret != MPI_SUCCESS){
08276 *request = MPI_REQUEST_NULL;
08277 return ret;
08278 }
08279 }
08280 #endif
08281
08282 ampi* ptr = getAmpiInstance(comm);
08283
08284 if (getAmpiParent()->isInter(comm)) {
08285 return ptr->intercomm_iscatterv(root, sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, comm, request);
08286 }
08287 if(ptr->getSize() == 1){
08288 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(recvbuf,recvcount,recvtype,root,MPI_SCATTER_TAG,comm,
08289 getDDT(), AMPI_REQ_COMPLETED));
08290 return copyDatatype(sendtype,sendcounts[0],recvtype,recvcount,sendbuf,recvbuf);
08291 }
08292
08293 #if AMPIMSGLOG
08294 ampiParent* pptr = getAmpiParent();
08295 if(msgLogRead){
08296 (*(pptr->fromPUPer))|(pptr->pupBytes);
08297 PUParray(*(pptr->fromPUPer), (char *)recvbuf, (pptr->pupBytes));
08298 return MPI_SUCCESS;
08299 }
08300 #endif
08301
08302 int size = ptr->getSize();
08303 int rank = ptr->getRank();
08304 int i;
08305
08306 if(rank == root) {
08307 CkDDT_DataType* dttype = ptr->getDDT()->getType(sendtype) ;
08308 int itemextent = dttype->getExtent();
08309
08310 ATAReq *newreq = new ATAReq(size);
08311 for(i=0;i<size;i++) {
08312 if (i != rank) {
08313 newreq->reqs[i] = ptr->send(MPI_SCATTER_TAG, rank, ((char*)sendbuf)+(itemextent*displs[i]),
08314 sendcounts[i], sendtype, i, comm, 0, I_SEND);
08315 }
08316 }
08317 newreq->reqs[rank] = MPI_REQUEST_NULL;
08318
08319 if (sendbuf != recvbuf) {
08320 copyDatatype(sendtype,sendcounts[rank],recvtype,recvcount,(char*)sendbuf+(itemextent*displs[rank]),recvbuf);
08321 }
08322 *request = ptr->postReq(newreq);
08323 }
08324 else {
08325
08326 ptr->irecv(recvbuf,recvcount,recvtype,root,MPI_SCATTER_TAG,comm,request);
08327 }
08328
08329 #if AMPIMSGLOG
08330 if(msgLogWrite && record_msglog(pptr->thisIndex)){
08331 (pptr->pupBytes) = getDDT()->getSize(recvtype) * recvcount;
08332 (*(pptr->toPUPer))|(pptr->pupBytes);
08333 PUParray(*(pptr->toPUPer), (char *)recvbuf, (pptr->pupBytes));
08334 }
08335 #endif
08336
08337 return MPI_SUCCESS;
08338 }
08339
08340 AMPI_API_IMPL(int, MPI_Alltoall, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
08341 void *recvbuf, int recvcount, MPI_Datatype recvtype,
08342 MPI_Comm comm)
08343 {
08344 AMPI_API("AMPI_Alltoall");
08345
08346 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
08347 handle_MPI_IN_PLACE_alltoall((void*&)sendbuf, recvbuf, sendcount, sendtype, recvcount, recvtype);
08348
08349 #if AMPI_ERROR_CHECKING
08350 int ret;
08351 if (sendbuf != recvbuf) {
08352 ret = errorCheck("AMPI_Alltoall", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08353 if(ret != MPI_SUCCESS)
08354 return ret;
08355 }
08356 ret = errorCheck("AMPI_Alltoall", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08357 if(ret != MPI_SUCCESS)
08358 return ret;
08359 #endif
08360
08361 ampi *ptr = getAmpiInstance(comm);
08362
08363 if(getAmpiParent()->isInter(comm))
08364 CkAbort("AMPI does not implement MPI_Alltoall for Inter-communicators!");
08365 if(ptr->getSize() == 1)
08366 return copyDatatype(sendtype,sendcount,recvtype,recvcount,sendbuf,recvbuf);
08367
08368 int itemsize = getDDT()->getSize(sendtype) * sendcount;
08369 int itemextent = getDDT()->getExtent(sendtype) * sendcount;
08370 int extent = getDDT()->getExtent(recvtype) * recvcount;
08371 int size = ptr->getSize();
08372 int rank = ptr->getRank();
08373
08374 #if CMK_BIGSIM_CHARM
08375 TRACE_BG_AMPI_LOG(MPI_ALLTOALL, itemextent);
08376 #endif
08377
08378
08379
08380
08381 if (recvbuf == sendbuf) {
08382 for (int i=0; i<size; i++) {
08383 for (int j=i; j<size; j++) {
08384 if (rank == i) {
08385 ptr->sendrecv_replace(((char *)recvbuf + j*extent),
08386 recvcount, recvtype, j, MPI_ATA_TAG, j,
08387 MPI_ATA_TAG, comm, MPI_STATUS_IGNORE);
08388 }
08389 else if (rank == j) {
08390 ptr->sendrecv_replace(((char *)recvbuf + i*extent),
08391 recvcount, recvtype, i, MPI_ATA_TAG, i,
08392 MPI_ATA_TAG, comm, MPI_STATUS_IGNORE);
08393 }
08394 }
08395 }
08396 }
08397 else if (itemsize <= AMPI_ALLTOALL_SHORT_MSG && size <= AMPI_ALLTOALL_THROTTLE) {
08398 vector<MPI_Request> reqs(size*2);
08399 for (int i=0; i<size; i++) {
08400 int src = (rank+i) % size;
08401 ptr->irecv(((char*)recvbuf)+(extent*src), recvcount, recvtype,
08402 src, MPI_ATA_TAG, comm, &reqs[i]);
08403 }
08404 for (int i=0; i<size; i++) {
08405 int dst = (rank+i) % size;
08406 reqs[size+i] = ptr->send(MPI_ATA_TAG, rank, ((char*)sendbuf)+(itemextent*dst),
08407 sendcount, sendtype, dst, comm, 0, I_SEND);
08408 }
08409 MPI_Waitall(reqs.size(), reqs.data(), MPI_STATUSES_IGNORE);
08410 }
08411 else if (itemsize <= AMPI_ALLTOALL_LONG_MSG) {
08412
08413 vector<MPI_Request> reqs(AMPI_ALLTOALL_THROTTLE*2);
08414 for (int j=0; j<size; j+=AMPI_ALLTOALL_THROTTLE) {
08415 int blockSize = std::min(size - j, AMPI_ALLTOALL_THROTTLE);
08416 for (int i=0; i<blockSize; i++) {
08417 int src = (rank + j + i) % size;
08418 ptr->irecv(((char*)recvbuf)+(extent*src), recvcount, recvtype,
08419 src, MPI_ATA_TAG, comm, &reqs[i]);
08420 }
08421 for (int i=0; i<blockSize; i++) {
08422 int dst = (rank - j - i + size) % size;
08423 reqs[blockSize+i] = ptr->send(MPI_ATA_TAG, rank, ((char*)sendbuf)+(itemextent*dst),
08424 sendcount, sendtype, dst, comm, I_SEND);
08425 }
08426 MPI_Waitall(blockSize*2, reqs.data(), MPI_STATUSES_IGNORE);
08427 }
08428 }
08429 else {
08430
08431
08432
08433 int src, dst;
08434
08435
08436 int pof2 = 1;
08437 while (pof2 < size)
08438 pof2 *= 2;
08439 bool isPof2 = (pof2 == size);
08440
08441
08442 for (int i=0; i<size; i++) {
08443 if (isPof2) {
08444
08445 src = dst = rank ^ i;
08446 }
08447 else {
08448 src = (rank - i + size) % size;
08449 dst = (rank + i) % size;
08450 }
08451
08452 ptr->sendrecv(((char *)sendbuf + dst*itemextent), sendcount, sendtype, dst, MPI_ATA_TAG,
08453 ((char *)recvbuf + src*extent), recvcount, recvtype, src, MPI_ATA_TAG,
08454 comm, MPI_STATUS_IGNORE);
08455 }
08456 }
08457
08458 return MPI_SUCCESS;
08459 }
08460
08461 AMPI_API_IMPL(int, MPI_Ialltoall, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
08462 void *recvbuf, int recvcount, MPI_Datatype recvtype,
08463 MPI_Comm comm, MPI_Request *request)
08464 {
08465 AMPI_API("AMPI_Ialltoall");
08466
08467 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
08468 handle_MPI_IN_PLACE_alltoall((void*&)sendbuf, recvbuf, sendcount, sendtype, recvcount, recvtype);
08469
08470 #if AMPI_ERROR_CHECKING
08471 int ret;
08472 if (sendbuf != recvbuf) {
08473 ret = errorCheck("AMPI_Ialltoall", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08474 if(ret != MPI_SUCCESS){
08475 *request = MPI_REQUEST_NULL;
08476 return ret;
08477 }
08478 }
08479 ret = errorCheck("AMPI_Ialltoall", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08480 if(ret != MPI_SUCCESS){
08481 *request = MPI_REQUEST_NULL;
08482 return ret;
08483 }
08484 #endif
08485
08486 ampi *ptr = getAmpiInstance(comm);
08487 int size = ptr->getSize();
08488
08489 if(getAmpiParent()->isInter(comm))
08490 CkAbort("AMPI does not implement MPI_Ialltoall for Inter-communicators!");
08491 if(size == 1){
08492 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(recvbuf,recvcount,recvtype,ptr->getRank(),MPI_ATA_TAG,comm,
08493 getDDT(), AMPI_REQ_COMPLETED));
08494 return copyDatatype(sendtype,sendcount,recvtype,recvcount,sendbuf,recvbuf);
08495 }
08496
08497 int rank = ptr->getRank();
08498 int itemsize = getDDT()->getSize(sendtype) * sendcount;
08499 int extent = getDDT()->getExtent(recvtype) * recvcount;
08500
08501
08502 ATAReq *newreq = new ATAReq(size*2);
08503 for (int i=0; i<size; i++) {
08504 ptr->irecv((char*)recvbuf+(extent*i), recvcount, recvtype, i, MPI_ATA_TAG, comm, &newreq->reqs[i]);
08505 }
08506
08507 for (int i=0; i<size; i++) {
08508 int dst = (rank+i) % size;
08509 newreq->reqs[size+i] = ptr->send(MPI_ATA_TAG, rank, ((char*)sendbuf)+(itemsize*dst), sendcount,
08510 sendtype, dst, comm, 0, I_SEND);
08511 }
08512 *request = ptr->postReq(newreq);
08513
08514 AMPI_DEBUG("MPI_Ialltoall: request=%d, reqs.size=%d, &reqs=%d\n",*request,reqs->size(),reqs);
08515 return MPI_SUCCESS;
08516 }
08517
08518 AMPI_API_IMPL(int, MPI_Alltoallv, const void *sendbuf, const int *sendcounts, const int *sdispls,
08519 MPI_Datatype sendtype, void *recvbuf, const int *recvcounts,
08520 const int *rdispls, MPI_Datatype recvtype, MPI_Comm comm)
08521 {
08522 AMPI_API("AMPI_Alltoallv");
08523
08524 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
08525 handle_MPI_IN_PLACE_alltoallv((void*&)sendbuf, recvbuf, (int*&)sendcounts, sendtype,
08526 (int*&)sdispls, recvcounts, recvtype, rdispls);
08527
08528 #if AMPI_ERROR_CHECKING
08529 int ret;
08530 if (sendbuf != recvbuf) {
08531 ret = errorCheck("AMPI_Alltoallv", comm, 1, 0, 0, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08532 if(ret != MPI_SUCCESS)
08533 return ret;
08534 }
08535 ret = errorCheck("AMPI_Alltoallv", comm, 1, 0, 0, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08536 if(ret != MPI_SUCCESS)
08537 return ret;
08538 #endif
08539
08540 ampi *ptr = getAmpiInstance(comm);
08541 int size = ptr->getSize();
08542
08543 if(getAmpiParent()->isInter(comm))
08544 CkAbort("AMPI does not implement MPI_Alltoallv for Inter-communicators!");
08545 if(size == 1)
08546 return copyDatatype(sendtype,sendcounts[0],recvtype,recvcounts[0],sendbuf,recvbuf);
08547
08548 int rank = ptr->getRank();
08549 int itemextent = getDDT()->getExtent(sendtype);
08550 int extent = getDDT()->getExtent(recvtype);
08551
08552 if (recvbuf == sendbuf) {
08553 for (int i=0; i<size; i++) {
08554 for (int j=i; j<size; j++) {
08555 if (rank == i) {
08556 ptr->sendrecv_replace(((char *)recvbuf + (extent*rdispls[j])),
08557 recvcounts[j], recvtype, j, MPI_ATA_TAG, j,
08558 MPI_ATA_TAG, comm, MPI_STATUS_IGNORE);
08559 }
08560 else if (rank == j) {
08561 ptr->sendrecv_replace(((char *)recvbuf + (extent*rdispls[i])),
08562 recvcounts[i], recvtype, i, MPI_ATA_TAG, i,
08563 MPI_ATA_TAG, comm, MPI_STATUS_IGNORE);
08564 }
08565 }
08566 }
08567 }
08568 else if (size <= AMPI_ALLTOALL_THROTTLE) {
08569 vector<MPI_Request> reqs(size*2);
08570 for (int i=0; i<size; i++) {
08571 int src = (rank+i) % size;
08572 ptr->irecv(((char*)recvbuf)+(extent*rdispls[src]), recvcounts[src], recvtype,
08573 src, MPI_ATA_TAG, comm, &reqs[i]);
08574 }
08575 for (int i=0; i<size; i++) {
08576 int dst = (rank+i) % size;
08577 reqs[size+i] = ptr->send(MPI_ATA_TAG, rank, ((char*)sendbuf)+(itemextent*sdispls[dst]),
08578 sendcounts[dst], sendtype, dst, comm, 0, I_SEND);
08579 }
08580 MPI_Waitall(size*2, reqs.data(), MPI_STATUSES_IGNORE);
08581 }
08582 else {
08583
08584 vector<MPI_Request> reqs(AMPI_ALLTOALL_THROTTLE*2);
08585 for (int j=0; j<size; j+=AMPI_ALLTOALL_THROTTLE) {
08586 int blockSize = std::min(size - j, AMPI_ALLTOALL_THROTTLE);
08587 for (int i=0; i<blockSize; i++) {
08588 int src = (rank + j + i) % size;
08589 ptr->irecv(((char*)recvbuf)+(extent*rdispls[src]), recvcounts[src], recvtype,
08590 src, MPI_ATA_TAG, comm, &reqs[i]);
08591 }
08592 for (int i=0; i<blockSize; i++) {
08593 int dst = (rank - j - i + size) % size;
08594 reqs[blockSize+i] = ptr->send(MPI_ATA_TAG, rank, ((char*)sendbuf)+(itemextent*sdispls[dst]),
08595 sendcounts[dst], sendtype, dst, comm);
08596 }
08597 MPI_Waitall(blockSize*2, reqs.data(), MPI_STATUSES_IGNORE);
08598 }
08599 }
08600
08601 return MPI_SUCCESS;
08602 }
08603
08604 AMPI_API_IMPL(int, MPI_Ialltoallv, void *sendbuf, int *sendcounts, int *sdispls, MPI_Datatype sendtype,
08605 void *recvbuf, int *recvcounts, int *rdispls, MPI_Datatype recvtype,
08606 MPI_Comm comm, MPI_Request *request)
08607 {
08608 AMPI_API("AMPI_Ialltoallv");
08609
08610 handle_MPI_BOTTOM(sendbuf, sendtype, recvbuf, recvtype);
08611 handle_MPI_IN_PLACE_alltoallv((void*&)sendbuf, recvbuf, (int*&)sendcounts, sendtype,
08612 (int*&)sdispls, recvcounts, recvtype, rdispls);
08613
08614 #if AMPI_ERROR_CHECKING
08615 int ret;
08616 if (sendbuf != recvbuf) {
08617 ret = errorCheck("AMPI_Ialltoallv", comm, 1, 0, 0, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08618 if(ret != MPI_SUCCESS){
08619 *request = MPI_REQUEST_NULL;
08620 return ret;
08621 }
08622 }
08623 ret = errorCheck("AMPI_Ialltoallv", comm, 1, 0, 0, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08624 if(ret != MPI_SUCCESS){
08625 *request = MPI_REQUEST_NULL;
08626 return ret;
08627 }
08628 #endif
08629
08630 ampi *ptr = getAmpiInstance(comm);
08631 int size = ptr->getSize();
08632
08633 if(getAmpiParent()->isInter(comm))
08634 CkAbort("AMPI does not implement MPI_Ialltoallv for Inter-communicators!");
08635 if(size == 1){
08636 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(recvbuf,recvcounts[0],recvtype,ptr->getRank(),MPI_ATA_TAG,comm,
08637 getDDT(), AMPI_REQ_COMPLETED));
08638 return copyDatatype(sendtype,sendcounts[0],recvtype,recvcounts[0],sendbuf,recvbuf);
08639 }
08640
08641 int rank = ptr->getRank();
08642 int itemextent = getDDT()->getExtent(sendtype);
08643 int extent = getDDT()->getExtent(recvtype);
08644
08645
08646 ATAReq *newreq = new ATAReq(size*2);
08647 for (int i=0; i<size; i++) {
08648 ptr->irecv((char*)recvbuf+(extent*rdispls[i]), recvcounts[i],
08649 recvtype, i, MPI_ATA_TAG, comm, &newreq->reqs[i]);
08650 }
08651
08652 for (int i=0; i<size; i++) {
08653 int dst = (rank+i) % size;
08654 newreq->reqs[size+i] = ptr->send(MPI_ATA_TAG, rank, ((char*)sendbuf)+(itemextent*sdispls[dst]),
08655 sendcounts[dst], sendtype, dst, comm, 0, I_SEND);
08656 }
08657 *request = ptr->postReq(newreq);
08658
08659 AMPI_DEBUG("MPI_Ialltoallv: request=%d, reqs.size=%d, &reqs=%d\n",*request,reqs->size(),reqs);
08660
08661 return MPI_SUCCESS;
08662 }
08663
08664 AMPI_API_IMPL(int, MPI_Alltoallw, const void *sendbuf, const int *sendcounts, const int *sdispls,
08665 const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts,
08666 const int *rdispls, const MPI_Datatype *recvtypes, MPI_Comm comm)
08667 {
08668 AMPI_API("AMPI_Alltoallw");
08669
08670 if (sendbuf == MPI_IN_PLACE) {
08671 handle_MPI_BOTTOM(recvbuf, recvtypes[0]);
08672 } else {
08673 handle_MPI_BOTTOM((void*&)sendbuf, sendtypes[0], recvbuf, recvtypes[0]);
08674 }
08675 handle_MPI_IN_PLACE_alltoallw((void*&)sendbuf, recvbuf, (int*&)sendcounts,
08676 (MPI_Datatype*&)sendtypes, (int*&)sdispls,
08677 recvcounts, recvtypes, rdispls);
08678
08679 #if AMPI_ERROR_CHECKING
08680 int ret;
08681 if (sendbuf != recvbuf) {
08682 ret = errorCheck("AMPI_Alltoallw", comm, 1, 0, 0, sendtypes[0], 1, 0, 0, 0, 0, sendbuf, 1);
08683 if(ret != MPI_SUCCESS)
08684 return ret;
08685 }
08686 ret = errorCheck("AMPI_Alltoallw", comm, 1, 0, 0, recvtypes[0], 1, 0, 0, 0, 0, recvbuf, 1);
08687 if(ret != MPI_SUCCESS)
08688 return ret;
08689 #endif
08690
08691 ampi *ptr = getAmpiInstance(comm);
08692 int size = ptr->getSize();
08693 int rank = ptr->getRank();
08694
08695 if(getAmpiParent()->isInter(comm))
08696 CkAbort("AMPI does not implement MPI_Alltoallw for Inter-communicators!");
08697 if(size == 1)
08698 return copyDatatype(sendtypes[0],sendcounts[0],recvtypes[0],recvcounts[0],sendbuf,recvbuf);
08699
08700
08701 if (recvbuf == sendbuf) {
08702 for (int i=0; i<size; i++) {
08703 for (int j=i; j<size; j++) {
08704 if (rank == i) {
08705 ptr->sendrecv_replace(((char *)recvbuf + rdispls[j]),
08706 recvcounts[j], recvtypes[j], j, MPI_ATA_TAG, j,
08707 MPI_ATA_TAG, comm, MPI_STATUS_IGNORE);
08708 }
08709 else if (rank == j) {
08710 ptr->sendrecv_replace(((char *)recvbuf + rdispls[i]),
08711 recvcounts[i], recvtypes[i], i, MPI_ATA_TAG, i,
08712 MPI_ATA_TAG, comm, MPI_STATUS_IGNORE);
08713 }
08714 }
08715 }
08716 }
08717 else if (size <= AMPI_ALLTOALL_THROTTLE) {
08718 vector<MPI_Request> reqs(size*2);
08719 for (int i=0; i<size; i++) {
08720 int src = (rank+i) % size;
08721 ptr->irecv(((char*)recvbuf)+rdispls[src], recvcounts[src], recvtypes[src],
08722 src, MPI_ATA_TAG, comm, &reqs[i]);
08723 }
08724 for (int i=0; i<size; i++) {
08725 int dst = (rank+i) % size;
08726 reqs[size+i] = ptr->send(MPI_ATA_TAG, rank, ((char*)sendbuf)+sdispls[dst],
08727 sendcounts[dst], sendtypes[dst], dst, comm, 0, I_SEND);
08728 }
08729 MPI_Waitall(size*2, reqs.data(), MPI_STATUSES_IGNORE);
08730 }
08731 else {
08732
08733 vector<MPI_Request> reqs(AMPI_ALLTOALL_THROTTLE*2);
08734 for (int j=0; j<size; j+=AMPI_ALLTOALL_THROTTLE) {
08735 int blockSize = std::min(size - j, AMPI_ALLTOALL_THROTTLE);
08736 for (int i=0; i<blockSize; i++) {
08737 int src = (rank + j + i) % size;
08738 ptr->irecv(((char*)recvbuf)+rdispls[src], recvcounts[src], recvtypes[src],
08739 src, MPI_ATA_TAG, comm, &reqs[i]);
08740 }
08741 for (int i=0; i<blockSize; i++) {
08742 int dst = (rank - j - i + size) % size;
08743 reqs[blockSize+i] = ptr->send(MPI_ATA_TAG, rank, ((char*)sendbuf)+sdispls[dst],
08744 sendcounts[dst], sendtypes[dst], dst, comm);
08745 }
08746 MPI_Waitall(blockSize*2, reqs.data(), MPI_STATUSES_IGNORE);
08747 }
08748 }
08749
08750 return MPI_SUCCESS;
08751 }
08752
08753 AMPI_API_IMPL(int, MPI_Ialltoallw, const void *sendbuf, const int *sendcounts, const int *sdispls,
08754 const MPI_Datatype *sendtypes, void *recvbuf, const int *recvcounts,
08755 const int *rdispls, const MPI_Datatype *recvtypes, MPI_Comm comm,
08756 MPI_Request *request)
08757 {
08758 AMPI_API("AMPI_Ialltoallw");
08759
08760 if (sendbuf == MPI_IN_PLACE) {
08761 handle_MPI_BOTTOM(recvbuf, recvtypes[0]);
08762 } else {
08763 handle_MPI_BOTTOM((void*&)sendbuf, sendtypes[0], recvbuf, recvtypes[0]);
08764 }
08765 handle_MPI_IN_PLACE_alltoallw((void*&)sendbuf, recvbuf, (int*&)sendcounts,
08766 (MPI_Datatype*&)sendtypes, (int*&)sdispls,
08767 recvcounts, recvtypes, rdispls);
08768
08769 #if AMPI_ERROR_CHECKING
08770 int ret;
08771 if (sendbuf != recvbuf) {
08772 ret = errorCheck("AMPI_Ialltoallw", comm, 1, 0, 0, sendtypes[0], 1, 0, 0, 0, 0, sendbuf, 1);
08773 if(ret != MPI_SUCCESS){
08774 *request = MPI_REQUEST_NULL;
08775 return ret;
08776 }
08777 }
08778 ret = errorCheck("AMPI_Ialltoallw", comm, 1, 0, 0, recvtypes[0], 1, 0, 0, 0, 0, recvbuf, 1);
08779 if(ret != MPI_SUCCESS){
08780 *request = MPI_REQUEST_NULL;
08781 return ret;
08782 }
08783 #endif
08784
08785 ampi *ptr = getAmpiInstance(comm);
08786 int size = ptr->getSize();
08787 int rank = ptr->getRank();
08788
08789 if(getAmpiParent()->isInter(comm))
08790 CkAbort("AMPI does not implement MPI_Ialltoallw for Inter-communicators!");
08791 if(size == 1){
08792 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(recvbuf,recvcounts[0],recvtypes[0],ptr->getRank(),MPI_ATA_TAG,comm,
08793 getDDT(), AMPI_REQ_COMPLETED));
08794 return copyDatatype(sendtypes[0],sendcounts[0],recvtypes[0],recvcounts[0],sendbuf,recvbuf);
08795 }
08796
08797
08798
08799
08800 ATAReq *newreq = new ATAReq(size*2);
08801 for (int i=0; i<size; i++) {
08802 ptr->irecv((char*)recvbuf+rdispls[i], recvcounts[i], recvtypes[i],
08803 i, MPI_ATA_TAG, comm, &newreq->reqs[i]);
08804 }
08805
08806 for (int i=0; i<size; i++) {
08807 int dst = (rank+i) % size;
08808 newreq->reqs[i] = ptr->send(MPI_ATA_TAG, rank, (char*)sendbuf+sdispls[dst],
08809 sendcounts[dst], sendtypes[dst], dst, comm, 0, I_SEND);
08810 }
08811 *request = ptr->postReq(newreq);
08812
08813 return MPI_SUCCESS;
08814 }
08815
08816 AMPI_API_IMPL(int, MPI_Neighbor_alltoall, const void* sendbuf, int sendcount, MPI_Datatype sendtype,
08817 void* recvbuf, int recvcount, MPI_Datatype recvtype,
08818 MPI_Comm comm)
08819 {
08820 AMPI_API("AMPI_Neighbor_alltoall");
08821
08822 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
08823
08824 #if AMPI_ERROR_CHECKING
08825 if (sendbuf == MPI_IN_PLACE || recvbuf == MPI_IN_PLACE)
08826 CkAbort("MPI_Neighbor_alltoall does not accept MPI_IN_PLACE!");
08827 if (getAmpiParent()->isInter(comm))
08828 CkAbort("MPI_Neighbor_alltoall is not defined for Inter-communicators!");
08829 int ret;
08830 ret = errorCheck("AMPI_Neighbor_alltoall", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08831 if(ret != MPI_SUCCESS)
08832 return ret;
08833 ret = errorCheck("AMPI_Neighbor_alltoall", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08834 if(ret != MPI_SUCCESS)
08835 return ret;
08836 #endif
08837
08838 ampi *ptr = getAmpiInstance(comm);
08839 int rank_in_comm = ptr->getRank();
08840
08841 if (ptr->getSize() == 1)
08842 return copyDatatype(sendtype, sendcount, recvtype, recvcount, sendbuf, recvbuf);
08843
08844 const vector<int>& neighbors = ptr->getNeighbors();
08845 int num_neighbors = neighbors.size();
08846 int itemsize = getDDT()->getSize(sendtype) * sendcount;
08847 int extent = getDDT()->getExtent(recvtype) * recvcount;
08848
08849 vector<MPI_Request> reqs(num_neighbors*2);
08850 for (int j=0; j<num_neighbors; j++) {
08851 ptr->irecv(((char*)recvbuf)+(extent*j), recvcount, recvtype,
08852 neighbors[j], MPI_NBOR_TAG, comm, &reqs[j]);
08853 }
08854
08855 for (int i=0; i<num_neighbors; i++) {
08856 reqs[num_neighbors+i] = ptr->send(MPI_NBOR_TAG, rank_in_comm, (void*)((char*)sendbuf+(itemsize*i)),
08857 sendcount, sendtype, neighbors[i], comm, 0, I_SEND);
08858 }
08859
08860 MPI_Waitall(reqs.size(), reqs.data(), MPI_STATUSES_IGNORE);
08861
08862 return MPI_SUCCESS;
08863 }
08864
08865 AMPI_API_IMPL(int, MPI_Ineighbor_alltoall, const void* sendbuf, int sendcount, MPI_Datatype sendtype,
08866 void* recvbuf, int recvcount, MPI_Datatype recvtype,
08867 MPI_Comm comm, MPI_Request *request)
08868 {
08869 AMPI_API("AMPI_Ineighbor_alltoall");
08870
08871 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
08872
08873 #if AMPI_ERROR_CHECKING
08874 if (sendbuf == MPI_IN_PLACE || recvbuf == MPI_IN_PLACE)
08875 CkAbort("MPI_Ineighbor_alltoall does not accept MPI_IN_PLACE!");
08876 if (getAmpiParent()->isInter(comm))
08877 CkAbort("MPI_Ineighbor_alltoall is not defined for Inter-communicators!");
08878 int ret;
08879 ret = errorCheck("AMPI_Ineighbor_alltoall", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08880 if(ret != MPI_SUCCESS){
08881 *request = MPI_REQUEST_NULL;
08882 return ret;
08883 }
08884 ret = errorCheck("AMPI_Ineighbor_alltoall", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08885 if(ret != MPI_SUCCESS){
08886 *request = MPI_REQUEST_NULL;
08887 return ret;
08888 }
08889 #endif
08890
08891 ampi *ptr = getAmpiInstance(comm);
08892 int rank_in_comm = ptr->getRank();
08893
08894 if (ptr->getSize() == 1) {
08895 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(recvbuf,recvcount,recvtype,rank_in_comm,MPI_NBOR_TAG,comm,
08896 getDDT(), AMPI_REQ_COMPLETED));
08897 return copyDatatype(sendtype, sendcount, recvtype, recvcount, sendbuf, recvbuf);
08898 }
08899
08900 const vector<int>& neighbors = ptr->getNeighbors();
08901 int num_neighbors = neighbors.size();
08902 int itemsize = getDDT()->getSize(sendtype) * sendcount;
08903 int extent = getDDT()->getExtent(recvtype) * recvcount;
08904
08905
08906 ATAReq *newreq = new ATAReq(num_neighbors*2);
08907 for (int j=0; j<num_neighbors; j++) {
08908 ptr->irecv((char*)recvbuf+(extent*j), recvcount, recvtype,
08909 neighbors[j], MPI_NBOR_TAG, comm, &newreq->reqs[j]);
08910 }
08911
08912 for (int i=0; i<num_neighbors; i++) {
08913 newreq->reqs[num_neighbors+i] = ptr->send(MPI_ATA_TAG, rank_in_comm, ((char*)sendbuf)+(i*itemsize),
08914 sendcount, sendtype, neighbors[i], comm, 0, I_SEND);
08915 }
08916 *request = ptr->postReq(newreq);
08917
08918 return MPI_SUCCESS;
08919 }
08920
08921 AMPI_API_IMPL(int, MPI_Neighbor_alltoallv, const void* sendbuf, const int *sendcounts, const int *sdispls,
08922 MPI_Datatype sendtype, void* recvbuf, const int *recvcounts,
08923 const int *rdispls, MPI_Datatype recvtype, MPI_Comm comm)
08924 {
08925 AMPI_API("AMPI_Neighbor_alltoallv");
08926
08927 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
08928
08929 #if AMPI_ERROR_CHECKING
08930 if (sendbuf == MPI_IN_PLACE || recvbuf == MPI_IN_PLACE)
08931 CkAbort("MPI_Neighbor_alltoallv does not accept MPI_IN_PLACE!");
08932 if (getAmpiParent()->isInter(comm))
08933 CkAbort("MPI_Neighbor_alltoallv is not defined for Inter-communicators!");
08934 int ret;
08935 ret = errorCheck("AMPI_Neighbor_alltoallv", comm, 1, sendcounts[0], 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08936 if(ret != MPI_SUCCESS)
08937 return ret;
08938 ret = errorCheck("AMPI_Neighbor_alltoallv", comm, 1, recvcounts[0], 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08939 if(ret != MPI_SUCCESS)
08940 return ret;
08941 #endif
08942
08943 ampi *ptr = getAmpiInstance(comm);
08944 int rank_in_comm = ptr->getRank();
08945
08946 if (ptr->getSize() == 1)
08947 return copyDatatype(sendtype, sendcounts[0], recvtype, recvcounts[0], sendbuf, recvbuf);
08948
08949 const vector<int>& neighbors = ptr->getNeighbors();
08950 int num_neighbors = neighbors.size();
08951 int itemsize = getDDT()->getSize(sendtype);
08952 int extent = getDDT()->getExtent(recvtype);
08953
08954 vector<MPI_Request> reqs(num_neighbors*2);
08955 for (int j=0; j<num_neighbors; j++) {
08956 ptr->irecv(((char*)recvbuf)+(extent*rdispls[j]), recvcounts[j], recvtype,
08957 neighbors[j], MPI_NBOR_TAG, comm, &reqs[j]);
08958 }
08959
08960 for (int i=0; i<num_neighbors; i++) {
08961 reqs[num_neighbors+i] = ptr->send(MPI_NBOR_TAG, rank_in_comm, (void*)((char*)sendbuf+(itemsize*sdispls[i])),
08962 sendcounts[i], sendtype, neighbors[i], comm, 0, I_SEND);
08963 }
08964
08965 MPI_Waitall(reqs.size(), reqs.data(), MPI_STATUSES_IGNORE);
08966
08967 return MPI_SUCCESS;
08968 }
08969
08970 AMPI_API_IMPL(int, MPI_Ineighbor_alltoallv, const void* sendbuf, const int *sendcounts, const int *sdispls,
08971 MPI_Datatype sendtype, void* recvbuf, const int *recvcounts,
08972 const int *rdispls, MPI_Datatype recvtype, MPI_Comm comm,
08973 MPI_Request *request)
08974 {
08975 AMPI_API("AMPI_Ineighbor_alltoallv");
08976
08977 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
08978
08979 #if AMPI_ERROR_CHECKING
08980 if (sendbuf == MPI_IN_PLACE || recvbuf == MPI_IN_PLACE)
08981 CkAbort("MPI_Ineighbor_alltoallv does not accept MPI_IN_PLACE!");
08982 if (getAmpiParent()->isInter(comm))
08983 CkAbort("MPI_Ineighbor_alltoallv is not defined for Inter-communicators!");
08984 int ret;
08985 ret = errorCheck("AMPI_Ineighbor_alltoallv", comm, 1, sendcounts[0], 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
08986 if(ret != MPI_SUCCESS){
08987 *request = MPI_REQUEST_NULL;
08988 return ret;
08989 }
08990 ret = errorCheck("AMPI_Ineighbor_alltoallv", comm, 1, recvcounts[0], 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
08991 if(ret != MPI_SUCCESS){
08992 *request = MPI_REQUEST_NULL;
08993 return ret;
08994 }
08995 #endif
08996
08997 ampi *ptr = getAmpiInstance(comm);
08998 int rank_in_comm = ptr->getRank();
08999
09000 if (ptr->getSize() == 1) {
09001 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(recvbuf,recvcounts[0],recvtype,rank_in_comm,MPI_NBOR_TAG,comm,
09002 getDDT(), AMPI_REQ_COMPLETED));
09003 return copyDatatype(sendtype, sendcounts[0], recvtype, recvcounts[0], sendbuf, recvbuf);
09004 }
09005
09006 const vector<int>& neighbors = ptr->getNeighbors();
09007 int num_neighbors = neighbors.size();
09008 int itemsize = getDDT()->getSize(sendtype);
09009 int extent = getDDT()->getExtent(recvtype);
09010
09011
09012 ATAReq *newreq = new ATAReq(num_neighbors*2);
09013 for (int j=0; j<num_neighbors; j++) {
09014 ptr->irecv((char*)recvbuf+(extent*rdispls[j]), recvcounts[j], recvtype,
09015 neighbors[j], MPI_NBOR_TAG, comm, &newreq->reqs[j]);
09016 }
09017
09018 for (int i=0; i<num_neighbors; i++) {
09019 newreq->reqs[num_neighbors+i] = ptr->send(MPI_NBOR_TAG, rank_in_comm, (char*)sendbuf+(itemsize*sdispls[i]),
09020 sendcounts[i], sendtype, neighbors[i], comm, 0, I_SEND);
09021 }
09022 *request = ptr->postReq(newreq);
09023
09024 return MPI_SUCCESS;
09025 }
09026
09027 AMPI_API_IMPL(int, MPI_Neighbor_alltoallw, const void* sendbuf, const int *sendcounts, const MPI_Aint *sdispls,
09028 const MPI_Datatype *sendtypes, void* recvbuf, const int *recvcounts,
09029 const MPI_Aint *rdispls, const MPI_Datatype *recvtypes, MPI_Comm comm)
09030 {
09031 AMPI_API("AMPI_Neighbor_alltoallw");
09032
09033 handle_MPI_BOTTOM((void*&)sendbuf, sendtypes[0], recvbuf, recvtypes[0]);
09034
09035 #if AMPI_ERROR_CHECKING
09036 if (sendbuf == MPI_IN_PLACE || recvbuf == MPI_IN_PLACE)
09037 CkAbort("MPI_Neighbor_alltoallw does not accept MPI_IN_PLACE!");
09038 if (getAmpiParent()->isInter(comm))
09039 CkAbort("MPI_Neighbor_alltoallw is not defined for Inter-communicators!");
09040 int ret;
09041 ret = errorCheck("AMPI_Neighbor_alltoallw", comm, 1, sendcounts[0], 1, sendtypes[0], 1, 0, 0, 0, 0, sendbuf, 1);
09042 if(ret != MPI_SUCCESS)
09043 return ret;
09044 ret = errorCheck("AMPI_Neighbor_alltoallw", comm, 1, recvcounts[0], 1, recvtypes[0], 1, 0, 0, 0, 0, recvbuf, 1);
09045 if(ret != MPI_SUCCESS)
09046 return ret;
09047 #endif
09048
09049 ampi *ptr = getAmpiInstance(comm);
09050 int rank_in_comm = ptr->getRank();
09051
09052 if (ptr->getSize() == 1)
09053 return copyDatatype(sendtypes[0], sendcounts[0], recvtypes[0], recvcounts[0], sendbuf, recvbuf);
09054
09055 const vector<int>& neighbors = ptr->getNeighbors();
09056 int num_neighbors = neighbors.size();
09057
09058 vector<MPI_Request> reqs(num_neighbors*2);
09059 for (int j=0; j<num_neighbors; j++) {
09060 ptr->irecv(((char*)recvbuf)+rdispls[j], recvcounts[j], recvtypes[j],
09061 neighbors[j], MPI_NBOR_TAG, comm, &reqs[j]);
09062 }
09063
09064 for (int i=0; i<num_neighbors; i++) {
09065 reqs[num_neighbors+i] = ptr->send(MPI_NBOR_TAG, rank_in_comm, (void*)((char*)sendbuf+sdispls[i]),
09066 sendcounts[i], sendtypes[i], neighbors[i], comm, 0, I_SEND);
09067 }
09068
09069 MPI_Waitall(reqs.size(), reqs.data(), MPI_STATUSES_IGNORE);
09070
09071 return MPI_SUCCESS;
09072 }
09073
09074 AMPI_API_IMPL(int, MPI_Ineighbor_alltoallw, const void* sendbuf, const int *sendcounts, const MPI_Aint *sdispls,
09075 const MPI_Datatype *sendtypes, void* recvbuf, const int *recvcounts,
09076 const MPI_Aint *rdispls, const MPI_Datatype *recvtypes, MPI_Comm comm,
09077 MPI_Request *request)
09078 {
09079 AMPI_API("AMPI_Ineighbor_alltoallw");
09080
09081 handle_MPI_BOTTOM((void*&)sendbuf, sendtypes[0], recvbuf, recvtypes[0]);
09082
09083 #if AMPI_ERROR_CHECKING
09084 if (sendbuf == MPI_IN_PLACE || recvbuf == MPI_IN_PLACE)
09085 CkAbort("MPI_Ineighbor_alltoallw does not accept MPI_IN_PLACE!");
09086 if (getAmpiParent()->isInter(comm))
09087 CkAbort("MPI_Ineighbor_alltoallw is not defined for Inter-communicators!");
09088 int ret;
09089 ret = errorCheck("AMPI_Ineighbor_alltoallw", comm, 1, sendcounts[0], 1, sendtypes[0], 1, 0, 0, 0, 0, sendbuf, 1);
09090 if(ret != MPI_SUCCESS){
09091 *request = MPI_REQUEST_NULL;
09092 return ret;
09093 }
09094 ret = errorCheck("AMPI_Ineighbor_alltoallw", comm, 1, recvcounts[0], 1, recvtypes[0], 1, 0, 0, 0, 0, recvbuf, 1);
09095 if(ret != MPI_SUCCESS){
09096 *request = MPI_REQUEST_NULL;
09097 return ret;
09098 }
09099 #endif
09100
09101 ampi *ptr = getAmpiInstance(comm);
09102 int rank_in_comm = ptr->getRank();
09103
09104 if (ptr->getSize() == 1) {
09105 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(recvbuf,recvcounts[0],recvtypes[0],rank_in_comm,MPI_NBOR_TAG,comm,
09106 getDDT(), AMPI_REQ_COMPLETED));
09107 return copyDatatype(sendtypes[0], sendcounts[0], recvtypes[0], recvcounts[0], sendbuf, recvbuf);
09108 }
09109
09110 const vector<int>& neighbors = ptr->getNeighbors();
09111 int num_neighbors = neighbors.size();
09112
09113
09114 ATAReq *newreq = new ATAReq(num_neighbors*2);
09115 for (int j=0; j<num_neighbors; j++) {
09116 ptr->irecv((char*)recvbuf+rdispls[j], recvcounts[j], recvtypes[j],
09117 neighbors[j], MPI_NBOR_TAG, comm, &newreq->reqs[j]);
09118 }
09119
09120 for (int i=0; i<num_neighbors; i++) {
09121 newreq->reqs[num_neighbors+i] = ptr->send(MPI_NBOR_TAG, rank_in_comm, (void*)((char*)sendbuf+sdispls[i]),
09122 sendcounts[i], sendtypes[i], neighbors[i], comm, 0, I_SEND);
09123 }
09124 *request = ptr->postReq(newreq);
09125
09126 return MPI_SUCCESS;
09127 }
09128
09129 AMPI_API_IMPL(int, MPI_Neighbor_allgather, const void* sendbuf, int sendcount, MPI_Datatype sendtype,
09130 void* recvbuf, int recvcount, MPI_Datatype recvtype,
09131 MPI_Comm comm)
09132 {
09133 AMPI_API("AMPI_Neighbor_allgather");
09134
09135 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
09136
09137 #if AMPI_ERROR_CHECKING
09138 if (sendbuf == MPI_IN_PLACE || recvbuf == MPI_IN_PLACE)
09139 CkAbort("MPI_Neighbor_allgather does not accept MPI_IN_PLACE!");
09140 if (getAmpiParent()->isInter(comm))
09141 CkAbort("MPI_Neighbor_allgather is not defined for Inter-communicators!");
09142 int ret;
09143 ret = errorCheck("AMPI_Neighbor_allgather", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
09144 if(ret != MPI_SUCCESS)
09145 return ret;
09146 ret = errorCheck("AMPI_Neighbor_allgather", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
09147 if(ret != MPI_SUCCESS)
09148 return ret;
09149 #endif
09150
09151 ampi *ptr = getAmpiInstance(comm);
09152 int rank_in_comm = ptr->getRank();
09153
09154 if (ptr->getSize() == 1)
09155 return copyDatatype(sendtype, sendcount, recvtype, recvcount, sendbuf, recvbuf);
09156
09157 const vector<int>& neighbors = ptr->getNeighbors();
09158 int num_neighbors = neighbors.size();
09159
09160 int extent = getDDT()->getExtent(recvtype) * recvcount;
09161 vector<MPI_Request> reqs(num_neighbors*2);
09162 for (int j=0; j<num_neighbors; j++) {
09163 ptr->irecv(((char*)recvbuf)+(extent*j), recvcount, recvtype,
09164 neighbors[j], MPI_NBOR_TAG, comm, &reqs[j]);
09165 }
09166
09167 for (int i=0; i<num_neighbors; i++) {
09168 reqs[num_neighbors+i] = ptr->send(MPI_NBOR_TAG, rank_in_comm, sendbuf, sendcount,
09169 sendtype, neighbors[i], comm, 0, I_SEND);
09170 }
09171
09172 MPI_Waitall(reqs.size(), reqs.data(), MPI_STATUSES_IGNORE);
09173
09174 return MPI_SUCCESS;
09175 }
09176
09177 AMPI_API_IMPL(int, MPI_Ineighbor_allgather, const void* sendbuf, int sendcount, MPI_Datatype sendtype,
09178 void* recvbuf, int recvcount, MPI_Datatype recvtype,
09179 MPI_Comm comm, MPI_Request *request)
09180 {
09181 AMPI_API("AMPI_Ineighbor_allgather");
09182
09183 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
09184
09185 #if AMPI_ERROR_CHECKING
09186 if (sendbuf == MPI_IN_PLACE || recvbuf == MPI_IN_PLACE)
09187 CkAbort("MPI_Ineighbor_allgather does not accept MPI_IN_PLACE!");
09188 if (getAmpiParent()->isInter(comm))
09189 CkAbort("MPI_Ineighbor_allgather is not defined for Inter-communicators!");
09190 int ret;
09191 ret = errorCheck("AMPI_Ineighbor_allgather", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
09192 if(ret != MPI_SUCCESS){
09193 *request = MPI_REQUEST_NULL;
09194 return ret;
09195 }
09196 ret = errorCheck("AMPI_Ineighbor_allgather", comm, 1, recvcount, 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
09197 if(ret != MPI_SUCCESS){
09198 *request = MPI_REQUEST_NULL;
09199 return ret;
09200 }
09201 #endif
09202
09203 ampi *ptr = getAmpiInstance(comm);
09204 int rank_in_comm = ptr->getRank();
09205
09206 if (ptr->getSize() == 1) {
09207 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(recvbuf,recvcount,recvtype,rank_in_comm,MPI_NBOR_TAG,comm,
09208 getDDT(), AMPI_REQ_COMPLETED));
09209 return copyDatatype(sendtype, sendcount, recvtype, recvcount, sendbuf, recvbuf);
09210 }
09211
09212 const vector<int>& neighbors = ptr->getNeighbors();
09213 int num_neighbors = neighbors.size();
09214
09215
09216 ATAReq *newreq = new ATAReq(num_neighbors*2);
09217 int extent = getDDT()->getExtent(recvtype) * recvcount;
09218 for (int j=0; j<num_neighbors; j++) {
09219 ptr->irecv((char*)recvbuf+(extent*j), recvcount, recvtype,
09220 neighbors[j], MPI_NBOR_TAG, comm, &newreq->reqs[j]);
09221 }
09222
09223 for (int i=0; i<num_neighbors; i++) {
09224 newreq->reqs[num_neighbors+i] = ptr->send(MPI_NBOR_TAG, rank_in_comm, sendbuf, sendcount,
09225 sendtype, neighbors[i], comm, 0, I_SEND);
09226 }
09227 *request = ptr->postReq(newreq);
09228
09229 return MPI_SUCCESS;
09230 }
09231
09232 AMPI_API_IMPL(int, MPI_Neighbor_allgatherv, const void* sendbuf, int sendcount, MPI_Datatype sendtype,
09233 void* recvbuf, const int *recvcounts, const int *displs,
09234 MPI_Datatype recvtype, MPI_Comm comm)
09235 {
09236 AMPI_API("AMPI_Neighbor_allgatherv");
09237
09238 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
09239
09240 #if AMPI_ERROR_CHECKING
09241 if (sendbuf == MPI_IN_PLACE || recvbuf == MPI_IN_PLACE)
09242 CkAbort("MPI_Neighbor_allgatherv does not accept MPI_IN_PLACE!");
09243 if (getAmpiParent()->isInter(comm))
09244 CkAbort("MPI_Neighbor_allgatherv is not defined for Inter-communicators!");
09245 int ret;
09246 ret = errorCheck("AMPI_Neighbor_allgatherv", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
09247 if(ret != MPI_SUCCESS)
09248 return ret;
09249 ret = errorCheck("AMPI_Neighbor_allgatherv", comm, 1, recvcounts[0], 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
09250 if(ret != MPI_SUCCESS)
09251 return ret;
09252 #endif
09253
09254 ampi *ptr = getAmpiInstance(comm);
09255 int rank_in_comm = ptr->getRank();
09256
09257 if (ptr->getSize() == 1)
09258 return copyDatatype(sendtype, sendcount, recvtype, recvcounts[0], sendbuf, recvbuf);
09259
09260 const vector<int>& neighbors = ptr->getNeighbors();
09261 int num_neighbors = neighbors.size();
09262 int extent = getDDT()->getExtent(recvtype);
09263 vector<MPI_Request> reqs(num_neighbors*2);
09264 for (int j=0; j<num_neighbors; j++) {
09265 ptr->irecv(((char*)recvbuf)+(extent*displs[j]), recvcounts[j], recvtype,
09266 neighbors[j], MPI_NBOR_TAG, comm, &reqs[j]);
09267 }
09268 for (int i=0; i<num_neighbors; i++) {
09269 reqs[num_neighbors+i] = ptr->send(MPI_NBOR_TAG, rank_in_comm, sendbuf, sendcount,
09270 sendtype, neighbors[i], comm, 0, I_SEND);
09271 }
09272
09273 MPI_Waitall(reqs.size(), reqs.data(), MPI_STATUSES_IGNORE);
09274
09275 return MPI_SUCCESS;
09276 }
09277
09278 AMPI_API_IMPL(int, MPI_Ineighbor_allgatherv, const void* sendbuf, int sendcount, MPI_Datatype sendtype,
09279 void* recvbuf, const int* recvcounts, const int* displs,
09280 MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request)
09281 {
09282 AMPI_API("AMPI_Ineighbor_allgatherv");
09283
09284 handle_MPI_BOTTOM((void*&)sendbuf, sendtype, recvbuf, recvtype);
09285
09286 #if AMPI_ERROR_CHECKING
09287 if (sendbuf == MPI_IN_PLACE || recvbuf == MPI_IN_PLACE)
09288 CkAbort("MPI_Ineighbor_allgatherv does not accept MPI_IN_PLACE!");
09289 if (getAmpiParent()->isInter(comm))
09290 CkAbort("MPI_Ineighbor_allgatherv is not defined for Inter-communicators!");
09291 int ret;
09292 ret = errorCheck("AMPI_Ineighbor_allgatherv", comm, 1, sendcount, 1, sendtype, 1, 0, 0, 0, 0, sendbuf, 1);
09293 if(ret != MPI_SUCCESS){
09294 *request = MPI_REQUEST_NULL;
09295 return ret;
09296 }
09297 ret = errorCheck("AMPI_Ineighbor_allgatherv", comm, 1, recvcounts[0], 1, recvtype, 1, 0, 0, 0, 0, recvbuf, 1);
09298 if(ret != MPI_SUCCESS){
09299 *request = MPI_REQUEST_NULL;
09300 return ret;
09301 }
09302 #endif
09303
09304 ampi *ptr = getAmpiInstance(comm);
09305 int rank_in_comm = ptr->getRank();
09306
09307 if (ptr->getSize() == 1) {
09308 *request = ptr->postReq(getAmpiParent()->reqPool.newReq<IReq>(recvbuf,recvcounts[0],recvtype,rank_in_comm,MPI_NBOR_TAG,comm,
09309 getDDT(), AMPI_REQ_COMPLETED));
09310 return copyDatatype(sendtype, sendcount, recvtype, recvcounts[0], sendbuf, recvbuf);
09311 }
09312
09313 const vector<int>& neighbors = ptr->getNeighbors();
09314 int num_neighbors = neighbors.size();
09315
09316
09317 ATAReq *newreq = new ATAReq(num_neighbors*2);
09318 int extent = getDDT()->getExtent(recvtype);
09319 for (int j=0; j<num_neighbors; j++) {
09320 ptr->irecv((char*)recvbuf+(extent*displs[j]), recvcounts[j], recvtype,
09321 neighbors[j], MPI_NBOR_TAG, comm, &newreq->reqs[j]);
09322 }
09323
09324 for (int i=0; i<num_neighbors; i++) {
09325 newreq->reqs[num_neighbors+i] = ptr->send(MPI_NBOR_TAG, rank_in_comm, sendbuf, sendcount,
09326 sendtype, neighbors[i], comm, 0, I_SEND);
09327 }
09328 *request = ptr->postReq(newreq);
09329
09330 return MPI_SUCCESS;
09331 }
09332
09333 AMPI_API_IMPL(int, MPI_Comm_dup, MPI_Comm comm, MPI_Comm *newcomm)
09334 {
09335 AMPI_API("AMPI_Comm_dup");
09336 ampi *ptr = getAmpiInstance(comm);
09337 int topoType, rank = ptr->getRank();
09338 MPI_Topo_test(comm, &topoType);
09339 ptr->topoDup(topoType, rank, comm, newcomm);
09340 int ret = getAmpiParent()->dupUserKeyvals(comm, *newcomm);
09341 ptr->barrier();
09342
09343 #if AMPIMSGLOG
09344 ampiParent* pptr = getAmpiParent();
09345 if(msgLogRead){
09346 PUParray(*(pptr->fromPUPer), (char *)newcomm, sizeof(int));
09347 return MPI_SUCCESS;
09348 }
09349 else if(msgLogWrite && record_msglog(pptr->thisIndex)){
09350 PUParray(*(pptr->toPUPer), (char *)newcomm, sizeof(int));
09351 }
09352 #endif
09353 return ampiErrhandler("AMPI_Comm_dup", ret);
09354 }
09355
09356 AMPI_API_IMPL(int, MPI_Comm_idup, MPI_Comm comm, MPI_Comm *newcomm, MPI_Request *request)
09357 {
09358 AMPI_API("AMPI_Comm_idup");
09359
09360 *request = MPI_REQUEST_NULL;
09361 return MPI_Comm_dup(comm, newcomm);
09362 }
09363
09364 AMPI_API_IMPL(int, MPI_Comm_dup_with_info, MPI_Comm comm, MPI_Info info, MPI_Comm *dest)
09365 {
09366 AMPI_API("AMPI_Comm_dup_with_info");
09367 MPI_Comm_dup(comm, dest);
09368 MPI_Comm_set_info(*dest, info);
09369 return MPI_SUCCESS;
09370 }
09371
09372 AMPI_API_IMPL(int, MPI_Comm_idup_with_info, MPI_Comm comm, MPI_Info info, MPI_Comm *dest, MPI_Request *request)
09373 {
09374 AMPI_API("AMPI_Comm_idup_with_info");
09375
09376 *request = MPI_REQUEST_NULL;
09377 return MPI_Comm_dup_with_info(comm, info, dest);
09378 }
09379
09380 AMPI_API_IMPL(int, MPI_Comm_split, MPI_Comm src, int color, int key, MPI_Comm *dest)
09381 {
09382 AMPI_API("AMPI_Comm_split");
09383 {
09384 ampi *ptr = getAmpiInstance(src);
09385 if (getAmpiParent()->isInter(src)) {
09386 ptr->split(color, key, dest, MPI_INTER);
09387 }
09388 else if (getAmpiParent()->isCart(src)) {
09389 ptr->split(color, key, dest, MPI_CART);
09390 }
09391 else if (getAmpiParent()->isGraph(src)) {
09392 ptr->split(color, key, dest, MPI_GRAPH);
09393 }
09394 else if (getAmpiParent()->isDistGraph(src)) {
09395 ptr->split(color, key, dest, MPI_DIST_GRAPH);
09396 }
09397 else {
09398 ptr->split(color, key, dest, MPI_UNDEFINED);
09399 }
09400 }
09401 if (color == MPI_UNDEFINED) *dest = MPI_COMM_NULL;
09402
09403 #if AMPIMSGLOG
09404 ampiParent* pptr = getAmpiParent();
09405 if(msgLogRead){
09406 PUParray(*(pptr->fromPUPer), (char *)dest, sizeof(int));
09407 return MPI_SUCCESS;
09408 }
09409 else if(msgLogWrite && record_msglog(pptr->thisIndex)){
09410 PUParray(*(pptr->toPUPer), (char *)dest, sizeof(int));
09411 }
09412 #endif
09413
09414 return MPI_SUCCESS;
09415 }
09416
09417 AMPI_API_IMPL(int, MPI_Comm_split_type, MPI_Comm src, int split_type, int key,
09418 MPI_Info info, MPI_Comm *dest)
09419 {
09420 AMPI_API("AMPI_Comm_split_type");
09421
09422 if (src == MPI_COMM_SELF && split_type == MPI_UNDEFINED) {
09423 *dest = MPI_COMM_NULL;
09424 return MPI_SUCCESS;
09425 }
09426
09427 int color = MPI_UNDEFINED;
09428
09429 if (split_type == MPI_COMM_TYPE_SHARED || split_type == AMPI_COMM_TYPE_HOST) {
09430 color = CmiPhysicalNodeID(CkMyPe());
09431 }
09432 else if (split_type == AMPI_COMM_TYPE_PROCESS) {
09433 color = CkMyNode();
09434 }
09435 else if (split_type == AMPI_COMM_TYPE_WTH) {
09436 color = CkMyPe();
09437 }
09438
09439 return MPI_Comm_split(src, color, key, dest);
09440 }
09441
09442 AMPI_API_IMPL(int, MPI_Comm_free, MPI_Comm *comm)
09443 {
09444 AMPI_API("AMPI_Comm_free");
09445 ampiParent* parent = getAmpiParent();
09446 int ret = MPI_SUCCESS;
09447 if (*comm != MPI_COMM_NULL) {
09448
09449
09450 if (*comm != MPI_COMM_WORLD && *comm != MPI_COMM_SELF) {
09451 ampi* ptr = getAmpiInstance(*comm);
09452 ptr->barrier();
09453 if (ptr->getRank() == 0) {
09454 CProxy_CkArray(ptr->ckGetArrayID()).ckDestroy();
09455 }
09456 }
09457 *comm = MPI_COMM_NULL;
09458 }
09459 return ampiErrhandler("AMPI_Comm_free", ret);
09460 }
09461
09462 AMPI_API_IMPL(int, MPI_Comm_test_inter, MPI_Comm comm, int *flag)
09463 {
09464 AMPI_API("AMPI_Comm_test_inter");
09465 *flag = getAmpiParent()->isInter(comm);
09466 return MPI_SUCCESS;
09467 }
09468
09469 AMPI_API_IMPL(int, MPI_Comm_remote_size, MPI_Comm comm, int *size)
09470 {
09471 AMPI_API("AMPI_Comm_remote_size");
09472 *size = getAmpiParent()->getRemoteSize(comm);
09473 return MPI_SUCCESS;
09474 }
09475
09476 AMPI_API_IMPL(int, MPI_Comm_remote_group, MPI_Comm comm, MPI_Group *group)
09477 {
09478 AMPI_API("AMPI_Comm_remote_group");
09479 *group = getAmpiParent()->getRemoteGroup(comm);
09480 return MPI_SUCCESS;
09481 }
09482
09483 AMPI_API_IMPL(int, MPI_Intercomm_create, MPI_Comm localComm, int localLeader, MPI_Comm peerComm,
09484 int remoteLeader, int tag, MPI_Comm *newintercomm)
09485 {
09486 AMPI_API("AMPI_Intercomm_create");
09487
09488 #if AMPI_ERROR_CHECKING
09489 if (getAmpiParent()->isInter(localComm) || getAmpiParent()->isInter(peerComm))
09490 return ampiErrhandler("AMPI_Intercomm_create", MPI_ERR_COMM);
09491 #endif
09492
09493 ampi *localPtr = getAmpiInstance(localComm);
09494 ampi *peerPtr = getAmpiInstance(peerComm);
09495 int rootIndex = localPtr->getIndexForRank(localLeader);
09496 int localSize, localRank;
09497
09498 localSize = localPtr->getSize();
09499 localRank = localPtr->getRank();
09500
09501 vector<int> remoteVec;
09502
09503 if (localRank == localLeader) {
09504 int remoteSize;
09505 MPI_Status sts;
09506 vector<int> localVec;
09507 localVec = localPtr->getIndices();
09508
09509 peerPtr->send(tag, peerPtr->getRank(), localVec.data(), localVec.size(), MPI_INT, remoteLeader, peerComm);
09510 peerPtr->probe(tag, remoteLeader, peerComm, &sts);
09511 MPI_Get_count(&sts, MPI_INT, &remoteSize);
09512 remoteVec.resize(remoteSize);
09513 if (-1==peerPtr->recv(tag, remoteLeader, remoteVec.data(), remoteSize, MPI_INT, peerComm))
09514 CkAbort("AMPI> Error in MPI_Intercomm_create");
09515
09516 if (remoteSize==0) {
09517 AMPI_DEBUG("AMPI> In MPI_Intercomm_create, creating an empty communicator\n");
09518 *newintercomm = MPI_COMM_NULL;
09519 return MPI_SUCCESS;
09520 }
09521 }
09522
09523 localPtr->intercommCreate(remoteVec,rootIndex,localComm,newintercomm);
09524
09525 return MPI_SUCCESS;
09526 }
09527
09528 AMPI_API_IMPL(int, MPI_Intercomm_merge, MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
09529 {
09530 AMPI_API("AMPI_Intercomm_merge");
09531
09532 #if AMPI_ERROR_CHECKING
09533 if (!getAmpiParent()->isInter(intercomm))
09534 return ampiErrhandler("AMPI_Intercomm_merge", MPI_ERR_COMM);
09535 #endif
09536
09537 ampi *ptr = getAmpiInstance(intercomm);
09538 int lroot, rroot, lrank, lhigh, rhigh, first;
09539 lroot = ptr->getIndexForRank(0);
09540 rroot = ptr->getIndexForRemoteRank(0);
09541 lhigh = high;
09542 lrank = ptr->getRank();
09543 first = 0;
09544
09545 if(lrank==0){
09546 MPI_Request req = ptr->send(MPI_ATA_TAG, ptr->getRank(), &lhigh, 1, MPI_INT, 0, intercomm, 0, I_SEND);
09547 if(-1==ptr->recv(MPI_ATA_TAG,0,&rhigh,1,MPI_INT,intercomm))
09548 CkAbort("AMPI> Error in MPI_Intercomm_create");
09549 MPI_Wait(&req, MPI_STATUS_IGNORE);
09550
09551 if((lhigh && rhigh) || (!lhigh && !rhigh)){
09552 first = (lroot < rroot);
09553 }else{
09554 first = (lhigh == false);
09555 }
09556 }
09557
09558 ptr->intercommMerge(first, newintracomm);
09559 return MPI_SUCCESS;
09560 }
09561
09562 AMPI_API_IMPL(int, MPI_Abort, MPI_Comm comm, int errorcode)
09563 {
09564 AMPI_API_INIT("AMPI_Abort");
09565 CkAbort("AMPI: Application called MPI_Abort()!\n");
09566 return errorcode;
09567 }
09568
09569 AMPI_API_IMPL(int, MPI_Get_count, const MPI_Status *sts, MPI_Datatype dtype, int *count)
09570 {
09571 AMPI_API("AMPI_Get_count");
09572 CkDDT_DataType* dttype = getDDT()->getType(dtype);
09573 int itemsize = dttype->getSize() ;
09574 if (itemsize == 0) {
09575 *count = 0;
09576 } else {
09577 if (sts->MPI_LENGTH%itemsize == 0) {
09578 *count = sts->MPI_LENGTH/itemsize;
09579 } else {
09580 *count = MPI_UNDEFINED;
09581 }
09582 }
09583 return MPI_SUCCESS;
09584 }
09585
09586 AMPI_API_IMPL(int, MPI_Type_lb, MPI_Datatype dtype, MPI_Aint* displacement)
09587 {
09588 AMPI_API("AMPI_Type_lb");
09589
09590 #if AMPI_ERROR_CHECKING
09591 int ret = checkData("AMPI_Type_lb", dtype);
09592 if (ret!=MPI_SUCCESS)
09593 return ret;
09594 #endif
09595
09596 *displacement = getDDT()->getLB(dtype);
09597 return MPI_SUCCESS;
09598 }
09599
09600 AMPI_API_IMPL(int, MPI_Type_ub, MPI_Datatype dtype, MPI_Aint* displacement)
09601 {
09602 AMPI_API("AMPI_Type_ub");
09603
09604 #if AMPI_ERROR_CHECKING
09605 int ret = checkData("AMPI_Type_ub", dtype);
09606 if (ret!=MPI_SUCCESS)
09607 return ret;
09608 #endif
09609
09610 *displacement = getDDT()->getUB(dtype);
09611 return MPI_SUCCESS;
09612 }
09613
09614 AMPI_API_IMPL(int, MPI_Get_address, const void* location, MPI_Aint *address)
09615 {
09616 AMPI_API("AMPI_Get_address");
09617 *address = (MPI_Aint)location;
09618 return MPI_SUCCESS;
09619 }
09620
09621 AMPI_API_IMPL(int, MPI_Address, void* location, MPI_Aint *address)
09622 {
09623 AMPI_API("AMPI_Address");
09624 return MPI_Get_address(location, address);
09625 }
09626
09627 AMPI_API_IMPL(int, MPI_Status_set_elements, MPI_Status *sts, MPI_Datatype dtype, int count)
09628 {
09629 AMPI_API("AMPI_Status_set_elements");
09630 if(sts == MPI_STATUS_IGNORE || sts == MPI_STATUSES_IGNORE)
09631 return MPI_SUCCESS;
09632
09633 #if AMPI_ERROR_CHECKING
09634 int ret = checkData("AMPI_Status_set_elements", dtype);
09635 if (ret!=MPI_SUCCESS)
09636 return(ret);
09637 #endif
09638
09639 CkDDT_DataType* dttype = getDDT()->getType(dtype);
09640 int basesize = dttype->getBaseSize();
09641 if(basesize==0) basesize = dttype->getSize();
09642 sts->MPI_LENGTH = basesize * count;
09643 return MPI_SUCCESS;
09644 }
09645
09646 AMPI_API_IMPL(int, MPI_Status_set_elements_x, MPI_Status *sts, MPI_Datatype dtype, MPI_Count count)
09647 {
09648 AMPI_API("AMPI_Status_set_elements_x");
09649 if(sts == MPI_STATUS_IGNORE || sts == MPI_STATUSES_IGNORE)
09650 return MPI_SUCCESS;
09651
09652 #if AMPI_ERROR_CHECKING
09653 int ret = checkData("AMPI_Status_set_elements_x", dtype);
09654 if (ret!=MPI_SUCCESS)
09655 return(ret);
09656 #endif
09657
09658 CkDDT_DataType* dttype = getDDT()->getType(dtype);
09659 int basesize = dttype->getBaseSize();
09660 if(basesize==0) basesize = dttype->getSize();
09661 sts->MPI_LENGTH = basesize * count;
09662 return MPI_SUCCESS;
09663 }
09664
09665 AMPI_API_IMPL(int, MPI_Get_elements, const MPI_Status *sts, MPI_Datatype dtype, int *count)
09666 {
09667 AMPI_API("AMPI_Get_elements");
09668
09669 #if AMPI_ERROR_CHECKING
09670 int ret = checkData("AMPI_Type_create_keyval", dtype);
09671 if (ret!=MPI_SUCCESS)
09672 return ret;
09673 #endif
09674
09675 *count = getDDT()->getType(dtype)->getNumBasicElements(sts->MPI_LENGTH);
09676 return MPI_SUCCESS;
09677 }
09678
09679 AMPI_API_IMPL(int, MPI_Get_elements_x, const MPI_Status *sts, MPI_Datatype dtype, MPI_Count *count)
09680 {
09681 AMPI_API("AMPI_Get_elements_x");
09682 *count = getDDT()->getType(dtype)->getNumBasicElements(sts->MPI_LENGTH);
09683 return MPI_SUCCESS;
09684 }
09685
09686 AMPI_API_IMPL(int, MPI_Pack, const void *inbuf, int incount, MPI_Datatype dtype,
09687 void *outbuf, int outsize, int *position, MPI_Comm comm)
09688 {
09689 AMPI_API("AMPI_Pack");
09690 CkDDT_DataType* dttype = getDDT()->getType(dtype) ;
09691 int itemsize = dttype->getSize();
09692 dttype->serialize((char*)inbuf, ((char*)outbuf)+(*position), incount, outsize, PACK);
09693 *position += (itemsize*incount);
09694 return MPI_SUCCESS;
09695 }
09696
09697 AMPI_API_IMPL(int, MPI_Unpack, const void *inbuf, int insize, int *position, void *outbuf,
09698 int outcount, MPI_Datatype dtype, MPI_Comm comm)
09699 {
09700 AMPI_API("AMPI_Unpack");
09701 CkDDT_DataType* dttype = getDDT()->getType(dtype) ;
09702 int itemsize = dttype->getSize();
09703 dttype->serialize((char*)outbuf, ((char*)inbuf+(*position)), outcount, insize, UNPACK);
09704 *position += (itemsize*outcount);
09705 return MPI_SUCCESS;
09706 }
09707
09708 AMPI_API_IMPL(int, MPI_Pack_size, int incount, MPI_Datatype datatype, MPI_Comm comm, int *sz)
09709 {
09710 AMPI_API("AMPI_Pack_size");
09711 CkDDT_DataType* dttype = getDDT()->getType(datatype) ;
09712 *sz = incount*dttype->getSize() ;
09713 return MPI_SUCCESS;
09714 }
09715
09716 AMPI_API_IMPL(int, MPI_Get_version, int *version, int *subversion)
09717 {
09718 AMPI_API_INIT("AMPI_Get_version");
09719 *version = MPI_VERSION;
09720 *subversion = MPI_SUBVERSION;
09721 return MPI_SUCCESS;
09722 }
09723
09724 AMPI_API_IMPL(int, MPI_Get_library_version, char *version, int *resultlen)
09725 {
09726 AMPI_API_INIT("AMPI_Get_library_version");
09727 const char *ampiNameStr = "Adaptive MPI ";
09728 strncpy(version, ampiNameStr, MPI_MAX_LIBRARY_VERSION_STRING);
09729 strncat(version, CmiCommitID, MPI_MAX_LIBRARY_VERSION_STRING - strlen(version));
09730 *resultlen = strlen(version);
09731 return MPI_SUCCESS;
09732 }
09733
09734 AMPI_API_IMPL(int, MPI_Get_processor_name, char *name, int *resultlen)
09735 {
09736 AMPI_API_INIT("AMPI_Get_processor_name");
09737 ampiParent *ptr = getAmpiParent();
09738 sprintf(name,"AMPI_RANK[%d]_WTH[%d]",ptr->thisIndex,ptr->getMyPe());
09739 *resultlen = strlen(name);
09740 return MPI_SUCCESS;
09741 }
09742
09743
09744 #if defined(USE_STDARG)
09745 void error_handler(MPI_Comm *, int *, ...);
09746 #else
09747 void error_handler ( MPI_Comm *, int * );
09748 #endif
09749
09750 AMPI_API_IMPL(int, MPI_Comm_call_errhandler, MPI_Comm comm, int errorcode)
09751 {
09752 AMPI_API("AMPI_Comm_call_errhandler");
09753 return MPI_SUCCESS;
09754 }
09755
09756 AMPI_API_IMPL(int, MPI_Comm_create_errhandler, MPI_Comm_errhandler_fn *function, MPI_Errhandler *errhandler)
09757 {
09758 AMPI_API("AMPI_Comm_create_errhandler");
09759 return MPI_SUCCESS;
09760 }
09761
09762 AMPI_API_IMPL(int, MPI_Comm_set_errhandler, MPI_Comm comm, MPI_Errhandler errhandler)
09763 {
09764 AMPI_API("AMPI_Comm_set_errhandler");
09765 return MPI_SUCCESS;
09766 }
09767
09768 AMPI_API_IMPL(int, MPI_Comm_get_errhandler, MPI_Comm comm, MPI_Errhandler *errhandler)
09769 {
09770 AMPI_API("AMPI_Comm_get_errhandler");
09771 return MPI_SUCCESS;
09772 }
09773
09774 AMPI_API_IMPL(int, MPI_Comm_free_errhandler, MPI_Errhandler *errhandler)
09775 {
09776 AMPI_API("AMPI_Comm_free_errhandler");
09777 *errhandler = MPI_ERRHANDLER_NULL;
09778 return MPI_SUCCESS;
09779 }
09780
09781 AMPI_API_IMPL(int, MPI_Errhandler_create, MPI_Handler_function *function, MPI_Errhandler *errhandler)
09782 {
09783 AMPI_API("AMPI_Errhandler_create");
09784 return MPI_Comm_create_errhandler(function, errhandler);
09785 }
09786
09787 AMPI_API_IMPL(int, MPI_Errhandler_set, MPI_Comm comm, MPI_Errhandler errhandler)
09788 {
09789 AMPI_API("AMPI_Errhandler_set");
09790 return MPI_Comm_set_errhandler(comm, errhandler);
09791 }
09792
09793 AMPI_API_IMPL(int, MPI_Errhandler_get, MPI_Comm comm, MPI_Errhandler *errhandler)
09794 {
09795 AMPI_API("AMPI_Errhandler_get");
09796 return MPI_Comm_get_errhandler(comm, errhandler);
09797 }
09798
09799 AMPI_API_IMPL(int, MPI_Errhandler_free, MPI_Errhandler *errhandler)
09800 {
09801 AMPI_API("AMPI_Errhandler_free");
09802 return MPI_Comm_free_errhandler(errhandler);
09803 }
09804
09805 AMPI_API_IMPL(int, MPI_Add_error_code, int errorclass, int *errorcode)
09806 {
09807 AMPI_API("AMPI_Add_error_code");
09808 return MPI_SUCCESS;
09809 }
09810
09811 AMPI_API_IMPL(int, MPI_Add_error_class, int *errorclass)
09812 {
09813 AMPI_API("AMPI_Add_error_class");
09814 return MPI_SUCCESS;
09815 }
09816
09817 AMPI_API_IMPL(int, MPI_Add_error_string, int errorcode, const char *errorstring)
09818 {
09819 AMPI_API("AMPI_Add_error_string");
09820 return MPI_SUCCESS;
09821 }
09822
09823 AMPI_API_IMPL(int, MPI_Error_class, int errorcode, int *errorclass)
09824 {
09825 AMPI_API("AMPI_Error_class");
09826 *errorclass = errorcode;
09827 return MPI_SUCCESS;
09828 }
09829
09830 AMPI_API_IMPL(int, MPI_Error_string, int errorcode, char *errorstring, int *resultlen)
09831 {
09832 AMPI_API("AMPI_Error_string");
09833 const char *r="";
09834 switch(errorcode) {
09835 case MPI_SUCCESS:
09836 r="MPI_SUCCESS: no errors"; break;
09837 case MPI_ERR_BUFFER:
09838 r="MPI_ERR_BUFFER: invalid buffer pointer"; break;
09839 case MPI_ERR_COUNT:
09840 r="MPI_ERR_COUNT: invalid count argument"; break;
09841 case MPI_ERR_TYPE:
09842 r="MPI_ERR_TYPE: invalid datatype"; break;
09843 case MPI_ERR_TAG:
09844 r="MPI_ERR_TAG: invalid tag"; break;
09845 case MPI_ERR_COMM:
09846 r="MPI_ERR_COMM: invalid communicator"; break;
09847 case MPI_ERR_RANK:
09848 r="MPI_ERR_RANK: invalid rank"; break;
09849 case MPI_ERR_REQUEST:
09850 r="MPI_ERR_REQUEST: invalid request (handle)"; break;
09851 case MPI_ERR_ROOT:
09852 r="MPI_ERR_ROOT: invalid root"; break;
09853 case MPI_ERR_GROUP:
09854 r="MPI_ERR_GROUP: invalid group"; break;
09855 case MPI_ERR_OP:
09856 r="MPI_ERR_OP: invalid operation"; break;
09857 case MPI_ERR_TOPOLOGY:
09858 r="MPI_ERR_TOPOLOGY: invalid communicator topology"; break;
09859 case MPI_ERR_DIMS:
09860 r="MPI_ERR_DIMS: invalid dimension argument"; break;
09861 case MPI_ERR_ARG:
09862 r="MPI_ERR_ARG: invalid argument of some other kind"; break;
09863 case MPI_ERR_TRUNCATE:
09864 r="MPI_ERR_TRUNCATE: message truncated in receive"; break;
09865 case MPI_ERR_OTHER:
09866 r="MPI_ERR_OTHER: known error not in this list"; break;
09867 case MPI_ERR_INTERN:
09868 r="MPI_ERR_INTERN: internal MPI (implementation) error"; break;
09869 case MPI_ERR_IN_STATUS:
09870 r="MPI_ERR_IN_STATUS: error code in status"; break;
09871 case MPI_ERR_PENDING:
09872 r="MPI_ERR_PENDING: pending request"; break;
09873 case MPI_ERR_ACCESS:
09874 r="MPI_ERR_ACCESS: invalid access mode"; break;
09875 case MPI_ERR_AMODE:
09876 r="MPI_ERR_AMODE: invalid amode argument"; break;
09877 case MPI_ERR_ASSERT:
09878 r="MPI_ERR_ASSERT: invalid assert argument"; break;
09879 case MPI_ERR_BAD_FILE:
09880 r="MPI_ERR_BAD_FILE: bad file"; break;
09881 case MPI_ERR_BASE:
09882 r="MPI_ERR_BASE: invalid base"; break;
09883 case MPI_ERR_CONVERSION:
09884 r="MPI_ERR_CONVERSION: error in data conversion"; break;
09885 case MPI_ERR_DISP:
09886 r="MPI_ERR_DISP: invalid displacement"; break;
09887 case MPI_ERR_DUP_DATAREP:
09888 r="MPI_ERR_DUP_DATAREP: error duplicating data representation"; break;
09889 case MPI_ERR_FILE_EXISTS:
09890 r="MPI_ERR_FILE_EXISTS: file exists already"; break;
09891 case MPI_ERR_FILE_IN_USE:
09892 r="MPI_ERR_FILE_IN_USE: file in use already"; break;
09893 case MPI_ERR_FILE:
09894 r="MPI_ERR_FILE: invalid file"; break;
09895 case MPI_ERR_INFO_KEY:
09896 r="MPI_ERR_INFO_KEY: invalid key argument for info object"; break;
09897 case MPI_ERR_INFO_NOKEY:
09898 r="MPI_ERR_INFO_NOKEY: unknown key for info object"; break;
09899 case MPI_ERR_INFO_VALUE:
09900 r="MPI_ERR_INFO_VALUE: invalid value argument for info object"; break;
09901 case MPI_ERR_INFO:
09902 r="MPI_ERR_INFO: invalid info object"; break;
09903 case MPI_ERR_IO:
09904 r="MPI_ERR_IO: input/output error"; break;
09905 case MPI_ERR_KEYVAL:
09906 r="MPI_ERR_KEYVAL: invalid keyval"; break;
09907 case MPI_ERR_LOCKTYPE:
09908 r="MPI_ERR_LOCKTYPE: invalid locktype argument"; break;
09909 case MPI_ERR_NAME:
09910 r="MPI_ERR_NAME: invalid name argument"; break;
09911 case MPI_ERR_NO_MEM:
09912 r="MPI_ERR_NO_MEM: out of memory"; break;
09913 case MPI_ERR_NOT_SAME:
09914 r="MPI_ERR_NOT_SAME: objects are not identical"; break;
09915 case MPI_ERR_NO_SPACE:
09916 r="MPI_ERR_NO_SPACE: no space left on device"; break;
09917 case MPI_ERR_NO_SUCH_FILE:
09918 r="MPI_ERR_NO_SUCH_FILE: no such file or directory"; break;
09919 case MPI_ERR_PORT:
09920 r="MPI_ERR_PORT: invalid port"; break;
09921 case MPI_ERR_QUOTA:
09922 r="MPI_ERR_QUOTA: out of quota"; break;
09923 case MPI_ERR_READ_ONLY:
09924 r="MPI_ERR_READ_ONLY: file is read only"; break;
09925 case MPI_ERR_RMA_CONFLICT:
09926 r="MPI_ERR_RMA_CONFLICT: rma conflict during operation"; break;
09927 case MPI_ERR_RMA_SYNC:
09928 r="MPI_ERR_RMA_SYNC: error executing rma sync"; break;
09929 case MPI_ERR_SERVICE:
09930 r="MPI_ERR_SERVICE: unknown service name"; break;
09931 case MPI_ERR_SIZE:
09932 r="MPI_ERR_SIZE: invalid size argument"; break;
09933 case MPI_ERR_SPAWN:
09934 r="MPI_ERR_SPAWN: error in spawning processes"; break;
09935 case MPI_ERR_UNSUPPORTED_DATAREP:
09936 r="MPI_ERR_UNSUPPORTED_DATAREP: data representation not supported"; break;
09937 case MPI_ERR_UNSUPPORTED_OPERATION:
09938 r="MPI_ERR_UNSUPPORTED_OPERATION: operation not supported"; break;
09939 case MPI_ERR_WIN:
09940 r="MPI_ERR_WIN: invalid win argument"; break;
09941 default:
09942 r="unknown error";
09943 *resultlen=strlen(r);
09944 strcpy(errorstring,r);
09945 return MPI_ERR_UNKNOWN;
09946 };
09947 *resultlen=strlen(r);
09948 strcpy(errorstring,r);
09949 return MPI_SUCCESS;
09950 }
09951
09952
09953 AMPI_API_IMPL(int, MPI_Comm_group, MPI_Comm comm, MPI_Group *group)
09954 {
09955 AMPI_API("AMPI_Comm_Group");
09956 *group = getAmpiParent()->comm2group(comm);
09957 return MPI_SUCCESS;
09958 }
09959
09960 AMPI_API_IMPL(int, MPI_Group_union, MPI_Group group1, MPI_Group group2, MPI_Group *newgroup)
09961 {
09962 AMPI_API("AMPI_Group_union");
09963 ampiParent *ptr = getAmpiParent();
09964 vector<int> vec1 = ptr->group2vec(group1);
09965 vector<int> vec2 = ptr->group2vec(group2);
09966 vector<int> newvec = unionOp(vec1,vec2);
09967 *newgroup = ptr->saveGroupStruct(newvec);
09968 return MPI_SUCCESS;
09969 }
09970
09971 AMPI_API_IMPL(int, MPI_Group_intersection, MPI_Group group1, MPI_Group group2, MPI_Group *newgroup)
09972 {
09973 AMPI_API("AMPI_Group_intersection");
09974 ampiParent *ptr = getAmpiParent();
09975 vector<int> vec1 = ptr->group2vec(group1);
09976 vector<int> vec2 = ptr->group2vec(group2);
09977 vector<int> newvec = intersectOp(vec1,vec2);
09978 *newgroup = ptr->saveGroupStruct(newvec);
09979 return MPI_SUCCESS;
09980 }
09981
09982 AMPI_API_IMPL(int, MPI_Group_difference, MPI_Group group1, MPI_Group group2, MPI_Group *newgroup)
09983 {
09984 AMPI_API("AMPI_Group_difference");
09985 ampiParent *ptr = getAmpiParent();
09986 vector<int> vec1 = ptr->group2vec(group1);
09987 vector<int> vec2 = ptr->group2vec(group2);
09988 vector<int> newvec = diffOp(vec1,vec2);
09989 *newgroup = ptr->saveGroupStruct(newvec);
09990 return MPI_SUCCESS;
09991 }
09992
09993 AMPI_API_IMPL(int, MPI_Group_size, MPI_Group group, int *size)
09994 {
09995 AMPI_API("AMPI_Group_size");
09996 *size = (getAmpiParent()->group2vec(group)).size();
09997 return MPI_SUCCESS;
09998 }
09999
10000 AMPI_API_IMPL(int, MPI_Group_rank, MPI_Group group, int *rank)
10001 {
10002 AMPI_API("AMPI_Group_rank");
10003 *rank = getAmpiParent()->getRank(group);
10004 return MPI_SUCCESS;
10005 }
10006
10007 AMPI_API_IMPL(int, MPI_Group_translate_ranks, MPI_Group group1, int n, const int *ranks1,
10008 MPI_Group group2, int *ranks2)
10009 {
10010 AMPI_API("AMPI_Group_translate_ranks");
10011 ampiParent *ptr = getAmpiParent();
10012 vector<int> vec1 = ptr->group2vec(group1);
10013 vector<int> vec2 = ptr->group2vec(group2);
10014 translateRanksOp(n, vec1, ranks1, vec2, ranks2);
10015 return MPI_SUCCESS;
10016 }
10017
10018 AMPI_API_IMPL(int, MPI_Group_compare, MPI_Group group1,MPI_Group group2, int *result)
10019 {
10020 AMPI_API("AMPI_Group_compare");
10021 ampiParent *ptr = getAmpiParent();
10022 vector<int> vec1 = ptr->group2vec(group1);
10023 vector<int> vec2 = ptr->group2vec(group2);
10024 *result = compareVecOp(vec1, vec2);
10025 return MPI_SUCCESS;
10026 }
10027
10028 AMPI_API_IMPL(int, MPI_Group_incl, MPI_Group group, int n, const int *ranks, MPI_Group *newgroup)
10029 {
10030 AMPI_API("AMPI_Group_incl");
10031 ampiParent *ptr = getAmpiParent();
10032 vector<int> vec = ptr->group2vec(group);
10033 vector<int> newvec = inclOp(n,ranks,vec);
10034 *newgroup = ptr->saveGroupStruct(newvec);
10035 return MPI_SUCCESS;
10036 }
10037
10038 AMPI_API_IMPL(int, MPI_Group_excl, MPI_Group group, int n, const int *ranks, MPI_Group *newgroup)
10039 {
10040 AMPI_API("AMPI_Group_excl");
10041 ampiParent *ptr = getAmpiParent();
10042 vector<int> vec = ptr->group2vec(group);
10043 vector<int> newvec = exclOp(n,ranks,vec);
10044 *newgroup = ptr->saveGroupStruct(newvec);
10045 return MPI_SUCCESS;
10046 }
10047
10048 AMPI_API_IMPL(int, MPI_Group_range_incl, MPI_Group group, int n, int ranges[][3], MPI_Group *newgroup)
10049 {
10050 AMPI_API("AMPI_Group_range_incl");
10051 int ret;
10052 ampiParent *ptr = getAmpiParent();
10053 vector<int> vec = ptr->group2vec(group);
10054 vector<int> newvec = rangeInclOp(n,ranges,vec,&ret);
10055 if(ret != MPI_SUCCESS){
10056 *newgroup = MPI_GROUP_EMPTY;
10057 return ampiErrhandler("AMPI_Group_range_incl", ret);
10058 }else{
10059 *newgroup = ptr->saveGroupStruct(newvec);
10060 return MPI_SUCCESS;
10061 }
10062 }
10063
10064 AMPI_API_IMPL(int, MPI_Group_range_excl, MPI_Group group, int n, int ranges[][3], MPI_Group *newgroup)
10065 {
10066 AMPI_API("AMPI_Group_range_excl");
10067 int ret;
10068 ampiParent *ptr = getAmpiParent();
10069 vector<int> vec = ptr->group2vec(group);
10070 vector<int> newvec = rangeExclOp(n,ranges,vec,&ret);
10071 if(ret != MPI_SUCCESS){
10072 *newgroup = MPI_GROUP_EMPTY;
10073 return ampiErrhandler("AMPI_Group_range_excl", ret);
10074 }else{
10075 *newgroup = ptr->saveGroupStruct(newvec);
10076 return MPI_SUCCESS;
10077 }
10078 }
10079
10080 AMPI_API_IMPL(int, MPI_Group_free, MPI_Group *group)
10081 {
10082 AMPI_API("AMPI_Group_free");
10083 return MPI_SUCCESS;
10084 }
10085
10086 AMPI_API_IMPL(int, MPI_Comm_create, MPI_Comm comm, MPI_Group group, MPI_Comm* newcomm)
10087 {
10088 AMPI_API("AMPI_Comm_create");
10089 int rank_in_group, key, color, zero;
10090 MPI_Group group_of_comm;
10091
10092 vector<int> vec = getAmpiParent()->group2vec(group);
10093 if(vec.size()==0){
10094 AMPI_DEBUG("AMPI> In MPI_Comm_create, creating an empty communicator");
10095 *newcomm = MPI_COMM_NULL;
10096 return MPI_SUCCESS;
10097 }
10098
10099 if(getAmpiParent()->isInter(comm)){
10100
10101 ampi *ptr = getAmpiInstance(comm);
10102 ptr->commCreate(vec, newcomm);
10103 ptr->barrier();
10104 }
10105 else{
10106
10107
10108 MPI_Group_rank(group, &rank_in_group);
10109 if(rank_in_group == MPI_UNDEFINED){
10110 color = MPI_UNDEFINED;
10111 key = 0;
10112 }
10113 else{
10114
10115
10116 MPI_Comm_group(comm, &group_of_comm);
10117 zero = 0;
10118 MPI_Group_translate_ranks(group, 1, &zero, group_of_comm, &color);
10119 key = rank_in_group;
10120 }
10121 return MPI_Comm_split(comm, color, key, newcomm);
10122 }
10123 return MPI_SUCCESS;
10124 }
10125
10126 AMPI_API_IMPL(int, MPI_Comm_create_group, MPI_Comm comm, MPI_Group group, int tag, MPI_Comm *newcomm)
10127 {
10128 AMPI_API("AMPI_Comm_create_group");
10129
10130 if (group == MPI_GROUP_NULL) {
10131 *newcomm = MPI_COMM_NULL;
10132 return MPI_SUCCESS;
10133 }
10134
10135 #if AMPI_ERROR_CHECKING
10136 if (!getAmpiParent()->isIntra(comm)) {
10137 *newcomm = MPI_COMM_NULL;
10138 return ampiErrhandler("AMPI_Comm_create_group", MPI_ERR_COMM);
10139 }
10140 int ret = checkTag("AMPI_Comm_create_group", tag);
10141 if (ret != MPI_SUCCESS) {
10142 *newcomm = MPI_COMM_NULL;
10143 return ampiErrhandler("AMPI_Comm_create_group", ret);
10144 }
10145 #endif
10146
10147 int rank, groupRank, groupSize;
10148 MPI_Group parentGroup;
10149 MPI_Comm_rank(comm, &rank);
10150 MPI_Group_rank(group, &groupRank);
10151 MPI_Group_size(group, &groupSize);
10152 if (groupRank == MPI_UNDEFINED) {
10153 *newcomm = MPI_COMM_NULL;
10154 return MPI_SUCCESS;
10155 }
10156 MPI_Comm_dup(MPI_COMM_SELF, newcomm);
10157
10158 vector<int> groupPids(groupSize), pids(groupSize, 0);
10159 std::iota(groupPids.begin(), groupPids.end(), 0);
10160 MPI_Comm_group(comm, &parentGroup);
10161 MPI_Group_translate_ranks(group, groupSize, groupPids.data(), parentGroup, pids.data());
10162 MPI_Group_free(&parentGroup);
10163
10164 MPI_Comm commOld, tmpInter;
10165 for (int i=0; i<groupSize; i*=2) {
10166 int groupId = groupRank/i;
10167 commOld = *newcomm;
10168
10169 if (groupId % 2 == 0) {
10170 if ((groupId+1)*i < groupSize) {
10171 MPI_Intercomm_create(*newcomm, 0, comm, pids[(groupId+1)*i], tag, &tmpInter);
10172 MPI_Intercomm_merge(tmpInter, 0, newcomm);
10173 }
10174 }
10175 else {
10176 MPI_Intercomm_create(*newcomm, 0, comm, pids[(groupId+1)*i], tag, &tmpInter);
10177 MPI_Intercomm_merge(tmpInter, 1, newcomm);
10178 }
10179
10180 if (*newcomm != commOld) {
10181 MPI_Comm_free(&tmpInter);
10182 MPI_Comm_free(&commOld);
10183 }
10184 }
10185
10186 return MPI_SUCCESS;
10187 }
10188
10189 AMPI_API_IMPL(int, MPI_Comm_set_name, MPI_Comm comm, const char *comm_name)
10190 {
10191 AMPI_API("AMPI_Comm_set_name");
10192 getAmpiInstance(comm)->setCommName(comm_name);
10193 return MPI_SUCCESS;
10194 }
10195
10196 AMPI_API_IMPL(int, MPI_Comm_get_name, MPI_Comm comm, char *comm_name, int *resultlen)
10197 {
10198 AMPI_API("AMPI_Comm_get_name");
10199 getAmpiInstance(comm)->getCommName(comm_name, resultlen);
10200 return MPI_SUCCESS;
10201 }
10202
10203 AMPI_API_IMPL(int, MPI_Comm_set_info, MPI_Comm comm, MPI_Info info)
10204 {
10205 AMPI_API("AMPI_Comm_set_info");
10206
10207 return MPI_SUCCESS;
10208 }
10209
10210 AMPI_API_IMPL(int, MPI_Comm_get_info, MPI_Comm comm, MPI_Info *info)
10211 {
10212 AMPI_API("AMPI_Comm_get_info");
10213
10214 *info = MPI_INFO_NULL;
10215 return MPI_SUCCESS;
10216 }
10217
10218 AMPI_API_IMPL(int, MPI_Comm_create_keyval, MPI_Comm_copy_attr_function *copy_fn,
10219 MPI_Comm_delete_attr_function *delete_fn,
10220 int *keyval, void* extra_state)
10221 {
10222 AMPI_API("AMPI_Comm_create_keyval");
10223 int ret = getAmpiParent()->createKeyval(copy_fn,delete_fn,keyval,extra_state);
10224 return ampiErrhandler("AMPI_Comm_create_keyval", ret);
10225 }
10226
10227 AMPI_API_IMPL(int, MPI_Comm_free_keyval, int *keyval)
10228 {
10229 AMPI_API("AMPI_Comm_free_keyval");
10230 vector<int>& keyvals = getAmpiParent()->getKeyvals(MPI_COMM_WORLD);
10231 int ret = getAmpiParent()->freeUserKeyval(MPI_COMM_WORLD, keyvals, keyval);
10232 return ampiErrhandler("AMPI_Comm_free_keyval", ret);
10233 }
10234
10235 AMPI_API_IMPL(int, MPI_Comm_set_attr, MPI_Comm comm, int keyval, void* attribute_val)
10236 {
10237 AMPI_API("AMPI_Comm_set_attr");
10238 ampiParent *parent = getAmpiParent();
10239 ampiCommStruct &cs = const_cast<ampiCommStruct &>(parent->comm2CommStruct(comm));
10240 vector<int>& keyvals = cs.getKeyvals();
10241 int ret = parent->setAttr(comm, keyvals, keyval, attribute_val);
10242 return ampiErrhandler("AMPI_Comm_set_attr", ret);
10243 }
10244
10245 AMPI_API_IMPL(int, MPI_Comm_get_attr, MPI_Comm comm, int keyval, void *attribute_val, int *flag)
10246 {
10247 AMPI_API("AMPI_Comm_get_attr");
10248 ampiParent *parent = getAmpiParent();
10249 ampiCommStruct &cs = const_cast<ampiCommStruct &>(parent->comm2CommStruct(comm));
10250 vector<int>& keyvals = cs.getKeyvals();
10251 int ret = parent->getAttr(comm, keyvals, keyval, attribute_val, flag);
10252 return ampiErrhandler("AMPI_Comm_get_attr", ret);
10253 }
10254
10255 AMPI_API_IMPL(int, MPI_Comm_delete_attr, MPI_Comm comm, int keyval)
10256 {
10257 AMPI_API("AMPI_Comm_delete_attr");
10258 ampiParent *parent = getAmpiParent();
10259 ampiCommStruct &cs = const_cast<ampiCommStruct &>(parent->comm2CommStruct(comm));
10260 vector<int>& keyvals = cs.getKeyvals();
10261 int ret = parent->deleteAttr(comm, keyvals, keyval);
10262 return ampiErrhandler("AMPI_Comm_delete_attr", ret);
10263 }
10264
10265 AMPI_API_IMPL(int, MPI_Keyval_create, MPI_Copy_function *copy_fn, MPI_Delete_function *delete_fn,
10266 int *keyval, void* extra_state)
10267 {
10268 AMPI_API("AMPI_Keyval_create");
10269 return MPI_Comm_create_keyval(copy_fn, delete_fn, keyval, extra_state);
10270 }
10271
10272 AMPI_API_IMPL(int, MPI_Keyval_free, int *keyval)
10273 {
10274 AMPI_API("AMPI_Keyval_free");
10275 return MPI_Comm_free_keyval(keyval);
10276 }
10277
10278 AMPI_API_IMPL(int, MPI_Attr_put, MPI_Comm comm, int keyval, void* attribute_val)
10279 {
10280 AMPI_API("AMPI_Attr_put");
10281 return MPI_Comm_set_attr(comm, keyval, attribute_val);
10282 }
10283
10284 AMPI_API_IMPL(int, MPI_Attr_get, MPI_Comm comm, int keyval, void *attribute_val, int *flag)
10285 {
10286 AMPI_API("AMPI_Attr_get");
10287 return MPI_Comm_get_attr(comm, keyval, attribute_val, flag);
10288 }
10289
10290 AMPI_API_IMPL(int, MPI_Attr_delete, MPI_Comm comm, int keyval)
10291 {
10292 AMPI_API("AMPI_Attr_delete");
10293 return MPI_Comm_delete_attr(comm, keyval);
10294 }
10295
10296 AMPI_API_IMPL(int, MPI_Cart_map, MPI_Comm comm, int ndims, const int *dims,
10297 const int *periods, int *newrank)
10298 {
10299 AMPI_API("AMPI_Cart_map");
10300
10301 ampi* ptr = getAmpiInstance(comm);
10302 int nranks;
10303
10304 if (ndims == 0) {
10305 nranks = 1;
10306 } else {
10307 nranks = dims[0];
10308 for (int i=1; i<ndims; i++) {
10309 nranks *= dims[i];
10310 }
10311 }
10312
10313 int rank = ptr->getRank();
10314 if (rank < nranks) {
10315 *newrank = rank;
10316 } else {
10317 *newrank = MPI_UNDEFINED;
10318 }
10319 return MPI_SUCCESS;
10320 }
10321
10322 AMPI_API_IMPL(int, MPI_Graph_map, MPI_Comm comm, int nnodes, const int *index,
10323 const int *edges, int *newrank)
10324 {
10325 AMPI_API("AMPI_Graph_map");
10326
10327 ampi* ptr = getAmpiInstance(comm);
10328
10329 if (ptr->getRank() < nnodes) {
10330 *newrank = ptr->getRank();
10331 } else {
10332 *newrank = MPI_UNDEFINED;
10333 }
10334 return MPI_SUCCESS;
10335 }
10336
10337 AMPI_API_IMPL(int, MPI_Cart_create, MPI_Comm comm_old, int ndims, const int *dims,
10338 const int *periods, int reorder, MPI_Comm *comm_cart)
10339 {
10340 AMPI_API("AMPI_Cart_create");
10341
10342
10343
10344
10345
10346
10347
10348
10349
10350 int newrank;
10351 MPI_Cart_map(comm_old, ndims, dims, periods, &newrank);
10352
10353 ampiParent *ptr = getAmpiParent();
10354 vector<int> vec = ptr->group2vec(ptr->comm2group(comm_old));
10355 *comm_cart = getAmpiInstance(comm_old)->cartCreate(vec, ndims, dims);
10356
10357 if (*comm_cart != MPI_COMM_NULL) {
10358 ampiCommStruct &c = getAmpiParent()->getCart(*comm_cart);
10359 ampiTopology *topo = c.getTopology();
10360 topo->setndims(ndims);
10361 vector<int> dimsv(dims, dims+ndims), periodsv(periods, periods+ndims), nborsv;
10362 topo->setdims(dimsv);
10363 topo->setperiods(periodsv);
10364 getAmpiInstance(*comm_cart)->findNeighbors(*comm_cart, newrank, nborsv);
10365 topo->setnbors(nborsv);
10366 }
10367
10368 return MPI_SUCCESS;
10369 }
10370
10371 AMPI_API_IMPL(int, MPI_Graph_create, MPI_Comm comm_old, int nnodes, const int *index,
10372 const int *edges, int reorder, MPI_Comm *comm_graph)
10373 {
10374 AMPI_API("AMPI_Graph_create");
10375
10376 if (nnodes == 0) {
10377 *comm_graph = MPI_COMM_NULL;
10378 return MPI_SUCCESS;
10379 }
10380
10381
10382 int newrank;
10383 MPI_Graph_map(comm_old, nnodes, index, edges, &newrank);
10384
10385 ampiParent *ptr = getAmpiParent();
10386 vector<int> vec = ptr->group2vec(ptr->comm2group(comm_old));
10387 getAmpiInstance(comm_old)->graphCreate(vec, comm_graph);
10388 ampiTopology &topo = *ptr->getGraph(*comm_graph).getTopology();
10389
10390 vector<int> index_(index, index+nnodes), edges_, nborsv;
10391 topo.setnvertices(nnodes);
10392 topo.setindex(index_);
10393
10394 for (int i = 0; i < index[nnodes - 1]; i++)
10395 edges_.push_back(edges[i]);
10396 topo.setedges(edges_);
10397
10398 getAmpiInstance(*comm_graph)->findNeighbors(*comm_graph, newrank, nborsv);
10399 topo.setnbors(nborsv);
10400
10401 return MPI_SUCCESS;
10402 }
10403
10404 AMPI_API_IMPL(int, MPI_Dist_graph_create_adjacent, MPI_Comm comm_old, int indegree, const int sources[],
10405 const int sourceweights[], int outdegree,
10406 const int destinations[], const int destweights[],
10407 MPI_Info info, int reorder, MPI_Comm *comm_dist_graph)
10408 {
10409 AMPI_API("AMPI_Dist_graph_create_adjacent");
10410
10411 #if AMPI_ERROR_CHECKING
10412 if (indegree < 0 || outdegree < 0) {
10413 return ampiErrhandler("AMPI_Dist_graph_create_adjacent", MPI_ERR_TOPOLOGY);
10414 }
10415 for (int i=0; i<indegree; i++) {
10416 if (sources[i] < 0) {
10417 return ampiErrhandler("AMPI_Dist_graph_create_adjacent", MPI_ERR_TOPOLOGY);
10418 }
10419 }
10420 for (int i=0; i<outdegree; i++) {
10421 if (destinations[i] < 0) {
10422 return ampiErrhandler("AMPI_Dist_graph_create_adjacent", MPI_ERR_TOPOLOGY);
10423 }
10424 }
10425 #endif
10426
10427 ampiParent *ptr = getAmpiParent();
10428 vector<int> vec = ptr->group2vec(ptr->comm2group(comm_old));
10429 getAmpiInstance(comm_old)->distGraphCreate(vec,comm_dist_graph);
10430 ampiCommStruct &c = ptr->getDistGraph(*comm_dist_graph);
10431 ampiTopology *topo = c.getTopology();
10432
10433 topo->setInDegree(indegree);
10434 topo->setOutDegree(outdegree);
10435
10436 topo->setAreSourcesWeighted(sourceweights != MPI_UNWEIGHTED);
10437 if (topo->areSourcesWeighted()) {
10438 vector<int> tmpSourceWeights(sourceweights, sourceweights+indegree);
10439 topo->setSourceWeights(tmpSourceWeights);
10440 }
10441
10442 topo->setAreDestsWeighted(destweights != MPI_UNWEIGHTED);
10443 if (topo->areDestsWeighted()) {
10444 vector<int> tmpDestWeights(destweights, destweights+outdegree);
10445 topo->setDestWeights(tmpDestWeights);
10446 }
10447
10448 vector<int> tmpSources(sources, sources+indegree);
10449 topo->setSources(tmpSources);
10450
10451 vector<int> tmpDestinations(destinations, destinations+outdegree);
10452 topo->setDestinations(tmpDestinations);
10453
10454 return MPI_SUCCESS;
10455 }
10456
10457 AMPI_API_IMPL(int, MPI_Dist_graph_create, MPI_Comm comm_old, int n, const int sources[], const int degrees[],
10458 const int destinations[], const int weights[], MPI_Info info,
10459 int reorder, MPI_Comm *comm_dist_graph)
10460 {
10461 AMPI_API("AMPI_Dist_graph_create");
10462
10463 #if AMPI_ERROR_CHECKING
10464 if (n < 0) {
10465 return ampiErrhandler("AMPI_Dist_graph_create", MPI_ERR_TOPOLOGY);
10466 }
10467 int counter = 0;
10468 for (int i=0; i<n; i++) {
10469 if ((sources[i] < 0) || (degrees[i] < 0)) {
10470 return ampiErrhandler("AMPI_Dist_graph_create", MPI_ERR_TOPOLOGY);
10471 }
10472 for (int j=0; j<degrees[i]; j++) {
10473 if ((destinations[counter] < 0) || (weights != MPI_UNWEIGHTED && weights[counter] < 0)) {
10474 return ampiErrhandler("AMPI_Dist_graph_create", MPI_ERR_TOPOLOGY);
10475 }
10476 counter++;
10477 }
10478 }
10479 #endif
10480
10481 ampiParent *ptr = getAmpiParent();
10482 vector<int> vec = ptr->group2vec(ptr->comm2group(comm_old));
10483 getAmpiInstance(comm_old)->distGraphCreate(vec,comm_dist_graph);
10484 ampiCommStruct &c = ptr->getDistGraph(*comm_dist_graph);
10485 ampiTopology *topo = c.getTopology();
10486
10487 int p = c.getSize();
10488
10489 vector<int> edgeListIn(p, 0);
10490 vector<int> edgeListOut(p, 0);
10491 vector<vector<int> > edgeMatrixIn(p);
10492 vector<vector<int> > edgeMatrixOut(p);
10493
10494 for (int i=0; i<p; i++) {
10495 vector<int> tmpVector(p, 0);
10496 edgeMatrixIn[i] = tmpVector;
10497 edgeMatrixOut[i] = tmpVector;
10498 }
10499
10500 int index = 0;
10501 for (int i=0; i<n; i++) {
10502 for (int j=0; j<degrees[i]; j++) {
10503 edgeMatrixOut[ sources[i] ][ edgeListOut[sources[i]]++ ] = destinations[index];
10504 edgeMatrixIn[ destinations[index] ][ edgeListIn[destinations[index]]++ ] = sources[i];
10505 index++;
10506 }
10507 }
10508
10509 vector<int> edgeCount(2*p);
10510 vector<int> totalcount(2);
10511 int sends = 0;
10512 for (int i=0; i<p; i++) {
10513 if (edgeListIn[i] > 0) {
10514 edgeCount[2*i] = 1;
10515 sends++;
10516 }
10517 else {
10518 edgeCount[2*i] = 0;
10519 }
10520 if (edgeListOut[i] > 0) {
10521 edgeCount[2*i+1] = 1;
10522 sends++;
10523 }
10524 else {
10525 edgeCount[2*i+1] = 0;
10526 }
10527 }
10528
10529
10530 MPI_Reduce_scatter_block(edgeCount.data(), totalcount.data(), 2, MPI_INT, MPI_SUM, comm_old);
10531
10532 vector<MPI_Request> requests(sends, MPI_REQUEST_NULL);
10533 int count = 0;
10534 for (int i=0; i<p; i++) {
10535 if (edgeListIn[i] > 0) {
10536 if (edgeListIn[i] == p) {
10537 edgeMatrixIn[i].push_back(1);
10538 }
10539 else {
10540 edgeMatrixIn[i][edgeListIn[i]] = 1;
10541 }
10542 MPI_Isend(edgeMatrixIn[i].data(), edgeListIn[i]+1, MPI_INT, i, 0, comm_old, &requests[count++]);
10543 }
10544 if (edgeListOut[i] > 0) {
10545 if (edgeListOut[i] == p) {
10546 edgeMatrixOut[i].push_back(-1);
10547 }
10548 else {
10549 edgeMatrixOut[i][edgeListOut[i]] = -1;
10550 }
10551 MPI_Isend(edgeMatrixOut[i].data(), edgeListOut[i]+1, MPI_INT, i, 0, comm_old, &requests[count++]);
10552 }
10553 }
10554
10555
10556 int numEdges;
10557 MPI_Status status;
10558 vector<int> saveSources, saveDestinations;
10559 for (int i=0; i<2; i++) {
10560 for (int j=0; j<totalcount[i]; j++) {
10561 MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, comm_old, &status);
10562 MPI_Get_count(&status, MPI_INT, &numEdges);
10563 vector<int> saveEdges(numEdges);
10564 MPI_Recv(saveEdges.data(), numEdges, MPI_INT, status.MPI_SOURCE, 0, comm_old, MPI_STATUS_IGNORE);
10565
10566 if (saveEdges[numEdges-1] > 0) {
10567 for (int k=0; k<numEdges-1; k++) {
10568 saveSources.push_back(saveEdges[k]);
10569 }
10570 }
10571 else {
10572 for (int k=0; k<numEdges-1; k++) {
10573 saveDestinations.push_back(saveEdges[k]);
10574 }
10575 }
10576 }
10577 }
10578
10579 topo->setDestinations(saveDestinations);
10580 topo->setSources(saveSources);
10581 topo->setOutDegree(saveDestinations.size());
10582 topo->setInDegree(saveSources.size());
10583
10584 topo->setAreSourcesWeighted(weights != MPI_UNWEIGHTED);
10585 topo->setAreDestsWeighted(weights != MPI_UNWEIGHTED);
10586 if (topo->areSourcesWeighted()) {
10587 vector<int> tmpWeights(weights, weights+n);
10588 topo->setSourceWeights(tmpWeights);
10589 topo->setDestWeights(tmpWeights);
10590 }
10591
10592 MPI_Waitall(sends, requests.data(), MPI_STATUSES_IGNORE);
10593
10594 return MPI_SUCCESS;
10595 }
10596
10597 AMPI_API_IMPL(int, MPI_Topo_test, MPI_Comm comm, int *status)
10598 {
10599 AMPI_API("AMPI_Topo_test");
10600
10601 ampiParent *ptr = getAmpiParent();
10602
10603 if (ptr->isCart(comm))
10604 *status = MPI_CART;
10605 else if (ptr->isGraph(comm))
10606 *status = MPI_GRAPH;
10607 else if (ptr->isDistGraph(comm))
10608 *status = MPI_DIST_GRAPH;
10609 else *status = MPI_UNDEFINED;
10610
10611 return MPI_SUCCESS;
10612 }
10613
10614 AMPI_API_IMPL(int, MPI_Cartdim_get, MPI_Comm comm, int *ndims)
10615 {
10616 AMPI_API("AMPI_Cartdim_get");
10617
10618 #if AMPI_ERROR_CHECKING
10619 if (!getAmpiParent()->isCart(comm))
10620 return ampiErrhandler("AMPI_Cartdim_get", MPI_ERR_TOPOLOGY);
10621 #endif
10622
10623 *ndims = getAmpiParent()->getCart(comm).getTopology()->getndims();
10624
10625 return MPI_SUCCESS;
10626 }
10627
10628 AMPI_API_IMPL(int, MPI_Cart_get, MPI_Comm comm, int maxdims, int *dims, int *periods, int *coords)
10629 {
10630 int i, ndims;
10631
10632 AMPI_API("AMPI_Cart_get");
10633
10634 #if AMPI_ERROR_CHECKING
10635 if (!getAmpiParent()->isCart(comm))
10636 return ampiErrhandler("AMPI_Cart_get", MPI_ERR_TOPOLOGY);
10637 #endif
10638
10639 ampiCommStruct &c = getAmpiParent()->getCart(comm);
10640 ampiTopology *topo = c.getTopology();
10641 ndims = topo->getndims();
10642 int rank = getAmpiInstance(comm)->getRank();
10643
10644 const vector<int> &dims_ = topo->getdims();
10645 const vector<int> &periods_ = topo->getperiods();
10646
10647 for (i = 0; i < maxdims; i++) {
10648 dims[i] = dims_[i];
10649 periods[i] = periods_[i];
10650 }
10651
10652 for (i = ndims - 1; i >= 0; i--) {
10653 if (i < maxdims)
10654 coords[i] = rank % dims_[i];
10655 rank = (int) (rank / dims_[i]);
10656 }
10657
10658 return MPI_SUCCESS;
10659 }
10660
10661 AMPI_API_IMPL(int, MPI_Cart_rank, MPI_Comm comm, const int *coords, int *rank)
10662 {
10663 AMPI_API("AMPI_Cart_rank");
10664
10665 #if AMPI_ERROR_CHECKING
10666 if (!getAmpiParent()->isCart(comm))
10667 return ampiErrhandler("AMPI_Cart_rank", MPI_ERR_TOPOLOGY);
10668 #endif
10669
10670 ampiCommStruct &c = getAmpiParent()->getCart(comm);
10671 ampiTopology *topo = c.getTopology();
10672 int ndims = topo->getndims();
10673 const vector<int> &dims = topo->getdims();
10674 const vector<int> &periods = topo->getperiods();
10675
10676
10677 vector<int> ncoords(coords, coords+ndims);
10678
10679 int prod = 1;
10680 int r = 0;
10681
10682 for (int i = ndims - 1; i >= 0; i--) {
10683 if ((ncoords[i] < 0) || (ncoords[i] >= dims[i])) {
10684 if (periods[i] != 0) {
10685 if (ncoords[i] > 0) {
10686 ncoords[i] %= dims[i];
10687 } else {
10688 while (ncoords[i] < 0) ncoords[i]+=dims[i];
10689 }
10690 }
10691 }
10692 r += prod * ncoords[i];
10693 prod *= dims[i];
10694 }
10695
10696 *rank = r;
10697
10698 return MPI_SUCCESS;
10699 }
10700
10701 AMPI_API_IMPL(int, MPI_Cart_coords, MPI_Comm comm, int rank, int maxdims, int *coords)
10702 {
10703 AMPI_API("AMPI_Cart_coords");
10704
10705 #if AMPI_ERROR_CHECKING
10706 if (!getAmpiParent()->isCart(comm))
10707 return ampiErrhandler("AMPI_Cart_coorts", MPI_ERR_TOPOLOGY);
10708 #endif
10709
10710 ampiCommStruct &c = getAmpiParent()->getCart(comm);
10711 ampiTopology *topo = c.getTopology();
10712 int ndims = topo->getndims();
10713 const vector<int> &dims = topo->getdims();
10714
10715 for (int i = ndims - 1; i >= 0; i--) {
10716 if (i < maxdims)
10717 coords[i] = rank % dims[i];
10718 rank = (int) (rank / dims[i]);
10719 }
10720
10721 return MPI_SUCCESS;
10722 }
10723
10724
10725
10726 static void cart_clamp_coord(MPI_Comm comm, const vector<int> &dims,
10727 const vector<int> &periodicity, int *coords,
10728 int direction, int displacement, int *rank_out)
10729 {
10730 int base_coord = coords[direction];
10731 coords[direction] += displacement;
10732
10733 if (periodicity[direction] != 0) {
10734 while (coords[direction] < 0)
10735 coords[direction] += dims[direction];
10736 while (coords[direction] >= dims[direction])
10737 coords[direction] -= dims[direction];
10738 }
10739
10740 if (coords[direction]<0 || coords[direction]>= dims[direction])
10741 *rank_out = MPI_PROC_NULL;
10742 else
10743 MPI_Cart_rank(comm, coords, rank_out);
10744
10745 coords[direction] = base_coord;
10746 }
10747
10748 AMPI_API_IMPL(int, MPI_Cart_shift, MPI_Comm comm, int direction, int disp,
10749 int *rank_source, int *rank_dest)
10750 {
10751 AMPI_API("AMPI_Cart_shift");
10752
10753 #if AMPI_ERROR_CHECKING
10754 if (!getAmpiParent()->isCart(comm))
10755 return ampiErrhandler("AMPI_Cart_shift", MPI_ERR_TOPOLOGY);
10756 #endif
10757
10758 ampiCommStruct &c = getAmpiParent()->getCart(comm);
10759 ampiTopology *topo = c.getTopology();
10760 int ndims = topo->getndims();
10761
10762 #if AMPI_ERROR_CHECKING
10763 if ((direction < 0) || (direction >= ndims))
10764 return ampiErrhandler("AMPI_Cart_shift", MPI_ERR_DIMS);
10765 #endif
10766
10767 const vector<int> &dims = topo->getdims();
10768 const vector<int> &periods = topo->getperiods();
10769 vector<int> coords(ndims);
10770
10771 int mype = getAmpiInstance(comm)->getRank();
10772 MPI_Cart_coords(comm, mype, ndims, &coords[0]);
10773
10774 cart_clamp_coord(comm, dims, periods, &coords[0], direction, disp, rank_dest);
10775 cart_clamp_coord(comm, dims, periods, &coords[0], direction, -disp, rank_source);
10776
10777 return MPI_SUCCESS;
10778 }
10779
10780 AMPI_API_IMPL(int, MPI_Graphdims_get, MPI_Comm comm, int *nnodes, int *nedges)
10781 {
10782 AMPI_API("AMPI_Graphdim_get");
10783
10784 ampiCommStruct &c = getAmpiParent()->getGraph(comm);
10785 ampiTopology *topo = c.getTopology();
10786 *nnodes = topo->getnvertices();
10787 const vector<int> &index = topo->getindex();
10788 *nedges = index[(*nnodes) - 1];
10789
10790 return MPI_SUCCESS;
10791 }
10792
10793 AMPI_API_IMPL(int, MPI_Graph_get, MPI_Comm comm, int maxindex, int maxedges, int *index, int *edges)
10794 {
10795 AMPI_API("AMPI_Graph_get");
10796
10797 #if AMPI_ERROR_CHECKING
10798 if (!getAmpiParent()->isGraph(comm))
10799 return ampiErrhandler("AMPI_Graph_get", MPI_ERR_TOPOLOGY);
10800 #endif
10801
10802 ampiCommStruct &c = getAmpiParent()->getGraph(comm);
10803 ampiTopology *topo = c.getTopology();
10804 const vector<int> &index_ = topo->getindex();
10805 const vector<int> &edges_ = topo->getedges();
10806
10807 if (maxindex > index_.size())
10808 maxindex = index_.size();
10809
10810 int i;
10811 for (i = 0; i < maxindex; i++)
10812 index[i] = index_[i];
10813
10814 for (i = 0; i < maxedges; i++)
10815 edges[i] = edges_[i];
10816
10817 return MPI_SUCCESS;
10818 }
10819
10820 AMPI_API_IMPL(int, MPI_Graph_neighbors_count, MPI_Comm comm, int rank, int *nneighbors)
10821 {
10822 AMPI_API("AMPI_Graph_neighbors_count");
10823
10824 #if AMPI_ERROR_CHECKING
10825 if (!getAmpiParent()->isGraph(comm))
10826 return ampiErrhandler("AMPI_Graph_neighbors_count", MPI_ERR_TOPOLOGY);
10827 #endif
10828
10829 ampiCommStruct &c = getAmpiParent()->getGraph(comm);
10830 ampiTopology *topo = c.getTopology();
10831 const vector<int> &index = topo->getindex();
10832
10833 #if AMPI_ERROR_CHECKING
10834 if ((rank >= index.size()) || (rank < 0))
10835 return ampiErrhandler("AMPI_Graph_neighbors_count", MPI_ERR_RANK);
10836 #endif
10837
10838 if (rank == 0)
10839 *nneighbors = index[rank];
10840 else
10841 *nneighbors = index[rank] - index[rank - 1];
10842
10843 return MPI_SUCCESS;
10844 }
10845
10846 AMPI_API_IMPL(int, MPI_Graph_neighbors, MPI_Comm comm, int rank, int maxneighbors, int *neighbors)
10847 {
10848 AMPI_API("AMPI_Graph_neighbors");
10849
10850 #if AMPI_ERROR_CHECKING
10851 if (!getAmpiParent()->isGraph(comm))
10852 return ampiErrhandler("AMPI_Graph_neighbors", MPI_ERR_TOPOLOGY);
10853 #endif
10854
10855 ampiCommStruct &c = getAmpiParent()->getGraph(comm);
10856 ampiTopology *topo = c.getTopology();
10857 const vector<int> &index = topo->getindex();
10858 const vector<int> &edges = topo->getedges();
10859
10860 int numneighbors = (rank == 0) ? index[rank] : index[rank] - index[rank - 1];
10861 if (maxneighbors > numneighbors)
10862 maxneighbors = numneighbors;
10863
10864 #if AMPI_ERROR_CHECKING
10865 if (maxneighbors < 0)
10866 return ampiErrhandler("AMPI_Graph_neighbors", MPI_ERR_ARG);
10867 if ((rank >= index.size()) || (rank < 0))
10868 return ampiErrhandler("AMPI_Graph_neighbors", MPI_ERR_RANK);
10869 #endif
10870
10871 if (rank == 0) {
10872 for (int i = 0; i < maxneighbors; i++)
10873 neighbors[i] = edges[i];
10874 } else {
10875 for (int i = 0; i < maxneighbors; i++)
10876 neighbors[i] = edges[index[rank - 1] + i];
10877 }
10878 return MPI_SUCCESS;
10879 }
10880
10881 AMPI_API_IMPL(int, MPI_Dist_graph_neighbors_count, MPI_Comm comm, int *indegree, int *outdegree, int *weighted)
10882 {
10883 AMPI_API("AMPI_Dist_graph_neighbors_count");
10884
10885 #if AMPI_ERROR_CHECKING
10886 if (!getAmpiParent()->isDistGraph(comm)) {
10887 return ampiErrhandler("AMPI_Dist_graph_neighbors_count", MPI_ERR_TOPOLOGY);
10888 }
10889 #endif
10890
10891 ampiParent *ptr = getAmpiParent();
10892 ampiCommStruct &c = ptr->getDistGraph(comm);
10893 ampiTopology *topo = c.getTopology();
10894 *indegree = topo->getInDegree();
10895 *outdegree = topo->getOutDegree();
10896 *weighted = topo->areSourcesWeighted() ? 1 : 0;
10897
10898 return MPI_SUCCESS;
10899 }
10900
10901 AMPI_API_IMPL(int, MPI_Dist_graph_neighbors, MPI_Comm comm, int maxindegree, int sources[], int sourceweights[],
10902 int maxoutdegree, int destinations[], int destweights[])
10903 {
10904 AMPI_API("AMPI_Dist_graph_neighbors");
10905
10906 #if AMPI_ERROR_CHECKING
10907 if (!getAmpiParent()->isDistGraph(comm)) {
10908 return ampiErrhandler("AMPI_Dist_graph_neighbors", MPI_ERR_TOPOLOGY);
10909 }
10910 if ((maxindegree < 0) || (maxoutdegree < 0)) {
10911 return ampiErrhandler("AMPI_Dist_graph_neighbors", MPI_ERR_TOPOLOGY);
10912 }
10913 #endif
10914
10915 ampiParent *ptr = getAmpiParent();
10916 ampiCommStruct &c = ptr->getDistGraph(comm);
10917 ampiTopology *topo = c.getTopology();
10918
10919 const vector<int> &tmpSources = topo->getSources();
10920 const vector<int> &tmpSourceWeights = topo->getSourceWeights();
10921 const vector<int> &tmpDestinations = topo->getDestinations();
10922 const vector<int> &tmpDestWeights = topo->getDestWeights();
10923
10924 maxindegree = std::min(maxindegree, static_cast<int>(tmpSources.size()));
10925 maxoutdegree = std::min(maxoutdegree, static_cast<int>(tmpDestinations.size()));
10926
10927 for (int i=0; i<maxindegree; i++) {
10928 sources[i] = tmpSources[i];
10929 }
10930 for (int i=0; i<maxoutdegree; i++) {
10931 destinations[i] = tmpDestinations[i];
10932 }
10933
10934 if (topo->areSourcesWeighted()) {
10935 for (int i=0; i<maxindegree; i++) {
10936 sourceweights[i] = tmpSourceWeights[i];
10937 }
10938 for (int i=0; i<maxoutdegree; i++) {
10939 destweights[i] = tmpDestWeights[i];
10940 }
10941 }
10942 else {
10943 sourceweights = NULL;
10944 destweights = NULL;
10945 }
10946
10947 return MPI_SUCCESS;
10948 }
10949
10950
10951 void ampi::findNeighbors(MPI_Comm comm, int rank, vector<int>& neighbors) const noexcept {
10952 int max_neighbors = 0;
10953 ampiParent *ptr = getAmpiParent();
10954 if (ptr->isGraph(comm)) {
10955 MPI_Graph_neighbors_count(comm, rank, &max_neighbors);
10956 neighbors.resize(max_neighbors);
10957 MPI_Graph_neighbors(comm, rank, max_neighbors, &neighbors[0]);
10958 }
10959 else if (ptr->isCart(comm)) {
10960 int num_dims;
10961 MPI_Cartdim_get(comm, &num_dims);
10962 max_neighbors = 2*num_dims;
10963 for (int i=0; i<max_neighbors; i++) {
10964 int src, dest;
10965 MPI_Cart_shift(comm, i/2, (i%2==0)?1:-1, &src, &dest);
10966 if (dest != MPI_PROC_NULL)
10967 neighbors.push_back(dest);
10968 }
10969 }
10970 }
10971
10972
10973
10979 int integerRoot(int n,int d) noexcept {
10980 double epsilon=0.001;
10981 return (int)floor(pow(n+epsilon,1.0/d));
10982 }
10983
10992 bool factors(int n, int d, int *dims, int m) noexcept {
10993 if (d==1)
10994 {
10995 if (n>=m) {
10996 dims[0]=n;
10997 return true;
10998 }
10999 }
11000 else {
11001 int k_up=integerRoot(n,d);
11002 for (int k=k_up;k>=m;k--) {
11003 if (n%k==0) {
11004 dims[0]=k;
11005 if (factors(n/k,d-1,&dims[1],k))
11006 return true;
11007 }
11008 }
11009 }
11010
11011 return false;
11012 }
11013
11014 AMPI_API_IMPL(int, MPI_Dims_create, int nnodes, int ndims, int *dims)
11015 {
11016 AMPI_API("AMPI_Dims_create");
11017
11018 int i, n, d;
11019
11020 n = nnodes;
11021 d = ndims;
11022
11023 for (i = 0; i < ndims; i++) {
11024 if (dims[i] != 0) {
11025 if (n % dims[i] != 0) {
11026 return ampiErrhandler("AMPI_Dims_create", MPI_ERR_DIMS);
11027 } else {
11028 n = n / dims[i];
11029 d--;
11030 }
11031 }
11032 }
11033
11034 if(d > 0) {
11035 vector<int> pdims(d);
11036
11037 if (!factors(n, d, &pdims[0], 1))
11038 CkAbort("MPI_Dims_create: factorization failed!\n");
11039
11040 int j = 0;
11041 for (i = 0; i < ndims; i++) {
11042 if (dims[i] == 0) {
11043 dims[i] = pdims[j];
11044 j++;
11045 }
11046 }
11047
11048
11049
11050 for (int i=0; i<d-1; i++) {
11051 for (int j=i+1; j<d; j++) {
11052 if (dims[j] > dims[i]) {
11053 int tmp = dims[i];
11054 dims[i] = dims[j];
11055 dims[j] = tmp;
11056 }
11057 }
11058 }
11059 }
11060
11061 return MPI_SUCCESS;
11062 }
11063
11064
11065
11066
11067
11068 AMPI_API_IMPL(int, MPI_Cart_sub, MPI_Comm comm, const int *remain_dims, MPI_Comm *newcomm)
11069 {
11070 AMPI_API("AMPI_Cart_sub");
11071
11072 int i, ndims;
11073 int color = 1, key = 1;
11074
11075 #if AMPI_ERROR_CHECKING
11076 if (!getAmpiParent()->isCart(comm))
11077 return ampiErrhandler("AMPI_Cart_sub", MPI_ERR_TOPOLOGY);
11078 #endif
11079
11080 int rank = getAmpiInstance(comm)->getRank();
11081 ampiCommStruct &c = getAmpiParent()->getCart(comm);
11082 ampiTopology *topo = c.getTopology();
11083 ndims = topo->getndims();
11084 const vector<int> &dims = topo->getdims();
11085 int num_remain_dims = 0;
11086
11087 vector<int> coords(ndims);
11088 MPI_Cart_coords(comm, rank, ndims, coords.data());
11089
11090 for (i = 0; i < ndims; i++) {
11091 if (remain_dims[i]) {
11092
11093 key = key * dims[i] + coords[i];
11094 num_remain_dims++;
11095 }
11096 else {
11097
11098 color = color * dims[i] + coords[i];
11099 }
11100 }
11101
11102 if (num_remain_dims == 0) {
11103 *newcomm = getAmpiInstance(comm)->cartCreate0D();
11104 return MPI_SUCCESS;
11105 }
11106
11107 getAmpiInstance(comm)->split(color, key, newcomm, MPI_CART);
11108
11109 ampiCommStruct &newc = getAmpiParent()->getCart(*newcomm);
11110 ampiTopology *newtopo = newc.getTopology();
11111 newtopo->setndims(num_remain_dims);
11112 vector<int> dimsv;
11113 const vector<int> &periods = topo->getperiods();
11114 vector<int> periodsv;
11115
11116 for (i = 0; i < ndims; i++) {
11117 if (remain_dims[i]) {
11118 dimsv.push_back(dims[i]);
11119 periodsv.push_back(periods[i]);
11120 }
11121 }
11122 newtopo->setdims(dimsv);
11123 newtopo->setperiods(periodsv);
11124
11125 vector<int> nborsv;
11126 getAmpiInstance(*newcomm)->findNeighbors(*newcomm, getAmpiParent()->getRank(*newcomm), nborsv);
11127 newtopo->setnbors(nborsv);
11128
11129 return MPI_SUCCESS;
11130 }
11131
11132 AMPI_API_IMPL(int, MPI_Type_get_envelope, MPI_Datatype datatype, int *ni, int *na,
11133 int *nd, int *combiner)
11134 {
11135 AMPI_API("AMPI_Type_get_envelope");
11136
11137 #if AMPI_ERROR_CHECKING
11138 int ret = checkData("AMPI_Type_get_envelope", datatype);
11139 if (ret!=MPI_SUCCESS)
11140 return ret;
11141 #endif
11142
11143 return getDDT()->getEnvelope(datatype,ni,na,nd,combiner);
11144 }
11145
11146 AMPI_API_IMPL(int, MPI_Type_get_contents, MPI_Datatype datatype, int ni, int na, int nd,
11147 int i[], MPI_Aint a[], MPI_Datatype d[])
11148 {
11149 AMPI_API("AMPI_Type_get_contents");
11150
11151 #if AMPI_ERROR_CHECKING
11152 int ret = checkData("AMPI_Type_get_contents", datatype);
11153 if (ret!=MPI_SUCCESS)
11154 return ret;
11155 #endif
11156
11157 return getDDT()->getContents(datatype,ni,na,nd,i,a,d);
11158 }
11159
11160 AMPI_API_IMPL(int, MPI_Pcontrol, const int level, ...)
11161 {
11162
11163
11164 return MPI_SUCCESS;
11165 }
11166
11167
11168
11169 CLINKAGE int AMPI_Init_universe(int * unicomm)
11170 {
11171 AMPI_API("AMPI_Init_universe");
11172 for(int i=0; i<_mpi_nworlds; i++) {
11173 unicomm[i] = MPI_COMM_UNIVERSE[i];
11174 }
11175 return MPI_SUCCESS;
11176 }
11177
11178 CLINKAGE char ** AMPI_Get_argv()
11179 {
11180 return CkGetArgv();
11181 }
11182
11183 CLINKAGE int AMPI_Get_argc()
11184 {
11185 return CkGetArgc();
11186 }
11187
11188 CLINKAGE int AMPI_Migrate(MPI_Info hints)
11189 {
11190 AMPI_API("AMPI_Migrate");
11191 int nkeys, exists;
11192 char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
11193
11194 MPI_Info_get_nkeys(hints, &nkeys);
11195
11196 for (int i=0; i<nkeys; i++) {
11197 MPI_Info_get_nthkey(hints, i, key);
11198 MPI_Info_get(hints, key, MPI_MAX_INFO_VAL, value, &exists);
11199 if (!exists) {
11200 continue;
11201 }
11202 else if (strncmp(key, "ampi_load_balance", MPI_MAX_INFO_KEY) == 0) {
11203
11204 if (strncmp(value, "sync", MPI_MAX_INFO_VAL) == 0) {
11205 TCHARM_Migrate();
11206 }
11207 else if (strncmp(value, "async", MPI_MAX_INFO_VAL) == 0) {
11208 TCHARM_Async_Migrate();
11209 }
11210 else if (strncmp(value, "false", MPI_MAX_INFO_VAL) == 0) {
11211
11212 }
11213 else {
11214 CkPrintf("WARNING: Unknown MPI_Info value (%s) given to AMPI_Migrate for key: %s\n", value, key);
11215 }
11216 }
11217 else if (strncmp(key, "ampi_checkpoint", MPI_MAX_INFO_KEY) == 0) {
11218
11219 if (strncmp(value, "true", MPI_MAX_INFO_VAL) == 0) {
11220 CkAbort("AMPI> Error: Value \"true\" is not supported for AMPI_Migrate key \"ampi_checkpoint\"!\n");
11221 }
11222 else if (strncmp(value, "to_file=", strlen("to_file=")) == 0) {
11223 int offset = strlen("to_file=");
11224 int restart_dir_name_len = 0;
11225 MPI_Info_get_valuelen(hints, key, &restart_dir_name_len, &exists);
11226 if (restart_dir_name_len > offset) {
11227 value[restart_dir_name_len] = '\0';
11228 }
11229 else {
11230 CkAbort("AMPI> Error: No checkpoint directory name given to AMPI_Migrate\n");
11231 }
11232 getAmpiInstance(MPI_COMM_WORLD)->barrier();
11233 getAmpiParent()->startCheckpoint(&value[offset]);
11234 }
11235 else if (strncmp(value, "in_memory", MPI_MAX_INFO_VAL) == 0) {
11236 #if CMK_MEM_CHECKPOINT
11237 getAmpiInstance(MPI_COMM_WORLD)->barrier();
11238 getAmpiParent()->startCheckpoint("");
11239 #else
11240 CkPrintf("AMPI> Error: In-memory checkpoint/restart is not enabled!\n");
11241 CkAbort("AMPI> Error: Recompile Charm++/AMPI with CMK_MEM_CHECKPOINT.\n");
11242 #endif
11243 }
11244 else if (strncmp(value, "message_logging", MPI_MAX_INFO_VAL) == 0) {
11245 #if CMK_MESSAGE_LOGGING
11246 TCHARM_Migrate();
11247 #else
11248 CkPrintf("AMPI> Error: Message logging is not enabled!\n");
11249 CkAbort("AMPI> Error: Recompile Charm++/AMPI with CMK_MESSAGE_LOGGING.\n");
11250 #endif
11251 }
11252 else if (strncmp(value, "false", MPI_MAX_INFO_VAL) == 0) {
11253
11254 }
11255 else {
11256 CkPrintf("WARNING: Unknown MPI_Info value (%s) given to AMPI_Migrate for key: %s\n", value, key);
11257 }
11258 }
11259 else {
11260 CkPrintf("WARNING: Unknown MPI_Info key given to AMPI_Migrate: %s\n", key);
11261 }
11262 }
11263
11264 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
11265 ampi *currentAmpi = getAmpiInstance(MPI_COMM_WORLD);
11266 CpvAccess(_currentObj) = currentAmpi;
11267 #endif
11268
11269 #if CMK_BIGSIM_CHARM
11270 TRACE_BG_ADD_TAG("AMPI_MIGRATE");
11271 #endif
11272 return MPI_SUCCESS;
11273 }
11274
11275 #if CMK_FAULT_EVAC
11276 CLINKAGE
11277 int AMPI_Evacuate(void)
11278 {
11279
11280 TCHARM_Evacuate();
11281 return MPI_SUCCESS;
11282 }
11283 #endif
11284
11285 CLINKAGE
11286 int AMPI_Migrate_to_pe(int dest)
11287 {
11288 AMPI_API("AMPI_Migrate_to_pe");
11289 TCHARM_Migrate_to(dest);
11290 #if CMK_BIGSIM_CHARM
11291 TRACE_BG_ADD_TAG("AMPI_MIGRATE_TO_PE");
11292 #endif
11293 return MPI_SUCCESS;
11294 }
11295
11296 CLINKAGE
11297 int AMPI_Set_migratable(int mig)
11298 {
11299 AMPI_API("AMPI_Set_migratable");
11300 #if CMK_LBDB_ON
11301 getAmpiParent()->setMigratable((mig!=0));
11302 #else
11303 CkPrintf("WARNING: MPI_Set_migratable is not supported in this build of Charm++/AMPI.\n");
11304 #endif
11305 return MPI_SUCCESS;
11306 }
11307
11308 CLINKAGE
11309 int AMPI_Load_start_measure(void)
11310 {
11311 AMPI_API("AMPI_Load_start_measure");
11312 LBTurnInstrumentOn();
11313 return MPI_SUCCESS;
11314 }
11315
11316 CLINKAGE
11317 int AMPI_Load_stop_measure(void)
11318 {
11319 AMPI_API("AMPI_Load_stop_measure");
11320 LBTurnInstrumentOff();
11321 return MPI_SUCCESS;
11322 }
11323
11324 CLINKAGE
11325 int AMPI_Load_reset_measure(void)
11326 {
11327 AMPI_API("AMPI_Load_reset_measure");
11328 LBClearLoads();
11329 return MPI_SUCCESS;
11330 }
11331
11332 CLINKAGE
11333 int AMPI_Load_set_value(double value)
11334 {
11335 AMPI_API("AMPI_Load_set_value");
11336 ampiParent *ptr = getAmpiParent();
11337 ptr->setObjTime(value);
11338 return MPI_SUCCESS;
11339 }
11340
11341 void _registerampif(void) {
11342 _registerampi();
11343 }
11344
11345 CLINKAGE
11346 int AMPI_Register_main(MPI_MainFn mainFn,const char *name)
11347 {
11348 AMPI_API("AMPI_Register_main");
11349 if (TCHARM_Element()==0)
11350 {
11351 ampiCreateMain(mainFn,name,strlen(name));
11352 }
11353 return MPI_SUCCESS;
11354 }
11355
11356 FLINKAGE
11357 void FTN_NAME(MPI_REGISTER_MAIN,mpi_register_main)
11358 (MPI_MainFn mainFn,const char *name,int nameLen)
11359 {
11360 AMPI_API("AMPI_register_main");
11361 if (TCHARM_Element()==0)
11362 {
11363 ampiCreateMain(mainFn,name,nameLen);
11364 }
11365 }
11366
11367 CLINKAGE
11368 int AMPI_Register_pup(MPI_PupFn fn, void *data, int *idx)
11369 {
11370 AMPI_API("AMPI_Register_pup");
11371 *idx = TCHARM_Register(data, fn);
11372 return MPI_SUCCESS;
11373 }
11374
11375 CLINKAGE
11376 int AMPI_Register_about_to_migrate(MPI_MigrateFn fn)
11377 {
11378 AMPI_API("AMPI_Register_about_to_migrate");
11379 ampiParent *thisParent = getAmpiParent();
11380 thisParent->setUserAboutToMigrateFn(fn);
11381 return MPI_SUCCESS;
11382 }
11383
11384 CLINKAGE
11385 int AMPI_Register_just_migrated(MPI_MigrateFn fn)
11386 {
11387 AMPI_API("AMPI_Register_just_migrated");
11388 ampiParent *thisParent = getAmpiParent();
11389 thisParent->setUserJustMigratedFn(fn);
11390 return MPI_SUCCESS;
11391 }
11392
11393 CLINKAGE
11394 int AMPI_Get_pup_data(int idx, void *data)
11395 {
11396 AMPI_API("AMPI_Get_pup_data");
11397 data = TCHARM_Get_userdata(idx);
11398 return MPI_SUCCESS;
11399 }
11400
11401 CLINKAGE
11402 int AMPI_Type_is_contiguous(MPI_Datatype datatype, int *flag)
11403 {
11404 AMPI_API("AMPI_Type_is_contiguous");
11405 *flag = getDDT()->isContig(datatype);
11406 return MPI_SUCCESS;
11407 }
11408
11409 CLINKAGE
11410 int AMPI_Print(const char *str)
11411 {
11412 AMPI_API("AMPI_Print");
11413 ampiParent *ptr = getAmpiParent();
11414 CkPrintf("[%d] %s\n", ptr->thisIndex, str);
11415 return MPI_SUCCESS;
11416 }
11417
11418 CLINKAGE
11419 int AMPI_Suspend(void)
11420 {
11421 AMPI_API("AMPI_Suspend");
11422 getAmpiParent()->block();
11423 return MPI_SUCCESS;
11424 }
11425
11426 CLINKAGE
11427 int AMPI_Yield(void)
11428 {
11429 AMPI_API("AMPI_Yield");
11430 getAmpiParent()->yield();
11431 return MPI_SUCCESS;
11432 }
11433
11434 CLINKAGE
11435 int AMPI_Resume(int dest, MPI_Comm comm)
11436 {
11437 AMPI_API("AMPI_Resume");
11438 getAmpiInstance(comm)->getProxy()[dest].unblock();
11439 return MPI_SUCCESS;
11440 }
11441
11442 CLINKAGE
11443 int AMPI_System(const char *cmd)
11444 {
11445 return TCHARM_System(cmd);
11446 }
11447
11448 CLINKAGE
11449 int AMPI_Trace_begin(void)
11450 {
11451 traceBegin();
11452 return MPI_SUCCESS;
11453 }
11454
11455 CLINKAGE
11456 int AMPI_Trace_end(void)
11457 {
11458 traceEnd();
11459 return MPI_SUCCESS;
11460 }
11461
11462 int AMPI_Install_idle_timer(void)
11463 {
11464 #if AMPI_PRINT_IDLE
11465 beginHandle = CcdCallOnConditionKeep(CcdPROCESSOR_BEGIN_IDLE,(CcdVoidFn)BeginIdle,NULL);
11466 endHandle = CcdCallOnConditionKeep(CcdPROCESSOR_END_IDLE,(CcdVoidFn)EndIdle,NULL);
11467 #endif
11468 return MPI_SUCCESS;
11469 }
11470
11471 int AMPI_Uninstall_idle_timer(void)
11472 {
11473 #if AMPI_PRINT_IDLE
11474 CcdCancelCallOnConditionKeep(CcdPROCESSOR_BEGIN_IDLE,beginHandle);
11475 CcdCancelCallOnConditionKeep(CcdPROCESSOR_BEGIN_BUSY,endHandle);
11476 #endif
11477 return MPI_SUCCESS;
11478 }
11479
11480 #if CMK_BIGSIM_CHARM
11481 extern "C" void startCFnCall(void *param,void *msg)
11482 {
11483 BgSetStartEvent();
11484 ampi *ptr = (ampi*)param;
11485 ampi::bcastraw(NULL, 0, ptr->getProxy());
11486 delete (CkReductionMsg*)msg;
11487 }
11488
11489 CLINKAGE
11490 int AMPI_Set_start_event(MPI_Comm comm)
11491 {
11492 AMPI_API("AMPI_Set_start_event");
11493 CkAssert(comm == MPI_COMM_WORLD);
11494
11495 ampi *ptr = getAmpiInstance(comm);
11496 int rank = ptr->getRank();
11497 int size = ptr->getSize();
11498
11499 CkDDT_DataType *ddt_type = ptr->getDDT()->getType(MPI_INT);
11500
11501 CkReductionMsg *msg=makeRednMsg(ddt_type, NULL, 0, MPI_INT, rank, size, MPI_SUM);
11502 if (CkMyPe() == 0) {
11503 CkCallback allreduceCB(startCFnCall, ptr);
11504 msg->setCallback(allreduceCB);
11505 }
11506 ptr->contribute(msg);
11507
11508
11509 if(-1==ptr->recv(MPI_BCAST_TAG, -1, NULL, 0, MPI_INT, MPI_COMM_WORLD))
11510 CkAbort("AMPI> MPI_Allreduce called with different values on different processors!");
11511
11512 return MPI_SUCCESS;
11513 }
11514
11515 CLINKAGE
11516 int AMPI_Set_end_event(void)
11517 {
11518 AMPI_API("AMPI_Set_end_event");
11519 return MPI_SUCCESS;
11520 }
11521 #endif // CMK_BIGSIM_CHARM
11522
11523 #if CMK_CUDA
11524 GPUReq::GPUReq() noexcept
11525 {
11526 comm = MPI_COMM_SELF;
11527 ampi* ptr = getAmpiInstance(comm);
11528 src = ptr->getRank();
11529 buf = ptr;
11530 }
11531
11532 bool GPUReq::test(MPI_Status *sts) noexcept
11533 {
11534 return complete;
11535 }
11536
11537 int GPUReq::wait(MPI_Status *sts) noexcept
11538 {
11539 (void)sts;
11540 while (!complete) {
11541 getAmpiParent()->block();
11542 }
11543 return 0;
11544 }
11545
11546 void GPUReq::receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg) noexcept
11547 {
11548 CkAbort("GPUReq::receive should never be called");
11549 }
11550
11551 void GPUReq::receive(ampi *ptr, CkReductionMsg *msg) noexcept
11552 {
11553 CkAbort("GPUReq::receive should never be called");
11554 }
11555
11556 void GPUReq::setComplete() noexcept
11557 {
11558 complete = true;
11559 }
11560
11561 void GPUReq::print() const noexcept {
11562 AmpiRequest::print();
11563 }
11564
11565 void AMPI_GPU_complete(void *request, void* dummy) noexcept
11566 {
11567 GPUReq *req = static_cast<GPUReq *>(request);
11568 req->setComplete();
11569 ampi *ptr = static_cast<ampi *>(req->buf);
11570 ptr->unblock();
11571 }
11572
11573
11574 CLINKAGE
11575 int AMPI_GPU_Iinvoke_wr(hapiWorkRequest *to_call, MPI_Request *request)
11576 {
11577 AMPI_API("AMPI_GPU_Iinvoke");
11578
11579 ampi* ptr = getAmpiInstance(MPI_COMM_WORLD);
11580 GPUReq* newreq = new GPUReq();
11581 *request = ptr->postReq(newreq);
11582
11583
11584 CkCallback *cb = new CkCallback(&I_GPU_complete, newreq);
11585 to_call->setCallback(cb);
11586
11587 hapiEnqueue(to_call);
11588 }
11589
11590
11591
11592 CLINKAGE
11593 int AMPI_GPU_Iinvoke(cudaStream_t stream, MPI_Request *request)
11594 {
11595 AMPI_API("AMPI_GPU_Iinvoke");
11596
11597 ampi* ptr = getAmpiInstance(MPI_COMM_WORLD);
11598 GPUReq* newreq = new GPUReq();
11599 *request = ptr->postReq(newreq);
11600
11601
11602 CkCallback *cb = new CkCallback(&I_GPU_complete, newreq);
11603
11604 hapiAddCallback(stream, cb, NULL);
11605 }
11606
11607 CLINKAGE
11608 int AMPI_GPU_Invoke_wr(hapiWorkRequest *to_call)
11609 {
11610 AMPI_API("AMPI_GPU_Invoke");
11611
11612 MPI_Request req;
11613 AMPI_GPU_Iinvoke_wr(to_call, &req);
11614 MPI_Wait(&req, MPI_STATUS_IGNORE);
11615
11616 return MPI_SUCCESS;
11617 }
11618
11619 CLINKAGE
11620 int AMPI_GPU_Invoke(cudaStream_t stream)
11621 {
11622 AMPI_API("AMPI_GPU_Invoke");
11623
11624 MPI_Request req;
11625 AMPI_GPU_Iinvoke(stream, &req);
11626 MPI_Wait(&req, MPI_STATUS_IGNORE);
11627
11628 return MPI_SUCCESS;
11629 }
11630 #endif // CMK_CUDA
11631
11632 #include "ampi.def.h"
11633