00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef _GNU_SOURCE
00014 #define _GNU_SOURCE
00015 #endif
00016
00017 #include "converse.h"
00018 #include "sockRoutines.h"
00019 #include "charm-api.h"
00020 #include "hwloc.h"
00021
00022 #if CMK_USE_IBVERBS
00023 #include <infiniband/verbs.h>
00024 #include <hwloc/openfabrics-verbs.h>
00025 #endif
00026
00027 #define DEBUGP(x)
00028 CpvDeclare(int, myCPUAffToCore);
00029 #if CMK_OS_IS_LINUX
00030
00031
00032
00033
00034
00035 CpvDeclare(void *, myProcStatFP);
00036 #endif
00037
00038 CmiHwlocTopology CmiHwlocTopologyLocal;
00039
00040 void CmiInitHwlocTopology(void)
00041 {
00042 hwloc_topology_t topology;
00043 int depth;
00044
00045
00046 cmi_hwloc_topology_init(&topology);
00047
00048 cmi_hwloc_topology_load(topology);
00049
00050
00051 depth = cmi_hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE);
00052 CmiHwlocTopologyLocal.num_sockets = depth != HWLOC_TYPE_DEPTH_UNKNOWN ? cmi_hwloc_get_nbobjs_by_depth(topology, depth) : 1;
00053 #if CMK_BLUEGENEQ
00054
00055 if (CmiHwlocTopologyLocal.num_sockets == 17)
00056 CmiHwlocTopologyLocal.num_sockets = 16;
00057 #endif
00058
00059
00060 depth = cmi_hwloc_get_type_depth(topology, HWLOC_OBJ_CORE);
00061 CmiHwlocTopologyLocal.num_cores = depth != HWLOC_TYPE_DEPTH_UNKNOWN ? cmi_hwloc_get_nbobjs_by_depth(topology, depth) : 1;
00062
00063
00064 depth = cmi_hwloc_get_type_depth(topology, HWLOC_OBJ_PU);
00065 CmiHwlocTopologyLocal.num_pus = depth != HWLOC_TYPE_DEPTH_UNKNOWN ? cmi_hwloc_get_nbobjs_by_depth(topology, depth) : 1;
00066
00067 cmi_hwloc_topology_destroy(topology);
00068 }
00069
00070 #if CMK_HAS_SETAFFINITY || defined (_WIN32) || CMK_HAS_BINDPROCESSOR
00071
00072 #include <stdlib.h>
00073 #include <stdio.h>
00074 #include <unistd.h>
00075 #include <errno.h>
00076
00077 #ifdef _WIN32
00078 #include <windows.h>
00079 #include <winbase.h>
00080 #else
00081 #include <sched.h>
00082
00083
00084 #endif
00085
00086 #if CMK_OS_IS_LINUX
00087 #include <sys/syscall.h>
00088 #endif
00089
00090 #if defined(__APPLE__)
00091 #include <Carbon/Carbon.h>
00092 #endif
00093
00094 #define MAX_EXCLUDE 64
00095 static int excludecore[MAX_EXCLUDE] = {-1};
00096 static int excludecount = 0;
00097
00098 static int affinity_doneflag = 0;
00099
00100 #ifndef _WIN32
00101 static int affMsgsRecvd = 1;
00102 static cpu_set_t core_usage;
00103 static int aff_is_set = 0;
00104 #endif
00105
00106 static int in_exclude(int core)
00107 {
00108 int i;
00109 for (i=0; i<excludecount; i++) if (core == excludecore[i]) return 1;
00110 return 0;
00111 }
00112
00113 static void add_exclude(int core)
00114 {
00115 if (in_exclude(core)) return;
00116 CmiAssert(excludecount < MAX_EXCLUDE);
00117 excludecore[excludecount++] = core;
00118 }
00119
00120 #if CMK_HAS_BINDPROCESSOR
00121 #include <sys/processor.h>
00122 #endif
00123
00124 static int set_process_affinity(hwloc_topology_t topology, hwloc_cpuset_t cpuset)
00125 {
00126 #ifdef _WIN32
00127 HANDLE process = GetCurrentProcess();
00128 #else
00129 pid_t process = getpid();
00130 #endif
00131
00132 if (cmi_hwloc_set_proc_cpubind(topology, process, cpuset, HWLOC_CPUBIND_PROCESS|HWLOC_CPUBIND_STRICT))
00133 {
00134 char *str;
00135 int error = errno;
00136 cmi_hwloc_bitmap_asprintf(&str, cpuset);
00137 CmiPrintf("HWLOC> Couldn't bind to cpuset %s: %s\n", str, strerror(error));
00138 free(str);
00139 return -1;
00140 }
00141
00142 #if CMK_CHARMDEBUG
00143 if (CmiPhysicalNodeID(CmiMyPe()) == 0)
00144 {
00145 char *str;
00146 cmi_hwloc_bitmap_asprintf(&str, cpuset);
00147 CmiPrintf("HWLOC> [%d] Process %p bound to cpuset: %s\n", CmiMyPe(), process, str);
00148 free(str);
00149 }
00150 #endif
00151
00152 return 0;
00153 }
00154
00155 #if CMK_SMP
00156 static int set_thread_affinity(hwloc_topology_t topology, hwloc_cpuset_t cpuset)
00157 {
00158 #ifdef _WIN32
00159 HANDLE thread = GetCurrentThread();
00160 #else
00161 pthread_t thread = pthread_self();
00162 #endif
00163
00164 if (cmi_hwloc_set_thread_cpubind(topology, thread, cpuset, HWLOC_CPUBIND_THREAD|HWLOC_CPUBIND_STRICT))
00165 {
00166 char *str;
00167 int error = errno;
00168 cmi_hwloc_bitmap_asprintf(&str, cpuset);
00169 CmiPrintf("HWLOC> Couldn't bind to cpuset %s: %s\n", str, strerror(error));
00170 free(str);
00171 return -1;
00172 }
00173
00174 #if CMK_CHARMDEBUG
00175 if (CmiPhysicalNodeID(CmiMyPe()) == 0)
00176 {
00177 char *str;
00178 cmi_hwloc_bitmap_asprintf(&str, cpuset);
00179 CmiPrintf("HWLOC> [%d] Thread %p bound to cpuset: %s\n", CmiMyPe(), thread, str);
00180 free(str);
00181 }
00182 #endif
00183
00184 return 0;
00185 }
00186 #endif
00187
00188
00189 int CmiSetCPUAffinity(int mycore)
00190 {
00191 int core = mycore;
00192 if (core < 0) {
00193 core = CmiNumCores() + core;
00194 }
00195 if (core < 0) {
00196 CmiError("Error: Invalid cpu affinity core number: %d\n", mycore);
00197 CmiAbort("CmiSetCPUAffinity failed");
00198 }
00199
00200 CpvAccess(myCPUAffToCore) = core;
00201
00202 hwloc_topology_t topology;
00203
00204 cmi_hwloc_topology_init(&topology);
00205 cmi_hwloc_topology_load(topology);
00206
00207 hwloc_cpuset_t cpuset = cmi_hwloc_bitmap_alloc();
00208 cmi_hwloc_bitmap_set(cpuset, core);
00209
00210 #if CMK_SMP
00211 set_thread_affinity(topology, cpuset);
00212 #else
00213 set_process_affinity(topology, cpuset);
00214 #endif
00215
00216 cmi_hwloc_bitmap_free(cpuset);
00217
00218 cmi_hwloc_topology_destroy(topology);
00219 return 0;
00220 }
00221
00222
00223
00224
00225 int print_cpu_affinity(void) {
00226 hwloc_topology_t topology;
00227
00228 cmi_hwloc_topology_init(&topology);
00229
00230 cmi_hwloc_topology_load(topology);
00231
00232 hwloc_cpuset_t cpuset = cmi_hwloc_bitmap_alloc();
00233
00234 if (cmi_hwloc_get_cpubind(topology, cpuset, 0)) {
00235 int error = errno;
00236 CmiPrintf("[%d] CPU affinity mask is unknown %s\n", CmiMyPe(), strerror(error));
00237 cmi_hwloc_bitmap_free(cpuset);
00238 cmi_hwloc_topology_destroy(topology);
00239 return -1;
00240 }
00241
00242 char *str;
00243 cmi_hwloc_bitmap_asprintf(&str, cpuset);
00244 CmiPrintf("[%d] CPU affinity mask is %s\n", CmiMyPe(), str);
00245 free(str);
00246 cmi_hwloc_bitmap_free(cpuset);
00247 cmi_hwloc_topology_destroy(topology);
00248 return 0;
00249 }
00250
00251 #if CMK_SMP
00252 int print_thread_affinity(void) {
00253 hwloc_topology_t topology;
00254
00255 cmi_hwloc_topology_init(&topology);
00256
00257 cmi_hwloc_topology_load(topology);
00258
00259 #ifdef _WIN32
00260 HANDLE thread = GetCurrentThread();
00261 #else
00262 pthread_t thread = pthread_self();
00263 #endif
00264
00265 hwloc_cpuset_t cpuset = cmi_hwloc_bitmap_alloc();
00266
00267
00268 if (cmi_hwloc_get_cpubind(topology, cpuset, HWLOC_CPUBIND_THREAD) == -1) {
00269 int error = errno;
00270 CmiPrintf("[%d] thread CPU affinity mask is unknown %s\n", CmiMyPe(), strerror(error));
00271 cmi_hwloc_bitmap_free(cpuset);
00272 cmi_hwloc_topology_destroy(topology);
00273 return -1;
00274 }
00275
00276 char *str;
00277 cmi_hwloc_bitmap_asprintf(&str, cpuset);
00278 CmiPrintf("[%d] thread CPU affinity mask is %s\n", CmiMyPe(), str);
00279 free(str);
00280 cmi_hwloc_bitmap_free(cpuset);
00281 cmi_hwloc_topology_destroy(topology);
00282 return 0;
00283
00284 }
00285 #endif
00286
00287 int CmiPrintCPUAffinity(void)
00288 {
00289 #if CMK_SMP
00290 return print_thread_affinity();
00291 #else
00292 return print_cpu_affinity();
00293 #endif
00294 }
00295
00296 #ifndef _WIN32
00297 int get_cpu_affinity(cpu_set_t *cpuset) {
00298 CPU_ZERO(cpuset);
00299 if (sched_getaffinity(0, sizeof(cpuset), cpuset) < 0) {
00300 perror("sched_getaffinity");
00301 return -1;
00302 }
00303 return 0;
00304 }
00305
00306 #if CMK_SMP
00307 int get_thread_affinity(cpu_set_t *cpuset) {
00308 #if CMK_HAS_PTHREAD_SETAFFINITY
00309 CPU_ZERO(cpuset);
00310 if (errno = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), cpuset)) {
00311 perror("pthread_getaffinity");
00312 return -1;
00313 }
00314 return 0;
00315 #else
00316 return -1;
00317 #endif
00318 }
00319 #endif
00320
00321 int get_affinity(cpu_set_t *cpuset) {
00322 #if CMK_SMP
00323 return get_thread_affinity(cpuset);
00324 #else
00325 return get_cpu_affinity(cpuset);
00326 #endif
00327 }
00328 #endif
00329
00330 int CmiOnCore(void) {
00331 #if CMK_OS_IS_LINUX
00332
00333
00334
00335
00336
00337
00338
00339
00340 #define TASK_CPU_POS (39)
00341 int n;
00342 char str[128];
00343 FILE *fp = (FILE *)CpvAccess(myProcStatFP);
00344 if (fp == NULL){
00345 printf("WARNING: CmiOnCore IS NOT SUPPORTED ON THIS PLATFORM\n");
00346 return -1;
00347 }
00348 fseek(fp, 0, SEEK_SET);
00349 for (n=0; n<TASK_CPU_POS; n++) {
00350 if (fscanf(fp, "%127s", str) != 1) {
00351 CmiAbort("CPU affinity> reading from /proc/<PID>/[task/<TID>]/stat failed!");
00352 }
00353 }
00354 return atoi(str);
00355 #else
00356 printf("WARNING: CmiOnCore IS NOT SUPPORTED ON THIS PLATFORM\n");
00357 return -1;
00358 #endif
00359 }
00360
00361
00362 static int cpuAffinityHandlerIdx;
00363 static int cpuAffinityRecvHandlerIdx;
00364 static int cpuPhyNodeAffinityRecvHandlerIdx;
00365
00366 typedef struct _hostnameMsg {
00367 char core[CmiMsgHeaderSizeBytes];
00368 int pe;
00369 skt_ip_t ip;
00370 int ncores;
00371 int rank;
00372 int seq;
00373 } hostnameMsg;
00374
00375 typedef struct _rankMsg {
00376 char core[CmiMsgHeaderSizeBytes];
00377 int *ranks;
00378 int *nodes;
00379 } rankMsg;
00380
00381 typedef struct _affMsg {
00382 char core[CmiMsgHeaderSizeBytes];
00383 #ifndef _WIN32
00384 cpu_set_t affinity;
00385 #endif
00386 } affMsg;
00387
00388 static rankMsg *rankmsg = NULL;
00389 static CmmTable hostTable;
00390 static CmiNodeLock affLock = 0;
00391
00392
00393 static void cpuAffinityHandler(void *m)
00394 {
00395 static int count = 0;
00396 static int nodecount = 0;
00397 hostnameMsg *rec;
00398 hostnameMsg *msg = (hostnameMsg *)m;
00399 void *tmpm;
00400 int tag, tag1, pe, myrank;
00401 int npes = CmiNumPes();
00402
00403
00404
00405
00406
00407
00408 CmiAssert(CmiMyPe()==0 && rankmsg != NULL);
00409 tag = *(int*)&msg->ip;
00410 pe = msg->pe;
00411 if ((rec = (hostnameMsg *)CmmProbe(hostTable, 1, &tag, &tag1)) != NULL) {
00412 CmiFree(msg);
00413 }
00414 else {
00415 rec = msg;
00416 rec->seq = nodecount;
00417 nodecount++;
00418 CmmPut(hostTable, 1, &tag, msg);
00419 }
00420 myrank = rec->rank%rec->ncores;
00421 while (in_exclude(myrank)) {
00422 myrank = (myrank+1)%rec->ncores;
00423 rec->rank ++;
00424 }
00425 rankmsg->ranks[pe] = myrank;
00426 rankmsg->nodes[pe] = rec->seq;
00427 rec->rank ++;
00428 count ++;
00429 if (count == CmiNumPes()) {
00430 DEBUGP(("Cpuaffinity> %d unique compute nodes detected! \n", CmmEntries(hostTable)));
00431 tag = CmmWildCard;
00432 while ((tmpm = CmmGet(hostTable, 1, &tag, &tag1))) CmiFree(tmpm);
00433 CmmFree(hostTable);
00434 #if 1
00435
00436 {
00437 int i,j;
00438 for (i=0; i<npes-1; i++)
00439 for(j=i+1; j<npes; j++) {
00440 if (rankmsg->nodes[i] == rankmsg->nodes[j] &&
00441 rankmsg->ranks[i] > rankmsg->ranks[j])
00442 {
00443 int tmp = rankmsg->ranks[i];
00444 rankmsg->ranks[i] = rankmsg->ranks[j];
00445 rankmsg->ranks[j] = tmp;
00446 }
00447 }
00448 }
00449 #endif
00450 CmiSyncBroadcastAllAndFree(sizeof(rankMsg)+CmiNumPes()*sizeof(int)*2, (void *)rankmsg);
00451 }
00452 }
00453
00454
00455 static void cpuAffinityRecvHandler(void *msg)
00456 {
00457 int myrank, mynode;
00458 rankMsg *m = (rankMsg *)msg;
00459 m->ranks = (int *)((char*)m + sizeof(rankMsg));
00460 m->nodes = (int *)((char*)m + sizeof(rankMsg) + CmiNumPes()*sizeof(int));
00461 myrank = m->ranks[CmiMyPe()];
00462 mynode = m->nodes[CmiMyPe()];
00463
00464 DEBUGP(("[%d %d] set to core #: %d\n", CmiMyNode(), CmiMyPe(), myrank));
00465
00466 if (-1 != CmiSetCPUAffinity(myrank)) {
00467 DEBUGP(("Processor %d is bound to core #%d on node #%d\n", CmiMyPe(), myrank, mynode));
00468 }
00469 else{
00470 CmiPrintf("Processor %d set affinity failed!\n", CmiMyPe());
00471 CmiAbort("set cpu affinity abort!\n");
00472 }
00473 CmiFree(m);
00474 }
00475
00476
00477 static void cpuPhyNodeAffinityRecvHandler(void *msg)
00478 {
00479 affMsg *m = (affMsg *)msg;
00480 #if !defined(_WIN32) && defined(CPU_OR)
00481 CPU_OR(&core_usage, &core_usage, &m->affinity);
00482 affMsgsRecvd++;
00483 #endif
00484 CmiFree(m);
00485 }
00486
00487 #if defined(_WIN32)
00488
00489 #define strtok_r(x,y,z) strtok(x,y)
00490 #endif
00491
00492 static int search_pemap(char *pecoremap, int pe)
00493 {
00494 int *map = (int *)malloc(CmiNumPesGlobal()*sizeof(int));
00495 char *ptr = NULL;
00496 int h, i, j, k, count;
00497 int plusarr[128];
00498 char *str;
00499
00500 char *mapstr = (char*)malloc(strlen(pecoremap)+1);
00501 strcpy(mapstr, pecoremap);
00502
00503 str = strtok_r(mapstr, ",", &ptr);
00504 count = 0;
00505 while (str && count < CmiNumPesGlobal())
00506 {
00507 int hasdash=0, hascolon=0, hasdot=0, hasstar1=0, hasstar2=0, numplus=0;
00508 int start, end, stride=1, block=1;
00509 int iter=1;
00510 plusarr[0] = 0;
00511 for (i=0; i<strlen(str); i++) {
00512 if (str[i] == '-' && i!=0) hasdash=1;
00513 else if (str[i] == ':') hascolon=1;
00514 else if (str[i] == '.') hasdot=1;
00515 else if (str[i] == 'x') hasstar1=1;
00516 else if (str[i] == 'X') hasstar2=1;
00517 else if (str[i] == '+') {
00518 if (str[i+1] == '+' || str[i+1] == '-') {
00519 printf("Warning: Check the format of \"%s\".\n", str);
00520 } else if (sscanf(&str[i], "+%d", &plusarr[++numplus]) != 1) {
00521 printf("Warning: Check the format of \"%s\".\n", str);
00522 --numplus;
00523 }
00524 }
00525 }
00526 if (hasstar1 || hasstar2) {
00527 if (hasstar1) sscanf(str, "%dx", &iter);
00528 if (hasstar2) sscanf(str, "%dX", &iter);
00529 while (*str!='x' && *str!='X') str++;
00530 str++;
00531 }
00532 if (hasdash) {
00533 if (hascolon) {
00534 if (hasdot) {
00535 if (sscanf(str, "%d-%d:%d.%d", &start, &end, &stride, &block) != 4)
00536 printf("Warning: Check the format of \"%s\".\n", str);
00537 }
00538 else {
00539 if (sscanf(str, "%d-%d:%d", &start, &end, &stride) != 3)
00540 printf("Warning: Check the format of \"%s\".\n", str);
00541 }
00542 }
00543 else {
00544 if (sscanf(str, "%d-%d", &start, &end) != 2)
00545 printf("Warning: Check the format of \"%s\".\n", str);
00546 }
00547 }
00548 else {
00549 sscanf(str, "%d", &start);
00550 end = start;
00551 }
00552 if (block > stride) {
00553 printf("Warning: invalid block size in \"%s\" ignored.\n", str);
00554 block=1;
00555 }
00556
00557 for (k = 0; k<iter; k++) {
00558 for (i = start; i<=end; i+=stride) {
00559 for (j=0; j<block; j++) {
00560 if (i+j>end) break;
00561 for (h=0; h<=numplus; h++) {
00562 map[count++] = i+j+plusarr[h];
00563 if (count == CmiNumPesGlobal()) break;
00564 }
00565 if (count == CmiNumPesGlobal()) break;
00566 }
00567 if (count == CmiNumPesGlobal()) break;
00568 }
00569 if (count == CmiNumPesGlobal()) break;
00570 }
00571 str = strtok_r(NULL, ",", &ptr);
00572 }
00573 i = map[pe % count];
00574
00575 free(map);
00576 free(mapstr);
00577 return i;
00578 }
00579
00580 #if CMK_CRAYXE || CMK_CRAYXC
00581 CLINKAGE int getXTNodeID(int mpirank, int nummpiranks);
00582 #endif
00583
00591 void CmiCheckAffinity(void)
00592 {
00593 #if !defined(_WIN32) && CMK_SMP && CMK_HAS_PTHREAD_SETAFFINITY && defined(CPU_OR)
00594
00595 if (!CmiCpuTopologyEnabled()) return;
00596
00597 if (CmiMyPe() == 0) {
00598
00599
00600 cpu_set_t my_aff;
00601 if (get_affinity(&my_aff) == -1) CmiAbort("get_affinity failed\n");
00602 CPU_OR(&core_usage, &core_usage, &my_aff);
00603 int N = CmiNumPesOnPhysicalNode(0);
00604 while (affMsgsRecvd < N)
00605 CmiDeliverSpecificMsg(cpuPhyNodeAffinityRecvHandlerIdx);
00606
00607
00608
00609 if (CPU_COUNT(&core_usage) < N) {
00610
00611 if (!aff_is_set) {
00612 CmiAbort("Multiple PEs assigned to same core. Set affinity "
00613 "options to correct or lower the number of threads, or pass +setcpuaffinity to ignore.\n");
00614 } else {
00615 CmiPrintf("WARNING: Multiple PEs assigned to same core, recommend "
00616 "adjusting processor affinity or passing +CmiSleepOnIdle to reduce "
00617 "interference.\n");
00618 }
00619 }
00620 } else if ((CmiMyPe() < CmiNumPes()) && (CmiPhysicalNodeID(CmiMyPe()) == 0)) {
00621
00622 affMsg *m = (affMsg*)CmiAlloc(sizeof(affMsg));
00623 CmiSetHandler((char *)m, cpuPhyNodeAffinityRecvHandlerIdx);
00624 if (get_affinity(&m->affinity) == -1) {
00625 CmiFree(m);
00626 CmiAbort("get_affinity failed\n");
00627 }
00628 CmiSyncSendAndFree(0, sizeof(affMsg), (void *)m);
00629 }
00630 #endif
00631 }
00632
00633 extern int CmiMyLocalRank;
00634
00635 static void bind_process_only(hwloc_obj_type_t process_unit)
00636 {
00637 hwloc_topology_t topology;
00638 hwloc_cpuset_t cpuset;
00639 cmi_hwloc_topology_init(&topology);
00640 cmi_hwloc_topology_load(topology);
00641
00642
00643 int process_unitcount = cmi_hwloc_get_nbobjs_by_type(topology, process_unit);
00644 #if CMK_BLUEGENEQ
00645
00646 if (process_unit == HWLOC_OBJ_PACKAGE && process_unitcount == 17)
00647 process_unitcount = 16;
00648 #endif
00649
00650 int process_assignment = CmiMyLocalRank % process_unitcount;
00651
00652 hwloc_obj_t process_obj = cmi_hwloc_get_obj_by_type(topology, process_unit, process_assignment);
00653 set_process_affinity(topology, process_obj->cpuset);
00654
00655
00656 cmi_hwloc_topology_destroy(topology);
00657 }
00658
00659 #if CMK_SMP
00660 static void bind_threads_only(hwloc_obj_type_t thread_unit)
00661 {
00662 hwloc_topology_t topology;
00663 hwloc_cpuset_t cpuset;
00664 cmi_hwloc_topology_init(&topology);
00665 cmi_hwloc_topology_load(topology);
00666
00667
00668 int thread_unitcount = cmi_hwloc_get_nbobjs_by_type(topology, thread_unit);
00669 #if CMK_BLUEGENEQ
00670
00671 if (thread_unit == HWLOC_OBJ_PACKAGE && thread_unitcount == 17)
00672 thread_unitcount = 16;
00673 #endif
00674
00675 int thread_assignment = CmiMyRank() % thread_unitcount;
00676
00677 hwloc_obj_t thread_obj = cmi_hwloc_get_obj_by_type(topology, thread_unit, thread_assignment);
00678 hwloc_cpuset_t thread_cpuset = cmi_hwloc_bitmap_dup(thread_obj->cpuset);
00679 cmi_hwloc_bitmap_singlify(thread_cpuset);
00680 set_thread_affinity(topology, thread_cpuset);
00681 cmi_hwloc_bitmap_free(thread_cpuset);
00682
00683
00684 cmi_hwloc_topology_destroy(topology);
00685 }
00686
00687 static void bind_process_and_threads(hwloc_obj_type_t process_unit, hwloc_obj_type_t thread_unit)
00688 {
00689 hwloc_topology_t topology;
00690 hwloc_cpuset_t cpuset;
00691 cmi_hwloc_topology_init(&topology);
00692 cmi_hwloc_topology_load(topology);
00693
00694
00695 int process_unitcount = cmi_hwloc_get_nbobjs_by_type(topology, process_unit);
00696
00697 int process_assignment = CmiMyLocalRank % process_unitcount;
00698
00699 hwloc_obj_t process_obj = cmi_hwloc_get_obj_by_type(topology, process_unit, process_assignment);
00700 set_process_affinity(topology, process_obj->cpuset);
00701
00702 int thread_unitcount = cmi_hwloc_get_nbobjs_inside_cpuset_by_type(topology, process_obj->cpuset, thread_unit);
00703
00704 int thread_assignment = CmiMyRank() % thread_unitcount;
00705
00706 hwloc_obj_t thread_obj = cmi_hwloc_get_obj_inside_cpuset_by_type(topology, process_obj->cpuset, thread_unit, thread_assignment);
00707 hwloc_cpuset_t thread_cpuset = cmi_hwloc_bitmap_dup(thread_obj->cpuset);
00708 cmi_hwloc_bitmap_singlify(thread_cpuset);
00709 set_thread_affinity(topology, thread_cpuset);
00710 cmi_hwloc_bitmap_free(thread_cpuset);
00711
00712
00713 cmi_hwloc_topology_destroy(topology);
00714 }
00715 #endif
00716
00717 static int set_default_affinity(void)
00718 {
00719 char *s;
00720 int n = -1;
00721
00722 if ((s = getenv("CmiProcessPerSocket")))
00723 {
00724 n = atoi(s);
00725 #if CMK_SMP
00726 if (getenv("CmiOneWthPerCore"))
00727 bind_process_and_threads(HWLOC_OBJ_PACKAGE, HWLOC_OBJ_CORE);
00728 else if (getenv("CmiOneWthPerPU"))
00729 bind_process_and_threads(HWLOC_OBJ_PACKAGE, HWLOC_OBJ_PU);
00730 else
00731 #endif
00732 bind_process_only(HWLOC_OBJ_PACKAGE);
00733 }
00734 else if ((s = getenv("CmiProcessPerCore")))
00735 {
00736 n = atoi(s);
00737 #if CMK_SMP
00738 if (getenv("CmiOneWthPerPU"))
00739 bind_process_and_threads(HWLOC_OBJ_CORE, HWLOC_OBJ_PU);
00740 else
00741 #endif
00742 bind_process_only(HWLOC_OBJ_CORE);
00743 }
00744 else if ((s = getenv("CmiProcessPerPU")))
00745 {
00746 n = atoi(s);
00747 bind_process_only(HWLOC_OBJ_PU);
00748 }
00749 else
00750 {
00751 #if CMK_SMP
00752 if (getenv("CmiOneWthPerSocket"))
00753 {
00754 n = 0;
00755 bind_threads_only(HWLOC_OBJ_PACKAGE);
00756 }
00757 else if (getenv("CmiOneWthPerCore"))
00758 {
00759 n = 0;
00760 bind_threads_only(HWLOC_OBJ_CORE);
00761 }
00762 else if (getenv("CmiOneWthPerPU"))
00763 {
00764 n = 0;
00765 bind_threads_only(HWLOC_OBJ_PU);
00766 }
00767 #endif
00768 }
00769
00770 return n != -1;
00771 }
00772
00773 void CmiInitCPUAffinity(char **argv)
00774 {
00775 static skt_ip_t myip;
00776 int ret, i, exclude;
00777 hostnameMsg *msg;
00778 char *pemap = NULL;
00779 char *commap = NULL;
00780 char *pemapfile = NULL;
00781
00782 int show_affinity_flag;
00783
00784 int affinity_flag = CmiGetArgFlagDesc(argv,"+setcpuaffinity",
00785 "set cpu affinity");
00786
00787 while (CmiGetArgIntDesc(argv,"+excludecore", &exclude, "avoid core when setting cpuaffinity")) {
00788 if (CmiMyRank() == 0) add_exclude(exclude);
00789 affinity_flag = 1;
00790 }
00791
00792 if (CmiGetArgStringDesc(argv, "+pemapfile", &pemapfile, "define pe to core mapping file")) {
00793 FILE *fp;
00794 char buf[128];
00795 pemap = (char*)malloc(1024);
00796 fp = fopen(pemapfile, "r");
00797 if (fp == NULL) CmiAbort("pemapfile does not exist");
00798 while (!feof(fp)) {
00799 if (fgets(buf, 128, fp)) {
00800 if (buf[strlen(buf)-1] == '\n') buf[strlen(buf)-1] = 0;
00801 strcat(pemap, buf);
00802 }
00803 }
00804 fclose(fp);
00805 if (CmiMyPe()==0) CmiPrintf("Charm++> read from pemap file '%s': %s\n", pemapfile, pemap);
00806 }
00807
00808 CmiGetArgStringDesc(argv, "+pemap", &pemap, "define pe to core mapping");
00809 if (pemap!=NULL && excludecount>0)
00810 CmiAbort("Charm++> +pemap can not be used with +excludecore.\n");
00811
00812 CmiGetArgStringDesc(argv, "+commap", &commap, "define comm threads to core mapping");
00813
00814 if (pemap!=NULL || commap!=NULL) affinity_flag = 1;
00815
00816 show_affinity_flag = CmiGetArgFlagDesc(argv,"+showcpuaffinity", "print cpu affinity");
00817
00818 CmiAssignOnce(&cpuAffinityHandlerIdx, CmiRegisterHandler((CmiHandler)cpuAffinityHandler));
00819 CmiAssignOnce(&cpuAffinityRecvHandlerIdx, CmiRegisterHandler((CmiHandler)cpuAffinityRecvHandler));
00820 CmiAssignOnce(&cpuPhyNodeAffinityRecvHandlerIdx, CmiRegisterHandler((CmiHandler)cpuPhyNodeAffinityRecvHandler));
00821
00822
00823 {
00824 int done = 0;
00825 CmiNodeAllBarrier();
00826
00827
00828
00829 if (CmiMyRank() == 0) {
00830 done = set_default_affinity();
00831 }
00832
00833 CmiNodeAllBarrier();
00834
00835 if (CmiMyRank() != 0) {
00836 done = set_default_affinity();
00837 }
00838
00839 if (done) {
00840 if (show_affinity_flag) CmiPrintCPUAffinity();
00841 return;
00842 }
00843 }
00844
00845 if (CmiMyRank() ==0) {
00846 affLock = CmiCreateLock();
00847 #ifndef _WIN32
00848 aff_is_set = affinity_flag;
00849 CPU_ZERO(&core_usage);
00850 #endif
00851 }
00852
00853 #if CMK_BLUEGENEQ
00854 if(affinity_flag){
00855 affinity_flag = 0;
00856 if(CmiMyPe()==0) CmiPrintf("Charm++> cpu affinity setting is not needed on Blue Gene/Q, thus ignored.\n");
00857 }
00858 if(show_affinity_flag){
00859 show_affinity_flag = 0;
00860 if(CmiMyPe()==0) CmiPrintf("Charm++> printing cpu affinity is not supported on Blue Gene/Q.\n");
00861 }
00862 #endif
00863
00864 if (!affinity_flag) {
00865 if (show_affinity_flag) {
00866 CmiPrintCPUAffinity();
00867 CmiPrintf("Charm++> cpu affinity NOT enabled.\n");
00868 }
00869 return;
00870 }
00871
00872 if (CmiMyPe() == 0) {
00873 CmiPrintf("Charm++> cpu affinity enabled. \n");
00874 if (excludecount > 0) {
00875 CmiPrintf("Charm++> cpuaffinity excludes core: %d", excludecore[0]);
00876 for (i=1; i<excludecount; i++) CmiPrintf(" %d", excludecore[i]);
00877 CmiPrintf(".\n");
00878 }
00879 if (pemap!=NULL)
00880 CmiPrintf("Charm++> cpuaffinity PE-core map : %s\n", pemap);
00881 }
00882
00883 if (CmiMyPe() >= CmiNumPes()) {
00884
00885
00886 CmiNodeAllBarrier();
00887 if (commap != NULL) {
00888 int mycore = search_pemap(commap, CmiMyPeGlobal()-CmiNumPesGlobal());
00889 if (CmiPhysicalNodeID(CmiMyPe()) == 0) CmiPrintf("Charm++> set comm %d on node %d to core #%d\n", CmiMyPe()-CmiNumPes(), CmiMyNode(), mycore);
00890 if (-1 == CmiSetCPUAffinity(mycore))
00891 CmiAbort("set_cpu_affinity abort!");
00892 CmiNodeAllBarrier();
00893 if (show_affinity_flag) CmiPrintCPUAffinity();
00894 return;
00895 }
00896 else {
00897
00898 #if !CMK_CRAYXE && !CMK_CRAYXC && !CMK_BLUEGENEQ && !CMK_PAMI_LINUX_PPC8
00899 if (pemap == NULL) {
00900 #if CMK_MACHINE_PROGRESS_DEFINED
00901 while (affinity_doneflag < CmiMyNodeSize()) CmiNetworkProgress();
00902 #else
00903 #if CMK_SMP
00904 #error "Machine progress call needs to be implemented for cpu affinity!"
00905 #endif
00906 #endif
00907 }
00908 #endif
00909 #if CMK_CRAYXE || CMK_CRAYXC
00910
00911 if (pemap != NULL)
00912 #endif
00913 {
00914 CmiNodeAllBarrier();
00915 if (show_affinity_flag) CmiPrintCPUAffinity();
00916 return;
00917 }
00918 }
00919 }
00920
00921 if (pemap != NULL && CmiMyPe()<CmiNumPes()) {
00922 int mycore = search_pemap(pemap, CmiMyPeGlobal());
00923 if(show_affinity_flag) CmiPrintf("Charm++> set PE %d on node %d to core #%d\n", CmiMyPe(), CmiMyNode(), mycore);
00924 if (mycore >= CmiNumCores()) {
00925 CmiPrintf("Error> Invalid core number %d, only have %d cores (0-%d) on the node. \n", mycore, CmiNumCores(), CmiNumCores()-1);
00926 CmiAbort("Invalid core number");
00927 }
00928 if (CmiSetCPUAffinity(mycore) == -1) CmiAbort("set_cpu_affinity abort!");
00929 CmiNodeAllBarrier();
00930 CmiNodeAllBarrier();
00931
00932 return;
00933 }
00934
00935 #if CMK_CRAYXE || CMK_CRAYXC
00936 {
00937 int numCores = CmiNumCores();
00938
00939 int myid = getXTNodeID(CmiMyNodeGlobal(), CmiNumNodesGlobal());
00940 int myrank;
00941 int pe, mype = CmiMyPeGlobal();
00942 int node = CmiMyNodeGlobal();
00943 int nnodes = 0;
00944 #if CMK_SMP
00945 if (CmiMyPe() >= CmiNumPes()) {
00946 int node = CmiMyPe() - CmiNumPes();
00947 mype = CmiGetPeGlobal(CmiNodeFirst(node) + CmiMyNodeSize() - 1, CmiMyPartition());
00948 node = CmiGetNodeGlobal(node, CmiMyPartition());
00949 }
00950 #endif
00951 pe = mype - 1;
00952 while (pe >= 0) {
00953 int n = CmiNodeOf(pe);
00954 if (n != node) { nnodes++; node = n; }
00955 if (getXTNodeID(n, CmiNumNodesGlobal()) != myid) break;
00956 pe --;
00957 }
00958 CmiAssert(numCores > 0);
00959 myrank = (mype - pe - 1 + nnodes)%numCores;
00960 #if CMK_SMP
00961 if (CmiMyPe() >= CmiNumPes())
00962 myrank = (myrank + 1)%numCores;
00963 #endif
00964
00965 if (-1 != CmiSetCPUAffinity(myrank)) {
00966 DEBUGP(("Processor %d is bound to core #%d on node #%d\n", CmiMyPe(), myrank, mynode));
00967 }
00968 else{
00969 CmiPrintf("Processor %d set affinity failed!\n", CmiMyPe());
00970 CmiAbort("set cpu affinity abort!\n");
00971 }
00972 }
00973 if (CmiMyPe() < CmiNumPes())
00974 CmiNodeAllBarrier();
00975 CmiNodeAllBarrier();
00976 #else
00977
00978 if (CmiMyRank() == 0)
00979 {
00980 #if CMK_HAS_GETHOSTNAME
00981 myip = skt_my_ip();
00982 #else
00983 CmiAbort("Can not get unique name for the compute nodes. \n");
00984 #endif
00985 }
00986 CmiNodeAllBarrier();
00987
00988
00989 msg = (hostnameMsg *)CmiAlloc(sizeof(hostnameMsg));
00990 CmiSetHandler((char *)msg, cpuAffinityHandlerIdx);
00991 msg->pe = CmiMyPe();
00992 msg->ip = myip;
00993 msg->ncores = CmiNumCores();
00994 DEBUGP(("PE %d's node has %d number of cores. \n", CmiMyPe(), msg->ncores));
00995 msg->rank = 0;
00996 CmiSyncSendAndFree(0, sizeof(hostnameMsg), (void *)msg);
00997
00998 if (CmiMyPe() == 0) {
00999 int i;
01000 hostTable = CmmNew();
01001 rankmsg = (rankMsg *)CmiAlloc(sizeof(rankMsg)+CmiNumPes()*sizeof(int)*2);
01002 CmiSetHandler((char *)rankmsg, cpuAffinityRecvHandlerIdx);
01003 rankmsg->ranks = (int *)((char*)rankmsg + sizeof(rankMsg));
01004 rankmsg->nodes = (int *)((char*)rankmsg + sizeof(rankMsg) + CmiNumPes()*sizeof(int));
01005 for (i=0; i<CmiNumPes(); i++) {
01006 rankmsg->ranks[i] = 0;
01007 rankmsg->nodes[i] = -1;
01008 }
01009
01010 for (i=0; i<CmiNumPes(); i++) CmiDeliverSpecificMsg(cpuAffinityHandlerIdx);
01011 }
01012
01013
01014 CmiDeliverSpecificMsg(cpuAffinityRecvHandlerIdx);
01015 CmiLock(affLock);
01016 affinity_doneflag++;
01017 CmiUnlock(affLock);
01018 CmiNodeAllBarrier();
01019 #endif
01020
01021 if (show_affinity_flag) CmiPrintCPUAffinity();
01022 }
01023
01024
01025 void CmiInitCPUAffinityUtil(void){
01026 char fname[64];
01027 CpvInitialize(int, myCPUAffToCore);
01028 CpvAccess(myCPUAffToCore) = -1;
01029 #if CMK_OS_IS_LINUX
01030 CpvInitialize(void *, myProcStatFP);
01031 CmiLock(_smp_mutex);
01032 #if CMK_SMP
01033 sprintf(fname, "/proc/%d/task/%ld/stat", getpid(), syscall(SYS_gettid));
01034 #else
01035 sprintf(fname, "/proc/%d/stat", getpid());
01036 #endif
01037 CpvAccess(myProcStatFP) = (void *)fopen(fname, "r");
01038 CmiUnlock(_smp_mutex);
01039
01040
01041
01042
01043
01044 #endif
01045 }
01046
01047 #else
01048
01049 int CmiSetCPUAffinity(int mycore)
01050 {
01051 return -1;
01052 }
01053
01054 int CmiPrintCPUAffinity(void)
01055 {
01056 CmiPrintf("Warning: CmiPrintCPUAffinity not supported.\n");
01057 return -1;
01058 }
01059
01060 void CmiCheckAffinity(void) {
01061 }
01062
01063 void CmiInitCPUAffinity(char **argv)
01064 {
01065 char *pemap = NULL;
01066 char *pemapfile = NULL;
01067 char *commap = NULL;
01068 int excludecore = -1;
01069 int affinity_flag = CmiGetArgFlagDesc(argv,"+setcpuaffinity",
01070 "set cpu affinity");
01071 while (CmiGetArgIntDesc(argv,"+excludecore",&excludecore, "avoid core when setting cpuaffinity"));
01072 CmiGetArgStringDesc(argv, "+pemap", &pemap, "define pe to core mapping");
01073 CmiGetArgStringDesc(argv, "+pemapfile", &pemapfile, "define pe to core mapping file");
01074 CmiGetArgStringDesc(argv, "+commap", &commap, "define comm threads to core mapping");
01075 CmiGetArgFlagDesc(argv,"+showcpuaffinity", "print cpu affinity");
01076 if (affinity_flag && CmiMyPe()==0)
01077 CmiPrintf("sched_setaffinity() is not supported, +setcpuaffinity disabled.\n");
01078 if (excludecore != -1 && CmiMyPe()==0)
01079 CmiPrintf("sched_setaffinity() is not supported, +excludecore disabled.\n");
01080 if (pemap && CmiMyPe()==0)
01081 CmiPrintf("sched_setaffinity() is not supported, +pemap disabled.\n");
01082 if (pemapfile && CmiMyPe()==0)
01083 CmiPrintf("sched_setaffinity() is not supported, +pemapfile disabled.\n");
01084 if (commap && CmiMyPe()==0)
01085 CmiPrintf("sched_setaffinity() is not supported, +commap disabled.\n");
01086 }
01087
01088
01089 void CmiInitCPUAffinityUtil(void){
01090 CpvInitialize(int, myCPUAffToCore);
01091 CpvAccess(myCPUAffToCore) = -1;
01092 #if CMK_OS_IS_LINUX
01093 CpvInitialize(void *, myProcStatFP);
01094 CpvAccess(myProcStatFP) = NULL;
01095 #endif
01096 }
01097
01098 int CmiOnCore(void){
01099 printf("WARNING: CmiOnCore IS NOT SUPPORTED ON THIS PLATFORM\n");
01100 return -1;
01101 }
01102 #endif