00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "GridHybridLB.decl.h"
00010
00011 #include "GridHybridLB.h"
00012 #include "manager.h"
00013
00014 extern int quietModeRequested;
00015
00016 CreateLBFunc_Def (GridHybridLB, "Grid load balancer that uses hybrid technique to optimize communication graph")
00017
00018
00019
00020
00021
00022
00023 GridHybridLB::GridHybridLB (const CkLBOptions &opt) : CBase_GridHybridLB (opt)
00024 {
00025 char *value;
00026
00027
00028 lbname = (char *) "GridHybridLB";
00029
00030 if (CkMyPe() == 0 && !quietModeRequested) {
00031 CkPrintf ("CharmLB> GridHybridLB created.\n");
00032 }
00033
00034 if ((value = getenv ("CK_LDB_GRIDHYBRIDLB_MODE"))) {
00035 CK_LDB_GridHybridLB_Mode = atoi (value);
00036 } else {
00037 CK_LDB_GridHybridLB_Mode = CK_LDB_GRIDHYBRIDLB_MODE;
00038 }
00039
00040 if ((value = getenv ("CK_LDB_GRIDHYBRIDLB_BACKGROUND_LOAD"))) {
00041 CK_LDB_GridHybridLB_Background_Load = atoi (value);
00042 } else {
00043 CK_LDB_GridHybridLB_Background_Load = CK_LDB_GRIDHYBRIDLB_BACKGROUND_LOAD;
00044 }
00045
00046 if ((value = getenv ("CK_LDB_GRIDHYBRIDLB_LOAD_TOLERANCE"))) {
00047 CK_LDB_GridHybridLB_Load_Tolerance = atof (value);
00048 } else {
00049 CK_LDB_GridHybridLB_Load_Tolerance = CK_LDB_GRIDHYBRIDLB_LOAD_TOLERANCE;
00050 }
00051
00052 manager_init ();
00053 }
00054
00055
00056
00057
00058
00059
00060 GridHybridLB::GridHybridLB (CkMigrateMessage *msg) : CBase_GridHybridLB (msg)
00061 {
00062 char *value;
00063
00064
00065 lbname = (char *) "GridHybridLB";
00066
00067 if ((value = getenv ("CK_LDB_GRIDHYBRIDLB_MODE"))) {
00068 CK_LDB_GridHybridLB_Mode = atoi (value);
00069 } else {
00070 CK_LDB_GridHybridLB_Mode = CK_LDB_GRIDHYBRIDLB_MODE;
00071 }
00072
00073 if ((value = getenv ("CK_LDB_GRIDHYBRIDLB_BACKGROUND_LOAD"))) {
00074 CK_LDB_GridHybridLB_Background_Load = atoi (value);
00075 } else {
00076 CK_LDB_GridHybridLB_Background_Load = CK_LDB_GRIDHYBRIDLB_BACKGROUND_LOAD;
00077 }
00078
00079 if ((value = getenv ("CK_LDB_GRIDHYBRIDLB_LOAD_TOLERANCE"))) {
00080 CK_LDB_GridHybridLB_Load_Tolerance = atof (value);
00081 } else {
00082 CK_LDB_GridHybridLB_Load_Tolerance = CK_LDB_GRIDHYBRIDLB_LOAD_TOLERANCE;
00083 }
00084
00085 manager_init ();
00086 }
00087
00088
00089
00090
00091
00092
00093
00094 bool GridHybridLB::QueryBalanceNow (int step)
00095 {
00096 if (_lb_args.debug() > 2) {
00097 CkPrintf ("[%d] GridHybridLB is balancing on step %d.\n", CkMyPe(), step);
00098 }
00099
00100 return (true);
00101 }
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116 int GridHybridLB::Get_Cluster (int pe)
00117 {
00118 return (0);
00119 }
00120
00121
00122
00123
00124
00125
00126 void GridHybridLB::Initialize_PE_Data (CentralLB::LDStats *stats)
00127 {
00128 int min_speed;
00129 int i;
00130
00131
00132 PE_Data = new PE_Data_T[Num_PEs];
00133
00134 min_speed = MAXINT;
00135 for (i = 0; i < Num_PEs; i++) {
00136 (&PE_Data[i])->available = stats->procs[i].available;
00137 (&PE_Data[i])->cluster = Get_Cluster (i);
00138 (&PE_Data[i])->num_objs = 0;
00139 (&PE_Data[i])->num_lan_objs = 0;
00140 (&PE_Data[i])->num_lan_msgs = 0;
00141 (&PE_Data[i])->num_wan_objs = 0;
00142 (&PE_Data[i])->num_wan_msgs = 0;
00143 (&PE_Data[i])->relative_speed = 0.0;
00144 (&PE_Data[i])->scaled_load = 0.0;
00145
00146 if (stats->procs[i].pe_speed < min_speed) {
00147 min_speed = stats->procs[i].pe_speed;
00148 }
00149 }
00150
00151
00152
00153 for (i = 0; i < Num_PEs; i++) {
00154 (&PE_Data[i])->relative_speed = (double) (stats->procs[i].pe_speed / min_speed);
00155 if (CK_LDB_GridHybridLB_Background_Load) {
00156 (&PE_Data[i])->scaled_load += stats->procs[i].bg_walltime;
00157 }
00158 }
00159 }
00160
00161
00162
00163
00164
00165
00166 int GridHybridLB::Available_PE_Count ()
00167 {
00168 int available_pe_count;
00169 int i;
00170
00171
00172 available_pe_count = 0;
00173 for (i = 0; i < Num_PEs; i++) {
00174 if ((&PE_Data[i])->available) {
00175 available_pe_count += 1;
00176 }
00177 }
00178 return (available_pe_count);
00179 }
00180
00181
00182
00183
00184
00185
00186 int GridHybridLB::Compute_Number_Of_Clusters ()
00187 {
00188 int max_cluster;
00189 int i;
00190
00191
00192 max_cluster = 0;
00193 for (i = 0; i < Num_PEs; i++) {
00194 if ((&PE_Data[i])->cluster < 0) {
00195 return (-1);
00196 }
00197
00198 if ((&PE_Data[i])->cluster > max_cluster) {
00199 max_cluster = (&PE_Data[i])->cluster;
00200 }
00201 }
00202 return (max_cluster + 1);
00203 }
00204
00205
00206
00207
00208
00209
00210 void GridHybridLB::Initialize_Object_Data (CentralLB::LDStats *stats)
00211 {
00212 int i;
00213
00214
00215 Object_Data = new Object_Data_T[Num_Objects];
00216
00217 for (i = 0; i < Num_Objects; i++) {
00218 (&Object_Data[i])->migratable = (&stats->objData[i])->migratable;
00219 (&Object_Data[i])->from_pe = stats->from_proc[i];
00220 (&Object_Data[i])->num_lan_msgs = 0;
00221 (&Object_Data[i])->num_wan_msgs = 0;
00222 (&Object_Data[i])->load = (&stats->objData[i])->wallTime;
00223
00224 if ((&Object_Data[i])->migratable) {
00225 (&Object_Data[i])->to_pe = -1;
00226 (&Object_Data[i])->cluster = -1;
00227 } else {
00228 (&Object_Data[i])->to_pe = (&Object_Data[i])->from_pe;
00229 (&Object_Data[i])->cluster = Get_Cluster ((&Object_Data[i])->from_pe);
00230 if (_lb_args.debug() > 1) {
00231 CkPrintf ("[%d] GridHybridLB identifies object %d as non-migratable.\n", CkMyPe(), i);
00232 }
00233 }
00234 }
00235 }
00236
00237
00238
00239
00240
00241
00242 void GridHybridLB::Initialize_Cluster_Data ()
00243 {
00244 int cluster;
00245 double min_total_cpu_power;
00246 int i;
00247
00248
00249 Cluster_Data = new Cluster_Data_T[Num_Clusters];
00250
00251 for (i = 0; i < Num_Clusters; i++) {
00252 (&Cluster_Data[i])->num_pes = 0;
00253 (&Cluster_Data[i])->total_cpu_power = 0.0;
00254 (&Cluster_Data[i])->scaled_cpu_power = 0.0;
00255 }
00256
00257
00258 for (i = 0; i < Num_PEs; i++) {
00259 cluster = (&PE_Data[i])->cluster;
00260
00261 (&Cluster_Data[cluster])->num_pes += 1;
00262 (&Cluster_Data[cluster])->total_cpu_power += (&PE_Data[i])->relative_speed;
00263 }
00264
00265 min_total_cpu_power = MAXDOUBLE;
00266 for (i = 0; i < Num_Clusters; i++) {
00267 if ((&Cluster_Data[i])->total_cpu_power < min_total_cpu_power) {
00268 min_total_cpu_power = (&Cluster_Data[i])->total_cpu_power;
00269 }
00270 }
00271
00272 for (i = 0; i < Num_Clusters; i++) {
00273 (&Cluster_Data[i])->scaled_cpu_power = (double) ((&Cluster_Data[i])->total_cpu_power / min_total_cpu_power);
00274 }
00275 }
00276
00277
00278
00279
00280
00281
00282 void GridHybridLB::Partition_Objects_Into_Clusters (CentralLB::LDStats *stats)
00283 {
00284 int num_migratable_objects;
00285 int *migratable_objects;
00286 int index;
00287 int num_partitions;
00288 int *partition_to_cluster_map;
00289 int cluster;
00290 int partition;
00291 int partition_count;
00292 int *vertex_weights;
00293 int vertex;
00294 int **communication_matrix;
00295 LDCommData *com_data;
00296 int send_object;
00297 int recv_object;
00298 int send_index;
00299 int recv_index;
00300 const LDObjKey *recv_objects;
00301 int num_objects;
00302 int *xadj;
00303 int num_edges;
00304 int *adjncy;
00305 int *edge_weights;
00306 int count;
00307 int weight_flag;
00308 int numbering_flag;
00309 int options[5];
00310 int edgecut;
00311 int *newmap;
00312 int i;
00313 int j;
00314
00315
00316 if (Num_Clusters == 1) {
00317 for (i = 0; i < Num_Objects; i++) {
00318 (&Object_Data[i])->cluster = 0;
00319 }
00320
00321 return;
00322 }
00323
00324 for (i = 0; i < Num_Objects; i++) {
00325 (&Object_Data[i])->secondary_index = -1;
00326 }
00327
00328
00329
00330
00331 num_migratable_objects = 0;
00332 for (i = 0; i < Num_Objects; i++) {
00333 if ((&Object_Data[i])->migratable) {
00334 num_migratable_objects += 1;
00335 }
00336 }
00337
00338 migratable_objects = new int[num_migratable_objects];
00339
00340 index = 0;
00341 for (i = 0; i < Num_Objects; i++) {
00342 if ((&Object_Data[i])->migratable) {
00343 (&Object_Data[i])->secondary_index = index;
00344 migratable_objects[index] = i;
00345 index += 1;
00346 }
00347 }
00348
00349
00350
00351 num_partitions = 0;
00352 for (i = 0; i < Num_Clusters; i++) {
00353 num_partitions += (int) ceil ((&Cluster_Data[i])->scaled_cpu_power);
00354 }
00355
00356 partition_to_cluster_map = new int[num_partitions];
00357
00358 cluster = 0;
00359 partition = 0;
00360 while (partition < num_partitions) {
00361 partition_count = (int) ceil ((&Cluster_Data[cluster])->scaled_cpu_power);
00362
00363 for (i = partition; i < (partition + partition_count); i++) {
00364 partition_to_cluster_map[i] = cluster;
00365 }
00366
00367 partition += partition_count;
00368 cluster += 1;
00369 }
00370
00371 if ((CK_LDB_GridHybridLB_Mode == 1) || (CK_LDB_GridHybridLB_Mode == 3)) {
00372 vertex_weights = new int[num_migratable_objects];
00373 vertex = 0;
00374 for (i = 0; i < Num_Objects; i++) {
00375 if ((&Object_Data[i])->migratable) {
00376 vertex_weights[vertex] = (int) ceil ((&Object_Data[i])->load * 10000);
00377 vertex += 1;
00378 }
00379 }
00380 }
00381
00382
00383 communication_matrix = new int *[num_migratable_objects];
00384 for (i = 0; i < num_migratable_objects; i++) {
00385 communication_matrix[i] = new int[num_migratable_objects];
00386 for (j = 0; j < num_migratable_objects; j++) {
00387 communication_matrix[i][j] = 0;
00388 }
00389 }
00390
00391 for (i = 0; i < stats->n_comm; i++) {
00392 com_data = &(stats->commData[i]);
00393 if ((!com_data->from_proc()) && (com_data->recv_type() == LD_OBJ_MSG)) {
00394 send_object = stats->getHash (com_data->sender);
00395 recv_object = stats->getHash (com_data->receiver.get_destObj());
00396
00397
00398 if ((send_object < 0) || (send_object > Num_Objects) || (recv_object < 0) || (recv_object > Num_Objects)) {
00399 continue;
00400 }
00401
00402 if ((!(&Object_Data[send_object])->migratable) || (!(&Object_Data[recv_object])->migratable)) {
00403 continue;
00404 }
00405
00406 send_index = (&Object_Data[send_object])->secondary_index;
00407 recv_index = (&Object_Data[recv_object])->secondary_index;
00408
00409 communication_matrix[send_index][recv_index] += com_data->messages;
00410 communication_matrix[recv_index][send_index] += com_data->messages;
00411 } else if (com_data->receiver.get_type() == LD_OBJLIST_MSG) {
00412 send_object = stats->getHash (com_data->sender);
00413
00414 if ((send_object < 0) || (send_object > Num_Objects)) {
00415 continue;
00416 }
00417
00418 if (!(&Object_Data[send_object])->migratable) {
00419 continue;
00420 }
00421
00422 recv_objects = com_data->receiver.get_destObjs (num_objects);
00423
00424 for (j = 0; j < num_objects; j++) {
00425 recv_object = stats->getHash (recv_objects[j]);
00426
00427
00428 if ((recv_object < 0) || (recv_object > Num_Objects)) {
00429 continue;
00430 }
00431
00432 if (!(&Object_Data[recv_object])->migratable) {
00433 continue;
00434 }
00435
00436 send_index = (&Object_Data[send_object])->secondary_index;
00437 recv_index = (&Object_Data[recv_object])->secondary_index;
00438
00439 communication_matrix[send_index][recv_index] += com_data->messages;
00440 communication_matrix[recv_index][send_index] += com_data->messages;
00441 }
00442 }
00443 }
00444
00445 for (i = 0; i < num_migratable_objects; i++) {
00446 communication_matrix[i][i] = 0;
00447 }
00448
00449
00450 xadj = new int[num_migratable_objects + 1];
00451 num_edges = 0;
00452 for (i = 0; i < num_migratable_objects; i++) {
00453 for (j = 0; j < num_migratable_objects; j++) {
00454 if (communication_matrix[i][j] > 0) {
00455 num_edges += 1;
00456 }
00457 }
00458 }
00459 adjncy = new int[num_edges];
00460 edge_weights = new int[num_edges];
00461 count = 0;
00462 xadj[0] = 0;
00463 for (i = 0; i < num_migratable_objects; i++) {
00464 for (j = 0; j < num_migratable_objects; j++) {
00465 if (communication_matrix[i][j] > 0) {
00466 adjncy[count] = j;
00467 edge_weights[count] = communication_matrix[i][j];
00468 count += 1;
00469 }
00470 }
00471 xadj[i+1] = count;
00472 }
00473
00474 if ((CK_LDB_GridHybridLB_Mode == 0) || (CK_LDB_GridHybridLB_Mode == 2)) {
00475
00476 weight_flag = 1;
00477 numbering_flag = 0;
00478 options[0] = 0;
00479 newmap = new int[num_migratable_objects];
00480
00481 METIS_PartGraphRecursive (&num_migratable_objects, xadj, adjncy, NULL, edge_weights, &weight_flag, &numbering_flag, &num_partitions, options, &edgecut, newmap);
00482 } else if ((CK_LDB_GridHybridLB_Mode == 1) || (CK_LDB_GridHybridLB_Mode == 3)) {
00483
00484 weight_flag = 3;
00485 numbering_flag = 0;
00486 options[0] = 0;
00487 newmap = new int[num_migratable_objects];
00488
00489 METIS_PartGraphRecursive (&num_migratable_objects, xadj, adjncy, vertex_weights, edge_weights, &weight_flag, &numbering_flag, &num_partitions, options, &edgecut, newmap);
00490 } else {
00491 if (_lb_args.debug() > 0) {
00492 CkPrintf ("[%d] GridHybridLB was told to use bad mode (%d).\n", CkMyPe(), CK_LDB_GridHybridLB_Mode);
00493 }
00494 }
00495
00496
00497 for (i = 0; i < num_migratable_objects; i++) {
00498 partition = newmap[i];
00499 cluster = partition_to_cluster_map[partition];
00500
00501 index = migratable_objects[i];
00502
00503 (&Object_Data[index])->cluster = cluster;
00504 }
00505
00506
00507 delete [] newmap;
00508 delete [] edge_weights;
00509 delete [] adjncy;
00510 delete [] xadj;
00511 for (i = 0; i < num_migratable_objects; i++) {
00512 delete [] communication_matrix[i];
00513 }
00514 delete [] communication_matrix;
00515 if ((CK_LDB_GridHybridLB_Mode == 1) || (CK_LDB_GridHybridLB_Mode == 3)) {
00516 delete [] vertex_weights;
00517 }
00518 delete [] partition_to_cluster_map;
00519 delete [] migratable_objects;
00520 }
00521
00522
00523
00524
00525
00526
00527 void GridHybridLB::Examine_InterObject_Messages (CentralLB::LDStats *stats)
00528 {
00529 int i;
00530 int j;
00531 LDCommData *com_data;
00532 int send_object;
00533 int send_cluster;
00534 int recv_object;
00535 int recv_cluster;
00536 LDObjKey *recv_objects;
00537 int num_objects;
00538
00539
00540 for (i = 0; i < stats->n_comm; i++) {
00541 com_data = &(stats->commData[i]);
00542 if ((!com_data->from_proc()) && (com_data->recv_type() == LD_OBJ_MSG)) {
00543 send_object = stats->getHash (com_data->sender);
00544 recv_object = stats->getHash (com_data->receiver.get_destObj());
00545
00546 if ((send_object < 0) || (send_object > Num_Objects) || (recv_object < 0) || (recv_object > Num_Objects)) {
00547 continue;
00548 }
00549
00550 send_cluster = (&Object_Data[send_object])->cluster;
00551 recv_cluster = (&Object_Data[recv_object])->cluster;
00552
00553 if (send_cluster == recv_cluster) {
00554 (&Object_Data[send_object])->num_lan_msgs += com_data->messages;
00555 } else {
00556 (&Object_Data[send_object])->num_wan_msgs += com_data->messages;
00557 }
00558 } else if (com_data->receiver.get_type() == LD_OBJLIST_MSG) {
00559 send_object = stats->getHash (com_data->sender);
00560
00561 if ((send_object < 0) || (send_object > Num_Objects)) {
00562 continue;
00563 }
00564
00565 send_cluster = (&Object_Data[send_object])->cluster;
00566
00567 recv_objects = com_data->receiver.get_destObjs (num_objects);
00568
00569 for (j = 0; j < num_objects; j++) {
00570 recv_object = stats->getHash (recv_objects[j]);
00571
00572 if ((recv_object < 0) || (recv_object > Num_Objects)) {
00573 continue;
00574 }
00575
00576 recv_cluster = (&Object_Data[recv_object])->cluster;
00577
00578 if (send_cluster == recv_cluster) {
00579 (&Object_Data[send_object])->num_lan_msgs += com_data->messages;
00580 } else {
00581 (&Object_Data[send_object])->num_wan_msgs += com_data->messages;
00582 }
00583 }
00584 }
00585 }
00586 }
00587
00588
00589
00590
00591
00592
00593 void GridHybridLB::Map_NonMigratable_Objects_To_PEs ()
00594 {
00595 int i;
00596
00597
00598 for (i = 0; i < Num_Objects; i++) {
00599 if (!((&Object_Data[i])->migratable)) {
00600 if (_lb_args.debug() > 1) {
00601 CkPrintf ("[%d] GridHybridLB identifies object %d as non-migratable.\n", CkMyPe(), i);
00602 }
00603
00604 Assign_Object_To_PE (i, (&Object_Data[i])->from_pe);
00605 }
00606 }
00607 }
00608
00609
00610
00611
00612
00613
00614 void GridHybridLB::Map_Migratable_Objects_To_PEs (int cluster)
00615 {
00616 int target_object;
00617 int target_pe;
00618
00619
00620 while (1) {
00621 target_object = Find_Maximum_Object (cluster);
00622 target_pe = Find_Minimum_PE (cluster);
00623
00624 if ((target_object == -1) || (target_pe == -1)) {
00625 break;
00626 }
00627
00628 Assign_Object_To_PE (target_object, target_pe);
00629 }
00630 }
00631
00632
00633
00634
00635
00636
00637
00638
00639
00640
00641
00642 int GridHybridLB::Find_Maximum_Object (int cluster)
00643 {
00644 int max_index;
00645 int max_load_index;
00646 double max_load;
00647 int max_wan_msgs_index;
00648 int max_wan_msgs;
00649 double load_tolerance;
00650 int i;
00651
00652
00653 max_index = -1;
00654
00655 max_load_index = -1;
00656 max_load = -1.0;
00657
00658 max_wan_msgs_index = -1;
00659 max_wan_msgs = -1;
00660
00661 for (i = 0; i < Num_Objects; i++) {
00662 if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1)) {
00663 if ((&Object_Data[i])->load > max_load) {
00664 max_load_index = i;
00665 max_load = (&Object_Data[i])->load;
00666 }
00667 if ((&Object_Data[i])->num_wan_msgs > max_wan_msgs) {
00668 max_wan_msgs_index = i;
00669 max_wan_msgs = (&Object_Data[i])->num_wan_msgs;
00670 }
00671 }
00672 }
00673
00674 if (max_load_index < 0) {
00675 return (max_load_index);
00676 }
00677
00678 if ((&Object_Data[max_load_index])->num_wan_msgs >= (&Object_Data[max_wan_msgs_index])->num_wan_msgs) {
00679 return (max_load_index);
00680 }
00681
00682 load_tolerance = (&Object_Data[max_load_index])->load * CK_LDB_GridHybridLB_Load_Tolerance;
00683
00684 max_index = max_load_index;
00685
00686 for (i = 0; i < Num_Objects; i++) {
00687 if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1)) {
00688 if (i != max_load_index) {
00689 if (fabs ((&Object_Data[max_load_index])->load - (&Object_Data[i])->load) <= load_tolerance) {
00690 if ((&Object_Data[i])->num_wan_msgs > (&Object_Data[max_index])->num_wan_msgs) {
00691 max_index = i;
00692 }
00693 }
00694 }
00695 }
00696 }
00697
00698 return (max_index);
00699 }
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717 int GridHybridLB::Find_Minimum_PE (int cluster)
00718 {
00719 if ((CK_LDB_GridHybridLB_Mode == 0) || (CK_LDB_GridHybridLB_Mode == 1)) {
00720 int min_index;
00721 int min_objs;
00722 int i;
00723
00724
00725 min_index = -1;
00726 min_objs = MAXINT;
00727
00728 for (i = 0; i < Num_PEs; i++) {
00729 if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
00730 if ((&PE_Data[i])->num_objs < min_objs) {
00731 min_index = i;
00732 min_objs = (&PE_Data[i])->num_objs;
00733 } else if (((&PE_Data[i])->num_objs == min_objs) &&
00734 ((&PE_Data[i])->num_wan_objs < (&PE_Data[min_index])->num_wan_objs)) {
00735 min_index = i;
00736 } else if (((&PE_Data[i])->num_objs == min_objs) &&
00737 ((&PE_Data[i])->num_wan_objs == (&PE_Data[min_index])->num_wan_objs) &&
00738 ((&PE_Data[i])->num_wan_msgs < (&PE_Data[min_index])->num_wan_msgs)) {
00739 min_index = i;
00740 } else if (((&PE_Data[i])->num_objs == min_objs) &&
00741 ((&PE_Data[i])->num_wan_objs == (&PE_Data[min_index])->num_wan_objs) &&
00742 ((&PE_Data[i])->num_wan_msgs == (&PE_Data[min_index])->num_wan_msgs) &&
00743 ((&PE_Data[i])->scaled_load < (&PE_Data[min_index])->scaled_load)) {
00744 min_index = i;
00745 }
00746 }
00747 }
00748
00749 return (min_index);
00750 } else if ((CK_LDB_GridHybridLB_Mode == 2) || (CK_LDB_GridHybridLB_Mode == 3)) {
00751 int min_index;
00752 int min_load_index;
00753 double min_scaled_load;
00754 int min_wan_msgs_index;
00755 int min_wan_msgs;
00756 double load_tolerance;
00757 int i;
00758
00759
00760 min_index = -1;
00761
00762 min_load_index = -1;
00763 min_scaled_load = MAXDOUBLE;
00764
00765 min_wan_msgs_index = -1;
00766 min_wan_msgs = MAXINT;
00767
00768 for (i = 0; i < Num_PEs; i++) {
00769 if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
00770 if ((&PE_Data[i])->scaled_load < min_scaled_load) {
00771 min_load_index = i;
00772 min_scaled_load = (&PE_Data[i])->scaled_load;
00773 }
00774 if ((&PE_Data[i])->num_wan_msgs < min_wan_msgs) {
00775 min_wan_msgs_index = i;
00776 min_wan_msgs = (&PE_Data[i])->num_wan_msgs;
00777 }
00778 }
00779 }
00780
00781
00782 if (min_load_index < 0) {
00783 return (min_load_index);
00784 }
00785
00786
00787
00788
00789 if ((&PE_Data[min_load_index])->num_wan_msgs <= (&PE_Data[min_wan_msgs_index])->num_wan_msgs) {
00790 return (min_load_index);
00791 }
00792
00793
00794
00795
00796
00797 load_tolerance = (&PE_Data[min_load_index])->scaled_load * CK_LDB_GridHybridLB_Load_Tolerance;
00798
00799 min_index = min_load_index;
00800
00801 for (i = 0; i < Num_PEs; i++) {
00802 if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
00803 if (i != min_load_index) {
00804 if (fabs ((&PE_Data[i])->scaled_load - (&PE_Data[min_load_index])->scaled_load) <= load_tolerance) {
00805 if ((&PE_Data[i])->num_wan_msgs < (&PE_Data[min_index])->num_wan_msgs) {
00806 min_index = i;
00807 }
00808 }
00809 }
00810 }
00811 }
00812
00813 return (min_index);
00814 } else {
00815 if (_lb_args.debug() > 0) {
00816 CkPrintf ("[%d] GridHybridLB was told to use bad mode (%d).\n", CkMyPe(), CK_LDB_GridHybridLB_Mode);
00817 }
00818 return (-1);
00819 }
00820 }
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830 void GridHybridLB::Assign_Object_To_PE (int target_object, int target_pe)
00831 {
00832 (&Object_Data[target_object])->to_pe = target_pe;
00833
00834 (&PE_Data[target_pe])->num_objs += 1;
00835
00836 if ((&Object_Data[target_object])->num_lan_msgs > 0) {
00837 (&PE_Data[target_pe])->num_lan_objs += 1;
00838 (&PE_Data[target_pe])->num_lan_msgs += (&Object_Data[target_object])->num_lan_msgs;
00839 }
00840
00841 if ((&Object_Data[target_object])->num_wan_msgs > 0) {
00842 (&PE_Data[target_pe])->num_wan_objs += 1;
00843 (&PE_Data[target_pe])->num_wan_msgs += (&Object_Data[target_object])->num_wan_msgs;
00844 }
00845
00846 (&PE_Data[target_pe])->scaled_load += (&Object_Data[target_object])->load / (&PE_Data[target_pe])->relative_speed;
00847 }
00848
00849
00850
00851
00852
00853
00854
00855 void GridHybridLB::work (LDStats *stats)
00856 {
00857 int i;
00858
00859
00860 if (_lb_args.debug() > 0) {
00861 CkPrintf ("[%d] GridHybridLB is working (mode=%d, background load=%d, load tolerance=%f).\n", CkMyPe(), CK_LDB_GridHybridLB_Mode, CK_LDB_GridHybridLB_Background_Load, CK_LDB_GridHybridLB_Load_Tolerance);
00862 }
00863
00864
00865 stats->makeCommHash ();
00866
00867
00868 Num_PEs = stats->nprocs();
00869 Num_Objects = stats->n_objs;
00870
00871 if (_lb_args.debug() > 0) {
00872 CkPrintf ("[%d] GridHybridLB is examining %d PEs and %d objects.\n", CkMyPe(), Num_PEs, Num_Objects);
00873 }
00874
00875
00876 Initialize_PE_Data (stats);
00877
00878
00879 if (Available_PE_Count() < 1) {
00880 if (_lb_args.debug() > 0) {
00881 CkPrintf ("[%d] GridHybridLB finds no available PEs -- no balancing done.\n", CkMyPe());
00882 }
00883
00884 delete [] PE_Data;
00885
00886 return;
00887 }
00888
00889
00890
00891 Num_Clusters = Compute_Number_Of_Clusters ();
00892 if (Num_Clusters < 1) {
00893 if (_lb_args.debug() > 0) {
00894 CkPrintf ("[%d] GridHybridLB finds incomplete PE cluster map -- no balancing done.\n", CkMyPe());
00895 }
00896
00897 delete [] PE_Data;
00898
00899 return;
00900 }
00901
00902 if (_lb_args.debug() > 0) {
00903 CkPrintf ("[%d] GridHybridLB finds %d clusters.\n", CkMyPe(), Num_Clusters);
00904 }
00905
00906
00907 Initialize_Object_Data (stats);
00908
00909
00910 Initialize_Cluster_Data ();
00911
00912
00913 Partition_Objects_Into_Clusters (stats);
00914
00915
00916 Examine_InterObject_Messages (stats);
00917
00918
00919 Map_NonMigratable_Objects_To_PEs ();
00920
00921
00922 for (i = 0; i < Num_Clusters; i++) {
00923 Map_Migratable_Objects_To_PEs (i);
00924 }
00925
00926
00927 for (i = 0; i < Num_Objects; i++) {
00928 stats->to_proc[i] = (&Object_Data[i])->to_pe;
00929
00930 if (_lb_args.debug() > 2) {
00931 CkPrintf ("[%d] GridHybridLB migrates object %d from PE %d to PE %d.\n", CkMyPe(), i, stats->from_proc[i], stats->to_proc[i]);
00932 } else if (_lb_args.debug() > 1) {
00933 if (stats->to_proc[i] != stats->from_proc[i]) {
00934 CkPrintf ("[%d] GridHybridLB migrates object %d from PE %d to PE %d.\n", CkMyPe(), i, stats->from_proc[i], stats->to_proc[i]);
00935 }
00936 }
00937 }
00938
00939
00940 delete [] Cluster_Data;
00941 delete [] Object_Data;
00942 delete [] PE_Data;
00943 }
00944
00945 #include "GridHybridLB.def.h"