00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "GridHybridSeedLB.decl.h"
00010
00011 #include "GridHybridSeedLB.h"
00012 #include "manager.h"
00013
00014 extern int quietModeRequested;
00015
00016 CreateLBFunc_Def (GridHybridSeedLB, "Grid load balancer that uses hybrid seed technique to optimize communication graph")
00017
00018
00019
00020
00021
00022
00023 GridHybridSeedLB::GridHybridSeedLB (const CkLBOptions &opt) : CBase_GridHybridSeedLB (opt)
00024 {
00025 char *value;
00026
00027
00028 lbname = (char *) "GridHybridSeedLB";
00029
00030 if (CkMyPe() == 0 && !quietModeRequested) {
00031 CkPrintf ("CharmLB> GridHybridSeedLB created.\n");
00032 }
00033
00034 if ((value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_MODE"))) {
00035 CK_LDB_GridHybridSeedLB_Mode = atoi (value);
00036 } else {
00037 CK_LDB_GridHybridSeedLB_Mode = CK_LDB_GRIDHYBRIDSEEDLB_MODE;
00038 }
00039
00040 if ((value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_BACKGROUND_LOAD"))) {
00041 CK_LDB_GridHybridSeedLB_Background_Load = atoi (value);
00042 } else {
00043 CK_LDB_GridHybridSeedLB_Background_Load = CK_LDB_GRIDHYBRIDSEEDLB_BACKGROUND_LOAD;
00044 }
00045
00046 if ((value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_LOAD_TOLERANCE"))) {
00047 CK_LDB_GridHybridSeedLB_Load_Tolerance = atof (value);
00048 } else {
00049 CK_LDB_GridHybridSeedLB_Load_Tolerance = CK_LDB_GRIDHYBRIDSEEDLB_LOAD_TOLERANCE;
00050 }
00051
00052 manager_init ();
00053 }
00054
00055
00056
00057
00058
00059
00060 GridHybridSeedLB::GridHybridSeedLB (CkMigrateMessage *msg) : CBase_GridHybridSeedLB (msg)
00061 {
00062 char *value;
00063
00064
00065 lbname = (char *) "GridHybridSeedLB";
00066
00067 if ((value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_MODE"))) {
00068 CK_LDB_GridHybridSeedLB_Mode = atoi (value);
00069 } else {
00070 CK_LDB_GridHybridSeedLB_Mode = CK_LDB_GRIDHYBRIDSEEDLB_MODE;
00071 }
00072
00073 if ((value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_BACKGROUND_LOAD"))) {
00074 CK_LDB_GridHybridSeedLB_Background_Load = atoi (value);
00075 } else {
00076 CK_LDB_GridHybridSeedLB_Background_Load = CK_LDB_GRIDHYBRIDSEEDLB_BACKGROUND_LOAD;
00077 }
00078
00079 if ((value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_LOAD_TOLERANCE"))) {
00080 CK_LDB_GridHybridSeedLB_Load_Tolerance = atof (value);
00081 } else {
00082 CK_LDB_GridHybridSeedLB_Load_Tolerance = CK_LDB_GRIDHYBRIDSEEDLB_LOAD_TOLERANCE;
00083 }
00084
00085 manager_init ();
00086 }
00087
00088
00089
00090
00091
00092
00093
00094 bool GridHybridSeedLB::QueryBalanceNow (int step)
00095 {
00096 if (_lb_args.debug() > 2) {
00097 CkPrintf ("[%d] GridHybridSeedLB is balancing on step %d.\n", CkMyPe(), step);
00098 }
00099
00100 return (true);
00101 }
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116 int GridHybridSeedLB::Get_Cluster (int pe)
00117 {
00118 return (0);
00119 }
00120
00121
00122
00123
00124
00125
00126 void GridHybridSeedLB::Initialize_PE_Data (CentralLB::LDStats *stats)
00127 {
00128 int min_speed;
00129
00130
00131 PE_Data = new PE_Data_T[Num_PEs];
00132
00133 min_speed = MAXINT;
00134 for (int i = 0; i < Num_PEs; i++) {
00135 (&PE_Data[i])->available = stats->procs[i].available;
00136 (&PE_Data[i])->cluster = Get_Cluster (i);
00137 (&PE_Data[i])->num_objs = 0;
00138 (&PE_Data[i])->num_lan_objs = 0;
00139 (&PE_Data[i])->num_lan_msgs = 0;
00140 (&PE_Data[i])->num_wan_objs = 0;
00141 (&PE_Data[i])->num_wan_msgs = 0;
00142 (&PE_Data[i])->relative_speed = 0.0;
00143 (&PE_Data[i])->scaled_load = 0.0;
00144
00145 if (stats->procs[i].pe_speed < min_speed) {
00146 min_speed = stats->procs[i].pe_speed;
00147 }
00148 }
00149
00150
00151
00152 for (int i = 0; i < Num_PEs; i++) {
00153 (&PE_Data[i])->relative_speed = (double) (stats->procs[i].pe_speed / min_speed);
00154 if (CK_LDB_GridHybridSeedLB_Background_Load) {
00155 (&PE_Data[i])->scaled_load += stats->procs[i].bg_walltime;
00156 }
00157 }
00158 }
00159
00160
00161
00162
00163
00164
00165 int GridHybridSeedLB::Available_PE_Count ()
00166 {
00167 int available_pe_count;
00168
00169
00170 available_pe_count = 0;
00171 for (int i = 0; i < Num_PEs; i++) {
00172 if ((&PE_Data[i])->available) {
00173 available_pe_count += 1;
00174 }
00175 }
00176
00177 return (available_pe_count);
00178 }
00179
00180
00181
00182
00183
00184
00185 int GridHybridSeedLB::Compute_Number_Of_Clusters ()
00186 {
00187 int max_cluster;
00188
00189
00190 max_cluster = 0;
00191 for (int i = 0; i < Num_PEs; i++) {
00192 if ((&PE_Data[i])->cluster < 0) {
00193 return (-1);
00194 }
00195
00196 if ((&PE_Data[i])->cluster > max_cluster) {
00197 max_cluster = (&PE_Data[i])->cluster;
00198 }
00199 }
00200
00201 return (max_cluster + 1);
00202 }
00203
00204
00205
00206
00207
00208
00209 void GridHybridSeedLB::Initialize_Object_Data (CentralLB::LDStats *stats)
00210 {
00211 Object_Data = new Object_Data_T[Num_Objects];
00212
00213 for (int i = 0; i < Num_Objects; i++) {
00214 (&Object_Data[i])->migratable = (&stats->objData[i])->migratable;
00215 (&Object_Data[i])->from_pe = stats->from_proc[i];
00216 (&Object_Data[i])->num_lan_msgs = 0;
00217 (&Object_Data[i])->num_wan_msgs = 0;
00218 (&Object_Data[i])->load = (&stats->objData[i])->wallTime;
00219 (&Object_Data[i])->secondary_index = -1;
00220
00221 if ((&Object_Data[i])->migratable) {
00222 (&Object_Data[i])->to_pe = -1;
00223 (&Object_Data[i])->cluster = -1;
00224 } else {
00225 (&Object_Data[i])->to_pe = (&Object_Data[i])->from_pe;
00226 (&Object_Data[i])->cluster = Get_Cluster ((&Object_Data[i])->from_pe);
00227 if (_lb_args.debug() > 1) {
00228 CkPrintf ("[%d] GridHybridSeedLB identifies object %d as non-migratable.\n", CkMyPe(), i);
00229 }
00230 }
00231 }
00232 }
00233
00234
00235
00236
00237
00238
00239 int GridHybridSeedLB::Compute_Migratable_Object_Count ()
00240 {
00241 int count;
00242
00243
00244 count = 0;
00245
00246 for (int i = 0; i < Num_Objects; i++) {
00247 if ((&Object_Data[i])->migratable) {
00248 count += 1;
00249 }
00250 }
00251
00252 return (count);
00253 }
00254
00255
00256
00257
00258
00259
00260 void GridHybridSeedLB::Initialize_Cluster_Data ()
00261 {
00262 int cluster;
00263 double min_total_cpu_power;
00264
00265
00266 Cluster_Data = new Cluster_Data_T[Num_Clusters];
00267
00268 for (int i = 0; i < Num_Clusters; i++) {
00269 (&Cluster_Data[i])->num_pes = 0;
00270 (&Cluster_Data[i])->total_cpu_power = 0.0;
00271 (&Cluster_Data[i])->scaled_cpu_power = 0.0;
00272 }
00273
00274
00275 for (int i = 0; i < Num_PEs; i++) {
00276 cluster = (&PE_Data[i])->cluster;
00277
00278 (&Cluster_Data[cluster])->num_pes += 1;
00279 (&Cluster_Data[cluster])->total_cpu_power += (&PE_Data[i])->relative_speed;
00280 }
00281
00282 min_total_cpu_power = MAXDOUBLE;
00283 for (int i = 0; i < Num_Clusters; i++) {
00284 if ((&Cluster_Data[i])->total_cpu_power < min_total_cpu_power) {
00285 min_total_cpu_power = (&Cluster_Data[i])->total_cpu_power;
00286 }
00287 }
00288
00289 for (int i = 0; i < Num_Clusters; i++) {
00290 (&Cluster_Data[i])->scaled_cpu_power = (double) ((&Cluster_Data[i])->total_cpu_power / min_total_cpu_power);
00291 }
00292 }
00293
00294
00295
00296
00297
00298
00299 void GridHybridSeedLB::Initialize_Communication_Matrix (CentralLB::LDStats *stats)
00300 {
00301 LDCommData *com_data;
00302 int send_object;
00303 int recv_object;
00304 int send_index;
00305 int recv_index;
00306 int num_objects;
00307 const LDObjKey *recv_objects;
00308 int index;
00309
00310
00311 Migratable_Objects = new int[Num_Migratable_Objects];
00312
00313 index = 0;
00314 for (int i = 0; i < Num_Objects; i++) {
00315 if ((&Object_Data[i])->migratable) {
00316 (&Object_Data[i])->secondary_index = index;
00317 Migratable_Objects[index] = i;
00318 index += 1;
00319 }
00320 }
00321
00322
00323 Communication_Matrix = new int *[Num_Migratable_Objects];
00324 for (int i = 0; i < Num_Migratable_Objects; i++) {
00325 Communication_Matrix[i] = new int[Num_Migratable_Objects];
00326 for (int j = 0; j < Num_Migratable_Objects; j++) {
00327 Communication_Matrix[i][j] = 0;
00328 }
00329 }
00330
00331 for (int i = 0; i < stats->n_comm; i++) {
00332 com_data = &(stats->commData[i]);
00333 if ((!com_data->from_proc()) && (com_data->recv_type() == LD_OBJ_MSG)) {
00334 send_object = stats->getHash (com_data->sender);
00335 recv_object = stats->getHash (com_data->receiver.get_destObj());
00336
00337 if ((send_object < 0) || (send_object > Num_Objects) || (recv_object < 0) || (recv_object > Num_Objects)) {
00338 continue;
00339 }
00340
00341 if ((!(&Object_Data[send_object])->migratable) || (!(&Object_Data[recv_object])->migratable)) {
00342 continue;
00343 }
00344
00345 send_index = (&Object_Data[send_object])->secondary_index;
00346 recv_index = (&Object_Data[recv_object])->secondary_index;
00347
00348 Communication_Matrix[send_index][recv_index] += com_data->messages;
00349 Communication_Matrix[recv_index][send_index] += com_data->messages;
00350 } else if (com_data->receiver.get_type() == LD_OBJLIST_MSG) {
00351 send_object = stats->getHash (com_data->sender);
00352
00353 if ((send_object < 0) || (send_object > Num_Objects)) {
00354 continue;
00355 }
00356
00357 if (!(&Object_Data[send_object])->migratable) {
00358 continue;
00359 }
00360
00361 recv_objects = com_data->receiver.get_destObjs (num_objects);
00362
00363 for (int j = 0; j < num_objects; j++) {
00364 recv_object = stats->getHash (recv_objects[j]);
00365
00366 if ((recv_object < 0) || (recv_object > Num_Objects)) {
00367 continue;
00368 }
00369
00370 if (!(&Object_Data[recv_object])->migratable) {
00371 continue;
00372 }
00373
00374 send_index = (&Object_Data[send_object])->secondary_index;
00375 recv_index = (&Object_Data[recv_object])->secondary_index;
00376
00377 Communication_Matrix[send_index][recv_index] += com_data->messages;
00378 Communication_Matrix[recv_index][send_index] += com_data->messages;
00379 }
00380 }
00381 }
00382
00383 for (int i = 0; i < Num_Migratable_Objects; i++) {
00384 Communication_Matrix[i][i] = 0;
00385 }
00386 }
00387
00388
00389
00390
00391
00392
00393 void GridHybridSeedLB::Partition_Objects_Into_Clusters (CentralLB::LDStats *stats)
00394 {
00395 int index;
00396 int num_partitions;
00397 int *partition_to_cluster_map;
00398 int cluster;
00399 int partition;
00400 int partition_count;
00401 int *vertex_weights;
00402 int vertex;
00403 int *xadj;
00404 int num_edges;
00405 int *adjncy;
00406 int *edge_weights;
00407 int count;
00408 int weight_flag;
00409 int numbering_flag;
00410 int options[5];
00411 int edgecut;
00412 int *newmap;
00413
00414
00415 if (Num_Clusters == 1) {
00416 for (int i = 0; i < Num_Objects; i++) {
00417 (&Object_Data[i])->cluster = 0;
00418 }
00419
00420 return;
00421 }
00422
00423
00424
00425 num_partitions = 0;
00426 for (int i = 0; i < Num_Clusters; i++) {
00427 num_partitions += (int) ceil ((&Cluster_Data[i])->scaled_cpu_power);
00428 }
00429
00430 partition_to_cluster_map = new int[num_partitions];
00431
00432 cluster = 0;
00433 partition = 0;
00434 while (partition < num_partitions) {
00435 partition_count = (int) ceil ((&Cluster_Data[cluster])->scaled_cpu_power);
00436
00437 for (int i = partition; i < (partition + partition_count); i++) {
00438 partition_to_cluster_map[i] = cluster;
00439 }
00440
00441 partition += partition_count;
00442 cluster += 1;
00443 }
00444
00445 if ((CK_LDB_GridHybridSeedLB_Mode == 1) || (CK_LDB_GridHybridSeedLB_Mode == 3)) {
00446 vertex_weights = new int[Num_Migratable_Objects];
00447 vertex = 0;
00448 for (int i = 0; i < Num_Objects; i++) {
00449 if ((&Object_Data[i])->migratable) {
00450 vertex_weights[vertex] = (int) ceil ((&Object_Data[i])->load * 10000);
00451 vertex += 1;
00452 }
00453 }
00454 }
00455
00456
00457 xadj = new int[Num_Migratable_Objects + 1];
00458 num_edges = 0;
00459 for (int i = 0; i < Num_Migratable_Objects; i++) {
00460 for (int j = 0; j < Num_Migratable_Objects; j++) {
00461 if (Communication_Matrix[i][j] > 0) {
00462 num_edges += 1;
00463 }
00464 }
00465 }
00466 adjncy = new int[num_edges];
00467 edge_weights = new int[num_edges];
00468 count = 0;
00469 xadj[0] = 0;
00470 for (int i = 0; i < Num_Migratable_Objects; i++) {
00471 for (int j = 0; j < Num_Migratable_Objects; j++) {
00472 if (Communication_Matrix[i][j] > 0) {
00473 adjncy[count] = j;
00474 edge_weights[count] = Communication_Matrix[i][j];
00475 count += 1;
00476 }
00477 }
00478 xadj[i+1] = count;
00479 }
00480
00481 if ((CK_LDB_GridHybridSeedLB_Mode == 0) || (CK_LDB_GridHybridSeedLB_Mode == 2)) {
00482
00483 weight_flag = 1;
00484 numbering_flag = 0;
00485 options[0] = 0;
00486 newmap = new int[Num_Migratable_Objects];
00487
00488 METIS_PartGraphRecursive (&Num_Migratable_Objects, xadj, adjncy, NULL, edge_weights, &weight_flag, &numbering_flag, &num_partitions, options, &edgecut, newmap);
00489 } else if ((CK_LDB_GridHybridSeedLB_Mode == 1) || (CK_LDB_GridHybridSeedLB_Mode == 3)) {
00490
00491 weight_flag = 3;
00492 numbering_flag = 0;
00493 options[0] = 0;
00494 newmap = new int[Num_Migratable_Objects];
00495
00496 METIS_PartGraphRecursive (&Num_Migratable_Objects, xadj, adjncy, vertex_weights, edge_weights, &weight_flag, &numbering_flag, &num_partitions, options, &edgecut, newmap);
00497 } else {
00498 if (_lb_args.debug() > 0) {
00499 CkPrintf ("[%d] GridHybridSeedLB was told to use bad mode (%d).\n", CkMyPe(), CK_LDB_GridHybridSeedLB_Mode);
00500 }
00501 }
00502
00503
00504 for (int i = 0; i < Num_Migratable_Objects; i++) {
00505 partition = newmap[i];
00506 cluster = partition_to_cluster_map[partition];
00507
00508 index = Migratable_Objects[i];
00509
00510 (&Object_Data[index])->cluster = cluster;
00511 }
00512
00513
00514 delete [] newmap;
00515 delete [] edge_weights;
00516 delete [] adjncy;
00517 delete [] xadj;
00518 if ((CK_LDB_GridHybridSeedLB_Mode == 1) || (CK_LDB_GridHybridSeedLB_Mode == 3)) {
00519 delete [] vertex_weights;
00520 }
00521 delete [] partition_to_cluster_map;
00522 }
00523
00524
00525
00526
00527
00528
00529 void GridHybridSeedLB::Examine_InterObject_Messages (CentralLB::LDStats *stats)
00530 {
00531 LDCommData *com_data;
00532 int send_object;
00533 int send_cluster;
00534 int recv_object;
00535 int recv_cluster;
00536 LDObjKey *recv_objects;
00537 int num_objects;
00538
00539
00540 for (int i = 0; i < stats->n_comm; i++) {
00541 com_data = &(stats->commData[i]);
00542 if ((!com_data->from_proc()) && (com_data->recv_type() == LD_OBJ_MSG)) {
00543 send_object = stats->getHash (com_data->sender);
00544 recv_object = stats->getHash (com_data->receiver.get_destObj());
00545
00546 if ((send_object < 0) || (send_object > Num_Objects) || (recv_object < 0) || (recv_object > Num_Objects)) {
00547 continue;
00548 }
00549
00550 send_cluster = (&Object_Data[send_object])->cluster;
00551 recv_cluster = (&Object_Data[recv_object])->cluster;
00552
00553 if (send_cluster == recv_cluster) {
00554 (&Object_Data[send_object])->num_lan_msgs += com_data->messages;
00555 } else {
00556 (&Object_Data[send_object])->num_wan_msgs += com_data->messages;
00557 }
00558 } else if (com_data->receiver.get_type() == LD_OBJLIST_MSG) {
00559 send_object = stats->getHash (com_data->sender);
00560
00561 if ((send_object < 0) || (send_object > Num_Objects)) {
00562 continue;
00563 }
00564
00565 send_cluster = (&Object_Data[send_object])->cluster;
00566
00567 recv_objects = com_data->receiver.get_destObjs (num_objects);
00568
00569 for (int j = 0; j < num_objects; j++) {
00570 recv_object = stats->getHash (recv_objects[j]);
00571
00572 if ((recv_object < 0) || (recv_object > Num_Objects)) {
00573 continue;
00574 }
00575
00576 recv_cluster = (&Object_Data[recv_object])->cluster;
00577
00578 if (send_cluster == recv_cluster) {
00579 (&Object_Data[send_object])->num_lan_msgs += com_data->messages;
00580 } else {
00581 (&Object_Data[send_object])->num_wan_msgs += com_data->messages;
00582 }
00583 }
00584 }
00585 }
00586 }
00587
00588
00589
00590
00591
00592
00593 void GridHybridSeedLB::Map_NonMigratable_Objects_To_PEs ()
00594 {
00595 for (int i = 0; i < Num_Objects; i++) {
00596 if (!((&Object_Data[i])->migratable)) {
00597 if (_lb_args.debug() > 1) {
00598 CkPrintf ("[%d] GridHybridSeedLB identifies object %d as non-migratable.\n", CkMyPe(), i);
00599 }
00600
00601 Assign_Object_To_PE (i, (&Object_Data[i])->from_pe);
00602 }
00603 }
00604 }
00605
00606
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616 int GridHybridSeedLB::Find_Maximum_Object (int cluster)
00617 {
00618 int max_index;
00619 double max_load;
00620
00621
00622 max_index = -1;
00623 max_load = -1.0;
00624
00625 for (int i = 0; i < Num_Objects; i++) {
00626 if ((((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1)) && ((&Object_Data[i])->load > max_load)) {
00627 max_index = i;
00628 max_load = (&Object_Data[i])->load;
00629 }
00630 }
00631
00632 return (max_index);
00633 }
00634
00635
00636
00637
00638
00639
00640
00641
00642
00643
00644
00645 int GridHybridSeedLB::Find_Maximum_Border_Object (int cluster)
00646 {
00647 int max_index;
00648 int max_load_index;
00649 double max_load;
00650 int max_wan_msgs_index;
00651 int max_wan_msgs;
00652 double load_tolerance;
00653
00654
00655 max_index = -1;
00656
00657 max_load_index = -1;
00658 max_load = -1.0;
00659
00660 max_wan_msgs_index = -1;
00661 max_wan_msgs = -1;
00662
00663 for (int i = 0; i < Num_Objects; i++) {
00664 if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1) && ((&Object_Data[i])->num_wan_msgs > 0)) {
00665 if ((&Object_Data[i])->load > max_load) {
00666 max_load_index = i;
00667 max_load = (&Object_Data[i])->load;
00668 }
00669 if ((&Object_Data[i])->num_wan_msgs > max_wan_msgs) {
00670 max_wan_msgs_index = i;
00671 max_wan_msgs = (&Object_Data[i])->num_wan_msgs;
00672 }
00673 }
00674 }
00675
00676 if (max_load_index < 0) {
00677 return (max_load_index);
00678 }
00679
00680 if ((&Object_Data[max_load_index])->num_wan_msgs >= (&Object_Data[max_wan_msgs_index])->num_wan_msgs) {
00681 return (max_load_index);
00682 }
00683
00684 if (CK_LDB_GridHybridSeedLB_Load_Tolerance <= 0.0) {
00685 return (max_load_index);
00686 }
00687
00688 load_tolerance = (&Object_Data[max_load_index])->load * CK_LDB_GridHybridSeedLB_Load_Tolerance;
00689
00690 max_index = max_load_index;
00691
00692 for (int i = 0; i < Num_Objects; i++) {
00693 if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1) && ((&Object_Data[i])->num_wan_msgs > 0) && ((&Object_Data[i])->num_wan_msgs > (&Object_Data[max_index])->num_wan_msgs) && (fabs ((&Object_Data[max_load_index])->load - (&Object_Data[i])->load) <= load_tolerance)) {
00694 max_index = i;
00695 }
00696 }
00697
00698 return (max_index);
00699 }
00700
00701
00702
00703
00704
00705
00706 int GridHybridSeedLB::Find_Maximum_Object_From_Seeds (int pe)
00707 {
00708 int cluster;
00709 int max_index;
00710 int max_comm_events;
00711 int max_load_index;
00712 double max_load;
00713 double load_tolerance;
00714 int comm_events;
00715
00716
00717 max_index = -1;
00718
00719 max_comm_events = 0;
00720
00721 max_load_index = -1;
00722 max_load = -1.0;
00723
00724 cluster = (&PE_Data[pe])->cluster;
00725
00726 for (int i = 0; i < Num_Objects; i++) {
00727 if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1) && ((&Object_Data[i])->load > max_load)) {
00728 max_load_index = i;
00729 max_load = (&Object_Data[i])->load;
00730 }
00731 }
00732
00733 if (max_load_index < 0) {
00734 return (max_load_index);
00735 }
00736
00737 if (CK_LDB_GridHybridSeedLB_Load_Tolerance <= 0.0) {
00738 return (max_load_index);
00739 }
00740
00741 load_tolerance = (&Object_Data[max_load_index])->load * CK_LDB_GridHybridSeedLB_Load_Tolerance;
00742
00743 max_index = max_load_index;
00744
00745 for (int i = 0; i < Num_Objects; i++) {
00746 if ((&Object_Data[i])->to_pe == pe) {
00747 for (int j = 0; j < Num_Objects; j++) {
00748 if (((&Object_Data[j])->cluster == cluster) && ((&Object_Data[j])->to_pe == -1) && (fabs ((&Object_Data[max_load_index])->load - (&Object_Data[j])->load) <= load_tolerance)) {
00749 comm_events = Compute_Communication_Events (i, j);
00750 if (comm_events > max_comm_events) {
00751 max_index = j;
00752 max_comm_events = comm_events;
00753 }
00754 }
00755 }
00756 }
00757 }
00758
00759 return (max_index);
00760 }
00761
00762
00763
00764
00765
00766
00767 int GridHybridSeedLB::Find_Maximum_Border_Object_From_Seeds (int pe)
00768 {
00769 int cluster;
00770 int max_index;
00771 int max_comm_events;
00772 int max_load_index;
00773 double max_load;
00774 double load_tolerance;
00775 int comm_events;
00776
00777
00778 max_index = -1;
00779
00780 max_comm_events = 0;
00781
00782 max_load_index = -1;
00783 max_load = -1.0;
00784
00785 cluster = (&PE_Data[pe])->cluster;
00786
00787 for (int i = 0; i < Num_Objects; i++) {
00788 if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1) && ((&Object_Data[i])->num_wan_msgs > 0) && ((&Object_Data[i])->load > max_load)) {
00789 max_load_index = i;
00790 max_load = (&Object_Data[i])->load;
00791 }
00792 }
00793
00794 if (max_load_index < 0) {
00795 return (max_load_index);
00796 }
00797
00798 if (CK_LDB_GridHybridSeedLB_Load_Tolerance <= 0.0) {
00799 return (max_load_index);
00800 }
00801
00802 load_tolerance = (&Object_Data[max_load_index])->load * CK_LDB_GridHybridSeedLB_Load_Tolerance;
00803
00804 max_index = max_load_index;
00805
00806 for (int i = 0; i < Num_Objects; i++) {
00807 if ((&Object_Data[i])->to_pe == pe) {
00808 for (int j = 0; j < Num_Objects; j++) {
00809 if (((&Object_Data[j])->cluster == cluster) && ((&Object_Data[j])->to_pe == -1) && ((&Object_Data[j])->num_wan_msgs > 0) && (fabs ((&Object_Data[max_load_index])->load - (&Object_Data[j])->load) <= load_tolerance)) {
00810 comm_events = Compute_Communication_Events (i, j);
00811 if (comm_events > max_comm_events) {
00812 max_index = j;
00813 max_comm_events = comm_events;
00814 }
00815 }
00816 }
00817 }
00818 }
00819
00820 return (max_index);
00821 }
00822
00823
00824
00825
00826
00827
00828 int GridHybridSeedLB::Compute_Communication_Events (int obj1, int obj2)
00829 {
00830 int send_index;
00831 int recv_index;
00832
00833
00834 send_index = (&Object_Data[obj1])->secondary_index;
00835 recv_index = (&Object_Data[obj2])->secondary_index;
00836
00837 return (Communication_Matrix[send_index][recv_index]);
00838 }
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856 int GridHybridSeedLB::Find_Minimum_PE (int cluster)
00857 {
00858 if ((CK_LDB_GridHybridSeedLB_Mode == 0) || (CK_LDB_GridHybridSeedLB_Mode == 1)) {
00859 int min_index;
00860 int min_objs;
00861
00862
00863 min_index = -1;
00864 min_objs = MAXINT;
00865
00866 for (int i = 0; i < Num_PEs; i++) {
00867 if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
00868 if ((&PE_Data[i])->num_objs < min_objs) {
00869 min_index = i;
00870 min_objs = (&PE_Data[i])->num_objs;
00871 } else if (((&PE_Data[i])->num_objs == min_objs) &&
00872 ((&PE_Data[i])->num_wan_objs < (&PE_Data[min_index])->num_wan_objs)) {
00873 min_index = i;
00874 } else if (((&PE_Data[i])->num_objs == min_objs) &&
00875 ((&PE_Data[i])->num_wan_objs == (&PE_Data[min_index])->num_wan_objs) &&
00876 ((&PE_Data[i])->num_wan_msgs < (&PE_Data[min_index])->num_wan_msgs)) {
00877 min_index = i;
00878 } else if (((&PE_Data[i])->num_objs == min_objs) &&
00879 ((&PE_Data[i])->num_wan_objs == (&PE_Data[min_index])->num_wan_objs) &&
00880 ((&PE_Data[i])->num_wan_msgs == (&PE_Data[min_index])->num_wan_msgs) &&
00881 ((&PE_Data[i])->scaled_load < (&PE_Data[min_index])->scaled_load)) {
00882 min_index = i;
00883 }
00884 }
00885 }
00886
00887 return (min_index);
00888 } else if ((CK_LDB_GridHybridSeedLB_Mode == 2) || (CK_LDB_GridHybridSeedLB_Mode == 3)) {
00889 int min_index;
00890 int min_load_index;
00891 double min_scaled_load;
00892 int min_wan_msgs_index;
00893 int min_wan_msgs;
00894 double load_tolerance;
00895
00896
00897 min_index = -1;
00898
00899 min_load_index = -1;
00900 min_scaled_load = MAXDOUBLE;
00901
00902 min_wan_msgs_index = -1;
00903 min_wan_msgs = MAXINT;
00904
00905 for (int i = 0; i < Num_PEs; i++) {
00906 if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
00907 if ((&PE_Data[i])->scaled_load < min_scaled_load) {
00908 min_load_index = i;
00909 min_scaled_load = (&PE_Data[i])->scaled_load;
00910 }
00911 if ((&PE_Data[i])->num_wan_msgs < min_wan_msgs) {
00912 min_wan_msgs_index = i;
00913 min_wan_msgs = (&PE_Data[i])->num_wan_msgs;
00914 }
00915 }
00916 }
00917
00918
00919 if (min_load_index < 0) {
00920 return (min_load_index);
00921 }
00922
00923
00924
00925
00926 if ((&PE_Data[min_load_index])->num_wan_msgs <= (&PE_Data[min_wan_msgs_index])->num_wan_msgs) {
00927 return (min_load_index);
00928 }
00929
00930
00931
00932
00933
00934 load_tolerance = (&PE_Data[min_load_index])->scaled_load * CK_LDB_GridHybridSeedLB_Load_Tolerance;
00935
00936 min_index = min_load_index;
00937
00938 for (int i = 0; i < Num_PEs; i++) {
00939 if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
00940 if (i != min_load_index) {
00941 if (fabs ((&PE_Data[i])->scaled_load - (&PE_Data[min_load_index])->scaled_load) <= load_tolerance) {
00942 if ((&PE_Data[i])->num_wan_msgs < (&PE_Data[min_index])->num_wan_msgs) {
00943 min_index = i;
00944 }
00945 }
00946 }
00947 }
00948 }
00949
00950 return (min_index);
00951 } else {
00952 if (_lb_args.debug() > 0) {
00953 CkPrintf ("[%d] GridHybridSeedLB was told to use bad mode (%d).\n", CkMyPe(), CK_LDB_GridHybridSeedLB_Mode);
00954 }
00955 return (-1);
00956 }
00957 }
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967 void GridHybridSeedLB::Assign_Object_To_PE (int target_object, int target_pe)
00968 {
00969 (&Object_Data[target_object])->to_pe = target_pe;
00970
00971 (&PE_Data[target_pe])->num_objs += 1;
00972
00973 if ((&Object_Data[target_object])->num_lan_msgs > 0) {
00974 (&PE_Data[target_pe])->num_lan_objs += 1;
00975 (&PE_Data[target_pe])->num_lan_msgs += (&Object_Data[target_object])->num_lan_msgs;
00976 }
00977
00978 if ((&Object_Data[target_object])->num_wan_msgs > 0) {
00979 (&PE_Data[target_pe])->num_wan_objs += 1;
00980 (&PE_Data[target_pe])->num_wan_msgs += (&Object_Data[target_object])->num_wan_msgs;
00981 }
00982
00983 (&PE_Data[target_pe])->scaled_load += (&Object_Data[target_object])->load / (&PE_Data[target_pe])->relative_speed;
00984 }
00985
00986
00987
00988
00989
00990
00991
00992 void GridHybridSeedLB::work (LDStats *stats)
00993 {
00994 int target_pe;
00995 int target_object;
00996
00997
00998 if (_lb_args.debug() > 0) {
00999 CkPrintf ("[%d] GridHybridSeedLB is working (mode=%d, background load=%d, load tolerance=%f).\n", CkMyPe(), CK_LDB_GridHybridSeedLB_Mode, CK_LDB_GridHybridSeedLB_Background_Load, CK_LDB_GridHybridSeedLB_Load_Tolerance);
01000 }
01001
01002
01003 stats->makeCommHash ();
01004
01005
01006 Num_PEs = stats->nprocs();
01007 Num_Objects = stats->n_objs;
01008
01009 if (_lb_args.debug() > 0) {
01010 CkPrintf ("[%d] GridHybridSeedLB is examining %d PEs and %d objects.\n", CkMyPe(), Num_PEs, Num_Objects);
01011 }
01012
01013
01014 Initialize_PE_Data (stats);
01015
01016
01017 if (Available_PE_Count() < 1) {
01018 if (_lb_args.debug() > 0) {
01019 CkPrintf ("[%d] GridHybridSeedLB finds no available PEs -- no balancing done.\n", CkMyPe());
01020 }
01021
01022 delete [] PE_Data;
01023
01024 return;
01025 }
01026
01027
01028
01029 Num_Clusters = Compute_Number_Of_Clusters ();
01030 if (Num_Clusters < 1) {
01031 if (_lb_args.debug() > 0) {
01032 CkPrintf ("[%d] GridHybridSeedLB finds incomplete PE cluster map -- no balancing done.\n", CkMyPe());
01033 }
01034
01035 delete [] PE_Data;
01036
01037 return;
01038 }
01039
01040 if (_lb_args.debug() > 0) {
01041 CkPrintf ("[%d] GridHybridSeedLB finds %d clusters.\n", CkMyPe(), Num_Clusters);
01042 }
01043
01044
01045 Initialize_Object_Data (stats);
01046
01047
01048 Num_Migratable_Objects = Compute_Migratable_Object_Count ();
01049
01050
01051 Initialize_Cluster_Data ();
01052
01053
01054 Initialize_Communication_Matrix (stats);
01055
01056
01057 Partition_Objects_Into_Clusters (stats);
01058
01059
01060 Examine_InterObject_Messages (stats);
01061
01062
01063 Map_NonMigratable_Objects_To_PEs ();
01064
01065
01066 for (int i = 0; i < Num_Clusters; i++) {
01067
01068 while (1) {
01069 target_pe = Find_Minimum_PE (i);
01070
01071 if ((&PE_Data[target_pe])->num_objs == 0) {
01072 target_object = Find_Maximum_Border_Object (i);
01073 } else {
01074 target_object = Find_Maximum_Border_Object_From_Seeds (target_pe);
01075 if (target_object == -1) {
01076 target_object = Find_Maximum_Border_Object (i);
01077 }
01078 }
01079
01080 if ((target_object == -1) || (target_pe == -1)) {
01081 break;
01082 }
01083
01084 Assign_Object_To_PE (target_object, target_pe);
01085 }
01086
01087 while (1) {
01088 target_pe = Find_Minimum_PE (i);
01089
01090 target_object = Find_Maximum_Object_From_Seeds (target_pe);
01091 if (target_object == -1) {
01092 target_object = Find_Maximum_Object (i);
01093 }
01094
01095 if ((target_object == -1) || (target_pe == -1)) {
01096 break;
01097 }
01098
01099 Assign_Object_To_PE (target_object, target_pe);
01100 }
01101 }
01102
01103
01104 for (int i = 0; i < Num_Objects; i++) {
01105 stats->to_proc[i] = (&Object_Data[i])->to_pe;
01106
01107 if (_lb_args.debug() > 2) {
01108 CkPrintf ("[%d] GridHybridSeedLB migrates object %d from PE %d to PE %d.\n", CkMyPe(), i, stats->from_proc[i], stats->to_proc[i]);
01109 } else if (_lb_args.debug() > 1) {
01110 if (stats->to_proc[i] != stats->from_proc[i]) {
01111 CkPrintf ("[%d] GridHybridSeedLB migrates object %d from PE %d to PE %d.\n", CkMyPe(), i, stats->from_proc[i], stats->to_proc[i]);
01112 }
01113 }
01114 }
01115
01116
01117 delete [] Migratable_Objects;
01118 for (int i = 0; i < Num_Migratable_Objects; i++) {
01119 delete [] Communication_Matrix[i];
01120 }
01121 delete [] Communication_Matrix;
01122 delete [] Cluster_Data;
01123 delete [] Object_Data;
01124 delete [] PE_Data;
01125 }
01126
01127 #include "GridHybridSeedLB.def.h"