00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "GridHybridSeedLB.decl.h"
00010
00011 #include "GridHybridSeedLB.h"
00012 #include "manager.h"
00013
00014 CreateLBFunc_Def (GridHybridSeedLB, "Grid load balancer that uses hybrid seed technique to optimize communication graph")
00015
00016
00017
00018
00019
00020
00021 GridHybridSeedLB::GridHybridSeedLB (const CkLBOptions &opt) : CentralLB (opt)
00022 {
00023 char *value;
00024
00025
00026 lbname = (char *) "GridHybridSeedLB";
00027
00028 if (CkMyPe() == 0) {
00029 CkPrintf ("[%d] GridHybridSeedLB created.\n", CkMyPe());
00030 }
00031
00032 if (value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_MODE")) {
00033 CK_LDB_GridHybridSeedLB_Mode = atoi (value);
00034 } else {
00035 CK_LDB_GridHybridSeedLB_Mode = CK_LDB_GRIDHYBRIDSEEDLB_MODE;
00036 }
00037
00038 if (value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_BACKGROUND_LOAD")) {
00039 CK_LDB_GridHybridSeedLB_Background_Load = atoi (value);
00040 } else {
00041 CK_LDB_GridHybridSeedLB_Background_Load = CK_LDB_GRIDHYBRIDSEEDLB_BACKGROUND_LOAD;
00042 }
00043
00044 if (value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_LOAD_TOLERANCE")) {
00045 CK_LDB_GridHybridSeedLB_Load_Tolerance = atof (value);
00046 } else {
00047 CK_LDB_GridHybridSeedLB_Load_Tolerance = CK_LDB_GRIDHYBRIDSEEDLB_LOAD_TOLERANCE;
00048 }
00049
00050 manager_init ();
00051 }
00052
00053
00054
00055
00056
00057
00058 GridHybridSeedLB::GridHybridSeedLB (CkMigrateMessage *msg) : CentralLB (msg)
00059 {
00060 char *value;
00061
00062
00063 lbname = (char *) "GridHybridSeedLB";
00064
00065 if (value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_MODE")) {
00066 CK_LDB_GridHybridSeedLB_Mode = atoi (value);
00067 } else {
00068 CK_LDB_GridHybridSeedLB_Mode = CK_LDB_GRIDHYBRIDSEEDLB_MODE;
00069 }
00070
00071 if (value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_BACKGROUND_LOAD")) {
00072 CK_LDB_GridHybridSeedLB_Background_Load = atoi (value);
00073 } else {
00074 CK_LDB_GridHybridSeedLB_Background_Load = CK_LDB_GRIDHYBRIDSEEDLB_BACKGROUND_LOAD;
00075 }
00076
00077 if (value = getenv ("CK_LDB_GRIDHYBRIDSEEDLB_LOAD_TOLERANCE")) {
00078 CK_LDB_GridHybridSeedLB_Load_Tolerance = atof (value);
00079 } else {
00080 CK_LDB_GridHybridSeedLB_Load_Tolerance = CK_LDB_GRIDHYBRIDSEEDLB_LOAD_TOLERANCE;
00081 }
00082
00083 manager_init ();
00084 }
00085
00086
00087
00088
00089
00090
00091
00092 CmiBool GridHybridSeedLB::QueryBalanceNow (int step)
00093 {
00094 if (_lb_args.debug() > 2) {
00095 CkPrintf ("[%d] GridHybridSeedLB is balancing on step %d.\n", CkMyPe(), step);
00096 }
00097
00098 return (CmiTrue);
00099 }
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114 int GridHybridSeedLB::Get_Cluster (int pe)
00115 {
00116 #if CONVERSE_VERSION_VMI
00117 return (CmiGetCluster (pe));
00118 #else
00119 return (0);
00120 #endif
00121 }
00122
00123
00124
00125
00126
00127
00128 void GridHybridSeedLB::Initialize_PE_Data (CentralLB::LDStats *stats)
00129 {
00130 int min_speed;
00131
00132
00133 PE_Data = new PE_Data_T[Num_PEs];
00134
00135 min_speed = MAXINT;
00136 for (int i = 0; i < Num_PEs; i++) {
00137 (&PE_Data[i])->available = stats->procs[i].available;
00138 (&PE_Data[i])->cluster = Get_Cluster (i);
00139 (&PE_Data[i])->num_objs = 0;
00140 (&PE_Data[i])->num_lan_objs = 0;
00141 (&PE_Data[i])->num_lan_msgs = 0;
00142 (&PE_Data[i])->num_wan_objs = 0;
00143 (&PE_Data[i])->num_wan_msgs = 0;
00144 (&PE_Data[i])->relative_speed = 0.0;
00145 (&PE_Data[i])->scaled_load = 0.0;
00146
00147 if (stats->procs[i].pe_speed < min_speed) {
00148 min_speed = stats->procs[i].pe_speed;
00149 }
00150 }
00151
00152
00153
00154 for (int i = 0; i < Num_PEs; i++) {
00155 (&PE_Data[i])->relative_speed = (double) (stats->procs[i].pe_speed / min_speed);
00156 if (CK_LDB_GridHybridSeedLB_Background_Load) {
00157 (&PE_Data[i])->scaled_load += stats->procs[i].bg_walltime;
00158 }
00159 }
00160 }
00161
00162
00163
00164
00165
00166
00167 int GridHybridSeedLB::Available_PE_Count ()
00168 {
00169 int available_pe_count;
00170
00171
00172 available_pe_count = 0;
00173 for (int i = 0; i < Num_PEs; i++) {
00174 if ((&PE_Data[i])->available) {
00175 available_pe_count += 1;
00176 }
00177 }
00178
00179 return (available_pe_count);
00180 }
00181
00182
00183
00184
00185
00186
00187 int GridHybridSeedLB::Compute_Number_Of_Clusters ()
00188 {
00189 int max_cluster;
00190
00191
00192 max_cluster = 0;
00193 for (int i = 0; i < Num_PEs; i++) {
00194 if ((&PE_Data[i])->cluster < 0) {
00195 return (-1);
00196 }
00197
00198 if ((&PE_Data[i])->cluster > max_cluster) {
00199 max_cluster = (&PE_Data[i])->cluster;
00200 }
00201 }
00202
00203 return (max_cluster + 1);
00204 }
00205
00206
00207
00208
00209
00210
00211 void GridHybridSeedLB::Initialize_Object_Data (CentralLB::LDStats *stats)
00212 {
00213 Object_Data = new Object_Data_T[Num_Objects];
00214
00215 for (int i = 0; i < Num_Objects; i++) {
00216 (&Object_Data[i])->migratable = (&stats->objData[i])->migratable;
00217 (&Object_Data[i])->from_pe = stats->from_proc[i];
00218 (&Object_Data[i])->num_lan_msgs = 0;
00219 (&Object_Data[i])->num_wan_msgs = 0;
00220 (&Object_Data[i])->load = (&stats->objData[i])->wallTime;
00221 (&Object_Data[i])->secondary_index = -1;
00222
00223 if ((&Object_Data[i])->migratable) {
00224 (&Object_Data[i])->to_pe = -1;
00225 (&Object_Data[i])->cluster = -1;
00226 } else {
00227 (&Object_Data[i])->to_pe = (&Object_Data[i])->from_pe;
00228 (&Object_Data[i])->cluster = Get_Cluster ((&Object_Data[i])->from_pe);
00229 if (_lb_args.debug() > 1) {
00230 CkPrintf ("[%d] GridHybridSeedLB identifies object %d as non-migratable.\n", CkMyPe(), i);
00231 }
00232 }
00233 }
00234 }
00235
00236
00237
00238
00239
00240
00241 int GridHybridSeedLB::Compute_Migratable_Object_Count ()
00242 {
00243 int count;
00244
00245
00246 count = 0;
00247
00248 for (int i = 0; i < Num_Objects; i++) {
00249 if ((&Object_Data[i])->migratable) {
00250 count += 1;
00251 }
00252 }
00253
00254 return (count);
00255 }
00256
00257
00258
00259
00260
00261
00262 void GridHybridSeedLB::Initialize_Cluster_Data ()
00263 {
00264 int cluster;
00265 double min_total_cpu_power;
00266
00267
00268 Cluster_Data = new Cluster_Data_T[Num_Clusters];
00269
00270 for (int i = 0; i < Num_Clusters; i++) {
00271 (&Cluster_Data[i])->num_pes = 0;
00272 (&Cluster_Data[i])->total_cpu_power = 0.0;
00273 (&Cluster_Data[i])->scaled_cpu_power = 0.0;
00274 }
00275
00276
00277 for (int i = 0; i < Num_PEs; i++) {
00278 cluster = (&PE_Data[i])->cluster;
00279
00280 (&Cluster_Data[cluster])->num_pes += 1;
00281 (&Cluster_Data[cluster])->total_cpu_power += (&PE_Data[i])->relative_speed;
00282 }
00283
00284 min_total_cpu_power = MAXDOUBLE;
00285 for (int i = 0; i < Num_Clusters; i++) {
00286 if ((&Cluster_Data[i])->total_cpu_power < min_total_cpu_power) {
00287 min_total_cpu_power = (&Cluster_Data[i])->total_cpu_power;
00288 }
00289 }
00290
00291 for (int i = 0; i < Num_Clusters; i++) {
00292 (&Cluster_Data[i])->scaled_cpu_power = (double) ((&Cluster_Data[i])->total_cpu_power / min_total_cpu_power);
00293 }
00294 }
00295
00296
00297
00298
00299
00300
00301 void GridHybridSeedLB::Initialize_Communication_Matrix (CentralLB::LDStats *stats)
00302 {
00303 LDCommData *com_data;
00304 int send_object;
00305 int recv_object;
00306 int send_index;
00307 int recv_index;
00308 int num_objects;
00309 LDObjKey *recv_objects;
00310 int index;
00311
00312
00313 Migratable_Objects = new int[Num_Migratable_Objects];
00314
00315 index = 0;
00316 for (int i = 0; i < Num_Objects; i++) {
00317 if ((&Object_Data[i])->migratable) {
00318 (&Object_Data[i])->secondary_index = index;
00319 Migratable_Objects[index] = i;
00320 index += 1;
00321 }
00322 }
00323
00324
00325 Communication_Matrix = new int *[Num_Migratable_Objects];
00326 for (int i = 0; i < Num_Migratable_Objects; i++) {
00327 Communication_Matrix[i] = new int[Num_Migratable_Objects];
00328 for (int j = 0; j < Num_Migratable_Objects; j++) {
00329 Communication_Matrix[i][j] = 0;
00330 }
00331 }
00332
00333 for (int i = 0; i < stats->n_comm; i++) {
00334 com_data = &(stats->commData[i]);
00335 if ((!com_data->from_proc()) && (com_data->recv_type() == LD_OBJ_MSG)) {
00336 send_object = stats->getHash (com_data->sender);
00337 recv_object = stats->getHash (com_data->receiver.get_destObj());
00338
00339 if ((send_object < 0) || (send_object > Num_Objects) || (recv_object < 0) || (recv_object > Num_Objects)) {
00340 continue;
00341 }
00342
00343 if ((!(&Object_Data[send_object])->migratable) || (!(&Object_Data[recv_object])->migratable)) {
00344 continue;
00345 }
00346
00347 send_index = (&Object_Data[send_object])->secondary_index;
00348 recv_index = (&Object_Data[recv_object])->secondary_index;
00349
00350 Communication_Matrix[send_index][recv_index] += com_data->messages;
00351 Communication_Matrix[recv_index][send_index] += com_data->messages;
00352 } else if (com_data->receiver.get_type() == LD_OBJLIST_MSG) {
00353 send_object = stats->getHash (com_data->sender);
00354
00355 if ((send_object < 0) || (send_object > Num_Objects)) {
00356 continue;
00357 }
00358
00359 if (!(&Object_Data[send_object])->migratable) {
00360 continue;
00361 }
00362
00363 recv_objects = com_data->receiver.get_destObjs (num_objects);
00364
00365 for (int j = 0; j < num_objects; j++) {
00366 recv_object = stats->getHash (recv_objects[j]);
00367
00368 if ((recv_object < 0) || (recv_object > Num_Objects)) {
00369 continue;
00370 }
00371
00372 if (!(&Object_Data[recv_object])->migratable) {
00373 continue;
00374 }
00375
00376 send_index = (&Object_Data[send_object])->secondary_index;
00377 recv_index = (&Object_Data[recv_object])->secondary_index;
00378
00379 Communication_Matrix[send_index][recv_index] += com_data->messages;
00380 Communication_Matrix[recv_index][send_index] += com_data->messages;
00381 }
00382 }
00383 }
00384
00385 for (int i = 0; i < Num_Migratable_Objects; i++) {
00386 Communication_Matrix[i][i] = 0;
00387 }
00388 }
00389
00390
00391
00392
00393
00394
00395 void GridHybridSeedLB::Partition_Objects_Into_Clusters (CentralLB::LDStats *stats)
00396 {
00397 int index;
00398 int num_partitions;
00399 int *partition_to_cluster_map;
00400 int cluster;
00401 int partition;
00402 int partition_count;
00403 int *vertex_weights;
00404 int vertex;
00405 int *xadj;
00406 int num_edges;
00407 int *adjncy;
00408 int *edge_weights;
00409 int count;
00410 int weight_flag;
00411 int numbering_flag;
00412 int options[5];
00413 int edgecut;
00414 int *newmap;
00415
00416
00417 if (Num_Clusters == 1) {
00418 for (int i = 0; i < Num_Objects; i++) {
00419 (&Object_Data[i])->cluster = 0;
00420 }
00421
00422 return;
00423 }
00424
00425
00426
00427 num_partitions = 0;
00428 for (int i = 0; i < Num_Clusters; i++) {
00429 num_partitions += (int) ceil ((&Cluster_Data[i])->scaled_cpu_power);
00430 }
00431
00432 partition_to_cluster_map = new int[num_partitions];
00433
00434 cluster = 0;
00435 partition = 0;
00436 while (partition < num_partitions) {
00437 partition_count = (int) ceil ((&Cluster_Data[cluster])->scaled_cpu_power);
00438
00439 for (int i = partition; i < (partition + partition_count); i++) {
00440 partition_to_cluster_map[i] = cluster;
00441 }
00442
00443 partition += partition_count;
00444 cluster += 1;
00445 }
00446
00447 if ((CK_LDB_GridHybridSeedLB_Mode == 1) || (CK_LDB_GridHybridSeedLB_Mode == 3)) {
00448 vertex_weights = new int[Num_Migratable_Objects];
00449 vertex = 0;
00450 for (int i = 0; i < Num_Objects; i++) {
00451 if ((&Object_Data[i])->migratable) {
00452 vertex_weights[vertex] = (int) ceil ((&Object_Data[i])->load * 10000);
00453 vertex += 1;
00454 }
00455 }
00456 }
00457
00458
00459 xadj = new int[Num_Migratable_Objects + 1];
00460 num_edges = 0;
00461 for (int i = 0; i < Num_Migratable_Objects; i++) {
00462 for (int j = 0; j < Num_Migratable_Objects; j++) {
00463 if (Communication_Matrix[i][j] > 0) {
00464 num_edges += 1;
00465 }
00466 }
00467 }
00468 adjncy = new int[num_edges];
00469 edge_weights = new int[num_edges];
00470 count = 0;
00471 xadj[0] = 0;
00472 for (int i = 0; i < Num_Migratable_Objects; i++) {
00473 for (int j = 0; j < Num_Migratable_Objects; j++) {
00474 if (Communication_Matrix[i][j] > 0) {
00475 adjncy[count] = j;
00476 edge_weights[count] = Communication_Matrix[i][j];
00477 count += 1;
00478 }
00479 }
00480 xadj[i+1] = count;
00481 }
00482
00483 if ((CK_LDB_GridHybridSeedLB_Mode == 0) || (CK_LDB_GridHybridSeedLB_Mode == 2)) {
00484
00485 weight_flag = 1;
00486 numbering_flag = 0;
00487 options[0] = 0;
00488 newmap = new int[Num_Migratable_Objects];
00489
00490 METIS_PartGraphRecursive (&Num_Migratable_Objects, xadj, adjncy, NULL, edge_weights, &weight_flag, &numbering_flag, &num_partitions, options, &edgecut, newmap);
00491 } else if ((CK_LDB_GridHybridSeedLB_Mode == 1) || (CK_LDB_GridHybridSeedLB_Mode == 3)) {
00492
00493 weight_flag = 3;
00494 numbering_flag = 0;
00495 options[0] = 0;
00496 newmap = new int[Num_Migratable_Objects];
00497
00498 METIS_PartGraphRecursive (&Num_Migratable_Objects, xadj, adjncy, vertex_weights, edge_weights, &weight_flag, &numbering_flag, &num_partitions, options, &edgecut, newmap);
00499 } else {
00500 if (_lb_args.debug() > 0) {
00501 CkPrintf ("[%d] GridHybridSeedLB was told to use bad mode (%d).\n", CkMyPe(), CK_LDB_GridHybridSeedLB_Mode);
00502 }
00503 }
00504
00505
00506 for (int i = 0; i < Num_Migratable_Objects; i++) {
00507 partition = newmap[i];
00508 cluster = partition_to_cluster_map[partition];
00509
00510 index = Migratable_Objects[i];
00511
00512 (&Object_Data[index])->cluster = cluster;
00513 }
00514
00515
00516 delete [] newmap;
00517 delete [] edge_weights;
00518 delete [] adjncy;
00519 delete [] xadj;
00520 if ((CK_LDB_GridHybridSeedLB_Mode == 1) || (CK_LDB_GridHybridSeedLB_Mode == 3)) {
00521 delete [] vertex_weights;
00522 }
00523 delete [] partition_to_cluster_map;
00524 }
00525
00526
00527
00528
00529
00530
00531 void GridHybridSeedLB::Examine_InterObject_Messages (CentralLB::LDStats *stats)
00532 {
00533 LDCommData *com_data;
00534 int send_object;
00535 int send_cluster;
00536 int recv_object;
00537 int recv_cluster;
00538 LDObjKey *recv_objects;
00539 int num_objects;
00540
00541
00542 for (int i = 0; i < stats->n_comm; i++) {
00543 com_data = &(stats->commData[i]);
00544 if ((!com_data->from_proc()) && (com_data->recv_type() == LD_OBJ_MSG)) {
00545 send_object = stats->getHash (com_data->sender);
00546 recv_object = stats->getHash (com_data->receiver.get_destObj());
00547
00548 if ((send_object < 0) || (send_object > Num_Objects) || (recv_object < 0) || (recv_object > Num_Objects)) {
00549 continue;
00550 }
00551
00552 send_cluster = (&Object_Data[send_object])->cluster;
00553 recv_cluster = (&Object_Data[recv_object])->cluster;
00554
00555 if (send_cluster == recv_cluster) {
00556 (&Object_Data[send_object])->num_lan_msgs += com_data->messages;
00557 } else {
00558 (&Object_Data[send_object])->num_wan_msgs += com_data->messages;
00559 }
00560 } else if (com_data->receiver.get_type() == LD_OBJLIST_MSG) {
00561 send_object = stats->getHash (com_data->sender);
00562
00563 if ((send_object < 0) || (send_object > Num_Objects)) {
00564 continue;
00565 }
00566
00567 send_cluster = (&Object_Data[send_object])->cluster;
00568
00569 recv_objects = com_data->receiver.get_destObjs (num_objects);
00570
00571 for (int j = 0; j < num_objects; j++) {
00572 recv_object = stats->getHash (recv_objects[j]);
00573
00574 if ((recv_object < 0) || (recv_object > Num_Objects)) {
00575 continue;
00576 }
00577
00578 recv_cluster = (&Object_Data[recv_object])->cluster;
00579
00580 if (send_cluster == recv_cluster) {
00581 (&Object_Data[send_object])->num_lan_msgs += com_data->messages;
00582 } else {
00583 (&Object_Data[send_object])->num_wan_msgs += com_data->messages;
00584 }
00585 }
00586 }
00587 }
00588 }
00589
00590
00591
00592
00593
00594
00595 void GridHybridSeedLB::Map_NonMigratable_Objects_To_PEs ()
00596 {
00597 for (int i = 0; i < Num_Objects; i++) {
00598 if (!((&Object_Data[i])->migratable)) {
00599 if (_lb_args.debug() > 1) {
00600 CkPrintf ("[%d] GridHybridSeedLB identifies object %d as non-migratable.\n", CkMyPe(), i);
00601 }
00602
00603 Assign_Object_To_PE (i, (&Object_Data[i])->from_pe);
00604 }
00605 }
00606 }
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618 int GridHybridSeedLB::Find_Maximum_Object (int cluster)
00619 {
00620 int max_index;
00621 double max_load;
00622
00623
00624 max_index = -1;
00625 max_load = -1.0;
00626
00627 for (int i = 0; i < Num_Objects; i++) {
00628 if ((((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1)) && ((&Object_Data[i])->load > max_load)) {
00629 max_index = i;
00630 max_load = (&Object_Data[i])->load;
00631 }
00632 }
00633
00634 return (max_index);
00635 }
00636
00637
00638
00639
00640
00641
00642
00643
00644
00645
00646
00647 int GridHybridSeedLB::Find_Maximum_Border_Object (int cluster)
00648 {
00649 int max_index;
00650 int max_load_index;
00651 double max_load;
00652 int max_wan_msgs_index;
00653 int max_wan_msgs;
00654 double load_tolerance;
00655
00656
00657 max_index = -1;
00658
00659 max_load_index = -1;
00660 max_load = -1.0;
00661
00662 max_wan_msgs_index = -1;
00663 max_wan_msgs = -1;
00664
00665 for (int i = 0; i < Num_Objects; i++) {
00666 if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1) && ((&Object_Data[i])->num_wan_msgs > 0)) {
00667 if ((&Object_Data[i])->load > max_load) {
00668 max_load_index = i;
00669 max_load = (&Object_Data[i])->load;
00670 }
00671 if ((&Object_Data[i])->num_wan_msgs > max_wan_msgs) {
00672 max_wan_msgs_index = i;
00673 max_wan_msgs = (&Object_Data[i])->num_wan_msgs;
00674 }
00675 }
00676 }
00677
00678 if (max_load_index < 0) {
00679 return (max_load_index);
00680 }
00681
00682 if ((&Object_Data[max_load_index])->num_wan_msgs >= (&Object_Data[max_wan_msgs_index])->num_wan_msgs) {
00683 return (max_load_index);
00684 }
00685
00686 if (CK_LDB_GridHybridSeedLB_Load_Tolerance <= 0.0) {
00687 return (max_load_index);
00688 }
00689
00690 load_tolerance = (&Object_Data[max_load_index])->load * CK_LDB_GridHybridSeedLB_Load_Tolerance;
00691
00692 max_index = max_load_index;
00693
00694 for (int i = 0; i < Num_Objects; i++) {
00695 if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1) && ((&Object_Data[i])->num_wan_msgs > 0) && ((&Object_Data[i])->num_wan_msgs > (&Object_Data[max_index])->num_wan_msgs) && (fabs ((&Object_Data[max_load_index])->load - (&Object_Data[i])->load) <= load_tolerance)) {
00696 max_index = i;
00697 }
00698 }
00699
00700 return (max_index);
00701 }
00702
00703
00704
00705
00706
00707
00708 int GridHybridSeedLB::Find_Maximum_Object_From_Seeds (int pe)
00709 {
00710 int cluster;
00711 int max_index;
00712 int max_comm_events;
00713 int max_load_index;
00714 double max_load;
00715 double load_tolerance;
00716 int comm_events;
00717
00718
00719 max_index = -1;
00720
00721 max_comm_events = 0;
00722
00723 max_load_index = -1;
00724 max_load = -1.0;
00725
00726 cluster = (&PE_Data[pe])->cluster;
00727
00728 for (int i = 0; i < Num_Objects; i++) {
00729 if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1) && ((&Object_Data[i])->load > max_load)) {
00730 max_load_index = i;
00731 max_load = (&Object_Data[i])->load;
00732 }
00733 }
00734
00735 if (max_load_index < 0) {
00736 return (max_load_index);
00737 }
00738
00739 if (CK_LDB_GridHybridSeedLB_Load_Tolerance <= 0.0) {
00740 return (max_load_index);
00741 }
00742
00743 load_tolerance = (&Object_Data[max_load_index])->load * CK_LDB_GridHybridSeedLB_Load_Tolerance;
00744
00745 max_index = max_load_index;
00746
00747 for (int i = 0; i < Num_Objects; i++) {
00748 if ((&Object_Data[i])->to_pe == pe) {
00749 for (int j = 0; j < Num_Objects; j++) {
00750 if (((&Object_Data[j])->cluster == cluster) && ((&Object_Data[j])->to_pe == -1) && (fabs ((&Object_Data[max_load_index])->load - (&Object_Data[j])->load) <= load_tolerance)) {
00751 comm_events = Compute_Communication_Events (i, j);
00752 if (comm_events > max_comm_events) {
00753 max_index = j;
00754 max_comm_events = comm_events;
00755 }
00756 }
00757 }
00758 }
00759 }
00760
00761 return (max_index);
00762 }
00763
00764
00765
00766
00767
00768
00769 int GridHybridSeedLB::Find_Maximum_Border_Object_From_Seeds (int pe)
00770 {
00771 int cluster;
00772 int max_index;
00773 int max_comm_events;
00774 int max_load_index;
00775 double max_load;
00776 double load_tolerance;
00777 int comm_events;
00778
00779
00780 max_index = -1;
00781
00782 max_comm_events = 0;
00783
00784 max_load_index = -1;
00785 max_load = -1.0;
00786
00787 cluster = (&PE_Data[pe])->cluster;
00788
00789 for (int i = 0; i < Num_Objects; i++) {
00790 if (((&Object_Data[i])->cluster == cluster) && ((&Object_Data[i])->to_pe == -1) && ((&Object_Data[i])->num_wan_msgs > 0) && ((&Object_Data[i])->load > max_load)) {
00791 max_load_index = i;
00792 max_load = (&Object_Data[i])->load;
00793 }
00794 }
00795
00796 if (max_load_index < 0) {
00797 return (max_load_index);
00798 }
00799
00800 if (CK_LDB_GridHybridSeedLB_Load_Tolerance <= 0.0) {
00801 return (max_load_index);
00802 }
00803
00804 load_tolerance = (&Object_Data[max_load_index])->load * CK_LDB_GridHybridSeedLB_Load_Tolerance;
00805
00806 max_index = max_load_index;
00807
00808 for (int i = 0; i < Num_Objects; i++) {
00809 if ((&Object_Data[i])->to_pe == pe) {
00810 for (int j = 0; j < Num_Objects; j++) {
00811 if (((&Object_Data[j])->cluster == cluster) && ((&Object_Data[j])->to_pe == -1) && ((&Object_Data[j])->num_wan_msgs > 0) && (fabs ((&Object_Data[max_load_index])->load - (&Object_Data[j])->load) <= load_tolerance)) {
00812 comm_events = Compute_Communication_Events (i, j);
00813 if (comm_events > max_comm_events) {
00814 max_index = j;
00815 max_comm_events = comm_events;
00816 }
00817 }
00818 }
00819 }
00820 }
00821
00822 return (max_index);
00823 }
00824
00825
00826
00827
00828
00829
00830 int GridHybridSeedLB::Compute_Communication_Events (int obj1, int obj2)
00831 {
00832 int send_index;
00833 int recv_index;
00834
00835
00836 send_index = (&Object_Data[obj1])->secondary_index;
00837 recv_index = (&Object_Data[obj2])->secondary_index;
00838
00839 return (Communication_Matrix[send_index][recv_index]);
00840 }
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858 int GridHybridSeedLB::Find_Minimum_PE (int cluster)
00859 {
00860 if ((CK_LDB_GridHybridSeedLB_Mode == 0) || (CK_LDB_GridHybridSeedLB_Mode == 1)) {
00861 int min_index;
00862 int min_objs;
00863
00864
00865 min_index = -1;
00866 min_objs = MAXINT;
00867
00868 for (int i = 0; i < Num_PEs; i++) {
00869 if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
00870 if ((&PE_Data[i])->num_objs < min_objs) {
00871 min_index = i;
00872 min_objs = (&PE_Data[i])->num_objs;
00873 } else if (((&PE_Data[i])->num_objs == min_objs) &&
00874 ((&PE_Data[i])->num_wan_objs < (&PE_Data[min_index])->num_wan_objs)) {
00875 min_index = i;
00876 } else if (((&PE_Data[i])->num_objs == min_objs) &&
00877 ((&PE_Data[i])->num_wan_objs == (&PE_Data[min_index])->num_wan_objs) &&
00878 ((&PE_Data[i])->num_wan_msgs < (&PE_Data[min_index])->num_wan_msgs)) {
00879 min_index = i;
00880 } else if (((&PE_Data[i])->num_objs == min_objs) &&
00881 ((&PE_Data[i])->num_wan_objs == (&PE_Data[min_index])->num_wan_objs) &&
00882 ((&PE_Data[i])->num_wan_msgs == (&PE_Data[min_index])->num_wan_msgs) &&
00883 ((&PE_Data[i])->scaled_load < (&PE_Data[min_index])->scaled_load)) {
00884 min_index = i;
00885 }
00886 }
00887 }
00888
00889 return (min_index);
00890 } else if ((CK_LDB_GridHybridSeedLB_Mode == 2) || (CK_LDB_GridHybridSeedLB_Mode == 3)) {
00891 int min_index;
00892 int min_load_index;
00893 double min_scaled_load;
00894 int min_wan_msgs_index;
00895 int min_wan_msgs;
00896 double load_tolerance;
00897
00898
00899 min_index = -1;
00900
00901 min_load_index = -1;
00902 min_scaled_load = MAXDOUBLE;
00903
00904 min_wan_msgs_index = -1;
00905 min_wan_msgs = MAXINT;
00906
00907 for (int i = 0; i < Num_PEs; i++) {
00908 if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
00909 if ((&PE_Data[i])->scaled_load < min_scaled_load) {
00910 min_load_index = i;
00911 min_scaled_load = (&PE_Data[i])->scaled_load;
00912 }
00913 if ((&PE_Data[i])->num_wan_msgs < min_wan_msgs) {
00914 min_wan_msgs_index = i;
00915 min_wan_msgs = (&PE_Data[i])->num_wan_msgs;
00916 }
00917 }
00918 }
00919
00920
00921 if (min_load_index < 0) {
00922 return (min_load_index);
00923 }
00924
00925
00926
00927
00928 if ((&PE_Data[min_load_index])->num_wan_msgs <= (&PE_Data[min_wan_msgs_index])->num_wan_msgs) {
00929 return (min_load_index);
00930 }
00931
00932
00933
00934
00935
00936 load_tolerance = (&PE_Data[min_load_index])->scaled_load * CK_LDB_GridHybridSeedLB_Load_Tolerance;
00937
00938 min_index = min_load_index;
00939
00940 for (int i = 0; i < Num_PEs; i++) {
00941 if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
00942 if (i != min_load_index) {
00943 if (fabs ((&PE_Data[i])->scaled_load - (&PE_Data[min_load_index])->scaled_load) <= load_tolerance) {
00944 if ((&PE_Data[i])->num_wan_msgs < (&PE_Data[min_index])->num_wan_msgs) {
00945 min_index = i;
00946 }
00947 }
00948 }
00949 }
00950 }
00951
00952 return (min_index);
00953 } else {
00954 if (_lb_args.debug() > 0) {
00955 CkPrintf ("[%d] GridHybridSeedLB was told to use bad mode (%d).\n", CkMyPe(), CK_LDB_GridHybridSeedLB_Mode);
00956 }
00957 return (-1);
00958 }
00959 }
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969 void GridHybridSeedLB::Assign_Object_To_PE (int target_object, int target_pe)
00970 {
00971 (&Object_Data[target_object])->to_pe = target_pe;
00972
00973 (&PE_Data[target_pe])->num_objs += 1;
00974
00975 if ((&Object_Data[target_object])->num_lan_msgs > 0) {
00976 (&PE_Data[target_pe])->num_lan_objs += 1;
00977 (&PE_Data[target_pe])->num_lan_msgs += (&Object_Data[target_object])->num_lan_msgs;
00978 }
00979
00980 if ((&Object_Data[target_object])->num_wan_msgs > 0) {
00981 (&PE_Data[target_pe])->num_wan_objs += 1;
00982 (&PE_Data[target_pe])->num_wan_msgs += (&Object_Data[target_object])->num_wan_msgs;
00983 }
00984
00985 (&PE_Data[target_pe])->scaled_load += (&Object_Data[target_object])->load / (&PE_Data[target_pe])->relative_speed;
00986 }
00987
00988
00989
00990
00991
00992
00993
00994 void GridHybridSeedLB::work (LDStats *stats)
00995 {
00996 int target_pe;
00997 int target_object;
00998
00999
01000 if (_lb_args.debug() > 0) {
01001 CkPrintf ("[%d] GridHybridSeedLB is working (mode=%d, background load=%d, load tolerance=%f).\n", CkMyPe(), CK_LDB_GridHybridSeedLB_Mode, CK_LDB_GridHybridSeedLB_Background_Load, CK_LDB_GridHybridSeedLB_Load_Tolerance);
01002 }
01003
01004
01005 stats->makeCommHash ();
01006
01007
01008 Num_PEs = stats->nprocs();
01009 Num_Objects = stats->n_objs;
01010
01011 if (_lb_args.debug() > 0) {
01012 CkPrintf ("[%d] GridHybridSeedLB is examining %d PEs and %d objects.\n", CkMyPe(), Num_PEs, Num_Objects);
01013 }
01014
01015
01016 Initialize_PE_Data (stats);
01017
01018
01019 if (Available_PE_Count() < 1) {
01020 if (_lb_args.debug() > 0) {
01021 CkPrintf ("[%d] GridHybridSeedLB finds no available PEs -- no balancing done.\n", CkMyPe());
01022 }
01023
01024 delete [] PE_Data;
01025
01026 return;
01027 }
01028
01029
01030
01031 Num_Clusters = Compute_Number_Of_Clusters ();
01032 if (Num_Clusters < 1) {
01033 if (_lb_args.debug() > 0) {
01034 CkPrintf ("[%d] GridHybridSeedLB finds incomplete PE cluster map -- no balancing done.\n", CkMyPe());
01035 }
01036
01037 delete [] PE_Data;
01038
01039 return;
01040 }
01041
01042 if (_lb_args.debug() > 0) {
01043 CkPrintf ("[%d] GridHybridSeedLB finds %d clusters.\n", CkMyPe(), Num_Clusters);
01044 }
01045
01046
01047 Initialize_Object_Data (stats);
01048
01049
01050 Num_Migratable_Objects = Compute_Migratable_Object_Count ();
01051
01052
01053 Initialize_Cluster_Data ();
01054
01055
01056 Initialize_Communication_Matrix (stats);
01057
01058
01059 Partition_Objects_Into_Clusters (stats);
01060
01061
01062 Examine_InterObject_Messages (stats);
01063
01064
01065 Map_NonMigratable_Objects_To_PEs ();
01066
01067
01068 for (int i = 0; i < Num_Clusters; i++) {
01069
01070 while (1) {
01071 target_pe = Find_Minimum_PE (i);
01072
01073 if ((&PE_Data[target_pe])->num_objs == 0) {
01074 target_object = Find_Maximum_Border_Object (i);
01075 } else {
01076 target_object = Find_Maximum_Border_Object_From_Seeds (target_pe);
01077 if (target_object == -1) {
01078 target_object = Find_Maximum_Border_Object (i);
01079 }
01080 }
01081
01082 if ((target_object == -1) || (target_pe == -1)) {
01083 break;
01084 }
01085
01086 Assign_Object_To_PE (target_object, target_pe);
01087 }
01088
01089 while (1) {
01090 target_pe = Find_Minimum_PE (i);
01091
01092 target_object = Find_Maximum_Object_From_Seeds (target_pe);
01093 if (target_object == -1) {
01094 target_object = Find_Maximum_Object (i);
01095 }
01096
01097 if ((target_object == -1) || (target_pe == -1)) {
01098 break;
01099 }
01100
01101 Assign_Object_To_PE (target_object, target_pe);
01102 }
01103 }
01104
01105
01106 for (int i = 0; i < Num_Objects; i++) {
01107 stats->to_proc[i] = (&Object_Data[i])->to_pe;
01108
01109 if (_lb_args.debug() > 2) {
01110 CkPrintf ("[%d] GridHybridSeedLB migrates object %d from PE %d to PE %d.\n", CkMyPe(), i, stats->from_proc[i], stats->to_proc[i]);
01111 } else if (_lb_args.debug() > 1) {
01112 if (stats->to_proc[i] != stats->from_proc[i]) {
01113 CkPrintf ("[%d] GridHybridSeedLB migrates object %d from PE %d to PE %d.\n", CkMyPe(), i, stats->from_proc[i], stats->to_proc[i]);
01114 }
01115 }
01116 }
01117
01118
01119 delete [] Migratable_Objects;
01120 for (int i = 0; i < Num_Migratable_Objects; i++) {
01121 delete [] Communication_Matrix[i];
01122 }
01123 delete [] Communication_Matrix;
01124 delete [] Cluster_Data;
01125 delete [] Object_Data;
01126 delete [] PE_Data;
01127 }
01128
01129 #include "GridHybridSeedLB.def.h"