00001
00002
00003
00004
00005
00006
00007
00008 #include <math.h>
00009 #include <stdlib.h>
00010 #include "RefineTopoLB.decl.h"
00011
00012 #include "RefineTopoLB.h"
00013
00014 #define alpha PER_MESSAGE_SEND_OVERHEAD_DEFAULT
00015 #define beta PER_BYTE_SEND_OVERHEAD_DEFAULT
00016 #define DEG_THRES 0.50
00017 #define EPSILON -0.001
00018
00019 #define _lb_debug_on 0
00020 #define _lb_debug2_on 0
00021 #define _make_new_grouping_ 0
00022 #define _USE_MAX_HOPBYTES_ 1
00023
00024 extern int quietModeRequested;
00025
00026 CreateLBFunc_Def(RefineTopoLB,"TopoLB: Balance objects based on the network topology")
00027
00028
00029 RefineTopoLB::RefineTopoLB(const CkLBOptions &opt) : CBase_RefineTopoLB (opt)
00030 {
00031 lbname = "RefineTopoLB";
00032 if (CkMyPe () == 0 && !quietModeRequested) {
00033 CkPrintf ("CharmLB> RefineTopoLB created.\n");
00034 }
00035 }
00036
00037 bool RefineTopoLB::QueryBalanceNow (int _step)
00038 {
00039 return true;
00040 }
00041
00042 void RefineTopoLB :: work(LDStats *stats)
00043 {
00044 int i, j;
00045 int n_pes = stats->nprocs();
00046
00047 if (_lb_args.debug() >= 2) {
00048 CkPrintf("In TopoLB Strategy...\n");
00049 }
00050
00051
00052 int proc;
00053 for (proc = 0; proc < n_pes; proc++) {
00054 if (stats->procs[proc].available)
00055 break;
00056 }
00057
00058 if (proc == n_pes) {
00059 CmiAbort ("TopoLB: no available processors!");
00060 }
00061
00062 removeNonMigratable(stats, n_pes);
00063
00064 if(_lb_debug_on) {
00065 CkPrintf("Num of procs: %d\n", n_pes);
00066 CkPrintf("Num of objs: %d\n", stats->n_objs);
00067 }
00068
00069
00070 LBtopoFn topofn;
00071 topofn = LBTopoLookup(_lbtopo);
00072 if (topofn == NULL)
00073 {
00074 char str[1024];
00075 CmiPrintf("TopoLB> Fatal error: Unknown topology: %s. Choose from:\n", _lbtopo);
00076 printoutTopo();
00077 sprintf(str, "TopoLB> Fatal error: Unknown topology: %s", _lbtopo);
00078 CmiAbort(str);
00079 }
00080 topo = topofn(n_pes);
00081
00082 if(_lb_debug_on)
00083 CkPrintf("before computing partitions...\n");
00084
00085 int *newmap = new int[stats->n_objs];
00086 if(_make_new_grouping_)
00087 computePartitions(stats, n_pes, newmap);
00088 else
00089 {
00090 for(int i=0;i<stats->n_objs;i++)
00091 {
00092 newmap[i]=stats->from_proc[i];
00093 }
00094 }
00095
00096 if(_lb_debug_on)
00097 CkPrintf("before allocating dataStructures...\n");
00098 allocateDataStructures(n_pes);
00099 if(_lb_debug_on)
00100 CkPrintf("before initizlizing dataStructures...\n");
00101 initDataStructures(stats, n_pes, newmap);
00102 if(_lb_debug_on)
00103 CkPrintf("After initizlizing dataStructures...\n");
00104
00105 for(i = 0; i < n_pes; i++)
00106 assign[i]=i;
00107
00108
00109
00110 if(_lb_debug_on)
00111 printDataStructures(n_pes, stats->n_objs,newmap);
00112
00113 bool *swapdone=new bool[n_pes];
00114 for(i = 0; i < n_pes; i++)
00115 swapdone[i]=false;
00116
00117
00118
00119
00120
00121
00122 double totalGain=0;
00123 for(i = 0; i < n_pes; i++)
00124 {
00125
00126 if(_USE_MAX_HOPBYTES_)
00127 {
00128 updateCommUA(n_pes);
00129 }
00130 int cpart=-1;
00131 double maxComm=-1;
00132 for(j = 0; j < n_pes; j++)
00133 {
00134 if(swapdone[j]) continue;
00135 if(commUA[j]>maxComm)
00136 {
00137 maxComm=commUA[j];
00138 cpart=j;
00139 }
00140 }
00141 CmiAssert(cpart!=-1);
00142
00143
00144 int swapcpart=-1;
00145 double gainMax=-1;
00146 double gain=-1;;
00147
00148 for(j = 0; j < n_pes; j++)
00149 {
00150 if(j==cpart)
00151 continue;
00152
00153 gain=findSwapGain(j, cpart, n_pes);
00154
00155
00156 if(gain>gainMax && gain>0)
00157 {
00158 gainMax=gain;
00159 swapcpart=j;
00160 }
00161 }
00162 if(swapcpart==-1)
00163 {
00164 swapdone[cpart]=true;
00165 continue;
00166 }
00167 totalGain+=gainMax;
00168 CmiAssert(swapcpart!=-1);
00169
00170
00171 int temp=assign[cpart];
00172 assign[cpart]=assign[swapcpart];
00173 assign[swapcpart]=temp;
00174 swapdone[cpart]=true;
00175
00176
00177
00178
00179 }
00180
00181 for(i=0;i<stats->n_objs;i++)
00182 {
00183 stats->to_proc[i]= assign[newmap[i]];
00184 }
00185 if(_lb_debug2_on)
00186 {
00187
00188 double hbval1=getHopBytesNew(NULL, n_pes);
00189 CkPrintf(" Original hopBytes : %lf Avg comm hops: %lf\n", hbval1,hbval1/total_comm);
00190 double hbval2=getHopBytesNew(assign, n_pes);
00191 CkPrintf(" Resulting hopBytes : %lf Avg comm hops: %lf\n", hbval2,hbval2/total_comm);
00192 CkPrintf(" Percentage gain %.2lf\n",(hbval1-hbval2)*100/hbval1);
00193 CkPrintf("\n");
00194 }
00195 freeDataStructures(n_pes);
00196 delete[] newmap;
00197 delete[] swapdone;
00198 }
00199
00200 double RefineTopoLB::findSwapGain(int cpart1, int cpart2, int n_pes)
00201 {
00202 double oldvalue=0;
00203 int proc1=assign[cpart1];
00204 int proc2=assign[cpart2];
00205 int proci=-1;
00206
00207 for(int i = 0; i < n_pes; i++)
00208 {
00209 proci=assign[i];
00210 if(i!=cpart1 && i!=cpart2)
00211 {
00212
00213
00214 oldvalue+=(comm[cpart1][i]-comm[cpart2][i])*(dist[proc1][proci]-dist[proc2][proci]);
00215
00216 }
00217 }
00218 return oldvalue;
00219 }
00220
00221 double RefineTopoLB::getCpartHopBytes(int cpart, int proc, int count)
00222 {
00223 double totalHB=0;
00224 for(int i=0;i<count;i++)
00225 {
00226 if(assign[i]==proc)
00227 {
00228 totalHB+=comm[cpart][i]*dist[proc][assign[cpart]];
00229 }
00230 else
00231 {
00232 totalHB+=comm[cpart][i]*dist[proc][assign[i]];
00233 }
00234 }
00235 return totalHB;
00236 }
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260 void RefineTopoLB::updateCommUA(int count)
00261 {
00262 int i,j;
00263 for(i=0;i<count;i++)
00264 {
00265 commUA[i]=0;
00266 for(j=0;j<count;j++)
00267 commUA[i]+=comm[i][j]*dist[assign[i]][assign[j]];
00268 }
00269 }
00270
00271 #include "RefineTopoLB.def.h"