00001
00005
00010 #include "Refiner.h"
00011
00012 int* Refiner::AllocProcs(int count, BaseLB::LDStats* stats)
00013 {
00014 return new int[stats->n_objs];
00015 }
00016
00017 void Refiner::FreeProcs(int* bufs)
00018 {
00019 delete [] bufs;
00020 }
00021
00022 void Refiner::create(int count, BaseLB::LDStats* stats, int* procs)
00023 {
00024 int i;
00025
00026
00027
00028
00029 numAvail = 0;
00030 for(i=0; i < P; i++) {
00031 processors[i].Id = i;
00032 processors[i].backgroundLoad = stats->procs[i].bg_walltime;
00033 processors[i].load = processors[i].backgroundLoad;
00034 processors[i].computeLoad = 0;
00035 processors[i].computeSet = new Set();
00036 processors[i].pe_speed = stats->procs[i].pe_speed;
00037
00038 processors[i].available = stats->procs[i].available;
00039 if (processors[i].available == CmiTrue) numAvail++;
00040 }
00041
00042 for (i=0; i<stats->n_objs; i++)
00043 {
00044 LDObjData &odata = stats->objData[i];
00045 computes[i].Id = i;
00046 computes[i].id = odata.objID();
00047
00048 computes[i].load = odata.wallTime;
00049 computes[i].processor = -1;
00050 computes[i].oldProcessor = procs[i];
00051 computes[i].migratable = odata.migratable;
00052 if (computes[i].oldProcessor >= P) {
00053 if (stats->complete_flag)
00054 CmiAbort("LB Panic: the old processor in RefineLB cannot be found, is this in a simulation mode?");
00055 else {
00056
00057 computes[i].oldProcessor = CrnRand()%P;
00058 }
00059 }
00060 }
00061
00062
00063 }
00064
00065 void Refiner::assign(computeInfo *c, int processor)
00066 {
00067 assign(c, &(processors[processor]));
00068 }
00069
00070 void Refiner::assign(computeInfo *c, processorInfo *p)
00071 {
00072 c->processor = p->Id;
00073 p->computeSet->insert((InfoRecord *) c);
00074 p->computeLoad += c->load;
00075 p->load = p->computeLoad + p->backgroundLoad;
00076 }
00077
00078 void Refiner::deAssign(computeInfo *c, processorInfo *p)
00079 {
00080 c->processor = -1;
00081 p->computeSet->remove(c);
00082 p->computeLoad -= c->load;
00083 p->load = p->computeLoad + p->backgroundLoad;
00084 }
00085
00086 void Refiner::computeAverage()
00087 {
00088 int i;
00089 double total = 0.;
00090 for (i=0; i<numComputes; i++) total += computes[i].load;
00091
00092 for (i=0; i<P; i++)
00093 if (processors[i].available == CmiTrue)
00094 total += processors[i].backgroundLoad;
00095
00096 averageLoad = total/numAvail;
00097 }
00098
00099 double Refiner::computeMax()
00100 {
00101 int i;
00102 double max = -1.0;
00103 for (i=0; i<P; i++) {
00104 if (processors[i].available == CmiTrue && processors[i].load > max)
00105 max = processors[i].load;
00106 }
00107 return max;
00108 }
00109
00110 int Refiner::isHeavy(processorInfo *p)
00111 {
00112 if (p->available == CmiTrue)
00113 return p->load > overLoad*averageLoad;
00114 else {
00115 return p->computeSet->numElements() != 0;
00116 }
00117 }
00118
00119 int Refiner::isLight(processorInfo *p)
00120 {
00121 if (p->available == CmiTrue)
00122 return p->load < averageLoad;
00123 else
00124 return 0;
00125 }
00126
00127
00128 void Refiner::removeComputes()
00129 {
00130 int first;
00131 Iterator nextCompute;
00132
00133 if (numAvail < P) {
00134 if (numAvail == 0) CmiAbort("No processor available!");
00135 for (first=0; first<P; first++)
00136 if (processors[first].available == CmiTrue) break;
00137 for (int i=0; i<P; i++) {
00138 if (processors[i].available == CmiFalse) {
00139 computeInfo *c = (computeInfo *)
00140 processors[i].computeSet->iterator((Iterator *)&nextCompute);
00141 while (c) {
00142 deAssign(c, &processors[i]);
00143 assign(c, &processors[first]);
00144 nextCompute.id++;
00145 c = (computeInfo *)
00146 processors[i].computeSet->next((Iterator *)&nextCompute);
00147 }
00148 }
00149 }
00150 }
00151 }
00152
00153 int Refiner::refine()
00154 {
00155 int i;
00156 int finish = 1;
00157 maxHeap *heavyProcessors = new maxHeap(P);
00158
00159 Set *lightProcessors = new Set();
00160 for (i=0; i<P; i++) {
00161 if (isHeavy(&processors[i])) {
00162
00163
00164 heavyProcessors->insert((InfoRecord *) &(processors[i]));
00165 } else if (isLight(&processors[i])) {
00166
00167
00168 lightProcessors->insert((InfoRecord *) &(processors[i]));
00169 }
00170 }
00171 int done = 0;
00172
00173 while (!done) {
00174 double bestSize;
00175 computeInfo *bestCompute;
00176 processorInfo *bestP;
00177
00178 processorInfo *donor = (processorInfo *) heavyProcessors->deleteMax();
00179 if (!donor) break;
00180
00181
00182 Iterator nextProcessor;
00183 processorInfo *p = (processorInfo *)
00184 lightProcessors->iterator((Iterator *) &nextProcessor);
00185 bestSize = 0;
00186 bestP = 0;
00187 bestCompute = 0;
00188
00189 while (p) {
00190 Iterator nextCompute;
00191 nextCompute.id = 0;
00192 computeInfo *c = (computeInfo *)
00193 donor->computeSet->iterator((Iterator *)&nextCompute);
00194
00195
00196 while (c) {
00197 if (!c->migratable) {
00198 nextCompute.id++;
00199 c = (computeInfo *)
00200 donor->computeSet->next((Iterator *)&nextCompute);
00201 continue;
00202 }
00203
00204
00205 if ( c->load + p->load < overLoad*averageLoad) {
00206
00207
00208
00209 if(c->load > bestSize) {
00210 bestSize = c->load;
00211 bestCompute = c;
00212 bestP = p;
00213 }
00214 }
00215 nextCompute.id++;
00216 c = (computeInfo *)
00217 donor->computeSet->next((Iterator *)&nextCompute);
00218 }
00219 p = (processorInfo *)
00220 lightProcessors->next((Iterator *) &nextProcessor);
00221 }
00222
00223 if (bestCompute) {
00224
00225
00226
00227 deAssign(bestCompute, donor);
00228 assign(bestCompute, bestP);
00229 } else {
00230 finish = 0;
00231 break;
00232 }
00233
00234 if (bestP->load > averageLoad)
00235 lightProcessors->remove(bestP);
00236
00237 if (isHeavy(donor))
00238 heavyProcessors->insert((InfoRecord *) donor);
00239 else if (isLight(donor))
00240 lightProcessors->insert((InfoRecord *) donor);
00241 }
00242
00243 delete heavyProcessors;
00244 delete lightProcessors;
00245
00246 return finish;
00247 }
00248
00249 int Refiner::multirefine()
00250 {
00251 computeAverage();
00252 double avg = averageLoad;
00253 double max = computeMax();
00254
00255 const double overloadStep = 0.01;
00256 const double overloadStart = 1.001;
00257 double dCurOverload = max / avg;
00258
00259 int minOverload = 0;
00260 int maxOverload = (int)((dCurOverload - overloadStart)/overloadStep + 1);
00261 double dMinOverload = minOverload * overloadStep + overloadStart;
00262 double dMaxOverload = maxOverload * overloadStep + overloadStart;
00263 int curOverload;
00264 int refineDone = 0;
00265 if (_lb_args.debug()>=1)
00266 CmiPrintf("dMinOverload: %f dMaxOverload: %f\n", dMinOverload, dMaxOverload);
00267
00268 overLoad = dMinOverload;
00269 if (refine())
00270 refineDone = 1;
00271 else {
00272 overLoad = dMaxOverload;
00273 if (!refine()) {
00274 CmiPrintf("ERROR: Could not refine at max overload\n");
00275 refineDone = 1;
00276 }
00277 }
00278
00279
00280 while (!refineDone) {
00281 if (maxOverload - minOverload <= 1)
00282 refineDone = 1;
00283 else {
00284 curOverload = (maxOverload + minOverload ) / 2;
00285
00286 overLoad = curOverload * overloadStep + overloadStart;
00287 if (_lb_args.debug()>=1)
00288 CmiPrintf("Testing curOverload %d = %f [min,max]= %d, %d\n", curOverload, overLoad, minOverload, maxOverload);
00289 if (refine())
00290 maxOverload = curOverload;
00291 else
00292 minOverload = curOverload;
00293 }
00294 }
00295 return 1;
00296 }
00297
00298 void Refiner::Refine(int count, BaseLB::LDStats* stats,
00299 int* cur_p, int* new_p)
00300 {
00301
00302
00303 P = count;
00304 numComputes = stats->n_objs;
00305 computes = new computeInfo[numComputes];
00306 processors = new processorInfo[count];
00307
00308 create(count, stats, cur_p);
00309
00310 int i;
00311 for (i=0; i<numComputes; i++)
00312 assign((computeInfo *) &(computes[i]),
00313 (processorInfo *) &(processors[computes[i].oldProcessor]));
00314
00315 removeComputes();
00316
00317 computeAverage();
00318
00319 if (_lb_args.debug()>2) {
00320 CkPrintf("Old PE load (bg load): ");
00321 for (i=0; i<count; i++) CkPrintf("%d:%f(%f) ", i, processors[i].load, processors[i].backgroundLoad);
00322 CkPrintf("\n");
00323 }
00324
00325 multirefine();
00326
00327 int nmoves = 0;
00328 for (int pe=0; pe < P; pe++) {
00329 Iterator nextCompute;
00330 nextCompute.id = 0;
00331 computeInfo *c = (computeInfo *)
00332 processors[pe].computeSet->iterator((Iterator *)&nextCompute);
00333 while(c) {
00334 new_p[c->Id] = c->processor;
00335 if (new_p[c->Id] != cur_p[c->Id]) nmoves++;
00336
00337
00338 nextCompute.id++;
00339 c = (computeInfo *) processors[pe].computeSet->
00340 next((Iterator *)&nextCompute);
00341 }
00342 }
00343 if (_lb_args.debug()>2) {
00344 CkPrintf("New PE load: ");
00345 for (i=0; i<count; i++) CkPrintf("%f ", processors[i].load);
00346 CkPrintf("\n");
00347 }
00348 if (_lb_args.debug()>1)
00349 CkPrintf("Refiner: moving %d obejcts. \n", nmoves);
00350 delete [] computes;
00351 delete [] processors;
00352 }
00353
00354