00001
00005
00010 #include "RefinerTemp.h"
00011
00012 int* RefinerTemp::AllocProcs(int count, BaseLB::LDStats* stats)
00013 {
00014 return new int[stats->n_objs];
00015 }
00016
00017 void RefinerTemp::FreeProcs(int* bufs)
00018 {
00019 delete [] bufs;
00020 }
00021
00022 void RefinerTemp::create(int count, BaseLB::LDStats* stats, int* procs)
00023 {
00024 #ifdef TEMP_LDB
00025 int i;
00026
00027
00028
00029
00030 numAvail = 0;
00031 for(i=0; i < P; i++) {
00032 processors[i].Id = i;
00033 processors[i].backgroundLoad = stats->procs[i].bg_walltime;
00034 processors[i].load = processors[i].backgroundLoad;
00035 processors[i].computeLoad = 0;
00036 processors[i].computeSet = new Set();
00037 processors[i].pe_speed = stats->procs[i].pe_speed;
00038
00039 processors[i].available = stats->procs[i].available;
00040 if (processors[i].available == true) numAvail++;
00041 }
00042
00043 for (i=0; i<stats->n_objs; i++)
00044 {
00045 LDObjData &odata = stats->objData[i];
00046 computes[i].Id = i;
00047 computes[i].id = odata.objID();
00048
00049 computes[i].load = odata.wallTime;
00050 computes[i].processor = -1;
00051 computes[i].oldProcessor = procs[i];
00052 computes[i].omid = odata.omID().id.idx;
00053 computes[i].migratable = odata.migratable;
00054 if (computes[i].oldProcessor >= P) {
00055 if (stats->complete_flag)
00056 CmiAbort("LB Panic: the old processor in RefineLB cannot be found, is this in a simulation mode?");
00057 else {
00058
00059 computes[i].oldProcessor = CrnRand()%P;
00060 }
00061 }
00062 }
00063
00064
00065 #endif
00066 }
00067
00068 void RefinerTemp::assign(computeInfo *c, int processor)
00069 {
00070 assign(c, &(processors[processor]));
00071 }
00072
00073 void RefinerTemp::assign(computeInfo *c, processorInfo *p)
00074 {
00075 c->processor = p->Id;
00076 p->computeSet->insert((InfoRecord *) c);
00077 int oldPe=c->oldProcessor;
00078 p->computeLoad += c->load*procFreq[oldPe];
00079 p->load = p->computeLoad + p->backgroundLoad*procFreq[p->Id];
00080
00081
00082
00083
00084
00085 }
00086
00087 void RefinerTemp::deAssign(computeInfo *c, processorInfo *p)
00088 {
00089 c->processor = -1;
00090 p->computeSet->remove(c);
00091 int oldPe=c->oldProcessor;
00092 p->computeLoad -= c->load*procFreq[p->Id];
00093 p->load = p->computeLoad + p->backgroundLoad*procFreq[p->Id];
00094 }
00095
00096 void RefinerTemp::computeAverage()
00097 {
00098 int i;
00099 double total = 0.;
00100 for (i=0; i<numComputes; i++) total += computes[i].load*procFreq[computes[i].oldProcessor];
00101
00102 for (i=0; i<P; i++)
00103 if (processors[i].available == true)
00104 total += processors[i].backgroundLoad*procFreq[processors[i].Id];
00105
00106 averageLoad = total/numAvail;
00107 totalInst=total;
00108 }
00109
00110 double RefinerTemp::computeMax()
00111 {
00112 int i;
00113 double max = -1.0;
00114 for (i=0; i<P; i++) {
00115 if (processors[i].available == true && processors[i].load > max)
00116
00117 max=processors[i].load/procFreqNew[processors[i].Id];
00118 }
00119 return max;
00120 }
00121
00122 double RefinerTemp::computeMax(int *maxPe)
00123 {
00124 int i;
00125 double max = -1.0,maxratio=-1.0;
00126 for (i=0; i<P; i++) {
00127
00128 if (processors[i].available == true && processors[i].load/procFreqNew[processors[i].Id] > maxratio)
00129 {
00130
00131
00132 maxratio=processors[i].load/procFreqNew[processors[i].Id];
00133 max=processors[i].load;
00134 *maxPe=processors[i].Id;
00135 }
00136 }
00137 return max;
00138 }
00139
00140 bool RefinerTemp::isHeavy(processorInfo *p)
00141 {
00142 if (p->available == true)
00143
00144 return p->load > overLoad*(totalInst*procFreqNew[p->Id]/sumFreqs);
00145 else {
00146 return p->computeSet->numElements() != 0;
00147 }
00148 }
00149
00150 bool RefinerTemp::isLight(processorInfo *p)
00151 {
00152 if (p->available == true)
00153
00154 return p->load < totalInst*procFreqNew[p->Id]/sumFreqs;
00155 else
00156 return false;
00157 }
00158
00159
00160 void RefinerTemp::removeComputes()
00161 {
00162 int first;
00163 Iterator nextCompute;
00164
00165 if (numAvail < P) {
00166 if (numAvail == 0) CmiAbort("No processor available!");
00167 for (first=0; first<P; first++)
00168 if (processors[first].available == true) break;
00169 for (int i=0; i<P; i++) {
00170 if (processors[i].available == false) {
00171 computeInfo *c = (computeInfo *)
00172 processors[i].computeSet->iterator((Iterator *)&nextCompute);
00173 while (c) {
00174 deAssign(c, &processors[i]);
00175 assign(c, &processors[first]);
00176 nextCompute.id++;
00177 c = (computeInfo *)
00178 processors[i].computeSet->next((Iterator *)&nextCompute);
00179 }
00180 }
00181 }
00182 }
00183 }
00184
00185 int RefinerTemp::refine()
00186 {
00187 #ifdef TEMP_LDB
00188 int i;
00189 int finish = 1;
00190 maxHeap *heavyProcessors = new maxHeap(P);
00191
00192 Set *lightProcessors = new Set();
00193 for (i=0; i<P; i++) {
00194 if (isHeavy(&processors[i])) {
00195
00196
00197 heavyProcessors->insert((InfoRecord *) &(processors[i]));
00198 } else if (isLight(&processors[i])) {
00199
00200
00201 lightProcessors->insert((InfoRecord *) &(processors[i]));
00202 }
00203 }
00204 int done = 0;
00205
00206 while (!done) {
00207 double bestSize;
00208 computeInfo *bestCompute;
00209 processorInfo *bestP;
00210
00211 processorInfo *donor = (processorInfo *) heavyProcessors->deleteMax();
00212 if (!donor) break;
00213
00214
00215 Iterator nextProcessor;
00216 processorInfo *p = (processorInfo *)
00217 lightProcessors->iterator((Iterator *) &nextProcessor);
00218 bestSize = 0;
00219 bestP = 0;
00220 bestCompute = 0;
00221
00222 while (p) {
00223 Iterator nextCompute;
00224 nextCompute.id = 0;
00225 computeInfo *c = (computeInfo *)
00226 donor->computeSet->iterator((Iterator *)&nextCompute);
00227
00228
00229 while (c) {
00230 int ind1 = c->id.getID()[0];
00231 int ind2 = c->id.getID()[1];
00232
00233 if (!c->migratable) {
00234 nextCompute.id++;
00235 c = (computeInfo *)
00236 donor->computeSet->next((Iterator *)&nextCompute);
00237 continue;
00238 }
00239
00240
00241
00242
00243
00244 if ( c->load*procFreq[c->oldProcessor] + p->load < overLoad*(totalInst*procFreqNew[p->Id]/sumFreqs)
00245 && ind1!=0 && ind2!=0) {
00246
00247
00248
00249
00250 if(c->load*procFreq[c->oldProcessor] > bestSize) {
00251
00252 bestSize = c->load*procFreq[c->oldProcessor];
00253 bestCompute = c;
00254 bestP = p;
00255 }
00256 }
00257 nextCompute.id++;
00258 c = (computeInfo *)
00259 donor->computeSet->next((Iterator *)&nextCompute);
00260 }
00261 p = (processorInfo *)
00262 lightProcessors->next((Iterator *) &nextProcessor);
00263 }
00264
00265
00266 if (bestCompute) {
00267
00268
00269
00270
00271
00272 deAssign(bestCompute, donor);
00273 assign(bestCompute, bestP);
00274 } else {
00275 finish = 0;
00276 break;
00277 }
00278
00279
00280 if(bestP->load > totalInst*procFreqNew[bestP->Id]/sumFreqs)
00281 lightProcessors->remove(bestP);
00282
00283 if (isHeavy(donor))
00284 heavyProcessors->insert((InfoRecord *) donor);
00285 else if (isLight(donor))
00286 lightProcessors->insert((InfoRecord *) donor);
00287 }
00288
00289 delete heavyProcessors;
00290 delete lightProcessors;
00291
00292 return finish;
00293 #else
00294 return 0;
00295 #endif
00296 }
00297
00298 int RefinerTemp::multirefine()
00299 {
00300 computeAverage();
00301 double avg = averageLoad;
00302 int maxPe=-1;
00303
00304 double max = computeMax(&maxPe);
00305
00306
00307 const double overloadStep = 0.01;
00308 const double overloadStart = 1.001;
00309
00310 double dCurOverload = max /(totalInst*procFreqNew[maxPe]/sumFreqs);
00311
00312 int minOverload = 0;
00313 int maxOverload = (int)((dCurOverload - overloadStart)/overloadStep + 1);
00314 double dMinOverload = minOverload * overloadStep + overloadStart;
00315 double dMaxOverload = maxOverload * overloadStep + overloadStart;
00316 int curOverload;
00317 int refineDone = 0;
00318
00319
00320 if (_lb_args.debug()>=1)
00321 CmiPrintf("dMinOverload: %f dMaxOverload: %f\n", dMinOverload, dMaxOverload);
00322
00323 overLoad = dMinOverload;
00324 if (refine())
00325 refineDone = 1;
00326 else {
00327 overLoad = dMaxOverload;
00328 if (!refine()) {
00329 CmiPrintf("ERROR: Could not refine at max overload\n");
00330 refineDone = 1;
00331 }
00332 }
00333
00334
00335 while (!refineDone) {
00336 if (maxOverload - minOverload <= 1)
00337 refineDone = 1;
00338 else {
00339 curOverload = (maxOverload + minOverload ) / 2;
00340
00341 overLoad = curOverload * overloadStep + overloadStart;
00342 if (_lb_args.debug()>=1)
00343 CmiPrintf("Testing curOverload %d = %f [min,max]= %d, %d\n", curOverload, overLoad, minOverload, maxOverload);
00344 if (refine())
00345 maxOverload = curOverload;
00346 else
00347 minOverload = curOverload;
00348 }
00349 }
00350 return 1;
00351 }
00352
00353
00354 RefinerTemp::RefinerTemp(
00355 double _overload,int *p,int *pn,int nProcs) {
00356 P=nProcs;
00357 overLoad = _overload; computes=0; processors=0;
00358 procFreq=p;procFreqNew=pn;
00359 sumFreqs=0;
00360 for(int i=0;i<P;i++)
00361 {
00362 sumFreqs+=procFreqNew[i];
00363 }
00364
00365 }
00366
00367 void RefinerTemp::Refine(int count, BaseLB::LDStats* stats,
00368 int* cur_p, int* new_p)
00369 {
00370 #ifdef TEMP_LDB
00371
00372
00373 P = count;
00374 numComputes = stats->n_objs;
00375 computes = new computeInfo[numComputes];
00376 processors = new processorInfo[count];
00377
00378 create(count, stats, cur_p);
00379
00380 int i;
00381 for (i=0; i<numComputes; i++)
00382 {
00383 int ind1 = computes[i].id.getID()[0];
00384 int ind2 = computes[i].id.getID()[1];
00385 if(ind1==0 && ind2==48) CkPrintf("----- assigning oldproc%d load:%f\n",computes[i].oldProcessor,computes[i].load);
00386
00387 assign((computeInfo *) &(computes[i]),
00388 (processorInfo *) &(processors[computes[i].oldProcessor]));
00389 }
00390 removeComputes();
00391
00392 computeAverage();
00393
00394 if (_lb_args.debug()>2) {
00395 CkPrintf("Old PE load (bg load): ");
00396 for (i=0; i<count; i++) CkPrintf("%d:%f(%f) ", i, processors[i].load, processors[i].backgroundLoad);
00397 CkPrintf("\n");
00398 }
00399
00400 multirefine();
00401
00402 int nmoves = 0;
00403 for (int pe=0; pe < P; pe++) {
00404 Iterator nextCompute;
00405 nextCompute.id = 0;
00406 computeInfo *c = (computeInfo *)
00407 processors[pe].computeSet->iterator((Iterator *)&nextCompute);
00408 while(c) {
00409 new_p[c->Id] = c->processor;
00410 if (new_p[c->Id] != cur_p[c->Id]) nmoves++;
00411
00412
00413 nextCompute.id++;
00414 c = (computeInfo *) processors[pe].computeSet->
00415 next((Iterator *)&nextCompute);
00416 }
00417 }
00418 if (_lb_args.debug()>2) {
00419 CkPrintf("New PE load: ");
00420 for (i=0; i<count; i++) CkPrintf("%f ", processors[i].load);
00421 CkPrintf("\n");
00422 }
00423 if (_lb_args.debug()>1)
00424 CkPrintf("RefinerTemp: moving %d obejcts. \n", nmoves);
00425 delete [] computes;
00426 delete [] processors;
00427 #endif
00428 }
00429
00430