00001 #include <charm++.h>
00002
00003
00004
00005 #include "controlPoints.h"
00006 #include "trace-controlPoints.h"
00007 #include "LBDatabase.h"
00008 #include "controlPoints.h"
00009 #include "charm++.h"
00010 #include "trace-projections.h"
00011 #include <pathHistory.h>
00012 #include "cp_effects.h"
00013 #include <iostream>
00014 #include <math.h>
00015 #include <climits>
00016
00017 #if CMK_WITH_CONTROLPOINT
00018
00019 #define roundDouble(x) ((long)(x+0.5))
00020 #define myAbs(x) (((x)>=0.0)?(x):(-1.0*(x)))
00021 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
00022
00023 inline double closestInRange(double v, double a, double b){
00024 return (v<a) ? a : ((v>b)?b:v);
00025 }
00026
00027
00028
00029
00030
00034 using namespace std;
00035
00036 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD 10000000
00037
00038
00039
00040
00041
00042 static void periodicProcessControlPoints(void* ptr, double currWallTime);
00043
00044
00045
00046 CProxy_controlPointManager controlPointManagerProxy;
00047 int random_seed;
00048 long controlPointSamplePeriod;
00049 int whichTuningScheme;
00050 bool writeDataFileAtShutdown;
00051 bool shouldFilterOutputData;
00052 bool loadDataFileAtStartup;
00053 bool shouldGatherMemoryUsage;
00054 bool shouldGatherUtilization;
00055 bool shouldGatherAll;
00056 char CPDataFilename[512];
00057
00058 extern bool enableCPTracing;
00059
00062 std::map<std::string, int> defaultControlPointValues;
00063
00064
00065
00066 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal} tuningScheme;
00067
00068
00069
00070 void printTuningScheme(){
00071 switch(whichTuningScheme){
00072 case RandomSelection:
00073 CkPrintf("Tuning Scheme: RandomSelection\n");
00074 break;
00075 case AlwaysDefaults:
00076 CkPrintf("Tuning Scheme: AlwaysDefaults\n");
00077 break;
00078 case SimulatedAnnealing:
00079 CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
00080 break;
00081 case ExhaustiveSearch:
00082 CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
00083 break;
00084 case CriticalPathAutoPrioritization:
00085 CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
00086 break;
00087 case UseBestKnownTiming:
00088 CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
00089 break;
00090 case UseSteering:
00091 CkPrintf("Tuning Scheme: UseSteering\n");
00092 break;
00093 case MemoryAware:
00094 CkPrintf("Tuning Scheme: MemoryAware\n");
00095 break;
00096 case Simplex:
00097 CkPrintf("Tuning Scheme: Simplex Algorithm\n");
00098 break;
00099 case DivideAndConquer:
00100 CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
00101 break;
00102 case LDBPeriod:
00103 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
00104 break;
00105 case LDBPeriodLinear:
00106 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
00107 break;
00108 case LDBPeriodQuadratic:
00109 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
00110 break;
00111 case LDBPeriodOptimal:
00112 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
00113 break;
00114 default:
00115 CkPrintf("Unknown tuning scheme\n");
00116 break;
00117 }
00118 fflush(stdout);
00119 }
00120
00121
00122
00124 CkReduction::reducerType idleTimeReductionType;
00126 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
00127 double ret[3];
00128 if(nMsg > 0){
00129 CkAssert(msgs[0]->getSize()==3*sizeof(double));
00130 double *m=(double *)msgs[0]->getData();
00131 ret[0]=m[0];
00132 ret[1]=m[1];
00133 ret[2]=m[2];
00134 }
00135 for (int i=1;i<nMsg;i++) {
00136 CkAssert(msgs[i]->getSize()==3*sizeof(double));
00137 double *m=(double *)msgs[i]->getData();
00138 ret[0]=min(ret[0],m[0]);
00139 ret[1]+=m[1];
00140 ret[2]=max(ret[2],m[2]);
00141 }
00142 return CkReductionMsg::buildNew(3*sizeof(double),ret);
00143 }
00144
00145
00146
00148 CkReduction::reducerType allMeasuresReductionType;
00150 #define ALL_REDUCTION_SIZE 12
00151 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
00152 double ret[ALL_REDUCTION_SIZE];
00153 if(nMsg > 0){
00154 CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
00155 double *m=(double *)msgs[0]->getData();
00156 memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
00157 }
00158 for (int i=1;i<nMsg;i++) {
00159 CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
00160 double *m=(double *)msgs[i]->getData();
00161
00162 ret[0]=min(ret[0],m[0]);
00163 ret[1]+=m[1];
00164 ret[2]=max(ret[2],m[2]);
00165
00166 ret[3]=min(ret[3],m[3]);
00167 ret[4]+=m[4];
00168 ret[5]=max(ret[5],m[5]);
00169
00170 ret[6]=max(ret[6],m[6]);
00171
00172 ret[7]+=m[7];
00173 ret[8]+=m[8];
00174 ret[9]+=m[9];
00175 ret[10]+=m[10];
00176
00177 ret[11]+=m[11];
00178 }
00179 return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);
00180 }
00181
00182
00184 void registerCPReductions(void) {
00185 idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
00186 allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
00187 }
00188
00189
00190
00191
00192
00193
00196 unsigned int randInt(unsigned int num, const char* name, int seed=0){
00197 CkAssert(num > 0);
00198
00199 unsigned long hash = 0;
00200 unsigned int c;
00201 unsigned char * str = (unsigned char*)name;
00202
00203 while (c = *str++){
00204 unsigned int c2 = (c+64)%128;
00205 unsigned int c3 = (c2*5953)%127;
00206 hash = c3 + (hash << 6) + (hash << 16) - hash;
00207 }
00208
00209 unsigned long shuffled1 = (hash*2083)%7907;
00210 unsigned long shuffled2 = (seed*4297)%2017;
00211
00212 unsigned long shuffled3 = (random_seed*4799)%7919;
00213
00214 unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
00215
00216 unsigned int result = ((namehash * 6029) % 1117) % num;
00217
00218 CkAssert(result >=0 && result < num);
00219 return result;
00220 }
00221
00222
00223
00224 controlPointManager::controlPointManager() {
00225 generatedPlanForStep = -1;
00226
00227 exitWhenReady = false;
00228 alreadyRequestedMemoryUsage = false;
00229 alreadyRequestedIdleTime = false;
00230 alreadyRequestedAll = false;
00231
00232 instrumentedPhase * newPhase = new instrumentedPhase();
00233 allData.phases.push_back(newPhase);
00234
00235 frameworkShouldAdvancePhase = false;
00236 haveControlPointChangeCallback = false;
00237
00238
00239 phase_id = 0;
00240
00241 if(loadDataFileAtStartup){
00242 loadDataFile();
00243 }
00244
00245
00246 if(CkMyPe() == 0){
00247 CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
00248 }
00249
00250 traceRegisterUserEvent("No currently executing message", 5000);
00251 traceRegisterUserEvent("Zero time along critical path", 5010);
00252 traceRegisterUserEvent("Positive total time along critical path", 5020);
00253 traceRegisterUserEvent("env->setPathHistory()", 6000);
00254 traceRegisterUserEvent("Critical Path", 5900);
00255 traceRegisterUserEvent("Table Entry", 5901);
00256
00257
00258 #if USER_EVENT_FOR_REGISTERTERMINALPATH
00259 traceRegisterUserEvent("registerTerminalPath", 100);
00260 #endif
00261
00262 }
00263
00264
00265 controlPointManager::~controlPointManager(){
00266
00267 }
00268
00269 void controlPointManager::pup(PUP::er &p)
00270 {
00271 CBase_controlPointManager::pup(p);
00272
00273
00274 p|generatedPlanForStep;
00275 p|exitWhenReady;
00276 p|alreadyRequestedMemoryUsage;
00277 p|alreadyRequestedIdleTime;
00278 p|alreadyRequestedAll;
00279 p|frameworkShouldAdvancePhase;
00280 p|haveControlPointChangeCallback;
00281 p|phase_id;
00282 }
00283
00284
00286 void controlPointManager::loadDataFile(){
00287 ifstream infile(CPDataFilename);
00288 vector<std::string> names;
00289 std::string line;
00290
00291 while(getline(infile,line)){
00292 if(line[0] != '#')
00293 break;
00294 }
00295
00296 int numTimings = 0;
00297 std::istringstream n(line);
00298 n >> numTimings;
00299
00300 while(getline(infile,line)){
00301 if(line[0] != '#')
00302 break;
00303 }
00304
00305 int numControlPointNames = 0;
00306 std::istringstream n2(line);
00307 n2 >> numControlPointNames;
00308
00309 for(int i=0; i<numControlPointNames; i++){
00310 getline(infile,line);
00311 names.push_back(line);
00312 }
00313
00314 for(int i=0;i<numTimings;i++){
00315 getline(infile,line);
00316 while(line[0] == '#')
00317 getline(infile,line);
00318
00319 instrumentedPhase * ips = new instrumentedPhase();
00320
00321 std::istringstream iss(line);
00322
00323
00324 iss >> ips->memoryUsageMB;
00325
00326
00327
00328 iss >> ips->idleTime.min;
00329 iss >> ips->idleTime.avg;
00330 iss >> ips->idleTime.max;
00331
00332
00333 iss >> ips->overheadTime.min;
00334 iss >> ips->overheadTime.avg;
00335 iss >> ips->overheadTime.max;
00336
00337
00338 iss >> ips->bytesPerInvoke;
00339
00340
00341 iss >> ips->grainSize;
00342
00343
00344 for(int cp=0;cp<numControlPointNames;cp++){
00345 int cpvalue;
00346 iss >> cpvalue;
00347 ips->controlPoints.insert(make_pair(names[cp],cpvalue));
00348 }
00349
00350
00351 double mt;
00352 iss >> mt;
00353
00354
00355
00356 double time;
00357
00358 while(iss >> time){
00359 ips->times.push_back(time);
00360 #if DEBUGPRINT > 5
00361 CkPrintf("read time %lf from file\n", time);
00362 #endif
00363 }
00364
00365 allData.phases.push_back(ips);
00366
00367 }
00368
00369 infile.close();
00370 }
00371
00372
00373
00375 void controlPointManager::writeDataFile(){
00376 CkPrintf("============= writeDataFile() to %s ============\n", CPDataFilename);
00377 ofstream outfile(CPDataFilename);
00378 allData.cleanupNames();
00379
00380
00381
00382
00383 if(shouldFilterOutputData){
00384 allData.verify();
00385 allData.filterOutIncompletePhases();
00386 }
00387
00388
00389
00390 if(allData.toString().length() > 10){
00391 outfile << allData.toString();
00392 } else {
00393 outfile << " No data available to save to disk " << endl;
00394 }
00395 outfile.close();
00396 }
00397
00399 void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
00400 frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
00401 controlPointChangeCallback = cb;
00402 haveControlPointChangeCallback = true;
00403 }
00404
00405
00407 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
00408 frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
00409 }
00410
00413 void controlPointManager::processControlPoints(){
00414
00415 #if DEBUGPRINT
00416 CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
00417 #endif
00418
00419
00420
00421
00422 const int s = allData.phases.size();
00423
00424 #if DEBUGPRINT
00425 CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
00426 for(int p=0;p<s;++p){
00427 const instrumentedPhase &phase = allData.phases[p];
00428 const idleTimeContainer &idle = phase.idleTime;
00429
00430 vector<double> const × = phase.times;
00431
00432 CkPrintf("Phase %d:\n", p);
00433 idle.print();
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444 CkPrintf("Timings:\n");
00445 for(int i=0;i<times.size(); i++){
00446 CkPrintf("%lf ", times[i]);
00447 }
00448 CkPrintf("\n");
00449
00450 }
00451
00452 CkPrintf("\n\n");
00453
00454
00455 #endif
00456
00457
00458
00459
00460
00461 #if 0
00462 if( s%5 == 4) {
00463
00464
00465 int whichPhase=-1;
00466 for(int p=0; p<s; ++p){
00467 const instrumentedPhase &phase = allData.phases[p];
00468 const idleTimeContainer &idle = phase.idleTime;
00469 if(idle.isValid() && phase.criticalPaths.size()>0 ){
00470 whichPhase = p;
00471 }
00472 }
00473
00474
00475 CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
00476 const instrumentedPhase &phase = allData.phases[whichPhase];
00477 const idleTimeContainer &idle = phase.idleTime;
00478
00479 if(idle.min > 0.1){
00480 CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
00481
00482
00483 int medianCriticalPathIdx = phase.medianCriticalPathIdx();
00484 const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
00485 CkAssert(phase.criticalPaths.size() > 0);
00486 CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx);
00487
00488 CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
00489
00490 if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
00491 CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
00492
00493
00494 CkPrintf("Finding control points related to the critical path\n");
00495 int cpcount = 0;
00496 std::set<std::string> controlPointsAffectingCriticalPath;
00497
00498
00499 for(int e=0;e<path.getNumUsed();e++){
00500 if(path.getUsedCount(e)>0){
00501 int ep = path.getUsedEp(e);
00502
00503 std::map<std::string, std::set<int> >::iterator iter;
00504 for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
00505 if(iter->second.count(ep)>0){
00506 controlPointsAffectingCriticalPath.insert(iter->first);
00507 CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
00508 cpcount++;
00509 }
00510 }
00511
00512 }
00513 }
00514
00515
00516 if(cpcount==0){
00517 CkPrintf("No control points are known to affect the critical path\n");
00518 } else {
00519 double beginTime = CmiWallTimer();
00520
00521 CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
00522
00523 newControlPoints = phase.controlPoints;
00524
00525 if(frameworkShouldAdvancePhase){
00526 gotoNextPhase();
00527 }
00528
00529 if(haveControlPointChangeCallback){
00530 #if DEBUGPRINT
00531 CkPrintf("Calling control point change callback\n");
00532 #endif
00533
00534 controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg;
00535 *((int*)CkPriorityPtr(msg)) = -INT_MAX;
00536 CkSetQueueing(msg, CK_QUEUEING_IFIFO);
00537 controlPointChangeCallback.send(msg);
00538
00539 }
00540
00541
00542
00543
00544 char textDescription[4096*2];
00545 textDescription[0] = '\0';
00546
00547 std::map<std::string,int>::iterator newCP;
00548 for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
00549 if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
00550
00551 int lowerbound = controlPointSpace[newCP->first].first;
00552 if(newCP->second > lowerbound){
00553 newControlPointsAvailable = true;
00554 newCP->second --;
00555 }
00556 }
00557 }
00558
00559
00560 if(1){
00561 std::map<std::string,int>::iterator newCP;
00562 for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
00563 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
00564 }
00565 }
00566
00567 char *userEventString = new char[4096*2];
00568 sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
00569
00570 static int userEventCounter = 20000;
00571 CkPrintf("USER EVENT: %s\n", userEventString);
00572
00573 traceRegisterUserEvent(userEventString, userEventCounter);
00574 traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
00575 userEventCounter++;
00576
00577 }
00578
00579 } else {
00580 CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
00581 }
00582
00583
00584 }
00585
00586 } else {
00587
00588
00589 }
00590
00591 #endif
00592
00593
00594
00595 if(frameworkShouldAdvancePhase){
00596 gotoNextPhase();
00597 }
00598
00599 if(haveControlPointChangeCallback){
00600
00601 controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg;
00602 *((int*)CkPriorityPtr(msg)) = -INT_MAX;
00603 CkSetQueueing(msg, CK_QUEUEING_IFIFO);
00604 controlPointChangeCallback.send(msg);
00605 }
00606
00607 }
00608
00610 bool controlPointManager::controlPointAffectsThisEP(int ep){
00611 std::map<std::string, std::set<int> >::iterator iter;
00612 for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
00613 if(iter->second.count(ep)>0){
00614 return true;
00615 }
00616 }
00617 return false;
00618 }
00619
00621 bool controlPointManager::controlPointAffectsThisArray(int array){
00622 std::map<std::string, std::set<int> >::iterator iter;
00623 for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
00624 if(iter->second.count(array)>0){
00625 return true;
00626 }
00627 }
00628 return false;
00629 }
00630
00631
00633 instrumentedPhase * controlPointManager::currentPhaseData(){
00634 int s = allData.phases.size();
00635 CkAssert(s>=1);
00636 return allData.phases[s-1];
00637 }
00638
00639
00641 instrumentedPhase * controlPointManager::previousPhaseData(){
00642 int s = allData.phases.size();
00643 if(s >= 2 && phase_id > 0) {
00644 return allData.phases[s-2];
00645 } else {
00646 return NULL;
00647 }
00648 }
00649
00651 instrumentedPhase * controlPointManager::twoAgoPhaseData(){
00652 int s = allData.phases.size();
00653 if(s >= 3 && phase_id > 0) {
00654 return allData.phases[s-3];
00655 } else {
00656 return NULL;
00657 }
00658 }
00659
00660
00662 void controlPointManager::gotoNextPhase(){
00663 CkPrintf("gotoNextPhase shouldGatherAll=%d enableCPTracing=%d\n", (int)shouldGatherAll, (int)enableCPTracing);
00664 fflush(stdout);
00665
00666 if(enableCPTracing){
00667 if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
00668 alreadyRequestedAll = true;
00669 CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
00670 CkPrintf("Requesting all measurements\n");
00671 thisProxy.requestAll(*cb);
00672 delete cb;
00673
00674 } else {
00675
00676 if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
00677 alreadyRequestedMemoryUsage = true;
00678 CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
00679 thisProxy.requestMemoryUsage(*cb);
00680 delete cb;
00681 }
00682
00683 if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
00684 alreadyRequestedIdleTime = true;
00685 CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
00686 thisProxy.requestIdleTime(*cb);
00687 delete cb;
00688 }
00689 }
00690 }
00691
00692
00693
00694 #if CMK_LBDB_ON && 0
00695 LBDatabase * myLBdatabase = LBDatabaseObj();
00696 LBDB * myLBDB = myLBdatabase->getLBDB();
00697 const CkVec<LBObj*> objs = myLBDB->getObjs();
00698 const int objCount = myLBDB->getObjCount();
00699 CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
00700
00701 LBRealType maxObjWallTime = -1.0;
00702
00703 for(int i=0;i<objs.length();i++){
00704 LBObj* o = objs[i];
00705 const LDObjData d = o->ObjData();
00706 LBRealType cpuTime = d.cpuTime;
00707 LBRealType wallTime = d.wallTime;
00708
00709 CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
00710 if(wallTime > maxObjWallTime){
00711
00712 }
00713
00714 }
00715
00716 myLBDB->ClearLoads();
00717
00718 #endif
00719
00720
00721
00722
00723 phase_id++;
00724
00725
00726
00727 instrumentedPhase * newPhase = new instrumentedPhase();
00728 allData.phases.push_back(newPhase);
00729
00730 CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
00731
00732 }
00733
00735 void controlPointManager::setTiming(double time){
00736 currentPhaseData()->times.push_back(time);
00737
00738 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
00739
00740
00741
00742
00743
00744
00745
00746
00747
00748
00749
00750
00751 #endif
00752
00753 }
00754
00756 void controlPointManager::requestIdleTime(CkCallback cb){
00757 CkAssert(enableCPTracing);
00758
00759 double i = localControlPointTracingInstance()->idleRatio();
00760 double idle[3];
00761 idle[0] = i;
00762 idle[1] = i;
00763 idle[2] = i;
00764
00765
00766
00767 localControlPointTracingInstance()->resetTimings();
00768
00769 contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
00770 }
00771
00773 void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
00774 CkAssert(enableCPTracing);
00775
00776 int size=msg->getSize() / sizeof(double);
00777 CkAssert(size==3);
00778 double *r=(double *) msg->getData();
00779
00780 instrumentedPhase* prevPhase = previousPhaseData();
00781 if(prevPhase != NULL){
00782 prevPhase->idleTime.min = r[0];
00783 prevPhase->idleTime.avg = r[1]/CkNumPes();
00784 prevPhase->idleTime.max = r[2];
00785 prevPhase->idleTime.print();
00786 CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
00787 } else {
00788 CkPrintf("There is no previous phase to store the idle time measurements\n");
00789 }
00790
00791 alreadyRequestedIdleTime = false;
00792 checkForShutdown();
00793 delete msg;
00794 }
00795
00796
00797
00798
00799
00800
00802 void controlPointManager::requestAll(CkCallback cb){
00803 CkAssert(enableCPTracing);
00804
00805 TraceControlPoints *t = localControlPointTracingInstance();
00806
00807 double data[ALL_REDUCTION_SIZE];
00808
00809 double *idle = data;
00810 double *over = data+3;
00811 double *mem = data+6;
00812 double *msgBytes = data+7;
00813 double *grainsize = data+11;
00814
00815 const double i = t->idleRatio();
00816 idle[0] = i;
00817 idle[1] = i;
00818 idle[2] = i;
00819
00820 const double o = t->overheadRatio();
00821 over[0] = o;
00822 over[1] = o;
00823 over[2] = o;
00824
00825 const double m = t->memoryUsageMB();
00826 mem[0] = m;
00827
00828 msgBytes[0] = t->b2;
00829 msgBytes[1] = t->b3;
00830 msgBytes[2] = t->b2mlen;
00831 msgBytes[3] = t->b3mlen;
00832
00833 grainsize[0] = t->grainSize();
00834
00835 localControlPointTracingInstance()->resetAll();
00836
00837 contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
00838 }
00839
00841 void controlPointManager::gatherAll(CkReductionMsg *msg){
00842 CkAssert(enableCPTracing);
00843
00844 CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
00845 int size=msg->getSize() / sizeof(double);
00846 double *data=(double *) msg->getData();
00847
00848 double *idle = data;
00849 double *over = data+3;
00850 double *mem = data+6;
00851 double *msgBytes = data+7;
00852 double *granularity = data+11;
00853
00854
00855
00856
00857 instrumentedPhase* prevPhase = previousPhaseData();
00858 if(prevPhase != NULL){
00859 prevPhase->idleTime.min = idle[0];
00860 prevPhase->idleTime.avg = idle[1]/CkNumPes();
00861 prevPhase->idleTime.max = idle[2];
00862
00863 prevPhase->overheadTime.min = over[0];
00864 prevPhase->overheadTime.avg = over[1]/CkNumPes();
00865 prevPhase->overheadTime.max = over[2];
00866
00867 prevPhase->memoryUsageMB = mem[0];
00868
00869 CkPrintf("Stored idle time min=%lf avg=%lf max=%lf mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
00870
00871 double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
00872 double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
00873
00875 prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
00876
00877 CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
00878 CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
00879
00880 CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
00881 CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
00882
00883 if(bytesPerInvoke2 > bytesPerInvoke3)
00884 prevPhase->bytesPerInvoke = bytesPerInvoke2;
00885 else
00886 prevPhase->bytesPerInvoke = bytesPerInvoke3;
00887
00888
00889
00890
00891
00892
00893
00894
00895 } else {
00896 CkPrintf("There is no previous phase to store measurements\n");
00897 }
00898
00899 alreadyRequestedAll = false;
00900 checkForShutdown();
00901 delete msg;
00902 }
00903
00904
00905
00906
00907 void controlPointManager::checkForShutdown(){
00908 if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
00909 doExitNow();
00910 }
00911 }
00912
00913
00914 void controlPointManager::exitIfReady(){
00915 if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
00916
00917 doExitNow();
00918 } else {
00919
00920 exitWhenReady = true;
00921 }
00922 }
00923
00924
00925
00926 void controlPointManager::doExitNow(){
00927 _TRACE_BEGIN_EXECUTE_DETAILED(-1, -1, _threadEP,CkMyPe(), 0, NULL);
00928 writeOutputToDisk();
00929
00930 CkExit();
00931 }
00932
00933 void controlPointManager::writeOutputToDisk(){
00934 if(writeDataFileAtShutdown){
00935 controlPointManagerProxy.ckLocalBranch()->writeDataFile();
00936 }
00937 }
00938
00939
00941 void controlPointManager::requestMemoryUsage(CkCallback cb){
00942 int m = CmiMaxMemoryUsage() / 1024 / 1024;
00943 CmiResetMaxMemory();
00944
00945 contribute(sizeof(int),&m,CkReduction::max_int, cb);
00946 }
00947
00949 void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
00950 int size=msg->getSize() / sizeof(int);
00951 CkAssert(size==1);
00952 int *m=(int *) msg->getData();
00953
00954 CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
00955
00956 instrumentedPhase* prevPhase = previousPhaseData();
00957 if(prevPhase != NULL){
00958 prevPhase->memoryUsageMB = m[0];
00959 } else {
00960 CkPrintf("No place to store memory usage");
00961 }
00962
00963 alreadyRequestedMemoryUsage = false;
00964 checkForShutdown();
00965 delete msg;
00966 }
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024
01025
01026
01027
01029 void gotoNextPhase(){
01030 controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
01031 }
01032
01033 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
01034 {
01035 gotoNextPhase();
01036 }
01037
01038
01040 class controlPointMain : public CBase_controlPointMain {
01041 public:
01042 controlPointMain(CkArgMsg* args){
01043 #if OLDRANDSEED
01044 struct timeval tp;
01045 gettimeofday(& tp, NULL);
01046 random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
01047 #else
01048 double time = CmiWallTimer();
01049 int sec = (int)time;
01050 int usec = (int)((time - (double)sec)*1000000.0);
01051 random_seed = sec ^ usec;
01052 #endif
01053
01054
01055 double period, periodms;
01056 bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
01057 bool haveSamplePeriodMs = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriodMs", &periodms,"The time between Control Point Framework samples (in milliseconds)");
01058 if(haveSamplePeriod){
01059 CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
01060 controlPointSamplePeriod = (int)(period * 1000);
01061 } else if(haveSamplePeriodMs){
01062 CkPrintf("controlPointSamplePeriodMs = %lf ms\n", periodms);
01063 controlPointSamplePeriod = periodms;
01064 } else {
01065 controlPointSamplePeriod = DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
01066 }
01067
01068
01069
01070 whichTuningScheme = RandomSelection;
01071
01072
01073 if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
01074 whichTuningScheme = RandomSelection;
01075 } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
01076 whichTuningScheme = ExhaustiveSearch;
01077 } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
01078 whichTuningScheme = AlwaysDefaults;
01079 } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
01080 whichTuningScheme = SimulatedAnnealing;
01081 } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
01082 whichTuningScheme = CriticalPathAutoPrioritization;
01083 } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
01084 whichTuningScheme = UseBestKnownTiming;
01085 } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
01086 whichTuningScheme = UseSteering;
01087 } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
01088 whichTuningScheme = MemoryAware;
01089 } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
01090 whichTuningScheme = Simplex;
01091 } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
01092 whichTuningScheme = DivideAndConquer;
01093 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
01094 whichTuningScheme = LDBPeriod;
01095 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
01096 whichTuningScheme = LDBPeriodLinear;
01097 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
01098 whichTuningScheme = LDBPeriodQuadratic;
01099 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
01100 whichTuningScheme = LDBPeriodOptimal;
01101 }
01102
01103 char *defValStr = NULL;
01104 if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
01105 CkPrintf("You specified default value string: %s\n", defValStr);
01106
01107
01108
01109
01110 char *tok = strtok(defValStr, ",");
01111 while (tok) {
01112
01113 int len = strlen(tok);
01114 char *eqsign = strchr(tok, '=');
01115 if(eqsign != NULL && eqsign != tok){
01116 *eqsign = '\0';
01117 char *cpname = tok;
01118 std::string cpName(tok);
01119 char *cpDefVal = eqsign+1;
01120 int v=-1;
01121 if(sscanf(cpDefVal, "%d", &v) == 1){
01122 CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
01123 CkAssert(CkMyPe() == 0);
01124 defaultControlPointValues[cpName] = v;
01125 }
01126 }
01127 tok = strtok(NULL, ",");
01128 }
01129
01130 }
01131
01132 shouldGatherAll = false;
01133 shouldGatherMemoryUsage = false;
01134 shouldGatherUtilization = false;
01135
01136 if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
01137 shouldGatherAll = true;
01138 } else {
01139 if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
01140 shouldGatherMemoryUsage = true;
01141 }
01142 if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
01143 shouldGatherUtilization = true;
01144 }
01145 }
01146
01147 writeDataFileAtShutdown = false;
01148 if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
01149 writeDataFileAtShutdown = true;
01150 }
01151
01152 shouldFilterOutputData = true;
01153 if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
01154 shouldFilterOutputData = false;
01155 }
01156
01157
01158 loadDataFileAtStartup = false;
01159 if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
01160 loadDataFileAtStartup = true;
01161 }
01162
01163 char *cpdatafile;
01164 if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
01165 sprintf(CPDataFilename, "%s", cpdatafile);
01166 } else {
01167 sprintf(CPDataFilename, "controlPointData.txt");
01168 }
01169
01170
01171 controlPointManagerProxy = CProxy_controlPointManager::ckNew();
01172 }
01173 ~controlPointMain(){}
01174 };
01175
01177 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
01178 CkAssert(CkMyPe() == 0);
01179 CkPrintf("Application has registered a control point change callback\n");
01180 controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
01181 }
01182
01184 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
01185 if(CkMyPe() == 0){
01186 CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
01187 controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
01188 }
01189 }
01190
01192 void registerControlPointTiming(double time){
01193 CkAssert(CkMyPe() == 0);
01194 #if DEBUGPRINT>0
01195 CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
01196 #endif
01197 controlPointManagerProxy.ckLocalBranch()->setTiming(time);
01198 }
01199
01201 void controlPointTimingStamp() {
01202 CkAssert(CkMyPe() == 0);
01203 #if DEBUGPRINT>0
01204 CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
01205 #endif
01206
01207 static double prev_time = 0.0;
01208 double t = CmiWallTimer();
01209 double duration = t - prev_time;
01210 prev_time = t;
01211
01212 controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
01213 }
01214
01215 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
01216 {
01217 controlPointTimingStamp();
01218 }
01219
01220
01221 FDECL void FTN_NAME(SETFRAMEWORKADVANCEPHASEF,setframeworkadvancephasef)(CMK_TYPEDEF_INT4 *value)
01222 {
01223 setFrameworkAdvancePhase(*value);
01224 }
01225
01226
01227
01228
01230 extern "C" void controlPointShutdown(){
01231 if(CkMyPe() == 0){
01232
01233
01234 controlPointManagerProxy.ckLocalBranch()->exitIfReady();
01235
01236 }
01237 }
01238
01240 void controlPointInitNode(){
01241
01242 registerExitFn(controlPointShutdown);
01243 }
01244
01246 static void periodicProcessControlPoints(void* ptr, double currWallTime){
01247 #ifdef DEBUGPRINT
01248 CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
01249 #endif
01250 controlPointManagerProxy.ckLocalBranch()->processControlPoints();
01251 CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
01252 }
01253
01254
01255
01256
01257
01261 void controlPointManager::generatePlan() {
01262 const double startGenerateTime = CmiWallTimer();
01263 const int phase_id = this->phase_id;
01264 const int effective_phase = allData.phases.size();
01265
01266
01267 if(generatedPlanForStep == phase_id)
01268 return;
01269 generatedPlanForStep = phase_id;
01270
01271 CkPrintf("Generating Plan for phase %d\n", phase_id);
01272 printTuningScheme();
01273
01274
01275 instrumentedPhase *prevPhase = previousPhaseData();
01276 for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
01277 newControlPoints[cpsIter->first] = cpsIter->second;
01278 }
01279
01280
01281 if( whichTuningScheme == RandomSelection ){
01282 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01283 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01284 const std::string &name = cpsIter->first;
01285 const std::pair<int,int> &bounds = cpsIter->second;
01286 const int lb = bounds.first;
01287 const int ub = bounds.second;
01288 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
01289 }
01290 } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01301 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01302 const std::string &name = cpsIter->first;
01303 const std::pair<int,int> &bounds = cpsIter->second;
01304 const int lb = bounds.first;
01305 const int ub = bounds.second;
01306 newControlPoints[name] = (lb+ub)/2;
01307 }
01308
01309 } else if ( whichTuningScheme == MemoryAware ) {
01310
01311
01312
01313
01314 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01315 instrumentedPhase *prevPhase = previousPhaseData();
01316
01317 if(phase_id%2 == 0){
01318 CkPrintf("Steering (memory based) based on 2 phases ago:\n");
01319 twoAgoPhase->print();
01320 CkPrintf("\n");
01321 fflush(stdout);
01322
01323
01324 double memUsage = twoAgoPhase->memoryUsageMB;
01325 CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
01326 fflush(stdout);
01327 if(memUsage < 1100.0 && memUsage > 0.0){
01328 CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
01329 CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
01330
01331
01332 newControlPoints = twoAgoPhase->controlPoints;
01333
01334
01335 CkPrintf("Steering (memory based) initialized plan\n");
01336 fflush(stdout);
01337
01338
01339 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
01340
01341
01342 bool found = false;
01343 std::string cpName;
01344 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
01345 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
01346 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
01347 cpName = iter->first;
01348 info = &iter->second;
01349 found = true;
01350 break;
01351 }
01352
01353
01354 if(found){
01355 CkPrintf("Steering found knob to turn that should increase memory consumption\n");
01356 fflush(stdout);
01357 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01358 const int maxValue = controlPointSpace[cpName].second;
01359
01360 if(twoAgoValue+1 <= maxValue){
01361 newControlPoints[cpName] = twoAgoValue+1;
01362 }
01363 }
01364
01365 }
01366 }
01367
01368 CkPrintf("Steering (memory based) done for this phase\n");
01369 fflush(stdout);
01370
01371 } else if ( whichTuningScheme == UseBestKnownTiming ) {
01372
01373
01374
01375 static bool firstTime = true;
01376 if(firstTime){
01377 firstTime = false;
01378 instrumentedPhase *best = allData.findBest();
01379 CkPrintf("Best known phase is:\n");
01380 best->print();
01381 CkPrintf("\n");
01382
01383 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01384 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
01385 const std::string &name = cpsIter->first;
01386 newControlPoints[name] = best->controlPoints[name];
01387 }
01388 }
01389 } else if( whichTuningScheme == LDBPeriod) {
01390
01391
01392
01393
01394
01395
01396
01397 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01398 instrumentedPhase *prevPhase = previousPhaseData();
01399
01400
01401 const std::vector<double> × = twoAgoPhase->times;
01402 const int oldNumTimings = times.size();
01403
01404
01405 const std::vector<double> ×New = prevPhase->times;
01406 const int newNumTimings = timesNew.size();
01407
01408
01409 if(oldNumTimings > 4 && newNumTimings > 4){
01410
01411
01412
01413
01414 double oldSum = 0;
01415
01416 for(int i=2; i<oldNumTimings; i++){
01417 oldSum += times[i];
01418 }
01419
01420 const double oldAvg = oldSum / (oldNumTimings-2);
01421
01422
01423
01424
01425
01426 const double expectedTotalTime = oldAvg * newNumTimings;
01427
01428
01429
01430 double newSum = 0.0;
01431 for(int i=2; i<newNumTimings; ++i){
01432 newSum += timesNew[i];
01433 }
01434
01435 const double newAvg = newSum / (newNumTimings-2);
01436 const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings);
01437
01438 const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
01439
01440
01441 const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
01442
01443 const double benefitAfterLB = benefit - lbcost;
01444
01445
01446
01447 CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
01448
01449
01450 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01451 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01452 const std::string &name = cpsIter->first;
01453 const std::pair<int,int> &bounds = cpsIter->second;
01454 const int lb = bounds.first;
01455 const int ub = bounds.second;
01456
01457 if(benefitAfterLB > 0){
01458 CkPrintf("Constant Model: Beneficial LB\n");
01459 int newval = newControlPoints[name] / 2;
01460 if(newval > lb)
01461 newControlPoints[name] = newval;
01462 else
01463 newControlPoints[name] = lb;
01464 } else {
01465 CkPrintf("Constant Model: Detrimental LB\n");
01466 int newval = newControlPoints[name] * 2;
01467 if(newval < ub)
01468 newControlPoints[name] = newval;
01469 else
01470 newControlPoints[name] = ub;
01471 }
01472 }
01473 }
01474
01475
01476 } else if( whichTuningScheme == LDBPeriodLinear) {
01477
01478
01479
01480
01481
01482
01483
01484 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01485 instrumentedPhase *prevPhase = previousPhaseData();
01486
01487 const std::vector<double> × = twoAgoPhase->times;
01488 const int oldNumTimings = times.size();
01489
01490 const std::vector<double> ×New = prevPhase->times;
01491 const int newNumTimings = timesNew.size();
01492
01493
01494 if(oldNumTimings > 4 && newNumTimings > 4){
01495
01496
01497
01498 const int b1 = 2 + (oldNumTimings-2)/2;
01499 double s1 = 0;
01500 double s2 = 0;
01501
01502 const double ldbStepsTime = times[0] + times[1];
01503
01504 for(int i=2; i<b1; i++){
01505 s1 += times[i];
01506 }
01507 for(int i=b1; i<oldNumTimings; i++){
01508 s2 += times[i];
01509 }
01510
01511
01512
01513
01514 const double a1 = s1 / (double)(b1-2);
01515 const double a2 = s2 / (double)(oldNumTimings-b1);
01516 const double aold = (a1+a2)/2.0;
01517
01518 const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
01519
01520
01521
01522 double sum = 0.0;
01523 for(int i=2; i<newNumTimings; ++i){
01524 sum += timesNew[i];
01525 }
01526
01527 const double avg = sum / ((double)(newNumTimings-2));
01528 const double actualTotalTime = avg * ((double)newNumTimings);
01529
01530 const double benefit = expectedTotalTime - actualTotalTime;
01531
01532
01533 const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
01534
01535 const double benefitAfterLB = benefit - lbcost;
01536
01537
01538
01539 CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
01540
01541
01542
01543 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01544 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01545 const std::string &name = cpsIter->first;
01546 const std::pair<int,int> &bounds = cpsIter->second;
01547 const int lb = bounds.first;
01548 const int ub = bounds.second;
01549
01550 if(benefitAfterLB > 0){
01551 CkPrintf("Linear Model: Beneficial LB\n");
01552 int newval = newControlPoints[name] / 2;
01553 if(newval > lb)
01554 newControlPoints[name] = newval;
01555 else
01556 newControlPoints[name] = lb;
01557 } else {
01558 CkPrintf("Linear Model: Detrimental LB\n");
01559 int newval = newControlPoints[name] * 2;
01560 if(newval < ub)
01561 newControlPoints[name] = newval;
01562 else
01563 newControlPoints[name] = ub;
01564 }
01565 }
01566 }
01567
01568 }
01569
01570 else if( whichTuningScheme == LDBPeriodQuadratic) {
01571
01572
01573
01574
01575
01576
01577
01578 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01579 instrumentedPhase *prevPhase = previousPhaseData();
01580
01581 const std::vector<double> × = twoAgoPhase->times;
01582 const int oldNumTimings = times.size();
01583
01584 const std::vector<double> ×New = prevPhase->times;
01585 const int newNumTimings = timesNew.size();
01586
01587
01588 if(oldNumTimings > 4 && newNumTimings > 4){
01589
01590
01591
01592
01593 const int b1 = 2 + (oldNumTimings-2)/3;
01594 const int b2 = 2 + (2*(oldNumTimings-2))/3;
01595 double s1 = 0;
01596 double s2 = 0;
01597 double s3 = 0;
01598
01599 const double ldbStepsTime = times[0] + times[1];
01600
01601 for(int i=2; i<b1; i++){
01602 s1 += times[i];
01603 }
01604 for(int i=b1; i<b2; i++){
01605 s2 += times[i];
01606 }
01607 for(int i=b2; i<oldNumTimings; i++){
01608 s3 += times[i];
01609 }
01610
01611
01612
01613
01614 const double a1 = s1 / (double)(b1-2);
01615 const double a2 = s2 / (double)(b2-b1);
01616 const double a3 = s3 / (double)(oldNumTimings-b2);
01617
01618 const double a = (a1-2.0*a2+a3)/2.0;
01619 const double b = (a1-4.0*a2+3.0*a3)/2.0;
01620 const double c = a3;
01621
01622
01623 const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;
01624 const double x2 = 0.5;
01625 const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
01626
01627
01628
01629 double sum = 0.0;
01630 for(int i=2; i<newNumTimings; ++i){
01631 sum += timesNew[i];
01632 }
01633
01634 const double avg = sum / ((double)(newNumTimings-2));
01635 const double actualTotalTime = avg * ((double)newNumTimings);
01636
01637 const double benefit = expectedTotalTime - actualTotalTime;
01638
01639
01640 const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
01641
01642 const double benefitAfterLB = benefit - lbcost;
01643
01644
01645
01646 CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
01647
01648
01649
01650 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01651 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01652 const std::string &name = cpsIter->first;
01653 const std::pair<int,int> &bounds = cpsIter->second;
01654 const int lb = bounds.first;
01655 const int ub = bounds.second;
01656
01657 if(benefitAfterLB > 0){
01658 CkPrintf("QuadraticModel: Beneficial LB\n");
01659 int newval = newControlPoints[name] / 2;
01660 if(newval > lb)
01661 newControlPoints[name] = newval;
01662 else
01663 newControlPoints[name] = lb;
01664 } else {
01665 CkPrintf("QuadraticModel: Detrimental LB\n");
01666 int newval = newControlPoints[name] * 2;
01667 if(newval < ub)
01668 newControlPoints[name] = newval;
01669 else
01670 newControlPoints[name] = ub;
01671 }
01672
01673 }
01674 }
01675
01676
01677 } else if( whichTuningScheme == LDBPeriodOptimal) {
01678
01679
01680
01681
01682
01683
01684
01685
01686 instrumentedPhase *prevPhase = previousPhaseData();
01687
01688 const std::vector<double> × = prevPhase->times;
01689 const int numTimings = times.size();
01690
01691 if( numTimings > 4){
01692
01693 const int b1 = 2 + (numTimings-2)/2;
01694 double s1 = 0;
01695 double s2 = 0;
01696
01697
01698 for(int i=2; i<b1; i++){
01699 s1 += times[i];
01700 }
01701 for(int i=b1; i<numTimings; i++){
01702 s2 += times[i];
01703 }
01704
01705
01706 const double a1 = s1 / (double)(b1-2);
01707 const double a2 = s2 / (double)(numTimings-b1);
01708 const double avg = (a1+a1) / 2.0;
01709
01710 const double m = (a2-a1)/((double)(numTimings-2)/2.0);
01711
01712 const double ldbStepsTime = times[0] + times[1];
01713 const double lbcost = ldbStepsTime - 2.0*avg;
01714
01715
01716 int newval = roundDouble(sqrt(2.0*lbcost/m));
01717
01718
01719 if(m<=0)
01720 newval = 2*numTimings;
01721
01722 CkPrintf("Optimal Model (double when negative): lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);
01723
01724
01725 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01726 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01727
01728 const std::string &name = cpsIter->first;
01729 const std::pair<int,int> &bounds = cpsIter->second;
01730 const int lb = bounds.first;
01731 const int ub = bounds.second;
01732
01733 if(newval < lb){
01734 newControlPoints[name] = lb;
01735 } else if(newval > ub){
01736 newControlPoints[name] = ub;
01737 } else {
01738 newControlPoints[name] = newval;
01739 }
01740
01741 }
01742
01743
01744 }
01745
01746
01747
01748 } else if ( whichTuningScheme == UseSteering ) {
01749
01750
01751
01752
01753
01754
01755 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01756 instrumentedPhase *prevPhase = previousPhaseData();
01757
01758 if(phase_id%4 == 0){
01759 CkPrintf("Steering based on 2 phases ago:\n");
01760 twoAgoPhase->print();
01761 CkPrintf("\n");
01762 fflush(stdout);
01763
01764 std::vector<std::map<std::string,int> > possibleNextStepPlans;
01765
01766
01767
01768
01769 {
01770 double idleTime = twoAgoPhase->idleTime.avg;
01771 CkPrintf("Steering encountered idle time (%f)\n", idleTime);
01772 fflush(stdout);
01773 if(idleTime > 0.10){
01774 CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
01775 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
01776
01777 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
01778
01779 bool found = false;
01780 std::string cpName;
01781 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
01782 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
01783 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
01784 cpName = iter->first;
01785 info = &iter->second;
01786
01787
01788 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
01789
01790 CkPrintf("Steering found knob to turn\n");
01791 fflush(stdout);
01792
01793 if(info->first == ControlPoint::EFF_INC){
01794 const int maxValue = controlPointSpace[cpName].second;
01795 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01796 if(twoAgoValue+1 <= maxValue){
01797 aNewPlan[cpName] = twoAgoValue+1;
01798 }
01799 } else {
01800 const int minValue = controlPointSpace[cpName].second;
01801 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01802 if(twoAgoValue-1 >= minValue){
01803 aNewPlan[cpName] = twoAgoValue-1;
01804 }
01805 }
01806
01807 possibleNextStepPlans.push_back(aNewPlan);
01808
01809 }
01810 }
01811 }
01812
01813
01814
01815 {
01816 double overheadTime = twoAgoPhase->overheadTime.avg;
01817 CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
01818 fflush(stdout);
01819 if(overheadTime > 0.10){
01820 CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
01821 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
01822
01823 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
01824
01825 bool found = false;
01826 std::string cpName;
01827 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
01828 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
01829 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
01830 cpName = iter->first;
01831 info = &iter->second;
01832
01833
01834 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
01835
01836
01837
01838 CkPrintf("Steering found knob to turn\n");
01839 fflush(stdout);
01840
01841 if(info->first == ControlPoint::EFF_INC){
01842 const int maxValue = controlPointSpace[cpName].second;
01843 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01844 if(twoAgoValue+1 <= maxValue){
01845 aNewPlan[cpName] = twoAgoValue+1;
01846 }
01847 } else {
01848 const int minValue = controlPointSpace[cpName].second;
01849 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01850 if(twoAgoValue-1 >= minValue){
01851 aNewPlan[cpName] = twoAgoValue-1;
01852 }
01853 }
01854
01855 possibleNextStepPlans.push_back(aNewPlan);
01856
01857 }
01858
01859 }
01860 }
01861
01862
01863 {
01864 double idleTime = twoAgoPhase->idleTime.avg;
01865 CkPrintf("Steering encountered idle time (%f)\n", idleTime);
01866 fflush(stdout);
01867 if(idleTime > 0.10){
01868 CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
01869 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
01870
01871 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
01872
01873 bool found = false;
01874 std::string cpName;
01875 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
01876 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
01877 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
01878 cpName = iter->first;
01879 info = &iter->second;
01880
01881
01882 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
01883
01884
01885 CkPrintf("Steering found knob to turn\n");
01886 fflush(stdout);
01887
01888 if(info->first == ControlPoint::EFF_DEC){
01889 const int maxValue = controlPointSpace[cpName].second;
01890 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01891 if(twoAgoValue+1 <= maxValue){
01892 aNewPlan[cpName] = twoAgoValue+1;
01893 }
01894 } else {
01895 const int minValue = controlPointSpace[cpName].second;
01896 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01897 if(twoAgoValue-1 >= minValue){
01898 aNewPlan[cpName] = twoAgoValue-1;
01899 }
01900 }
01901
01902 possibleNextStepPlans.push_back(aNewPlan);
01903
01904 }
01905
01906 }
01907 }
01908
01909
01910
01911
01912 if(possibleNextStepPlans.size() > 0){
01913 newControlPoints = possibleNextStepPlans[0];
01914 }
01915
01916
01917 CkPrintf("Steering done for this phase\n");
01918 fflush(stdout);
01919
01920 } else {
01921
01922 CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
01923 fflush(stdout);
01924 }
01925
01926
01927
01928 } else if( whichTuningScheme == SimulatedAnnealing ) {
01929
01930
01931
01932
01933
01934
01935
01936
01937 CkPrintf("Finding best phase\n");
01938 instrumentedPhase *bestPhase = allData.findBest();
01939 CkPrintf("best found:\n");
01940 bestPhase->print();
01941 CkPrintf("\n");
01942
01943
01944 const int convergeByPhase = 100;
01945
01946 const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
01947
01948 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01949 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01950 const std::string &name = cpsIter->first;
01951 const std::pair<int,int> &bounds = cpsIter->second;
01952 const int minValue = bounds.first;
01953 const int maxValue = bounds.second;
01954
01955 const int before = bestPhase->controlPoints[name];
01956
01957 const int range = (int)((maxValue-minValue+1)*(1.0-progress));
01958
01959 int high = min(before+range, maxValue);
01960 int low = max(before-range, minValue);
01961
01962 newControlPoints[name] = low;
01963 if(high-low > 0){
01964 newControlPoints[name] += randInt(high-low, name.c_str(), phase_id);
01965 }
01966
01967 }
01968
01969 } else if ( whichTuningScheme == DivideAndConquer ) {
01970
01971
01972
01973
01974
01975
01976
01977
01978
01979
01980 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01981 instrumentedPhase *prevPhase = previousPhaseData();
01982
01983 if(phase_id%4 == 0){
01984 CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
01985 twoAgoPhase->print();
01986 CkPrintf("\n");
01987 fflush(stdout);
01988
01989 std::vector<std::map<std::string,int> > possibleNextStepPlans;
01990
01991
01992
01993
01994 {
01995 double idleTime = twoAgoPhase->idleTime.avg;
01996 double overheadTime = twoAgoPhase->overheadTime.avg;
01997
01998
01999 CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
02000 fflush(stdout);
02001 if(idleTime+overheadTime > 0.10){
02002 CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
02003 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
02004
02005 int direction = -1;
02006 if (idleTime>overheadTime){
02007
02008 direction = 1;
02009 }
02010
02011 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
02012
02013 bool found = false;
02014 std::string cpName;
02015 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
02016 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
02017 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
02018 cpName = iter->first;
02019 info = &iter->second;
02020
02021
02022 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
02023
02024 CkPrintf("Divide & Conquer Steering found knob to turn\n");
02025 fflush(stdout);
02026
02027 int adjustByAmount = (int)(myAbs(idleTime-overheadTime)*5.0);
02028
02029 if(info->first == ControlPoint::EFF_INC){
02030 const int minValue = controlPointSpace[cpName].first;
02031 const int maxValue = controlPointSpace[cpName].second;
02032 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
02033 const int newVal = closestInRange(twoAgoValue+adjustByAmount*direction, minValue, maxValue);
02034 CkAssert(newVal <= maxValue && newVal >= minValue);
02035 aNewPlan[cpName] = newVal;
02036 } else {
02037 const int minValue = controlPointSpace[cpName].first;
02038 const int maxValue = controlPointSpace[cpName].second;
02039 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
02040 const int newVal = closestInRange(twoAgoValue-adjustByAmount*direction, minValue, maxValue);
02041 CkAssert(newVal <= maxValue && newVal >= minValue);
02042 aNewPlan[cpName] = newVal;
02043 }
02044
02045 possibleNextStepPlans.push_back(aNewPlan);
02046 }
02047 }
02048 }
02049
02050 if(possibleNextStepPlans.size() > 0){
02051 CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
02052 newControlPoints = possibleNextStepPlans[0];
02053 } else {
02054 CkPrintf("Divide & Conquer Steering found no possible next phases\n");
02055 }
02056 }
02057
02058 } else if( whichTuningScheme == Simplex ) {
02059
02060
02061
02062
02063
02064
02065
02066 s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
02067
02068 } else if( whichTuningScheme == ExhaustiveSearch ) {
02069
02070
02071
02072
02073 int numDimensions = controlPointSpace.size();
02074 CkAssert(numDimensions > 0);
02075
02076 vector<int> lowerBounds(numDimensions);
02077 vector<int> upperBounds(numDimensions);
02078
02079 int d=0;
02080 std::map<std::string, pair<int,int> >::iterator iter;
02081 for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
02082
02083 lowerBounds[d] = iter->second.first;
02084 upperBounds[d] = iter->second.second;
02085 d++;
02086 }
02087
02088
02089 vector<std::string> names(numDimensions);
02090 d=0;
02091 for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
02092 names[d] = niter->first;
02093 d++;
02094 }
02095
02096
02097
02098 vector<int> config = lowerBounds;
02099 config.push_back(0);
02100
02101
02102 allData.cleanupNames();
02103 std::vector<instrumentedPhase*> &phases = allData.phases;
02104
02105 while(true){
02106
02107 std::vector<instrumentedPhase*>::iterator piter;
02108 bool testedConfiguration = false;
02109 for(piter = phases.begin(); piter != phases.end(); piter++){
02110
02111
02112 bool match = true;
02113 for(int j=0;j<numDimensions;j++){
02114 match &= (*piter)->controlPoints[names[j]] == config[j];
02115 }
02116
02117 if(match){
02118 testedConfiguration = true;
02119 break;
02120 }
02121 }
02122
02123 if(testedConfiguration == false){
02124 break;
02125 }
02126
02127
02128 config[0] ++;
02129
02130 for(int i=0;i<numDimensions;i++){
02131 if(config[i] > upperBounds[i]){
02132 config[i+1]++;
02133 config[i] = lowerBounds[i];
02134 }
02135 }
02136
02137 if(config[numDimensions] > 0){
02138 break;
02139 }
02140
02141 }
02142
02143 if(config[numDimensions] > 0){
02144 for(int i=0;i<numDimensions;i++){
02145 config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
02146 }
02147 }
02148
02149
02150
02151 for(int i=0; i<numDimensions; i++){
02152 newControlPoints[names[i]] = config[i];
02153 CkPrintf("Exhaustive search chose: %s -> %d\n", names[i].c_str(), config[i]);
02154 }
02155
02156
02157 } else {
02158 CkAbort("Some Control Point tuning strategy must be enabled.\n");
02159 }
02160
02161
02162 const double endGenerateTime = CmiWallTimer();
02163
02164 CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
02165
02166 }
02167
02168
02169
02170
02171
02173 int controlPoint(const char *name, int lb, int ub){
02174 instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
02175 const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
02176 std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
02177 int result;
02178
02179
02180 if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
02181 CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
02182 return thisPhaseData->controlPoints[std::string(name)];
02183 }
02184
02185
02186 if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
02187
02188
02189 result = lb;
02190
02191 if(defaultControlPointValues.count(std::string(name)) > 0){
02192 int v = defaultControlPointValues[std::string(name)];
02193 CkPrintf("Startup phase using default value of %d for \"%s\"\n", v, name);
02194 result = v;
02195 }
02196
02197 } else if(controlPointSpace.count(std::string(name)) == 0){
02198
02199 result = lb;
02200
02201 } else {
02202
02203 controlPointManagerProxy.ckLocalBranch()->generatePlan();
02204
02205
02206 result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
02207
02208 }
02209
02210 if(!isInRange(result,ub,lb)){
02211 std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
02212 fflush(stdout);
02213 fflush(stderr);
02214 }
02215 CkAssert(isInRange(result,ub,lb));
02216 thisPhaseData->controlPoints[std::string(name)] = result;
02217
02218 controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub)));
02219
02220 CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
02221
02222
02223 return result;
02224 }
02225
02226
02227 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
02228 CkAssert(CkMyPe() == 0);
02229 return controlPoint("FortranCP", *lb, *ub);
02230 }
02231
02232
02233
02234
02236
02237
02238
02239
02240
02241
02242
02243
02244
02245
02246
02247
02248
02249
02250
02251
02252
02253
02268 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
02269
02270 if(useBestKnown){
02271 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
02272 return;
02273 }
02274
02275
02276 if(firstSimplexPhase< 0){
02277 firstSimplexPhase = allData.phases.size()-1;
02278 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
02279 }
02280
02281 int n = controlPointSpace.size();
02282
02283 CkAssert(n>=2);
02284
02285
02286 if(simplexState == beginning){
02287
02288 if(allData.phases.size() < firstSimplexPhase + n+2 ){
02289 CkPrintf("Simplex Tuning: chose random configuration\n");
02290
02291
02292 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
02293 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
02294 const std::string &name = cpsIter->first;
02295 const std::pair<int,int> &bounds = cpsIter->second;
02296 const int lb = bounds.first;
02297 const int ub = bounds.second;
02298 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
02299 }
02300 } else {
02301
02302 for(int i=0; i<n+1; i++){
02303 simplexIndices.insert(firstSimplexPhase+i);
02304 }
02305 CkAssert(simplexIndices.size() == n+1);
02306
02307
02308 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02309
02310 }
02311
02312 } else if (simplexState == reflecting){
02313 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
02314 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
02315
02316
02317 double highestTimeForOthersInSimplex = 0.0;
02318 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
02319 double t = allData.phases[*iter]->medianTime();
02320 if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
02321 highestTimeForOthersInSimplex = t;
02322 }
02323 }
02324
02325 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
02326
02327 if(recentPhaseTime < highestTimeForOthersInSimplex){
02328
02329 doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
02330
02331 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
02332
02333 CkAssert(simplexIndices.size() == n+1);
02334 simplexIndices.erase(worstPhase);
02335 CkAssert(simplexIndices.size() == n);
02336 simplexIndices.insert(pPhase);
02337 CkAssert(simplexIndices.size() == n+1);
02338
02339 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02340
02341 } else {
02342
02343 if(recentPhaseTime <= worstTime){
02344
02345 CkAssert(simplexIndices.size() == n+1);
02346 simplexIndices.erase(worstPhase);
02347 CkAssert(simplexIndices.size() == n);
02348 simplexIndices.insert(pPhase);
02349 CkAssert(simplexIndices.size() == n+1);
02350
02351 worstPhase = pPhase;
02352
02353 worst.clear();
02354 }
02355
02356
02357 doContraction(controlPointSpace, newControlPoints, phase_id, allData);
02358
02359 }
02360
02361 } else if (simplexState == doneExpanding){
02362 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
02363 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
02364
02365
02366
02367 if(recentPhaseTime < bestTime){
02368
02369 CkAssert(simplexIndices.size() == n+1);
02370 simplexIndices.erase(worstPhase);
02371 CkAssert(simplexIndices.size() == n);
02372 simplexIndices.insert(p2Phase);
02373 CkAssert(simplexIndices.size() == n+1);
02374 } else {
02375
02376 CkAssert(simplexIndices.size() == n+1);
02377 simplexIndices.erase(worstPhase);
02378 CkAssert(simplexIndices.size() == n);
02379 simplexIndices.insert(pPhase);
02380 CkAssert(simplexIndices.size() == n+1);
02381 }
02382
02383
02384 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02385
02386 } else if (simplexState == contracting){
02387 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
02388 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
02389
02390
02391
02392 if(recentPhaseTime <= worstTime){
02393
02394 CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
02395 CkAssert(simplexIndices.size() == n+1);
02396 simplexIndices.erase(worstPhase);
02397 CkAssert(simplexIndices.size() == n);
02398 simplexIndices.insert(p2Phase);
02399 CkAssert(simplexIndices.size() == n+1);
02400
02401 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02402
02403 } else {
02404
02405 simplexState = stillContracting;
02406
02407
02408 stillMustContractList = simplexIndices;
02409
02410 CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
02411 }
02412
02413 } else if (simplexState == stillContracting){
02414 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
02415
02416 if(stillMustContractList.size()>0){
02417 int c = *stillMustContractList.begin();
02418 stillMustContractList.erase(c);
02419 CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
02420
02421 std::vector<double> cPhaseConfig = pointCoords(allData, c);
02422
02423
02424 int v = 0;
02425 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
02426 const std::string &name = cpsIter->first;
02427 const std::pair<int,int> &bounds = cpsIter->second;
02428 const int lb = bounds.first;
02429 const int ub = bounds.second;
02430
02431 double val = (cPhaseConfig[v] + best[v])/2.0;
02432
02433 newControlPoints[name] = keepInRange((int)val,lb,ub);
02434 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
02435 v++;
02436 }
02437 } else {
02438
02439 CkAssert(stillMustContractList.size() == 0);
02440 simplexIndices.clear();
02441 CkAssert(simplexIndices.size()==0);
02442 for(int i=0; i<n+1; i++){
02443 simplexIndices.insert(allData.phases.size()-2-i);
02444 }
02445 CkAssert(simplexIndices.size()==n+1);
02446
02447
02448 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02449
02450 }
02451
02452
02453 } else if (simplexState == expanding){
02454 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
02455 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
02456
02457
02458
02459 if(recentPhaseTime < bestTime){
02460
02461 CkAssert(simplexIndices.size() == n+1);
02462 simplexIndices.erase(worstPhase);
02463 CkAssert(simplexIndices.size() == n);
02464 simplexIndices.insert(p2Phase);
02465 CkAssert(simplexIndices.size() == n+1);
02466
02467 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02468
02469 } else {
02470
02471 CkAssert(simplexIndices.size() == n+1);
02472 simplexIndices.erase(worstPhase);
02473 CkAssert(simplexIndices.size() == n);
02474 simplexIndices.insert(pPhase);
02475 CkAssert(simplexIndices.size() == n+1);
02476
02477 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02478 }
02479
02480
02481 } else {
02482 CkAbort("Unknown simplexState");
02483 }
02484
02485 }
02486
02487
02488
02490 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
02491
02492 int n = controlPointSpace.size();
02493
02494 printSimplex(allData);
02495
02496 computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
02497
02498
02499
02500 double maxr = 0.0;
02501 for(int i=0; i<n+1; i++){
02502
02503 double r2 = 0.0;
02504 std::vector<double> p = pointCoords(allData, i);
02505 for(int d=0; d<p.size(); d++){
02506 double r1 = (p[d] * centroid[d]);
02507 r2 += r1*r1;
02508 }
02509 if(r2 > maxr)
02510 maxr = r2;
02511 }
02512
02513 #if 0
02514
02515 if(maxr < 10){
02516 useBestKnown = true;
02517 instrumentedPhase *best = allData.findBest();
02518 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
02519
02520 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
02521 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
02522 const std::string &name = cpsIter->first;
02523 newControlPoints[name] = best->controlPoints[name];
02524 }
02525 }
02526 #endif
02527
02528
02529
02530 pPhase = allData.phases.size()-1;
02531 P.resize(n);
02532 for(int i=0; i<n; i++){
02533 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
02534 }
02535
02536 for(int i=0; i<P.size(); i++){
02537 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
02538 }
02539
02540
02541
02542 int v = 0;
02543 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
02544 const std::string &name = cpsIter->first;
02545 const std::pair<int,int> &bounds = cpsIter->second;
02546 const int lb = bounds.first;
02547 const int ub = bounds.second;
02548 newControlPoints[name] = keepInRange((int)P[v],lb,ub);
02549 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
02550 v++;
02551 }
02552
02553
02554
02555 simplexState = reflecting;
02556 CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
02557
02558 }
02559
02560
02561
02562
02564 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
02565 int n = controlPointSpace.size();
02566 printSimplex(allData);
02567
02568
02569
02570
02571
02572
02573
02574 p2Phase = allData.phases.size()-1;
02575 P2.resize(n);
02576 for(int i=0; i<n; i++){
02577 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
02578 }
02579
02580 for(int i=0; i<P2.size(); i++){
02581 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
02582 }
02583
02584
02585
02586 int v = 0;
02587 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
02588 const std::string &name = cpsIter->first;
02589 const std::pair<int,int> &bounds = cpsIter->second;
02590 const int lb = bounds.first;
02591 const int ub = bounds.second;
02592 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
02593 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
02594 v++;
02595 }
02596
02597
02598
02599 simplexState = doneExpanding;
02600 CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
02601
02602 }
02603
02604
02605
02606
02608 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
02609 int n = controlPointSpace.size();
02610 printSimplex(allData);
02611
02612
02613
02614
02615 p2Phase = allData.phases.size()-1;
02616 P2.resize(n);
02617 for(int i=0; i<n; i++){
02618 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
02619 }
02620
02621 for(int i=0; i<P2.size(); i++){
02622 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
02623 }
02624
02625
02626
02627 int v = 0;
02628 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
02629 const std::string &name = cpsIter->first;
02630 const std::pair<int,int> &bounds = cpsIter->second;
02631 const int lb = bounds.first;
02632 const int ub = bounds.second;
02633 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
02634 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
02635 v++;
02636 }
02637
02638
02639
02640 simplexState = contracting;
02641 CkPrintf("Simplex Tuning: Switched to state: contracting\n");
02642
02643 }
02644
02645
02646
02647
02648 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
02649 int n = controlPointSpace.size();
02650
02651
02652 worstPhase = -1;
02653 worstTime = -1.0;
02654 bestPhase = 10000000;
02655 bestTime = 10000000;
02656 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
02657 double t = allData.phases[*iter]->medianTime();
02658 if(t > worstTime){
02659 worstTime = t;
02660 worstPhase = *iter;
02661 }
02662 if(t < bestTime){
02663 bestTime = t;
02664 bestPhase = *iter;
02665 }
02666 }
02667 CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
02668
02669 best = pointCoords(allData, bestPhase);
02670 CkAssert(best.size() == n);
02671
02672 worst = pointCoords(allData, worstPhase);
02673 CkAssert(worst.size() == n);
02674
02675
02676 centroid.resize(n);
02677 for(int i=0; i<n; i++){
02678 centroid[i] = 0.0;
02679 }
02680
02681 int numPts = 0;
02682
02683 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
02684 if(*iter != worstPhase){
02685 numPts ++;
02686
02687 int c = 0;
02688 for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
02689 centroid[c] += citer->second;
02690 c++;
02691 }
02692
02693 }
02694 }
02695
02696
02697 for(int v = 0; v<centroid.size(); v++) {
02698 centroid[v] /= (double)numPts;
02699 }
02700
02701 CkAssert(centroid.size() == n);
02702
02703 for(int i=0; i<centroid.size(); i++){
02704 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
02705 }
02706
02707
02708 }
02709
02710
02711
02712 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
02713 std::vector<double> result;
02714 for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
02715 result.push_back((double)citer->second);
02716 }
02717 return result;
02718 }
02719
02720
02721
02722
02723 void ControlPointWriteOutputToDisk(){
02724 CkAssert(CkMyPe() == 0);
02725 controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
02726 }
02727
02728
02729
02732 #include "ControlPoints.def.h"
02733
02734 #endif