00001 #include <charm++.h>
00002
00003
00004
00005 #include "controlPoints.h"
00006 #include "trace-controlPoints.h"
00007 #include "LBDatabase.h"
00008 #include "controlPoints.h"
00009 #include "charm++.h"
00010 #include "trace-projections.h"
00011 #include <pathHistory.h>
00012 #include "cp_effects.h"
00013 #include <iostream>
00014 #include <math.h>
00015 #include <climits>
00016
00017 #if CMK_WITH_CONTROLPOINT
00018
00019 #define roundDouble(x) ((long)(x+0.5))
00020 #define myAbs(x) (((x)>=0.0)?(x):(-1.0*(x)))
00021 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
00022
00023 inline double closestInRange(double v, double a, double b){
00024 return (v<a) ? a : ((v>b)?b:v);
00025 }
00026
00027
00028
00029
00030
00034 using namespace std;
00035
00036 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD 10000000
00037
00038
00039
00040
00041
00042 static void periodicProcessControlPoints(void* ptr, double currWallTime);
00043
00044
00045
00046 CProxy_controlPointManager controlPointManagerProxy;
00047 int random_seed;
00048 long controlPointSamplePeriod;
00049 int whichTuningScheme;
00050 bool writeDataFileAtShutdown;
00051 bool shouldFilterOutputData;
00052 bool loadDataFileAtStartup;
00053 bool shouldGatherMemoryUsage;
00054 bool shouldGatherUtilization;
00055 bool shouldGatherAll;
00056 char CPDataFilename[512];
00057
00058 extern bool enableCPTracing;
00059
00062 std::map<std::string, int> defaultControlPointValues;
00063
00064
00065
00066 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal} tuningScheme;
00067
00068
00069
00070 void printTuningScheme(){
00071 switch(whichTuningScheme){
00072 case RandomSelection:
00073 CkPrintf("Tuning Scheme: RandomSelection\n");
00074 break;
00075 case AlwaysDefaults:
00076 CkPrintf("Tuning Scheme: AlwaysDefaults\n");
00077 break;
00078 case SimulatedAnnealing:
00079 CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
00080 break;
00081 case ExhaustiveSearch:
00082 CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
00083 break;
00084 case CriticalPathAutoPrioritization:
00085 CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
00086 break;
00087 case UseBestKnownTiming:
00088 CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
00089 break;
00090 case UseSteering:
00091 CkPrintf("Tuning Scheme: UseSteering\n");
00092 break;
00093 case MemoryAware:
00094 CkPrintf("Tuning Scheme: MemoryAware\n");
00095 break;
00096 case Simplex:
00097 CkPrintf("Tuning Scheme: Simplex Algorithm\n");
00098 break;
00099 case DivideAndConquer:
00100 CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
00101 break;
00102 case LDBPeriod:
00103 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
00104 break;
00105 case LDBPeriodLinear:
00106 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
00107 break;
00108 case LDBPeriodQuadratic:
00109 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
00110 break;
00111 case LDBPeriodOptimal:
00112 CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
00113 break;
00114 default:
00115 CkPrintf("Unknown tuning scheme\n");
00116 break;
00117 }
00118 fflush(stdout);
00119 }
00120
00121
00122
00124 CkReduction::reducerType idleTimeReductionType;
00126 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
00127 double ret[3];
00128 if(nMsg > 0){
00129 CkAssert(msgs[0]->getSize()==3*sizeof(double));
00130 double *m=(double *)msgs[0]->getData();
00131 ret[0]=m[0];
00132 ret[1]=m[1];
00133 ret[2]=m[2];
00134 }
00135 for (int i=1;i<nMsg;i++) {
00136 CkAssert(msgs[i]->getSize()==3*sizeof(double));
00137 double *m=(double *)msgs[i]->getData();
00138 ret[0]=min(ret[0],m[0]);
00139 ret[1]+=m[1];
00140 ret[2]=max(ret[2],m[2]);
00141 }
00142 return CkReductionMsg::buildNew(3*sizeof(double),ret);
00143 }
00144
00145
00146
00148 CkReduction::reducerType allMeasuresReductionType;
00150 #define ALL_REDUCTION_SIZE 12
00151 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
00152 double ret[ALL_REDUCTION_SIZE];
00153 if(nMsg > 0){
00154 CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
00155 double *m=(double *)msgs[0]->getData();
00156 memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
00157 }
00158 for (int i=1;i<nMsg;i++) {
00159 CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
00160 double *m=(double *)msgs[i]->getData();
00161
00162 ret[0]=min(ret[0],m[0]);
00163 ret[1]+=m[1];
00164 ret[2]=max(ret[2],m[2]);
00165
00166 ret[3]=min(ret[3],m[3]);
00167 ret[4]+=m[4];
00168 ret[5]=max(ret[5],m[5]);
00169
00170 ret[6]=max(ret[6],m[6]);
00171
00172 ret[7]+=m[7];
00173 ret[8]+=m[8];
00174 ret[9]+=m[9];
00175 ret[10]+=m[10];
00176
00177 ret[11]+=m[11];
00178 }
00179 return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);
00180 }
00181
00182
00184 void registerCPReductions(void) {
00185 idleTimeReductionType=CkReduction::addReducer(idleTimeReduction, false, "idleTimeReduction");
00186 allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction, false, "allMeasuresReduction");
00187 }
00188
00189
00190
00191
00192
00193
00196 unsigned int randInt(unsigned int num, const char* name, int seed=0){
00197 CkAssert(num > 0);
00198
00199 unsigned long hash = 0;
00200 unsigned int c;
00201 unsigned char * str = (unsigned char*)name;
00202
00203 while ((c = *str++)){
00204 unsigned int c2 = (c+64)%128;
00205 unsigned int c3 = (c2*5953)%127;
00206 hash = c3 + (hash << 6) + (hash << 16) - hash;
00207 }
00208
00209 unsigned long shuffled1 = (hash*2083)%7907;
00210 unsigned long shuffled2 = (seed*4297)%2017;
00211
00212 unsigned long shuffled3 = (random_seed*4799)%7919;
00213
00214 unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
00215
00216 unsigned int result = ((namehash * 6029) % 1117) % num;
00217
00218 CkAssert(result >=0 && result < num);
00219 return result;
00220 }
00221
00222
00223
00224 controlPointManager::controlPointManager() {
00225 generatedPlanForStep = -1;
00226
00227 exitWhenReady = false;
00228 alreadyRequestedMemoryUsage = false;
00229 alreadyRequestedIdleTime = false;
00230 alreadyRequestedAll = false;
00231
00232 instrumentedPhase * newPhase = new instrumentedPhase();
00233 allData.phases.push_back(newPhase);
00234
00235 frameworkShouldAdvancePhase = false;
00236 haveControlPointChangeCallback = false;
00237
00238
00239 phase_id = 0;
00240
00241 if(loadDataFileAtStartup){
00242 loadDataFile();
00243 }
00244
00245
00246 if(CkMyPe() == 0){
00247 CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
00248 }
00249
00250 traceRegisterUserEvent("No currently executing message", 5000);
00251 traceRegisterUserEvent("Zero time along critical path", 5010);
00252 traceRegisterUserEvent("Positive total time along critical path", 5020);
00253 traceRegisterUserEvent("env->setPathHistory()", 6000);
00254 traceRegisterUserEvent("Critical Path", 5900);
00255 traceRegisterUserEvent("Table Entry", 5901);
00256
00257
00258 #if USER_EVENT_FOR_REGISTERTERMINALPATH
00259 traceRegisterUserEvent("registerTerminalPath", 100);
00260 #endif
00261
00262 }
00263
00264
00265 controlPointManager::~controlPointManager(){
00266
00267 }
00268
00269 void controlPointManager::pup(PUP::er &p)
00270 {
00271
00272
00273 p|generatedPlanForStep;
00274 p|exitWhenReady;
00275 p|alreadyRequestedMemoryUsage;
00276 p|alreadyRequestedIdleTime;
00277 p|alreadyRequestedAll;
00278 p|frameworkShouldAdvancePhase;
00279 p|haveControlPointChangeCallback;
00280 p|phase_id;
00281 }
00282
00283
00285 void controlPointManager::loadDataFile(){
00286 ifstream infile(CPDataFilename);
00287 vector<std::string> names;
00288 std::string line;
00289
00290 while(getline(infile,line)){
00291 if(line[0] != '#')
00292 break;
00293 }
00294
00295 int numTimings = 0;
00296 std::istringstream n(line);
00297 n >> numTimings;
00298
00299 while(getline(infile,line)){
00300 if(line[0] != '#')
00301 break;
00302 }
00303
00304 int numControlPointNames = 0;
00305 std::istringstream n2(line);
00306 n2 >> numControlPointNames;
00307
00308 for(int i=0; i<numControlPointNames; i++){
00309 getline(infile,line);
00310 names.push_back(line);
00311 }
00312
00313 for(int i=0;i<numTimings;i++){
00314 getline(infile,line);
00315 while(line[0] == '#')
00316 getline(infile,line);
00317
00318 instrumentedPhase * ips = new instrumentedPhase();
00319
00320 std::istringstream iss(line);
00321
00322
00323 iss >> ips->memoryUsageMB;
00324
00325
00326
00327 iss >> ips->idleTime.min;
00328 iss >> ips->idleTime.avg;
00329 iss >> ips->idleTime.max;
00330
00331
00332 iss >> ips->overheadTime.min;
00333 iss >> ips->overheadTime.avg;
00334 iss >> ips->overheadTime.max;
00335
00336
00337 iss >> ips->bytesPerInvoke;
00338
00339
00340 iss >> ips->grainSize;
00341
00342
00343 for(int cp=0;cp<numControlPointNames;cp++){
00344 int cpvalue;
00345 iss >> cpvalue;
00346 ips->controlPoints.insert(make_pair(names[cp],cpvalue));
00347 }
00348
00349
00350 double mt;
00351 iss >> mt;
00352
00353
00354
00355 double time;
00356
00357 while(iss >> time){
00358 ips->times.push_back(time);
00359 #if DEBUGPRINT > 5
00360 CkPrintf("read time %lf from file\n", time);
00361 #endif
00362 }
00363
00364 allData.phases.push_back(ips);
00365
00366 }
00367
00368 infile.close();
00369 }
00370
00371
00372
00374 void controlPointManager::writeDataFile(){
00375 CkPrintf("============= writeDataFile() to %s ============\n", CPDataFilename);
00376 ofstream outfile(CPDataFilename);
00377 allData.cleanupNames();
00378
00379
00380
00381
00382 if(shouldFilterOutputData){
00383 allData.verify();
00384 allData.filterOutIncompletePhases();
00385 }
00386
00387
00388
00389 if(allData.toString().length() > 10){
00390 outfile << allData.toString();
00391 } else {
00392 outfile << " No data available to save to disk " << endl;
00393 }
00394 outfile.close();
00395 }
00396
00398 void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
00399 frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
00400 controlPointChangeCallback = cb;
00401 haveControlPointChangeCallback = true;
00402 }
00403
00404
00406 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
00407 frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
00408 }
00409
00412 void controlPointManager::processControlPoints(){
00413
00414 #if DEBUGPRINT
00415 CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
00416 #endif
00417
00418
00419
00420
00421 const int s = allData.phases.size();
00422
00423 #if DEBUGPRINT
00424 CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
00425 for(int p=0;p<s;++p){
00426 const instrumentedPhase &phase = allData.phases[p];
00427 const idleTimeContainer &idle = phase.idleTime;
00428
00429 vector<double> const × = phase.times;
00430
00431 CkPrintf("Phase %d:\n", p);
00432 idle.print();
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443 CkPrintf("Timings:\n");
00444 for(int i=0;i<times.size(); i++){
00445 CkPrintf("%lf ", times[i]);
00446 }
00447 CkPrintf("\n");
00448
00449 }
00450
00451 CkPrintf("\n\n");
00452
00453
00454 #endif
00455
00456
00457
00458
00459
00460 #if 0
00461 if( s%5 == 4) {
00462
00463
00464 int whichPhase=-1;
00465 for(int p=0; p<s; ++p){
00466 const instrumentedPhase &phase = allData.phases[p];
00467 const idleTimeContainer &idle = phase.idleTime;
00468 if(idle.isValid() && phase.criticalPaths.size()>0 ){
00469 whichPhase = p;
00470 }
00471 }
00472
00473
00474 CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
00475 const instrumentedPhase &phase = allData.phases[whichPhase];
00476 const idleTimeContainer &idle = phase.idleTime;
00477
00478 if(idle.min > 0.1){
00479 CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
00480
00481
00482 int medianCriticalPathIdx = phase.medianCriticalPathIdx();
00483 const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
00484 CkAssert(phase.criticalPaths.size() > 0);
00485 CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx);
00486
00487 CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
00488
00489 if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
00490 CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
00491
00492
00493 CkPrintf("Finding control points related to the critical path\n");
00494 int cpcount = 0;
00495 std::set<std::string> controlPointsAffectingCriticalPath;
00496
00497
00498 for(int e=0;e<path.getNumUsed();e++){
00499 if(path.getUsedCount(e)>0){
00500 int ep = path.getUsedEp(e);
00501
00502 std::map<std::string, std::set<int> >::iterator iter;
00503 for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
00504 if(iter->second.count(ep)>0){
00505 controlPointsAffectingCriticalPath.insert(iter->first);
00506 CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
00507 cpcount++;
00508 }
00509 }
00510
00511 }
00512 }
00513
00514
00515 if(cpcount==0){
00516 CkPrintf("No control points are known to affect the critical path\n");
00517 } else {
00518 double beginTime = CmiWallTimer();
00519
00520 CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
00521
00522 newControlPoints = phase.controlPoints;
00523
00524 if(frameworkShouldAdvancePhase){
00525 gotoNextPhase();
00526 }
00527
00528 if(haveControlPointChangeCallback){
00529 #if DEBUGPRINT
00530 CkPrintf("Calling control point change callback\n");
00531 #endif
00532
00533 controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg;
00534 *((int*)CkPriorityPtr(msg)) = -INT_MAX;
00535 CkSetQueueing(msg, CK_QUEUEING_IFIFO);
00536 controlPointChangeCallback.send(msg);
00537
00538 }
00539
00540
00541
00542
00543 char textDescription[4096*2];
00544 textDescription[0] = '\0';
00545
00546 std::map<std::string,int>::iterator newCP;
00547 for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
00548 if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
00549
00550 int lowerbound = controlPointSpace[newCP->first].first;
00551 if(newCP->second > lowerbound){
00552 newControlPointsAvailable = true;
00553 newCP->second --;
00554 }
00555 }
00556 }
00557
00558
00559 if(1){
00560 std::map<std::string,int>::iterator newCP;
00561 for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
00562 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
00563 }
00564 }
00565
00566 char *userEventString = new char[4096*2];
00567 sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
00568
00569 static int userEventCounter = 20000;
00570 CkPrintf("USER EVENT: %s\n", userEventString);
00571
00572 traceRegisterUserEvent(userEventString, userEventCounter);
00573 traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
00574 userEventCounter++;
00575
00576 }
00577
00578 } else {
00579 CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
00580 }
00581
00582
00583 }
00584
00585 } else {
00586
00587
00588 }
00589
00590 #endif
00591
00592
00593
00594 if(frameworkShouldAdvancePhase){
00595 gotoNextPhase();
00596 }
00597
00598 if(haveControlPointChangeCallback){
00599
00600 controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg;
00601 *((int*)CkPriorityPtr(msg)) = -INT_MAX;
00602 CkSetQueueing(msg, CK_QUEUEING_IFIFO);
00603 controlPointChangeCallback.send(msg);
00604 }
00605
00606 }
00607
00609 bool controlPointManager::controlPointAffectsThisEP(int ep){
00610 std::map<std::string, std::set<int> >::iterator iter;
00611 for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
00612 if(iter->second.count(ep)>0){
00613 return true;
00614 }
00615 }
00616 return false;
00617 }
00618
00620 bool controlPointManager::controlPointAffectsThisArray(int array){
00621 std::map<std::string, std::set<int> >::iterator iter;
00622 for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
00623 if(iter->second.count(array)>0){
00624 return true;
00625 }
00626 }
00627 return false;
00628 }
00629
00630
00632 instrumentedPhase * controlPointManager::currentPhaseData(){
00633 int s = allData.phases.size();
00634 CkAssert(s>=1);
00635 return allData.phases[s-1];
00636 }
00637
00638
00640 instrumentedPhase * controlPointManager::previousPhaseData(){
00641 int s = allData.phases.size();
00642 if(s >= 2 && phase_id > 0) {
00643 return allData.phases[s-2];
00644 } else {
00645 return NULL;
00646 }
00647 }
00648
00650 instrumentedPhase * controlPointManager::twoAgoPhaseData(){
00651 int s = allData.phases.size();
00652 if(s >= 3 && phase_id > 0) {
00653 return allData.phases[s-3];
00654 } else {
00655 return NULL;
00656 }
00657 }
00658
00659
00661 void controlPointManager::gotoNextPhase(){
00662 CkPrintf("gotoNextPhase shouldGatherAll=%d enableCPTracing=%d\n", (int)shouldGatherAll, (int)enableCPTracing);
00663 fflush(stdout);
00664
00665 if(enableCPTracing){
00666 if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
00667 alreadyRequestedAll = true;
00668 CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
00669 CkPrintf("Requesting all measurements\n");
00670 thisProxy.requestAll(*cb);
00671 delete cb;
00672
00673 } else {
00674
00675 if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
00676 alreadyRequestedMemoryUsage = true;
00677 CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
00678 thisProxy.requestMemoryUsage(*cb);
00679 delete cb;
00680 }
00681
00682 if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
00683 alreadyRequestedIdleTime = true;
00684 CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
00685 thisProxy.requestIdleTime(*cb);
00686 delete cb;
00687 }
00688 }
00689 }
00690
00691
00692
00693 #if CMK_LBDB_ON && 0
00694 LBDatabase * myLBdatabase = LBDatabaseObj();
00695 LBDB * myLBDB = myLBdatabase->getLBDB();
00696 const CkVec<LBObj*> objs = myLBDB->getObjs();
00697
00698 LBRealType maxObjWallTime = -1.0;
00699
00700 for(int i=0;i<objs.length();i++){
00701 LBObj* o = objs[i];
00702 const LDObjData d = o->ObjData();
00703 LBRealType cpuTime = d.cpuTime;
00704 LBRealType wallTime = d.wallTime;
00705
00706 CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
00707 if(wallTime > maxObjWallTime){
00708
00709 }
00710
00711 }
00712
00713 myLBDB->ClearLoads();
00714
00715 #endif
00716
00717
00718
00719
00720 phase_id++;
00721
00722
00723
00724 instrumentedPhase * newPhase = new instrumentedPhase();
00725 allData.phases.push_back(newPhase);
00726
00727 CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
00728
00729 }
00730
00732 void controlPointManager::setTiming(double time){
00733 currentPhaseData()->times.push_back(time);
00734
00735 #if USE_CRITICAL_PATH_HEADER_ARRAY
00736
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747
00748 #endif
00749
00750 }
00751
00753 void controlPointManager::requestIdleTime(CkCallback cb){
00754 CkAssert(enableCPTracing);
00755
00756 double i = localControlPointTracingInstance()->idleRatio();
00757 double idle[3];
00758 idle[0] = i;
00759 idle[1] = i;
00760 idle[2] = i;
00761
00762
00763
00764 localControlPointTracingInstance()->resetTimings();
00765
00766 contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
00767 }
00768
00770 void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
00771 CkAssert(enableCPTracing);
00772
00773 int size=msg->getSize() / sizeof(double);
00774 CkAssert(size==3);
00775 double *r=(double *) msg->getData();
00776
00777 instrumentedPhase* prevPhase = previousPhaseData();
00778 if(prevPhase != NULL){
00779 prevPhase->idleTime.min = r[0];
00780 prevPhase->idleTime.avg = r[1]/CkNumPes();
00781 prevPhase->idleTime.max = r[2];
00782 prevPhase->idleTime.print();
00783 CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
00784 } else {
00785 CkPrintf("There is no previous phase to store the idle time measurements\n");
00786 }
00787
00788 alreadyRequestedIdleTime = false;
00789 checkForShutdown();
00790 delete msg;
00791 }
00792
00793
00794
00795
00796
00797
00799 void controlPointManager::requestAll(CkCallback cb){
00800 CkAssert(enableCPTracing);
00801
00802 TraceControlPoints *t = localControlPointTracingInstance();
00803
00804 double data[ALL_REDUCTION_SIZE];
00805
00806 double *idle = data;
00807 double *over = data+3;
00808 double *mem = data+6;
00809 double *msgBytes = data+7;
00810 double *grainsize = data+11;
00811
00812 const double i = t->idleRatio();
00813 idle[0] = i;
00814 idle[1] = i;
00815 idle[2] = i;
00816
00817 const double o = t->overheadRatio();
00818 over[0] = o;
00819 over[1] = o;
00820 over[2] = o;
00821
00822 const double m = t->memoryUsageMB();
00823 mem[0] = m;
00824
00825 msgBytes[0] = t->b2;
00826 msgBytes[1] = t->b3;
00827 msgBytes[2] = t->b2mlen;
00828 msgBytes[3] = t->b3mlen;
00829
00830 grainsize[0] = t->grainSize();
00831
00832 localControlPointTracingInstance()->resetAll();
00833
00834 contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
00835 }
00836
00838 void controlPointManager::gatherAll(CkReductionMsg *msg){
00839 CkAssert(enableCPTracing);
00840
00841 CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
00842 int size=msg->getSize() / sizeof(double);
00843 double *data=(double *) msg->getData();
00844
00845 double *idle = data;
00846 double *over = data+3;
00847 double *mem = data+6;
00848 double *msgBytes = data+7;
00849 double *granularity = data+11;
00850
00851
00852
00853
00854 instrumentedPhase* prevPhase = previousPhaseData();
00855 if(prevPhase != NULL){
00856 prevPhase->idleTime.min = idle[0];
00857 prevPhase->idleTime.avg = idle[1]/CkNumPes();
00858 prevPhase->idleTime.max = idle[2];
00859
00860 prevPhase->overheadTime.min = over[0];
00861 prevPhase->overheadTime.avg = over[1]/CkNumPes();
00862 prevPhase->overheadTime.max = over[2];
00863
00864 prevPhase->memoryUsageMB = mem[0];
00865
00866 CkPrintf("Stored idle time min=%lf avg=%lf max=%lf mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
00867
00868 double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
00869 double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
00870
00872 prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
00873
00874 CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
00875 CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
00876
00877 CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
00878 CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
00879
00880 if(bytesPerInvoke2 > bytesPerInvoke3)
00881 prevPhase->bytesPerInvoke = bytesPerInvoke2;
00882 else
00883 prevPhase->bytesPerInvoke = bytesPerInvoke3;
00884
00885
00886
00887
00888
00889
00890
00891
00892 } else {
00893 CkPrintf("There is no previous phase to store measurements\n");
00894 }
00895
00896 alreadyRequestedAll = false;
00897 checkForShutdown();
00898 delete msg;
00899 }
00900
00901
00902
00903
00904 void controlPointManager::checkForShutdown(){
00905 if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
00906 doExitNow();
00907 }
00908 }
00909
00910
00911 void controlPointManager::exitIfReady(){
00912 if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
00913
00914 doExitNow();
00915 } else {
00916
00917 exitWhenReady = true;
00918 }
00919 }
00920
00921
00922
00923 void controlPointManager::doExitNow(){
00924 _TRACE_BEGIN_EXECUTE_DETAILED(-1, -1, _threadEP,CkMyPe(), 0, NULL, this);
00925 writeOutputToDisk();
00926
00927 CkContinueExit();
00928 }
00929
00930 void controlPointManager::writeOutputToDisk(){
00931 if(writeDataFileAtShutdown){
00932 controlPointManagerProxy.ckLocalBranch()->writeDataFile();
00933 }
00934 }
00935
00936
00938 void controlPointManager::requestMemoryUsage(CkCallback cb){
00939 int m = CmiMaxMemoryUsage() / 1024 / 1024;
00940 CmiResetMaxMemory();
00941
00942 contribute(sizeof(int),&m,CkReduction::max_int, cb);
00943 }
00944
00946 void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
00947 int size=msg->getSize() / sizeof(int);
00948 CkAssert(size==1);
00949 int *m=(int *) msg->getData();
00950
00951 CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
00952
00953 instrumentedPhase* prevPhase = previousPhaseData();
00954 if(prevPhase != NULL){
00955 prevPhase->memoryUsageMB = m[0];
00956 } else {
00957 CkPrintf("No place to store memory usage");
00958 }
00959
00960 alreadyRequestedMemoryUsage = false;
00961 checkForShutdown();
00962 delete msg;
00963 }
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024
01026 void gotoNextPhase(){
01027 controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
01028 }
01029
01030 FLINKAGE void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
01031 {
01032 gotoNextPhase();
01033 }
01034
01035
01037 class controlPointMain : public CBase_controlPointMain {
01038 public:
01039 controlPointMain(CkArgMsg* args){
01040 #if OLDRANDSEED
01041 struct timeval tp;
01042 gettimeofday(& tp, NULL);
01043 random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
01044 #else
01045 double time = CmiWallTimer();
01046 int sec = (int)time;
01047 int usec = (int)((time - (double)sec)*1000000.0);
01048 random_seed = sec ^ usec;
01049 #endif
01050
01051
01052 double period, periodms;
01053 bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
01054 bool haveSamplePeriodMs = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriodMs", &periodms,"The time between Control Point Framework samples (in milliseconds)");
01055 if(haveSamplePeriod){
01056 CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
01057 controlPointSamplePeriod = (int)(period * 1000);
01058 } else if(haveSamplePeriodMs){
01059 CkPrintf("controlPointSamplePeriodMs = %lf ms\n", periodms);
01060 controlPointSamplePeriod = periodms;
01061 } else {
01062 controlPointSamplePeriod = DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
01063 }
01064
01065
01066
01067 whichTuningScheme = RandomSelection;
01068
01069
01070 if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
01071 whichTuningScheme = RandomSelection;
01072 } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
01073 whichTuningScheme = ExhaustiveSearch;
01074 } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
01075 whichTuningScheme = AlwaysDefaults;
01076 } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
01077 whichTuningScheme = SimulatedAnnealing;
01078 } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
01079 whichTuningScheme = CriticalPathAutoPrioritization;
01080 } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
01081 whichTuningScheme = UseBestKnownTiming;
01082 } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
01083 whichTuningScheme = UseSteering;
01084 } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
01085 whichTuningScheme = MemoryAware;
01086 } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
01087 whichTuningScheme = Simplex;
01088 } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
01089 whichTuningScheme = DivideAndConquer;
01090 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
01091 whichTuningScheme = LDBPeriod;
01092 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
01093 whichTuningScheme = LDBPeriodLinear;
01094 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
01095 whichTuningScheme = LDBPeriodQuadratic;
01096 } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
01097 whichTuningScheme = LDBPeriodOptimal;
01098 }
01099
01100 char *defValStr = NULL;
01101 if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
01102 CkPrintf("You specified default value string: %s\n", defValStr);
01103
01104
01105
01106
01107 char *tok = strtok(defValStr, ",");
01108 while (tok) {
01109
01110 int len = strlen(tok);
01111 char *eqsign = strchr(tok, '=');
01112 if(eqsign != NULL && eqsign != tok){
01113 *eqsign = '\0';
01114 char *cpname = tok;
01115 std::string cpName(tok);
01116 char *cpDefVal = eqsign+1;
01117 int v=-1;
01118 if(sscanf(cpDefVal, "%d", &v) == 1){
01119 CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
01120 CkAssert(CkMyPe() == 0);
01121 defaultControlPointValues[cpName] = v;
01122 }
01123 }
01124 tok = strtok(NULL, ",");
01125 }
01126
01127 }
01128
01129 shouldGatherAll = false;
01130 shouldGatherMemoryUsage = false;
01131 shouldGatherUtilization = false;
01132
01133 if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
01134 shouldGatherAll = true;
01135 } else {
01136 if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
01137 shouldGatherMemoryUsage = true;
01138 }
01139 if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
01140 shouldGatherUtilization = true;
01141 }
01142 }
01143
01144 writeDataFileAtShutdown = false;
01145 if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
01146 writeDataFileAtShutdown = true;
01147 }
01148
01149 shouldFilterOutputData = true;
01150 if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
01151 shouldFilterOutputData = false;
01152 }
01153
01154
01155 loadDataFileAtStartup = false;
01156 if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
01157 loadDataFileAtStartup = true;
01158 }
01159
01160 char *cpdatafile;
01161 if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
01162 sprintf(CPDataFilename, "%s", cpdatafile);
01163 } else {
01164 sprintf(CPDataFilename, "controlPointData.txt");
01165 }
01166
01167
01168 controlPointManagerProxy = CProxy_controlPointManager::ckNew();
01169
01170 delete args;
01171 }
01172 ~controlPointMain(){}
01173 };
01174
01176 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
01177 CkAssert(CkMyPe() == 0);
01178 CkPrintf("Application has registered a control point change callback\n");
01179 controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
01180 }
01181
01183 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
01184 if(CkMyPe() == 0){
01185 CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
01186 controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
01187 }
01188 }
01189
01191 void registerControlPointTiming(double time){
01192 CkAssert(CkMyPe() == 0);
01193 #if DEBUGPRINT>0
01194 CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
01195 #endif
01196 controlPointManagerProxy.ckLocalBranch()->setTiming(time);
01197 }
01198
01200 void controlPointTimingStamp() {
01201 CkAssert(CkMyPe() == 0);
01202 #if DEBUGPRINT>0
01203 CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
01204 #endif
01205
01206 static double prev_time = 0.0;
01207 double t = CmiWallTimer();
01208 double duration = t - prev_time;
01209 prev_time = t;
01210
01211 controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
01212 }
01213
01214 FLINKAGE void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
01215 {
01216 controlPointTimingStamp();
01217 }
01218
01219
01220 FLINKAGE void FTN_NAME(SETFRAMEWORKADVANCEPHASEF,setframeworkadvancephasef)(CMK_TYPEDEF_INT4 *value)
01221 {
01222 setFrameworkAdvancePhase(*value);
01223 }
01224
01225
01226
01227
01229 extern "C" void controlPointShutdown(){
01230 if(CkMyPe() == 0){
01231
01232 if (!controlPointManagerProxy.ckGetGroupID().isZero()) {
01233
01234 controlPointManagerProxy.ckLocalBranch()->exitIfReady();
01235 } else {
01236 CkContinueExit();
01237 }
01238 }
01239 }
01240
01242 void controlPointInitNode(){
01243 registerExitFn(controlPointShutdown);
01244 }
01245
01247 static void periodicProcessControlPoints(void* ptr, double currWallTime){
01248 #ifdef DEBUGPRINT
01249 CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
01250 #endif
01251 controlPointManagerProxy.ckLocalBranch()->processControlPoints();
01252 CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
01253 }
01254
01255
01256
01257
01258
01262 void controlPointManager::generatePlan() {
01263 const double startGenerateTime = CmiWallTimer();
01264 const int phase_id = this->phase_id;
01265 const int effective_phase = allData.phases.size();
01266
01267
01268 if(generatedPlanForStep == phase_id)
01269 return;
01270 generatedPlanForStep = phase_id;
01271
01272 CkPrintf("Generating Plan for phase %d\n", phase_id);
01273 printTuningScheme();
01274
01275
01276 instrumentedPhase *prevPhase = previousPhaseData();
01277 for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
01278 newControlPoints[cpsIter->first] = cpsIter->second;
01279 }
01280
01281
01282 if( whichTuningScheme == RandomSelection ){
01283 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01284 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01285 const std::string &name = cpsIter->first;
01286 const std::pair<int,int> &bounds = cpsIter->second;
01287 const int lb = bounds.first;
01288 const int ub = bounds.second;
01289 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
01290 }
01291 } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01302 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01303 const std::string &name = cpsIter->first;
01304 const std::pair<int,int> &bounds = cpsIter->second;
01305 const int lb = bounds.first;
01306 const int ub = bounds.second;
01307 newControlPoints[name] = (lb+ub)/2;
01308 }
01309
01310 } else if ( whichTuningScheme == MemoryAware ) {
01311
01312
01313
01314
01315 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01316 instrumentedPhase *prevPhase = previousPhaseData();
01317
01318 if(phase_id%2 == 0){
01319 CkPrintf("Steering (memory based) based on 2 phases ago:\n");
01320 twoAgoPhase->print();
01321 CkPrintf("\n");
01322 fflush(stdout);
01323
01324
01325 double memUsage = twoAgoPhase->memoryUsageMB;
01326 CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
01327 fflush(stdout);
01328 if(memUsage < 1100.0 && memUsage > 0.0){
01329 CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
01330 CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
01331
01332
01333 newControlPoints = twoAgoPhase->controlPoints;
01334
01335
01336 CkPrintf("Steering (memory based) initialized plan\n");
01337 fflush(stdout);
01338
01339
01340 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
01341
01342
01343 bool found = false;
01344 std::string cpName;
01345 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
01346 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
01347 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
01348 cpName = iter->first;
01349 info = &iter->second;
01350 found = true;
01351 break;
01352 }
01353
01354
01355 if(found){
01356 CkPrintf("Steering found knob to turn that should increase memory consumption\n");
01357 fflush(stdout);
01358 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01359 const int maxValue = controlPointSpace[cpName].second;
01360
01361 if(twoAgoValue+1 <= maxValue){
01362 newControlPoints[cpName] = twoAgoValue+1;
01363 }
01364 }
01365
01366 }
01367 }
01368
01369 CkPrintf("Steering (memory based) done for this phase\n");
01370 fflush(stdout);
01371
01372 } else if ( whichTuningScheme == UseBestKnownTiming ) {
01373
01374
01375
01376 static bool firstTime = true;
01377 if(firstTime){
01378 firstTime = false;
01379 instrumentedPhase *best = allData.findBest();
01380 CkPrintf("Best known phase is:\n");
01381 best->print();
01382 CkPrintf("\n");
01383
01384 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01385 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
01386 const std::string &name = cpsIter->first;
01387 newControlPoints[name] = best->controlPoints[name];
01388 }
01389 }
01390 } else if( whichTuningScheme == LDBPeriod) {
01391
01392
01393
01394
01395
01396
01397
01398 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01399 instrumentedPhase *prevPhase = previousPhaseData();
01400
01401
01402 const std::vector<double> × = twoAgoPhase->times;
01403 const int oldNumTimings = times.size();
01404
01405
01406 const std::vector<double> ×New = prevPhase->times;
01407 const int newNumTimings = timesNew.size();
01408
01409
01410 if(oldNumTimings > 4 && newNumTimings > 4){
01411
01412
01413
01414
01415 double oldSum = 0;
01416
01417 for(int i=2; i<oldNumTimings; i++){
01418 oldSum += times[i];
01419 }
01420
01421 const double oldAvg = oldSum / (oldNumTimings-2);
01422
01423
01424
01425
01426
01427 const double expectedTotalTime = oldAvg * newNumTimings;
01428
01429
01430
01431 double newSum = 0.0;
01432 for(int i=2; i<newNumTimings; ++i){
01433 newSum += timesNew[i];
01434 }
01435
01436 const double newAvg = newSum / (newNumTimings-2);
01437 const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings);
01438
01439 const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
01440
01441
01442 const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
01443
01444 const double benefitAfterLB = benefit - lbcost;
01445
01446
01447
01448 CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
01449
01450
01451 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01452 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01453 const std::string &name = cpsIter->first;
01454 const std::pair<int,int> &bounds = cpsIter->second;
01455 const int lb = bounds.first;
01456 const int ub = bounds.second;
01457
01458 if(benefitAfterLB > 0){
01459 CkPrintf("Constant Model: Beneficial LB\n");
01460 int newval = newControlPoints[name] / 2;
01461 if(newval > lb)
01462 newControlPoints[name] = newval;
01463 else
01464 newControlPoints[name] = lb;
01465 } else {
01466 CkPrintf("Constant Model: Detrimental LB\n");
01467 int newval = newControlPoints[name] * 2;
01468 if(newval < ub)
01469 newControlPoints[name] = newval;
01470 else
01471 newControlPoints[name] = ub;
01472 }
01473 }
01474 }
01475
01476
01477 } else if( whichTuningScheme == LDBPeriodLinear) {
01478
01479
01480
01481
01482
01483
01484
01485 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01486 instrumentedPhase *prevPhase = previousPhaseData();
01487
01488 const std::vector<double> × = twoAgoPhase->times;
01489 const int oldNumTimings = times.size();
01490
01491 const std::vector<double> ×New = prevPhase->times;
01492 const int newNumTimings = timesNew.size();
01493
01494
01495 if(oldNumTimings > 4 && newNumTimings > 4){
01496
01497
01498
01499 const int b1 = 2 + (oldNumTimings-2)/2;
01500 double s1 = 0;
01501 double s2 = 0;
01502
01503 const double ldbStepsTime = times[0] + times[1];
01504
01505 for(int i=2; i<b1; i++){
01506 s1 += times[i];
01507 }
01508 for(int i=b1; i<oldNumTimings; i++){
01509 s2 += times[i];
01510 }
01511
01512
01513
01514
01515 const double a1 = s1 / (double)(b1-2);
01516 const double a2 = s2 / (double)(oldNumTimings-b1);
01517 const double aold = (a1+a2)/2.0;
01518
01519 const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
01520
01521
01522
01523 double sum = 0.0;
01524 for(int i=2; i<newNumTimings; ++i){
01525 sum += timesNew[i];
01526 }
01527
01528 const double avg = sum / ((double)(newNumTimings-2));
01529 const double actualTotalTime = avg * ((double)newNumTimings);
01530
01531 const double benefit = expectedTotalTime - actualTotalTime;
01532
01533
01534 const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
01535
01536 const double benefitAfterLB = benefit - lbcost;
01537
01538
01539
01540 CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
01541
01542
01543
01544 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01545 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01546 const std::string &name = cpsIter->first;
01547 const std::pair<int,int> &bounds = cpsIter->second;
01548 const int lb = bounds.first;
01549 const int ub = bounds.second;
01550
01551 if(benefitAfterLB > 0){
01552 CkPrintf("Linear Model: Beneficial LB\n");
01553 int newval = newControlPoints[name] / 2;
01554 if(newval > lb)
01555 newControlPoints[name] = newval;
01556 else
01557 newControlPoints[name] = lb;
01558 } else {
01559 CkPrintf("Linear Model: Detrimental LB\n");
01560 int newval = newControlPoints[name] * 2;
01561 if(newval < ub)
01562 newControlPoints[name] = newval;
01563 else
01564 newControlPoints[name] = ub;
01565 }
01566 }
01567 }
01568
01569 }
01570
01571 else if( whichTuningScheme == LDBPeriodQuadratic) {
01572
01573
01574
01575
01576
01577
01578
01579 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01580 instrumentedPhase *prevPhase = previousPhaseData();
01581
01582 const std::vector<double> × = twoAgoPhase->times;
01583 const int oldNumTimings = times.size();
01584
01585 const std::vector<double> ×New = prevPhase->times;
01586 const int newNumTimings = timesNew.size();
01587
01588
01589 if(oldNumTimings > 4 && newNumTimings > 4){
01590
01591
01592
01593
01594 const int b1 = 2 + (oldNumTimings-2)/3;
01595 const int b2 = 2 + (2*(oldNumTimings-2))/3;
01596 double s1 = 0;
01597 double s2 = 0;
01598 double s3 = 0;
01599
01600 const double ldbStepsTime = times[0] + times[1];
01601
01602 for(int i=2; i<b1; i++){
01603 s1 += times[i];
01604 }
01605 for(int i=b1; i<b2; i++){
01606 s2 += times[i];
01607 }
01608 for(int i=b2; i<oldNumTimings; i++){
01609 s3 += times[i];
01610 }
01611
01612
01613
01614
01615 const double a1 = s1 / (double)(b1-2);
01616 const double a2 = s2 / (double)(b2-b1);
01617 const double a3 = s3 / (double)(oldNumTimings-b2);
01618
01619 const double a = (a1-2.0*a2+a3)/2.0;
01620 const double b = (a1-4.0*a2+3.0*a3)/2.0;
01621 const double c = a3;
01622
01623
01624 const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;
01625 const double x2 = 0.5;
01626 const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
01627
01628
01629
01630 double sum = 0.0;
01631 for(int i=2; i<newNumTimings; ++i){
01632 sum += timesNew[i];
01633 }
01634
01635 const double avg = sum / ((double)(newNumTimings-2));
01636 const double actualTotalTime = avg * ((double)newNumTimings);
01637
01638 const double benefit = expectedTotalTime - actualTotalTime;
01639
01640
01641 const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
01642
01643 const double benefitAfterLB = benefit - lbcost;
01644
01645
01646
01647 CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
01648
01649
01650
01651 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01652 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01653 const std::string &name = cpsIter->first;
01654 const std::pair<int,int> &bounds = cpsIter->second;
01655 const int lb = bounds.first;
01656 const int ub = bounds.second;
01657
01658 if(benefitAfterLB > 0){
01659 CkPrintf("QuadraticModel: Beneficial LB\n");
01660 int newval = newControlPoints[name] / 2;
01661 if(newval > lb)
01662 newControlPoints[name] = newval;
01663 else
01664 newControlPoints[name] = lb;
01665 } else {
01666 CkPrintf("QuadraticModel: Detrimental LB\n");
01667 int newval = newControlPoints[name] * 2;
01668 if(newval < ub)
01669 newControlPoints[name] = newval;
01670 else
01671 newControlPoints[name] = ub;
01672 }
01673
01674 }
01675 }
01676
01677
01678 } else if( whichTuningScheme == LDBPeriodOptimal) {
01679
01680
01681
01682
01683
01684
01685
01686
01687 instrumentedPhase *prevPhase = previousPhaseData();
01688
01689 const std::vector<double> × = prevPhase->times;
01690 const int numTimings = times.size();
01691
01692 if( numTimings > 4){
01693
01694 const int b1 = 2 + (numTimings-2)/2;
01695 double s1 = 0;
01696 double s2 = 0;
01697
01698
01699 for(int i=2; i<b1; i++){
01700 s1 += times[i];
01701 }
01702 for(int i=b1; i<numTimings; i++){
01703 s2 += times[i];
01704 }
01705
01706
01707 const double a1 = s1 / (double)(b1-2);
01708 const double a2 = s2 / (double)(numTimings-b1);
01709 const double avg = (a1+a1) / 2.0;
01710
01711 const double m = (a2-a1)/((double)(numTimings-2)/2.0);
01712
01713 const double ldbStepsTime = times[0] + times[1];
01714 const double lbcost = ldbStepsTime - 2.0*avg;
01715
01716
01717 int newval = roundDouble(sqrt(2.0*lbcost/m));
01718
01719
01720 if(m<=0)
01721 newval = 2*numTimings;
01722
01723 CkPrintf("Optimal Model (double when negative): lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);
01724
01725
01726 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01727 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01728
01729 const std::string &name = cpsIter->first;
01730 const std::pair<int,int> &bounds = cpsIter->second;
01731 const int lb = bounds.first;
01732 const int ub = bounds.second;
01733
01734 if(newval < lb){
01735 newControlPoints[name] = lb;
01736 } else if(newval > ub){
01737 newControlPoints[name] = ub;
01738 } else {
01739 newControlPoints[name] = newval;
01740 }
01741
01742 }
01743
01744
01745 }
01746
01747
01748
01749 } else if ( whichTuningScheme == UseSteering ) {
01750
01751
01752
01753
01754
01755
01756 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01757 instrumentedPhase *prevPhase = previousPhaseData();
01758
01759 if(phase_id%4 == 0){
01760 CkPrintf("Steering based on 2 phases ago:\n");
01761 twoAgoPhase->print();
01762 CkPrintf("\n");
01763 fflush(stdout);
01764
01765 std::vector<std::map<std::string,int> > possibleNextStepPlans;
01766
01767
01768
01769
01770 {
01771 double idleTime = twoAgoPhase->idleTime.avg;
01772 CkPrintf("Steering encountered idle time (%f)\n", idleTime);
01773 fflush(stdout);
01774 if(idleTime > 0.10){
01775 CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
01776 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
01777
01778 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
01779
01780 bool found = false;
01781 std::string cpName;
01782 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
01783 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
01784 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
01785 cpName = iter->first;
01786 info = &iter->second;
01787
01788
01789 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
01790
01791 CkPrintf("Steering found knob to turn\n");
01792 fflush(stdout);
01793
01794 if(info->first == ControlPoint::EFF_INC){
01795 const int maxValue = controlPointSpace[cpName].second;
01796 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01797 if(twoAgoValue+1 <= maxValue){
01798 aNewPlan[cpName] = twoAgoValue+1;
01799 }
01800 } else {
01801 const int minValue = controlPointSpace[cpName].second;
01802 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01803 if(twoAgoValue-1 >= minValue){
01804 aNewPlan[cpName] = twoAgoValue-1;
01805 }
01806 }
01807
01808 possibleNextStepPlans.push_back(aNewPlan);
01809
01810 }
01811 }
01812 }
01813
01814
01815
01816 {
01817 double overheadTime = twoAgoPhase->overheadTime.avg;
01818 CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
01819 fflush(stdout);
01820 if(overheadTime > 0.10){
01821 CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
01822 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
01823
01824 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
01825
01826 bool found = false;
01827 std::string cpName;
01828 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
01829 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
01830 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
01831 cpName = iter->first;
01832 info = &iter->second;
01833
01834
01835 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
01836
01837
01838
01839 CkPrintf("Steering found knob to turn\n");
01840 fflush(stdout);
01841
01842 if(info->first == ControlPoint::EFF_INC){
01843 const int maxValue = controlPointSpace[cpName].second;
01844 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01845 if(twoAgoValue+1 <= maxValue){
01846 aNewPlan[cpName] = twoAgoValue+1;
01847 }
01848 } else {
01849 const int minValue = controlPointSpace[cpName].second;
01850 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01851 if(twoAgoValue-1 >= minValue){
01852 aNewPlan[cpName] = twoAgoValue-1;
01853 }
01854 }
01855
01856 possibleNextStepPlans.push_back(aNewPlan);
01857
01858 }
01859
01860 }
01861 }
01862
01863
01864 {
01865 double idleTime = twoAgoPhase->idleTime.avg;
01866 CkPrintf("Steering encountered idle time (%f)\n", idleTime);
01867 fflush(stdout);
01868 if(idleTime > 0.10){
01869 CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
01870 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
01871
01872 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
01873
01874 bool found = false;
01875 std::string cpName;
01876 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
01877 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
01878 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
01879 cpName = iter->first;
01880 info = &iter->second;
01881
01882
01883 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
01884
01885
01886 CkPrintf("Steering found knob to turn\n");
01887 fflush(stdout);
01888
01889 if(info->first == ControlPoint::EFF_DEC){
01890 const int maxValue = controlPointSpace[cpName].second;
01891 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01892 if(twoAgoValue+1 <= maxValue){
01893 aNewPlan[cpName] = twoAgoValue+1;
01894 }
01895 } else {
01896 const int minValue = controlPointSpace[cpName].second;
01897 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
01898 if(twoAgoValue-1 >= minValue){
01899 aNewPlan[cpName] = twoAgoValue-1;
01900 }
01901 }
01902
01903 possibleNextStepPlans.push_back(aNewPlan);
01904
01905 }
01906
01907 }
01908 }
01909
01910
01911
01912
01913 if(possibleNextStepPlans.size() > 0){
01914 newControlPoints = possibleNextStepPlans[0];
01915 }
01916
01917
01918 CkPrintf("Steering done for this phase\n");
01919 fflush(stdout);
01920
01921 } else {
01922
01923 CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
01924 fflush(stdout);
01925 }
01926
01927
01928
01929 } else if( whichTuningScheme == SimulatedAnnealing ) {
01930
01931
01932
01933
01934
01935
01936
01937
01938 CkPrintf("Finding best phase\n");
01939 instrumentedPhase *bestPhase = allData.findBest();
01940 CkPrintf("best found:\n");
01941 bestPhase->print();
01942 CkPrintf("\n");
01943
01944
01945 const int convergeByPhase = 100;
01946
01947 const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
01948
01949 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
01950 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
01951 const std::string &name = cpsIter->first;
01952 const std::pair<int,int> &bounds = cpsIter->second;
01953 const int minValue = bounds.first;
01954 const int maxValue = bounds.second;
01955
01956 const int before = bestPhase->controlPoints[name];
01957
01958 const int range = (int)((maxValue-minValue+1)*(1.0-progress));
01959
01960 int high = min(before+range, maxValue);
01961 int low = max(before-range, minValue);
01962
01963 newControlPoints[name] = low;
01964 if(high-low > 0){
01965 newControlPoints[name] += randInt(high-low, name.c_str(), phase_id);
01966 }
01967
01968 }
01969
01970 } else if ( whichTuningScheme == DivideAndConquer ) {
01971
01972
01973
01974
01975
01976
01977
01978
01979
01980
01981 instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
01982 instrumentedPhase *prevPhase = previousPhaseData();
01983
01984 if(phase_id%4 == 0){
01985 CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
01986 twoAgoPhase->print();
01987 CkPrintf("\n");
01988 fflush(stdout);
01989
01990 std::vector<std::map<std::string,int> > possibleNextStepPlans;
01991
01992
01993
01994
01995 {
01996 double idleTime = twoAgoPhase->idleTime.avg;
01997 double overheadTime = twoAgoPhase->overheadTime.avg;
01998
01999
02000 CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
02001 fflush(stdout);
02002 if(idleTime+overheadTime > 0.10){
02003 CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
02004 CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
02005
02006 int direction = -1;
02007 if (idleTime>overheadTime){
02008
02009 direction = 1;
02010 }
02011
02012 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
02013
02014 bool found = false;
02015 std::string cpName;
02016 std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
02017 std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
02018 for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
02019 cpName = iter->first;
02020 info = &iter->second;
02021
02022
02023 std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
02024
02025 CkPrintf("Divide & Conquer Steering found knob to turn\n");
02026 fflush(stdout);
02027
02028 int adjustByAmount = (int)(myAbs(idleTime-overheadTime)*5.0);
02029
02030 if(info->first == ControlPoint::EFF_INC){
02031 const int minValue = controlPointSpace[cpName].first;
02032 const int maxValue = controlPointSpace[cpName].second;
02033 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
02034 const int newVal = closestInRange(twoAgoValue+adjustByAmount*direction, minValue, maxValue);
02035 CkAssert(newVal <= maxValue && newVal >= minValue);
02036 aNewPlan[cpName] = newVal;
02037 } else {
02038 const int minValue = controlPointSpace[cpName].first;
02039 const int maxValue = controlPointSpace[cpName].second;
02040 const int twoAgoValue = twoAgoPhase->controlPoints[cpName];
02041 const int newVal = closestInRange(twoAgoValue-adjustByAmount*direction, minValue, maxValue);
02042 CkAssert(newVal <= maxValue && newVal >= minValue);
02043 aNewPlan[cpName] = newVal;
02044 }
02045
02046 possibleNextStepPlans.push_back(aNewPlan);
02047 }
02048 }
02049 }
02050
02051 if(possibleNextStepPlans.size() > 0){
02052 CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
02053 newControlPoints = possibleNextStepPlans[0];
02054 } else {
02055 CkPrintf("Divide & Conquer Steering found no possible next phases\n");
02056 }
02057 }
02058
02059 } else if( whichTuningScheme == Simplex ) {
02060
02061
02062
02063
02064
02065
02066
02067 s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
02068
02069 } else if( whichTuningScheme == ExhaustiveSearch ) {
02070
02071
02072
02073
02074 int numDimensions = controlPointSpace.size();
02075 CkAssert(numDimensions > 0);
02076
02077 vector<int> lowerBounds(numDimensions);
02078 vector<int> upperBounds(numDimensions);
02079
02080 int d=0;
02081 std::map<std::string, pair<int,int> >::iterator iter;
02082 for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
02083
02084 lowerBounds[d] = iter->second.first;
02085 upperBounds[d] = iter->second.second;
02086 d++;
02087 }
02088
02089
02090 vector<std::string> names(numDimensions);
02091 d=0;
02092 for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
02093 names[d] = niter->first;
02094 d++;
02095 }
02096
02097
02098
02099 vector<int> config = lowerBounds;
02100 config.push_back(0);
02101
02102
02103 allData.cleanupNames();
02104 std::vector<instrumentedPhase*> &phases = allData.phases;
02105
02106 while(true){
02107
02108 std::vector<instrumentedPhase*>::iterator piter;
02109 bool testedConfiguration = false;
02110 for(piter = phases.begin(); piter != phases.end(); piter++){
02111
02112
02113 bool match = true;
02114 for(int j=0;j<numDimensions;j++){
02115 match &= (*piter)->controlPoints[names[j]] == config[j];
02116 }
02117
02118 if(match){
02119 testedConfiguration = true;
02120 break;
02121 }
02122 }
02123
02124 if(testedConfiguration == false){
02125 break;
02126 }
02127
02128
02129 config[0] ++;
02130
02131 for(int i=0;i<numDimensions;i++){
02132 if(config[i] > upperBounds[i]){
02133 config[i+1]++;
02134 config[i] = lowerBounds[i];
02135 }
02136 }
02137
02138 if(config[numDimensions] > 0){
02139 break;
02140 }
02141
02142 }
02143
02144 if(config[numDimensions] > 0){
02145 for(int i=0;i<numDimensions;i++){
02146 config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
02147 }
02148 }
02149
02150
02151
02152 for(int i=0; i<numDimensions; i++){
02153 newControlPoints[names[i]] = config[i];
02154 CkPrintf("Exhaustive search chose: %s -> %d\n", names[i].c_str(), config[i]);
02155 }
02156
02157
02158 } else {
02159 CkAbort("Some Control Point tuning strategy must be enabled.\n");
02160 }
02161
02162
02163 const double endGenerateTime = CmiWallTimer();
02164
02165 CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
02166
02167 }
02168
02169
02170
02171
02172
02174 int controlPoint(const char *name, int lb, int ub){
02175 instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
02176 const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
02177 std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
02178 int result;
02179
02180
02181 if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
02182 CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
02183 return thisPhaseData->controlPoints[std::string(name)];
02184 }
02185
02186
02187 if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
02188
02189
02190 result = lb;
02191
02192 if(defaultControlPointValues.count(std::string(name)) > 0){
02193 int v = defaultControlPointValues[std::string(name)];
02194 CkPrintf("Startup phase using default value of %d for \"%s\"\n", v, name);
02195 result = v;
02196 }
02197
02198 } else if(controlPointSpace.count(std::string(name)) == 0){
02199
02200 result = lb;
02201
02202 } else {
02203
02204 controlPointManagerProxy.ckLocalBranch()->generatePlan();
02205
02206
02207 result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
02208
02209 }
02210
02211 if(!isInRange(result,ub,lb)){
02212 std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
02213 fflush(stdout);
02214 fflush(stderr);
02215 }
02216 CkAssert(isInRange(result,ub,lb));
02217 thisPhaseData->controlPoints[std::string(name)] = result;
02218
02219 controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub)));
02220
02221 CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
02222
02223
02224 return result;
02225 }
02226
02227
02228 FLINKAGE int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
02229 CkAssert(CkMyPe() == 0);
02230 return controlPoint("FortranCP", *lb, *ub);
02231 }
02232
02233
02234
02235
02237
02238
02239
02240
02241
02242
02243
02244
02245
02246
02247
02248
02249
02250
02251
02252
02253
02254
02269 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
02270
02271 if(useBestKnown){
02272 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
02273 return;
02274 }
02275
02276
02277 if(firstSimplexPhase< 0){
02278 firstSimplexPhase = allData.phases.size()-1;
02279 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
02280 }
02281
02282 int n = controlPointSpace.size();
02283
02284 CkAssert(n>=2);
02285
02286
02287 if(simplexState == beginning){
02288
02289 if(allData.phases.size() < firstSimplexPhase + n+2 ){
02290 CkPrintf("Simplex Tuning: chose random configuration\n");
02291
02292
02293 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
02294 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
02295 const std::string &name = cpsIter->first;
02296 const std::pair<int,int> &bounds = cpsIter->second;
02297 const int lb = bounds.first;
02298 const int ub = bounds.second;
02299 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
02300 }
02301 } else {
02302
02303 for(int i=0; i<n+1; i++){
02304 simplexIndices.insert(firstSimplexPhase+i);
02305 }
02306 CkAssert(simplexIndices.size() == n+1);
02307
02308
02309 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02310
02311 }
02312
02313 } else if (simplexState == reflecting){
02314 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
02315 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
02316
02317
02318 double highestTimeForOthersInSimplex = 0.0;
02319 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
02320 double t = allData.phases[*iter]->medianTime();
02321 if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
02322 highestTimeForOthersInSimplex = t;
02323 }
02324 }
02325
02326 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
02327
02328 if(recentPhaseTime < highestTimeForOthersInSimplex){
02329
02330 doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
02331
02332 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
02333
02334 CkAssert(simplexIndices.size() == n+1);
02335 simplexIndices.erase(worstPhase);
02336 CkAssert(simplexIndices.size() == n);
02337 simplexIndices.insert(pPhase);
02338 CkAssert(simplexIndices.size() == n+1);
02339
02340 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02341
02342 } else {
02343
02344 if(recentPhaseTime <= worstTime){
02345
02346 CkAssert(simplexIndices.size() == n+1);
02347 simplexIndices.erase(worstPhase);
02348 CkAssert(simplexIndices.size() == n);
02349 simplexIndices.insert(pPhase);
02350 CkAssert(simplexIndices.size() == n+1);
02351
02352 worstPhase = pPhase;
02353
02354 worst.clear();
02355 }
02356
02357
02358 doContraction(controlPointSpace, newControlPoints, phase_id, allData);
02359
02360 }
02361
02362 } else if (simplexState == doneExpanding){
02363 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
02364 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
02365
02366
02367
02368 if(recentPhaseTime < bestTime){
02369
02370 CkAssert(simplexIndices.size() == n+1);
02371 simplexIndices.erase(worstPhase);
02372 CkAssert(simplexIndices.size() == n);
02373 simplexIndices.insert(p2Phase);
02374 CkAssert(simplexIndices.size() == n+1);
02375 } else {
02376
02377 CkAssert(simplexIndices.size() == n+1);
02378 simplexIndices.erase(worstPhase);
02379 CkAssert(simplexIndices.size() == n);
02380 simplexIndices.insert(pPhase);
02381 CkAssert(simplexIndices.size() == n+1);
02382 }
02383
02384
02385 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02386
02387 } else if (simplexState == contracting){
02388 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
02389 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
02390
02391
02392
02393 if(recentPhaseTime <= worstTime){
02394
02395 CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
02396 CkAssert(simplexIndices.size() == n+1);
02397 simplexIndices.erase(worstPhase);
02398 CkAssert(simplexIndices.size() == n);
02399 simplexIndices.insert(p2Phase);
02400 CkAssert(simplexIndices.size() == n+1);
02401
02402 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02403
02404 } else {
02405
02406 simplexState = stillContracting;
02407
02408
02409 stillMustContractList = simplexIndices;
02410
02411 CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
02412 }
02413
02414 } else if (simplexState == stillContracting){
02415 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
02416
02417 if(stillMustContractList.size()>0){
02418 int c = *stillMustContractList.begin();
02419 stillMustContractList.erase(c);
02420 CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
02421
02422 std::vector<double> cPhaseConfig = pointCoords(allData, c);
02423
02424
02425 int v = 0;
02426 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
02427 const std::string &name = cpsIter->first;
02428 const std::pair<int,int> &bounds = cpsIter->second;
02429 const int lb = bounds.first;
02430 const int ub = bounds.second;
02431
02432 double val = (cPhaseConfig[v] + best[v])/2.0;
02433
02434 newControlPoints[name] = keepInRange((int)val,lb,ub);
02435 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
02436 v++;
02437 }
02438 } else {
02439
02440 CkAssert(stillMustContractList.size() == 0);
02441 simplexIndices.clear();
02442 CkAssert(simplexIndices.size()==0);
02443 for(int i=0; i<n+1; i++){
02444 simplexIndices.insert(allData.phases.size()-2-i);
02445 }
02446 CkAssert(simplexIndices.size()==n+1);
02447
02448
02449 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02450
02451 }
02452
02453
02454 } else if (simplexState == expanding){
02455 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
02456 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
02457
02458
02459
02460 if(recentPhaseTime < bestTime){
02461
02462 CkAssert(simplexIndices.size() == n+1);
02463 simplexIndices.erase(worstPhase);
02464 CkAssert(simplexIndices.size() == n);
02465 simplexIndices.insert(p2Phase);
02466 CkAssert(simplexIndices.size() == n+1);
02467
02468 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02469
02470 } else {
02471
02472 CkAssert(simplexIndices.size() == n+1);
02473 simplexIndices.erase(worstPhase);
02474 CkAssert(simplexIndices.size() == n);
02475 simplexIndices.insert(pPhase);
02476 CkAssert(simplexIndices.size() == n+1);
02477
02478 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
02479 }
02480
02481
02482 } else {
02483 CkAbort("Unknown simplexState");
02484 }
02485
02486 }
02487
02488
02489
02491 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
02492
02493 int n = controlPointSpace.size();
02494
02495 printSimplex(allData);
02496
02497 computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
02498
02499
02500
02501 double maxr = 0.0;
02502 for(int i=0; i<n+1; i++){
02503
02504 double r2 = 0.0;
02505 std::vector<double> p = pointCoords(allData, i);
02506 for(int d=0; d<p.size(); d++){
02507 double r1 = (p[d] * centroid[d]);
02508 r2 += r1*r1;
02509 }
02510 if(r2 > maxr)
02511 maxr = r2;
02512 }
02513
02514 #if 0
02515
02516 if(maxr < 10){
02517 useBestKnown = true;
02518 instrumentedPhase *best = allData.findBest();
02519 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
02520
02521 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
02522 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
02523 const std::string &name = cpsIter->first;
02524 newControlPoints[name] = best->controlPoints[name];
02525 }
02526 }
02527 #endif
02528
02529
02530
02531 pPhase = allData.phases.size()-1;
02532 P.resize(n);
02533 for(int i=0; i<n; i++){
02534 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
02535 }
02536
02537 for(int i=0; i<P.size(); i++){
02538 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
02539 }
02540
02541
02542
02543 int v = 0;
02544 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
02545 const std::string &name = cpsIter->first;
02546 const std::pair<int,int> &bounds = cpsIter->second;
02547 const int lb = bounds.first;
02548 const int ub = bounds.second;
02549 newControlPoints[name] = keepInRange((int)P[v],lb,ub);
02550 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
02551 v++;
02552 }
02553
02554
02555
02556 simplexState = reflecting;
02557 CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
02558
02559 }
02560
02561
02562
02563
02565 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
02566 int n = controlPointSpace.size();
02567 printSimplex(allData);
02568
02569
02570
02571
02572
02573
02574
02575 p2Phase = allData.phases.size()-1;
02576 P2.resize(n);
02577 for(int i=0; i<n; i++){
02578 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
02579 }
02580
02581 for(int i=0; i<P2.size(); i++){
02582 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
02583 }
02584
02585
02586
02587 int v = 0;
02588 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
02589 const std::string &name = cpsIter->first;
02590 const std::pair<int,int> &bounds = cpsIter->second;
02591 const int lb = bounds.first;
02592 const int ub = bounds.second;
02593 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
02594 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
02595 v++;
02596 }
02597
02598
02599
02600 simplexState = doneExpanding;
02601 CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
02602
02603 }
02604
02605
02606
02607
02609 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
02610 int n = controlPointSpace.size();
02611 printSimplex(allData);
02612
02613
02614
02615
02616 p2Phase = allData.phases.size()-1;
02617 P2.resize(n);
02618 for(int i=0; i<n; i++){
02619 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
02620 }
02621
02622 for(int i=0; i<P2.size(); i++){
02623 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
02624 }
02625
02626
02627
02628 int v = 0;
02629 for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
02630 const std::string &name = cpsIter->first;
02631 const std::pair<int,int> &bounds = cpsIter->second;
02632 const int lb = bounds.first;
02633 const int ub = bounds.second;
02634 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
02635 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
02636 v++;
02637 }
02638
02639
02640
02641 simplexState = contracting;
02642 CkPrintf("Simplex Tuning: Switched to state: contracting\n");
02643
02644 }
02645
02646
02647
02648
02649 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
02650 int n = controlPointSpace.size();
02651
02652
02653 worstPhase = -1;
02654 worstTime = -1.0;
02655 bestPhase = 10000000;
02656 bestTime = 10000000;
02657 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
02658 double t = allData.phases[*iter]->medianTime();
02659 if(t > worstTime){
02660 worstTime = t;
02661 worstPhase = *iter;
02662 }
02663 if(t < bestTime){
02664 bestTime = t;
02665 bestPhase = *iter;
02666 }
02667 }
02668 CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
02669
02670 best = pointCoords(allData, bestPhase);
02671 CkAssert(best.size() == n);
02672
02673 worst = pointCoords(allData, worstPhase);
02674 CkAssert(worst.size() == n);
02675
02676
02677 centroid.resize(n);
02678 for(int i=0; i<n; i++){
02679 centroid[i] = 0.0;
02680 }
02681
02682 int numPts = 0;
02683
02684 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
02685 if(*iter != worstPhase){
02686 numPts ++;
02687
02688 int c = 0;
02689 for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
02690 centroid[c] += citer->second;
02691 c++;
02692 }
02693
02694 }
02695 }
02696
02697
02698 for(int v = 0; v<centroid.size(); v++) {
02699 centroid[v] /= (double)numPts;
02700 }
02701
02702 CkAssert(centroid.size() == n);
02703
02704 for(int i=0; i<centroid.size(); i++){
02705 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
02706 }
02707
02708
02709 }
02710
02711
02712
02713 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
02714 std::vector<double> result;
02715 for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
02716 result.push_back((double)citer->second);
02717 }
02718 return result;
02719 }
02720
02721
02722
02723
02724 void ControlPointWriteOutputToDisk(){
02725 CkAssert(CkMyPe() == 0);
02726 controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
02727 }
02728
02729
02730
02733 #include "ControlPoints.def.h"
02734
02735 #endif