00001
00002
00003 #include "LBSimulation.h"
00004
00005
00006
00007
00008
00009 int LBSimulation::dumpStep = -1;
00010 int LBSimulation::dumpStepSize = 1;
00011 char* LBSimulation::dumpFile = (char*)"lbdata.dat";
00012 int LBSimulation::doSimulation = 0;
00013 int LBSimulation::simStep = -1;
00014 int LBSimulation::simStepSize = 1;
00015 int LBSimulation::simProcs = 0;
00016 int LBSimulation::procsChanged = 0;
00017
00018 int LBSimulation::showDecisionsOnly = 0;
00019 int _lb_version = LB_FORMAT_VERSION;
00020
00021
00022
00023
00024
00025 LBInfo::LBInfo(int count): numPes(count), minObjLoad(0.0), maxObjLoad(0.0)
00026 {
00027 peLoads = new LBRealType[numPes];
00028 objLoads = new LBRealType[numPes];
00029 comLoads = new LBRealType[numPes];
00030 bgLoads = new LBRealType[numPes];
00031 clear();
00032 }
00033
00034 LBInfo::~LBInfo()
00035 {
00036
00037 if (peLoads && bgLoads) {
00038 delete [] bgLoads;
00039 delete [] comLoads;
00040 delete [] objLoads;
00041 delete [] peLoads;
00042 }
00043 }
00044
00045 void LBInfo::clear()
00046 {
00047 for (int i=0; i<numPes; i++) {
00048 peLoads[i] = 0.0;
00049 if (objLoads) objLoads[i] = 0.0;
00050 if (comLoads) comLoads[i] = 0.0;
00051 if (bgLoads) bgLoads[i] = 0.0;
00052 }
00053 minObjLoad = 0.0;
00054 maxObjLoad = 0.0;
00055 msgCount = msgBytes = 0;
00056 }
00057
00058 void LBInfo::getInfo(BaseLB::LDStats* stats, int count, int considerComm)
00059 {
00060 #if CMK_LBDB_ON
00061 int i, pe;
00062
00063 CmiAssert(peLoads);
00064
00065 clear();
00066
00067 double alpha = _lb_args.alpha();
00068 double beta = _lb_args.beta();
00069
00070 minObjLoad = 1.0e20;
00071 maxObjLoad = 0.0;
00072
00073 msgCount = 0;
00074 msgBytes = 0;
00075
00076 if (considerComm) stats->makeCommHash();
00077
00078
00079 if (bgLoads)
00080 for(pe = 0; pe < count; pe++)
00081 bgLoads[pe] = stats->procs[pe].bg_walltime;
00082
00083 for(pe = 0; pe < count; pe++)
00084 peLoads[pe] = stats->procs[pe].bg_walltime;
00085
00086 for(int obj = 0; obj < stats->n_objs; obj++)
00087 {
00088 int pe = stats->to_proc[obj];
00089 if (pe == -1) continue;
00090 CmiAssert(pe >=0 && pe < count);
00091 double oload = stats->objData[obj].wallTime;
00092 if (oload < minObjLoad) minObjLoad = oload;
00093 if (oload > maxObjLoad) maxObjLoad = oload;
00094 peLoads[pe] += oload;
00095 if (objLoads) objLoads[pe] += oload;
00096 }
00097
00098
00099 if (considerComm) {
00100 int* msgSentCount = new int[count];
00101 int* msgRecvCount = new int[count];
00102 int* byteSentCount = new int[count];
00103 int* byteRecvCount = new int[count];
00104 for(i = 0; i < count; i++)
00105 msgSentCount[i] = msgRecvCount[i] = byteSentCount[i] = byteRecvCount[i] = 0;
00106
00107 int mcast_count = 0;
00108 for (int cidx=0; cidx < stats->n_comm; cidx++) {
00109 LDCommData& cdata = stats->commData[cidx];
00110 int senderPE, receiverPE;
00111 if (cdata.from_proc())
00112 senderPE = cdata.src_proc;
00113 else {
00114 int idx = stats->getHash(cdata.sender);
00115 if (idx == -1) continue;
00116 senderPE = stats->to_proc[idx];
00117 CmiAssert(senderPE != -1);
00118 }
00119 CmiAssert(senderPE < count && senderPE >= 0);
00120
00121
00122 int receiver_type = cdata.receiver.get_type();
00123 if (receiver_type == LD_PROC_MSG || receiver_type == LD_OBJ_MSG) {
00124 if (receiver_type == LD_PROC_MSG)
00125 receiverPE = cdata.receiver.proc();
00126 else {
00127 int idx = stats->getHash(cdata.receiver.get_destObj());
00128 if (idx == -1) continue;
00129 receiverPE = stats->to_proc[idx];
00130 CmiAssert(receiverPE != -1);
00131 }
00132 CmiAssert(receiverPE < count && receiverPE >= 0);
00133 if(senderPE != receiverPE)
00134 {
00135 msgSentCount[senderPE] += cdata.messages;
00136 byteSentCount[senderPE] += cdata.bytes;
00137 msgRecvCount[receiverPE] += cdata.messages;
00138 byteRecvCount[receiverPE] += cdata.bytes;
00139 }
00140 }
00141 else if (receiver_type == LD_OBJLIST_MSG) {
00142 int nobjs;
00143 const LDObjKey *objs = cdata.receiver.get_destObjs(nobjs);
00144 mcast_count ++;
00145 CkVec<int> pes;
00146 for (i=0; i<nobjs; i++) {
00147 int idx = stats->getHash(objs[i]);
00148 CmiAssert(idx != -1);
00149 if (idx == -1) continue;
00150 receiverPE = stats->to_proc[idx];
00151 CmiAssert(receiverPE < count && receiverPE >= 0);
00152 int exist = 0;
00153 for (int p=0; p<pes.size(); p++)
00154 if (receiverPE == pes[p]) { exist=1; break; }
00155 if (exist) continue;
00156 pes.push_back(receiverPE);
00157 if(senderPE != receiverPE)
00158 {
00159 msgSentCount[senderPE] += cdata.messages;
00160 byteSentCount[senderPE] += cdata.bytes;
00161 msgRecvCount[receiverPE] += cdata.messages;
00162 byteRecvCount[receiverPE] += cdata.bytes;
00163 }
00164 }
00165 }
00166 }
00167 if (_lb_args.debug())
00168 CkPrintf("Number of MULTICAST: %d\n", mcast_count);
00169
00170
00171 for(i = 0; i < count; i++)
00172 {
00173 double comload = msgRecvCount[i] * PER_MESSAGE_RECV_OVERHEAD +
00174 msgSentCount[i] * alpha +
00175 byteRecvCount[i] * PER_BYTE_RECV_OVERHEAD +
00176 byteSentCount[i] * beta;
00177 peLoads[i] += comload;
00178 if (comLoads) comLoads[i] += comload;
00179 msgCount += msgRecvCount[i] + msgSentCount[i];
00180 msgBytes += byteRecvCount[i] + byteSentCount[i];
00181 }
00182 delete [] msgRecvCount;
00183 delete [] msgSentCount;
00184 delete [] byteRecvCount;
00185 delete [] byteSentCount;
00186 }
00187 #endif
00188 }
00189
00190 void LBInfo::print()
00191 {
00192 int i;
00193 double minLoad, maxLoad, maxProcObjLoad, avgProcObjLoad, maxComLoad, sum, average, avgComLoad;
00194 double avgBgLoad;
00195 int max_loaded_proc = 0;
00196 sum = .0;
00197 sum = minLoad = maxLoad = peLoads[0];
00198 avgProcObjLoad = maxProcObjLoad = objLoads[0];
00199 avgComLoad = maxComLoad = comLoads[0];
00200 avgBgLoad = bgLoads[0];
00201 for (i = 1; i < numPes; i++) {
00202 double load = peLoads[i];
00203 if (load>maxLoad) {
00204 maxLoad=load;
00205 max_loaded_proc = i;
00206 } else if (peLoads[i]<minLoad) minLoad=load;
00207 if (objLoads[i]>maxProcObjLoad) maxProcObjLoad = objLoads[i];
00208 if (comLoads[i]>maxComLoad) maxComLoad = comLoads[i];
00209 sum += load;
00210 avgProcObjLoad += objLoads[i];
00211 avgBgLoad += bgLoads[i];
00212 avgComLoad += comLoads[i];
00213 }
00214 average = sum/numPes;
00215 avgProcObjLoad /= numPes;
00216 avgBgLoad /= numPes;
00217 avgComLoad /= numPes;
00218 CmiPrintf("The processor loads are: \n");
00219 CmiPrintf("PE (Total Load) (Obj Load) (Comm Load) (BG Load)\n");
00220 if (_lb_args.debug() > 3)
00221 for(i = 0; i < numPes; i++)
00222 CmiPrintf("%-4d %10f %10f %10f %10f\n", i, peLoads[i], objLoads[i], comLoads[i], bgLoads[i]);
00223 CmiPrintf("max: %10f %10f %10f\n", maxLoad, maxProcObjLoad, maxComLoad);
00224 CmiPrintf("Min : %f Max : %f Average: %f AvgBgLoad: %f\n", minLoad, maxLoad, average, avgBgLoad);
00225 CmiPrintf("ProcObjLoad Max : %f Average: %f\n", maxProcObjLoad, avgProcObjLoad);
00226 CmiPrintf("CommLoad Max : %f Average: %f\n", maxComLoad, avgComLoad);
00227 CmiPrintf("[%d] is Maxloaded maxload: %f ObjLoad %f BgLoad %f\n",
00228 max_loaded_proc, peLoads[max_loaded_proc], objLoads[max_loaded_proc], bgLoads[max_loaded_proc]);
00229
00230 CmiPrintf("MinObj : %f MaxObj : %f\n", minObjLoad, maxObjLoad, average);
00231 CmiPrintf("Non-local comm: %d msgs %lld bytes\n", msgCount, msgBytes);
00232 }
00233
00234 void LBInfo::getSummary(LBRealType &maxLoad, LBRealType &maxCpuLoad, LBRealType &totalLoad)
00235 {
00236 totalLoad = maxLoad = peLoads[0];
00237 maxCpuLoad = objLoads[0];
00238 for (int i = 1; i < numPes; i++) {
00239 LBRealType load = peLoads[i];
00240 if (load>maxLoad) maxLoad=load;
00241 LBRealType cpuload = objLoads[i];
00242 if (cpuload>maxCpuLoad) maxCpuLoad=cpuload;
00243 totalLoad += load;
00244 }
00245 }
00246
00248
00249 LBSimulation::LBSimulation(int numPes_) : lbinfo(numPes_), numPes(numPes_)
00250 {
00251 }
00252
00253 LBSimulation::~LBSimulation()
00254 {
00255 }
00256
00257 void LBSimulation::reset()
00258 {
00259 lbinfo.clear();
00260 }
00261
00262 void LBSimulation::SetProcessorLoad(int pe, double load, double bgload)
00263 {
00264 CkAssert(0 <= pe && pe < numPes);
00265 lbinfo.peLoads[pe] = load;
00266 lbinfo.bgLoads[pe] = bgload;
00267 }
00268
00269 void LBSimulation::PrintSimulationResults()
00270 {
00271 lbinfo.print();
00272 }
00273
00274 void LBSimulation::PrintDecisions(LBMigrateMsg *m, char *simFileName,
00275 int peCount)
00276 {
00277 char *resultFile = (char *)malloc((strlen(simFileName) +
00278 strlen("results") + 2)*sizeof(char));
00279 sprintf(resultFile,"%s.results", simFileName);
00280 FILE *f = fopen(resultFile, "w");
00281 fprintf(f, "%d %d\n", peCount, m->n_moves);
00282 for (int i=0; i<m->n_moves; i++) {
00283 fprintf(f, "%" PRIu64 " ", m->moves[i].obj.id);
00284 fprintf(f, "%d\n",m->moves[i].to_pe);
00285 }
00286 }
00287
00288 void LBSimulation::PrintDifferences(LBSimulation *realSim, BaseLB::LDStats *stats)
00289 {
00290 LBRealType *peLoads = lbinfo.peLoads;
00291 LBRealType *realPeLoads = realSim->lbinfo.peLoads;
00292
00293
00294 int i;
00295
00296 CmiPrintf("Differences between predicted and real balance:\n");
00297 CmiPrintf("PE (Predicted Load) (Real Predicted) (Difference) (Real CPU) (Prediction Error)\n");
00298 for(i = 0; i < numPes; ++i) {
00299 CmiPrintf("%-4d %13f %16f %15f %12f %14f\n", i, peLoads[i], realPeLoads[i], peLoads[i]-realPeLoads[i],
00300 stats->procs[i].total_walltime-stats->procs[i].idletime, realPeLoads[i]-(stats->procs[i].total_walltime-stats->procs[i].idletime));
00301 }
00302 }