00001 #include <unistd.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <float.h>
00006 #include <ctype.h>
00007 #include <math.h>
00008 #include <assert.h>
00009 #include "tm_mapping.h"
00010 #include "tm_timings.h"
00011 #include "tm_tree.h"
00012 #ifdef _WIN32
00013 #include <windows.h>
00014 #include <winbase.h>
00015 #define random() rand()
00016 #define srandom(x) srand(x)
00017 #endif
00018
00019 #define TEST_ERROR(n) {if(n!=0){fprintf(stderr,"Error %d Line %d\n",n,__LINE__);exit(-1);}}
00020 #undef DEBUG
00021
00022 #define LINE_SIZE 1000000
00023
00024 typedef struct{
00025 int val;
00026 long key;
00027 }hash_t;
00028
00029
00030 typedef struct{
00031 double val;
00032 int key1;
00033 int key2;
00034 }hash2_t;
00035
00036
00037 int nb_nodes(tm_topology_t *topology){
00038 return topology->nb_nodes[topology->nb_levels-1];
00039 }
00040
00041
00042
00043
00044
00045 void free_topology(tm_topology_t *topology){
00046 int i;
00047 for(i=0;i<topology->nb_levels;i++)
00048 free(topology->node_id[i]);
00049 free(topology->node_id);
00050 free(topology->nb_nodes);
00051 free(topology->arity);
00052 free(topology);
00053 }
00054
00055 double print_sol(int N,int *Value,double **comm, double **arch){
00056 double sol;
00057 int i,j;
00058 double a,c;
00059
00060 sol=0;
00061 for (i=0;i<N;i++){
00062 for (j=i+1;j<N;j++){
00063 c=comm[i][j];
00064 a=arch[Value[i]][Value[j]];
00065
00066 sol+=c/a;
00067 }
00068 }
00069 for (i = 0; i < N; i++) {
00070 printf("%d", Value[i]);
00071 if(i<N-1)
00072 printf(",");
00073
00074 }
00075 printf(" : %g\n",sol);
00076
00077 return sol;
00078 }
00079
00080
00081 void print_1D_tab(int *tab,int N){
00082 int i;
00083 for (i = 0; i < N; i++) {
00084 printf("%d", tab[i]);
00085 if(i<N-1)
00086 printf(",");
00087 }
00088 printf("\n");
00089
00090 }
00091
00092 int nb_lines(char *filename){
00093 FILE *pf;
00094 char line[LINE_SIZE];
00095 int N=0;
00096
00097 if(!(pf=fopen(filename,"r"))){
00098 fprintf(stderr,"Cannot open %s\n",filename);
00099 exit(-1);
00100 }
00101
00102 while(fgets(line,LINE_SIZE,pf))
00103 N++;
00104
00105 printf("N=%d\n",N);
00106
00107 fclose(pf);
00108 return N;
00109 }
00110
00111 void init_comm(char *filename,int N,double **comm){
00112 int i,j;
00113 FILE *pf;
00114 char *ptr;
00115 char line[LINE_SIZE];
00116
00117 if(!(pf=fopen(filename,"r"))){
00118 fprintf(stderr,"Cannot open %s\n",filename);
00119 exit(-1);
00120 }
00121
00122
00123 j=-1;
00124 i=0;
00125 while(fgets(line,LINE_SIZE,pf)){
00126
00127
00128 char *l=line;
00129 j=0;
00130
00131 while((ptr=strtok(l," \t"))){
00132 if((ptr[0]!='\n')&&(!isspace(ptr[0]))&&(*ptr)){
00133 comm[i][j]=atof(ptr);
00134
00135 j++;
00136 }
00137 }
00138 if(j!=N){
00139 fprintf(stderr,"Error at %d %d (%d!=%d)for %s\n",i,j,j,N,filename);
00140 exit(-1);
00141 }
00142 i++;
00143 }
00144 if(i!=N){
00145 fprintf(stderr,"Error at %d %d for %s\n",i,j,filename);
00146 exit(-1);
00147 }
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157 }
00158
00159 int build_comm(char *filename,double ***pcomm){
00160 int i;
00161 int N;
00162 double **comm;
00163 N=nb_lines(filename);
00164 comm=(double**)malloc(N*sizeof(double*));
00165 for(i=0;i<N;i++)
00166 comm[i]=(double*)malloc(N*sizeof(double));
00167 init_comm(filename,N,comm);
00168 *pcomm=comm;
00169 return N;
00170 }
00171
00172 void map_Packed(tm_topology_t *topology,int N,int *Value){
00173 int i,depth;
00174
00175 depth=topology->nb_levels-1;
00176
00177 for(i=0;i<N;i++){
00178
00179 Value[i]=topology->node_id[depth][i];
00180 }
00181
00182 }
00183
00184 void map_RR(int N,int *Value){
00185 int i;
00186
00187 for(i=0;i<N;i++){
00188
00189 Value[i]=i;
00190 }
00191
00192 }
00193
00194
00195
00196 int hash_asc(const void* x1,const void* x2){
00197
00198 hash_t *e1,*e2;
00199
00200 e1=((hash_t*)x1);
00201 e2=((hash_t*)x2);
00202
00203
00204 return e1->key<e2->key?-1:1;
00205 }
00206
00207
00208 int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed){
00209 hash_t *hash_tab;
00210 int *sol,i;
00211 int *nodes_id;
00212
00213 nodes_id=topology->node_id[level];
00214
00215
00216 hash_tab=(hash_t*)malloc(sizeof(hash_t)*N);
00217 sol=(int*)malloc(sizeof(int)*N);
00218
00219 srandom(seed);
00220
00221 for(i=0;i<N;i++){
00222 hash_tab[i].val=nodes_id[i];
00223 hash_tab[i].key=random();
00224 }
00225
00226 qsort(hash_tab,N,sizeof(hash_t),hash_asc);
00227 for(i=0;i<N;i++){
00228 sol[i]=hash_tab[i].val;
00229 }
00230 free(hash_tab);
00231 return sol;
00232 }
00233
00234
00235 double eval_sol(int *sol,int N,double **comm, double **arch){
00236 double res;
00237 int i,j;
00238 double a,c;
00239
00240 res=0;
00241 for (i=0;i<N;i++){
00242 for (j=i+1;j<N;j++){
00243 c=comm[i][j];
00244 a=arch[sol[i]][sol[j]];
00245 res+=c/a;
00246 }
00247 }
00248 return res;
00249 }
00250
00251 void exchange(int *sol,int i,int j){
00252 int tmp;
00253 tmp=sol[i];
00254 sol[i]=sol[j];
00255 sol[j]=tmp;
00256 }
00257
00258 double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch){
00259 double eval2;
00260 if(l==m)
00261 return 0;
00262 exchange(sol,l,m);
00263 eval2=eval_sol(sol,N,comm,arch);
00264 exchange(sol,l,m);
00265 return eval1-eval2;
00266 }
00267
00268 void select_max(int *l,int *m,double **gain,int N,int *state){
00269 int i,j;
00270 double max;
00271 max=-DBL_MAX;
00272
00273 for(i=0;i<N;i++){
00274 if(!state[i]){
00275 for(j=0;j<N;j++){
00276 if((i!=j)&&(!state[j])){
00277 if(gain[i][j]>max){
00278 *l=i;*m=j;
00279 max=gain[i][j];
00280 }
00281 }
00282 }
00283 }
00284 }
00285
00286 }
00287
00288 void compute_gain(int *sol,int N,double **gain,double **comm, double **arch){
00289 int i,j;
00290 double eval1;
00291 eval1=eval_sol(sol,N,comm,arch);
00292 for(i=0;i<N;i++){
00293 for(j=0;j<=i;j++){
00294 gain[i][j]=gain[j][i]=gain_exchange(sol,i,j,eval1,N,comm,arch);
00295 }
00296 }
00297 }
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307 void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *Value,double **comm, double **arch){
00308 int *sol;
00309 int *state;
00310 double **gain;
00311 int **history;
00312 double *temp;
00313 int i,j,t,l=0,m=0,loop=0,seed=0;
00314 double max,sum,best_eval,eval;
00315
00316
00317 gain=(double**)malloc(sizeof(double*)*N);
00318 for(i=0;i<N;i++){
00319 gain[i]=(double*)malloc(sizeof(double)*N);
00320 if(!gain[i]){
00321 }
00322 }
00323 history=(int**)malloc(sizeof(int*)*N);
00324 for(i=0;i<N;i++)
00325 history[i]=(int*)malloc(sizeof(int)*3);
00326
00327 state=(int*)malloc(sizeof(int)*N);
00328 temp=(double*)malloc(sizeof(double)*N);
00329
00330 sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++);
00331 for(i=0;i<N;i++)
00332 Value[i]=sol[i];
00333
00334 best_eval=DBL_MAX;
00335 while(seed<=nb_seed){
00336 loop=0;
00337 do{
00338
00339 for(i=0;i<N;i++){
00340 state[i]=0;
00341
00342 }
00343
00344 compute_gain(sol,N,gain,comm,arch);
00345
00346
00347
00348 for(i=0;i<N/2;i++){
00349 select_max(&l,&m,gain,N,state);
00350
00351 state[l]=1;state[m]=1;
00352 exchange(sol,l,m);
00353 history[i][1]=l;history[i][2]=m;
00354 temp[i]=gain[l][m];
00355 compute_gain(sol,N,gain,comm,arch);
00356 }
00357
00358 t=-1;
00359 max=0;
00360 sum=0;
00361 for(i=0;i<N/2;i++){
00362 sum+=temp[i];
00363 if(sum>max){
00364 max=sum;
00365 t=i;
00366 }
00367 }
00368
00369
00370 for(j=t+1;j<N/2;j++){
00371 exchange(sol,history[j][1],history[j][2]);
00372
00373 }
00374
00375
00376
00377
00378
00379
00380 eval=eval_sol(sol,N,comm,arch);
00381 if(eval<best_eval){
00382 best_eval=eval;
00383 for(i=0;i<N;i++)
00384 Value[i]=sol[i];
00385
00386 }
00387
00388
00389 }while(max>0);
00390
00391 sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++);
00392
00393 }
00394
00395 }
00396
00397
00398
00399
00400 void map_tree(tree_t* t1,tree_t *t2){
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416 }
00417
00418 void depth_first(tree_t *comm_tree, int *proc_list,int *i){
00419 int j;
00420 if(!comm_tree->child){
00421 proc_list[(*i)++]=comm_tree->id;
00422 return;
00423 }
00424
00425 for(j=0;j<comm_tree->arity;j++){
00426 depth_first(comm_tree->child[j],proc_list,i);
00427 }
00428 }
00429
00430 int nb_leaves(tree_t *comm_tree){
00431 int n=0,j;
00432
00433 if(!comm_tree->child){
00434 return 1;
00435 }
00436
00437 for(j=0;j<comm_tree->arity;j++){
00438 n+=nb_leaves(comm_tree->child[j]);
00439 }
00440 return n;
00441 }
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457 void map_topology(tm_topology_t *topology,tree_t *comm_tree,int nb_proc,int level,
00458 int *sigma, int *k){
00459 int *nodes_id;
00460 int N;
00461 int *proc_list,i,l;
00462 int M;
00463 int block_size;
00464
00465
00466 M=nb_leaves(comm_tree);
00467 printf("nb_leaves=%d\n",M);
00468
00469
00470
00471 nodes_id=topology->node_id[level];
00472 N=topology->nb_nodes[level];
00473
00474
00475
00476
00477
00478 assert(N==nb_proc);
00479
00480
00481 proc_list=(int*)malloc(sizeof(int)*M);
00482 i=0;
00483 depth_first(comm_tree,proc_list,&i);
00484
00485 l=0;
00486 for(i=0;i<M;i++){
00487
00488 }
00489
00490
00491 block_size=M/N;
00492
00493
00494 if(k){
00495 printf("M=%d, N=%d, BS=%d\n",M,N,block_size);
00496 for(i=0;i<nb_nodes(topology);i++){
00497 k[i]=-1;
00498 }
00499 for(i=0;i<M;i++){
00500 if(proc_list[i]!=-1){
00501 #ifdef DEBUG
00502 printf ("%d->%d\n",proc_list[i],nodes_id[i/block_size]);
00503 #endif
00504 sigma[proc_list[i]]=nodes_id[i/block_size];
00505 k[nodes_id[i/block_size]]=proc_list[i];
00506 }
00507 }
00508 }else{
00509 printf("M=%d, N=%d, BS=%d\n",M,N,block_size);
00510 for(i=0;i<M;i++){
00511 if(proc_list[i]!=-1){
00512 #ifdef DEBUG
00513 printf ("%d->%d\n",proc_list[i],nodes_id[i/block_size]);
00514 #endif
00515 sigma[proc_list[i]]=nodes_id[i/block_size];
00516 }
00517 }
00518
00519 }
00520 free(proc_list);
00521
00522 }
00523
00524 void map_topology_simple(tm_topology_t *topology,tree_t *comm_tree, int *sigma,int *k){
00525 map_topology(topology,comm_tree,topology->nb_nodes[topology->nb_levels-1],topology->nb_levels-1,sigma,k);
00526 }
00527
00528 int int_cmp(const void* x1,const void* x2){
00529
00530 int *e1,*e2;
00531
00532 e1=((int *)x1);
00533 e2=((int *)x2);
00534
00535
00536 return (*e1)>(*e2)?-1:1;
00537 }
00538
00539
00540 int decompose(int n,int optimize,int *tab){
00541 int primes[6]={2,3,5,7,11,0},i=0;
00542 int flag=2;
00543 int j=1;
00544
00545
00546 while(primes[i]&&(n!=1)){
00547
00548 if(flag&&optimize&&(n%primes[i]!=0)){
00549 n+=primes[i]-n%primes[i];
00550 flag--;
00551 i=0;
00552 continue;
00553 }
00554
00555 if(n%primes[i]==0){
00556 tab[j++]=primes[i];
00557 n/=primes[i];
00558 }else{
00559 i++;
00560 flag=1;
00561 }
00562 }
00563 if(n!=1){
00564 tab[j++]=n;
00565 }
00566
00567 qsort(tab+1,j-1,sizeof(int),int_cmp);
00568
00569 for(i=0;i<j;i++)
00570 printf("%d:",tab[i]);
00571 printf("\n");
00572
00573 tab[j]=0;
00574
00575 return j+1;
00576 }
00577
00578
00579 tree_t *build_synthetic_topology_old(int *synt_tab,int id,int depth,int nb_levels){
00580 tree_t *res,**child;
00581 int arity=synt_tab[0];
00582 int val,i;
00583
00584 res=(tree_t*)malloc(sizeof(tree_t));
00585 val=0;
00586 if(depth>=nb_levels)
00587 child=NULL;
00588 else{
00589 child=(tree_t**)malloc(sizeof(tree_t*)*arity);
00590 for(i=0;i<arity;i++){
00591 child[i]=build_synthetic_topology_old(synt_tab+1,i,depth+1,nb_levels);
00592 child[i]->parent=res;
00593 val+=child[i]->val;
00594 }
00595 }
00596 set_node(res,child,arity,NULL,id,val+speed(depth),child[0]);
00597 return res;
00598 }
00599
00600
00601 void display_topology(tm_topology_t *topology){
00602 int i,j;
00603 for(i=0;i<topology->nb_levels;i++){
00604 printf("%d: ",i);
00605 for(j=0;j<topology->nb_nodes[i];j++)
00606 printf("%d ",topology->node_id[i][j]);
00607 printf("\n");
00608 }
00609 }
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620
00621
00622 tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_nodes){
00623 tm_topology_t *topology;
00624 int i,j,n=1;
00625
00626 topology=(tm_topology_t*)malloc(sizeof(tm_topology_t));
00627 topology->arity=(int*)malloc(sizeof(int)*nb_levels);
00628 memcpy(topology->arity,arity,sizeof(int)*nb_levels);
00629 topology->nb_levels=nb_levels;
00630
00631 topology->node_id=(int**)malloc(sizeof(int*)*topology->nb_levels);
00632 topology->nb_nodes=(int*)malloc(sizeof(int)*topology->nb_levels);
00633
00634
00635 for(i=0;i<topology->nb_levels;i++){
00636 topology->nb_nodes[i]=n;
00637 topology->node_id[i]=(int*)malloc(sizeof(int)*n);
00638 if(i<topology->nb_levels-1){
00639 for(j=0;j<n;j++)
00640 topology->node_id[i][j]=j;
00641 }else{
00642 for(j=0;j<n;j++)
00643 topology->node_id[i][j]=core_numbering[j%nb_core_per_nodes]+(nb_core_per_nodes)*(j/nb_core_per_nodes);
00644 }
00645
00646 n*=topology->arity[i];
00647 }
00648 return topology;
00649
00650 }
00651
00652
00653
00654
00655 void build_synthetic_proc_id(tm_topology_t *topology){
00656 int n=1,i,j;
00657 topology->node_id=(int**)malloc(sizeof(int*)*topology->nb_levels);
00658 topology->nb_nodes=(int*)malloc(sizeof(int)*topology->nb_levels);
00659
00660
00661 for(i=0;i<topology->nb_levels;i++){
00662 topology->nb_nodes[i]=n;
00663 topology->node_id[i]=(int*)malloc(sizeof(int)*n);
00664 for(j=0;j<n;j++)
00665 topology->node_id[i][j]=j;
00666 n*=topology->arity[i];
00667 }
00668
00669
00670
00671 }
00672
00673 void update_comm_speed(double **comm_speed,int old_size,int new_size){
00674 double *old_tab,*new_tab;
00675 int i;
00676 printf("comm speed [%p]: ",*comm_speed);
00677
00678 old_tab=*comm_speed;
00679 new_tab=(double*)malloc(sizeof(double)*new_size);
00680 *comm_speed=new_tab;
00681
00682 for(i=0;i<new_size;i++){
00683 if(i<old_size)
00684 new_tab[i]=old_tab[i];
00685 else
00686 new_tab[i]=new_tab[i-1];
00687
00688 printf("%f ",new_tab[i]);
00689 }
00690
00691 printf("\n");
00692 }
00693
00694
00695
00696
00697 void TreeMatchMapping(int nb_obj, int nb_proc, double **comm_mat, double *obj_weight, double * comm_speed, int d, int *sol){
00698 tree_t *comm_tree;
00699 tm_topology_t *topology;
00700 double duration;
00701
00702 int i;
00703 TIC;
00704
00705 for(i=0;i<nb_obj;i++){
00706 sol[i]=i;
00707
00708 }
00709
00710
00711
00712
00713
00714 topology=(tm_topology_t*)malloc(sizeof(tm_topology_t));
00715 topology->arity=(int*)malloc(sizeof(int)*MAX_LEVELS);
00716 topology->arity[0]=nb_proc;
00717 topology->nb_levels=decompose((int)ceil((1.0*nb_obj)/nb_proc),1,topology->arity);
00718 printf("Topology nb levels=%d\n",topology->nb_levels);
00719 build_synthetic_proc_id(topology);
00720
00721 if(topology->nb_levels>d)
00722 update_comm_speed(&comm_speed,d,topology->nb_levels);
00723
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734 TIC;
00735 comm_tree=build_tree_from_topology(topology,comm_mat,nb_obj,obj_weight,comm_speed);
00736 printf("Tree buildinbg time=%f\n",TOC);
00737 TIC;
00738 map_topology(topology,comm_tree,nb_proc,1,sol,NULL);
00739 printf("Topology mapping time=%f\n",TOC);
00740
00741
00742 if(topology->nb_levels>d)
00743 free(comm_speed);
00744
00745 free_topology(topology);
00746 free_tree(comm_tree);
00747
00748 duration=TOC;
00749 printf("-------------- Mapping done in %.4fs!\n",duration);
00750 }
00751
00752 void display_other_heuristics(tm_topology_t *topology,int N,double **comm,double **arch){
00753 CLOCK_T time1,time0;
00754 double duration;
00755 int *sol;
00756
00757 sol=(int*)malloc(sizeof(int)*N);
00758
00759 map_Packed(topology,N,sol);
00760 printf("Packed: ");
00761 print_sol(N,sol,comm,arch);
00762
00763
00764
00765 map_RR(N,sol);
00766 printf("RR: ");
00767 print_sol(N,sol,comm,arch);
00768
00769 CLOCK(time0);
00770 map_MPIPP(topology,1,N,sol,comm,arch);
00771 CLOCK(time1);
00772 duration=CLOCK_DIFF(time1,time0);
00773 printf("MPIPP-1-D:%f\n",duration);
00774 printf("MPIPP-1: ");
00775 print_sol(N,sol,comm,arch);
00776
00777 CLOCK(time0);
00778 map_MPIPP(topology,5,N,sol,comm,arch);
00779 CLOCK(time1);
00780 duration=CLOCK_DIFF(time1,time0);
00781 printf("MPIPP-5-D:%f\n",duration);
00782 printf("MPIPP-5: ");
00783 print_sol(N,sol,comm,arch);
00784
00785 free(sol);
00786 }
00787
00788