00001 #include <unistd.h>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <float.h>
00006 #include <ctype.h>
00007 #include <math.h>
00008 #include <assert.h>
00009 #include "tm_mapping.h"
00010 #include "tm_timings.h"
00011 #include "tm_tree.h"
00012 #ifdef _WIN32
00013 #include <windows.h>
00014 #include <winbase.h>
00015 #define random() rand()
00016 #define srandom(x) srand(x)
00017 #endif
00018
00019 #define TEST_ERROR(n) {if(n!=0){fprintf(stderr,"Error %d Line %d\n",n,__LINE__);exit(-1);}}
00020 #undef DEBUG
00021
00022 #define LINE_SIZE 1000000
00023
00024 typedef struct{
00025 int val;
00026 long key;
00027 }hash_t;
00028
00029
00030 typedef struct{
00031 double val;
00032 int key1;
00033 int key2;
00034 }hash2_t;
00035
00036
00037 int nb_nodes(tm_topology_t *topology){
00038 return topology->nb_nodes[topology->nb_levels-1];
00039 }
00040
00041
00042
00043
00044
00045 void free_topology(tm_topology_t *topology){
00046 int i;
00047 for(i=0;i<topology->nb_levels;i++)
00048 free(topology->node_id[i]);
00049 free(topology->node_id);
00050 free(topology->nb_nodes);
00051 free(topology->arity);
00052 free(topology);
00053 }
00054
00055 double print_sol(int N,int *Value,double **comm, double **arch){
00056 double sol;
00057 int i,j;
00058 double a,c;
00059
00060 sol=0;
00061 for (i=0;i<N;i++){
00062 for (j=i+1;j<N;j++){
00063 c=comm[i][j];
00064 a=arch[Value[i]][Value[j]];
00065
00066 sol+=c/a;
00067 }
00068 }
00069 for (i = 0; i < N; i++) {
00070 printf("%d", Value[i]);
00071 if(i<N-1)
00072 printf(",");
00073
00074 }
00075 printf(" : %g\n",sol);
00076
00077 return sol;
00078 }
00079
00080
00081 void print_1D_tab(int *tab,int N){
00082 int i;
00083 for (i = 0; i < N; i++) {
00084 printf("%d", tab[i]);
00085 if(i<N-1)
00086 printf(",");
00087 }
00088 printf("\n");
00089
00090 }
00091
00092 int nb_lines(char *filename){
00093 FILE *pf;
00094 char line[LINE_SIZE];
00095 int N=0;
00096
00097 if(!(pf=fopen(filename,"r"))){
00098 fprintf(stderr,"Cannot open %s\n",filename);
00099 exit(-1);
00100 }
00101
00102 while(fgets(line,LINE_SIZE,pf))
00103 N++;
00104
00105 printf("N=%d\n",N);
00106
00107 fclose(pf);
00108 return N;
00109 }
00110
00111 void init_comm(char *filename,int N,double **comm){
00112 int i,j;
00113 FILE *pf;
00114 char *ptr;
00115 char line[LINE_SIZE];
00116
00117 if(!(pf=fopen(filename,"r"))){
00118 fprintf(stderr,"Cannot open %s\n",filename);
00119 exit(-1);
00120 }
00121
00122
00123 j=-1;
00124 i=0;
00125 while(fgets(line,LINE_SIZE,pf)){
00126
00127
00128 char *l=line;
00129 j=0;
00130
00131 while((ptr=strtok(l," \t"))){
00132 if((ptr[0]!='\n')&&(!isspace(ptr[0]))&&(*ptr)){
00133 comm[i][j]=atof(ptr);
00134
00135 j++;
00136 }
00137 }
00138 if(j!=N){
00139 fprintf(stderr,"Error at %d %d (%d!=%d)for %s\n",i,j,j,N,filename);
00140 exit(-1);
00141 }
00142 i++;
00143 }
00144 if(i!=N){
00145 fprintf(stderr,"Error at %d %d for %s\n",i,j,filename);
00146 exit(-1);
00147 }
00148
00149
00150
00151
00152
00153
00154
00155
00156 fclose(pf);
00157 }
00158
00159 int build_comm(char *filename,double ***pcomm){
00160 int i;
00161 int N;
00162 double **comm;
00163 N=nb_lines(filename);
00164 comm=(double**)malloc(N*sizeof(double*));
00165 for(i=0;i<N;i++)
00166 comm[i]=(double*)malloc(N*sizeof(double));
00167 init_comm(filename,N,comm);
00168 *pcomm=comm;
00169 return N;
00170 }
00171
00172 void map_Packed(tm_topology_t *topology,int N,int *Value){
00173 int i,depth;
00174
00175 depth=topology->nb_levels-1;
00176
00177 for(i=0;i<N;i++){
00178
00179 Value[i]=topology->node_id[depth][i];
00180 }
00181
00182 }
00183
00184 void map_RR(int N,int *Value){
00185 int i;
00186
00187 for(i=0;i<N;i++){
00188
00189 Value[i]=i;
00190 }
00191
00192 }
00193
00194
00195
00196 int hash_asc(const void* x1,const void* x2){
00197
00198 hash_t *e1,*e2;
00199
00200 e1=((hash_t*)x1);
00201 e2=((hash_t*)x2);
00202
00203
00204 return e1->key<e2->key?-1:1;
00205 }
00206
00207
00208 int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed){
00209 hash_t *hash_tab;
00210 int *sol,i;
00211 int *nodes_id;
00212
00213 nodes_id=topology->node_id[level];
00214
00215
00216 hash_tab=(hash_t*)malloc(sizeof(hash_t)*N);
00217 sol=(int*)malloc(sizeof(int)*N);
00218
00219 srandom(seed);
00220
00221 for(i=0;i<N;i++){
00222 hash_tab[i].val=nodes_id[i];
00223 hash_tab[i].key=random();
00224 }
00225
00226 qsort(hash_tab,N,sizeof(hash_t),hash_asc);
00227 for(i=0;i<N;i++){
00228 sol[i]=hash_tab[i].val;
00229 }
00230 free(hash_tab);
00231 return sol;
00232 }
00233
00234
00235 double eval_sol(int *sol,int N,double **comm, double **arch){
00236 double res;
00237 int i,j;
00238 double a,c;
00239
00240 res=0;
00241 for (i=0;i<N;i++){
00242 for (j=i+1;j<N;j++){
00243 c=comm[i][j];
00244 a=arch[sol[i]][sol[j]];
00245 res+=c/a;
00246 }
00247 }
00248 return res;
00249 }
00250
00251 void exchange(int *sol,int i,int j){
00252 int tmp;
00253 tmp=sol[i];
00254 sol[i]=sol[j];
00255 sol[j]=tmp;
00256 }
00257
00258 double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch){
00259 double eval2;
00260 if(l==m)
00261 return 0;
00262 exchange(sol,l,m);
00263 eval2=eval_sol(sol,N,comm,arch);
00264 exchange(sol,l,m);
00265 return eval1-eval2;
00266 }
00267
00268 void select_max(int *l,int *m,double **gain,int N,int *state){
00269 int i,j;
00270 double max;
00271 max=-DBL_MAX;
00272
00273 for(i=0;i<N;i++){
00274 if(!state[i]){
00275 for(j=0;j<N;j++){
00276 if((i!=j)&&(!state[j])){
00277 if(gain[i][j]>max){
00278 *l=i;*m=j;
00279 max=gain[i][j];
00280 }
00281 }
00282 }
00283 }
00284 }
00285
00286 }
00287
00288 void compute_gain(int *sol,int N,double **gain,double **comm, double **arch){
00289 int i,j;
00290 double eval1;
00291 eval1=eval_sol(sol,N,comm,arch);
00292 for(i=0;i<N;i++){
00293 for(j=0;j<=i;j++){
00294 gain[i][j]=gain[j][i]=gain_exchange(sol,i,j,eval1,N,comm,arch);
00295 }
00296 }
00297 }
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307 void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *Value,double **comm, double **arch){
00308 int *sol;
00309 int *state;
00310 double **gain;
00311 int **history;
00312 double *temp;
00313 int i,j,t,l=0,m=0,loop=0,seed=0;
00314 double max,sum,best_eval,eval;
00315
00316
00317 gain=(double**)malloc(sizeof(double*)*N);
00318 for(i=0;i<N;i++){
00319 gain[i]=(double*)malloc(sizeof(double)*N);
00320 if(!gain[i]){
00321 }
00322 }
00323 history=(int**)malloc(sizeof(int*)*N);
00324 for(i=0;i<N;i++)
00325 history[i]=(int*)malloc(sizeof(int)*3);
00326
00327 state=(int*)malloc(sizeof(int)*N);
00328 temp=(double*)malloc(sizeof(double)*N);
00329
00330 sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++);
00331 for(i=0;i<N;i++)
00332 Value[i]=sol[i];
00333
00334 best_eval=DBL_MAX;
00335 while(seed<=nb_seed){
00336 loop=0;
00337 do{
00338
00339 for(i=0;i<N;i++){
00340 state[i]=0;
00341
00342 }
00343
00344 compute_gain(sol,N,gain,comm,arch);
00345
00346
00347
00348 for(i=0;i<N/2;i++){
00349 select_max(&l,&m,gain,N,state);
00350
00351 state[l]=1;state[m]=1;
00352 exchange(sol,l,m);
00353 history[i][1]=l;history[i][2]=m;
00354 temp[i]=gain[l][m];
00355 compute_gain(sol,N,gain,comm,arch);
00356 }
00357
00358 t=-1;
00359 max=0;
00360 sum=0;
00361 for(i=0;i<N/2;i++){
00362 sum+=temp[i];
00363 if(sum>max){
00364 max=sum;
00365 t=i;
00366 }
00367 }
00368
00369
00370 for(j=t+1;j<N/2;j++){
00371 exchange(sol,history[j][1],history[j][2]);
00372
00373 }
00374
00375
00376
00377
00378
00379
00380 eval=eval_sol(sol,N,comm,arch);
00381 if(eval<best_eval){
00382 best_eval=eval;
00383 for(i=0;i<N;i++)
00384 Value[i]=sol[i];
00385
00386 }
00387
00388
00389 }while(max>0);
00390
00391 if (sol != NULL) {
00392 free(sol);
00393 sol = NULL;
00394 }
00395 sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++);
00396
00397 }
00398 if (sol != NULL) {
00399 free(sol);
00400 sol = NULL;
00401 }
00402 free(state);
00403 free(temp);
00404
00405 for(i=0;i<N;i++){
00406 free(history[i]);
00407 free(gain[i]);
00408 }
00409 free(history);
00410 free(gain);
00411 }
00412
00413
00414
00415
00416 void map_tree(tree_t* t1,tree_t *t2){
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432 }
00433
00434 void depth_first(tree_t *comm_tree, int *proc_list,int *i){
00435 int j;
00436 if(!comm_tree->child){
00437 proc_list[(*i)++]=comm_tree->id;
00438 return;
00439 }
00440
00441 for(j=0;j<comm_tree->arity;j++){
00442 depth_first(comm_tree->child[j],proc_list,i);
00443 }
00444 }
00445
00446 int nb_leaves(tree_t *comm_tree){
00447 int n=0,j;
00448
00449 if(!comm_tree->child){
00450 return 1;
00451 }
00452
00453 for(j=0;j<comm_tree->arity;j++){
00454 n+=nb_leaves(comm_tree->child[j]);
00455 }
00456 return n;
00457 }
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473 void map_topology(tm_topology_t *topology,tree_t *comm_tree,int nb_proc,int level,
00474 int *sigma, int *k){
00475 int *nodes_id;
00476 int N;
00477 int *proc_list,i,l;
00478 int M;
00479 int block_size;
00480
00481
00482 M=nb_leaves(comm_tree);
00483 printf("nb_leaves=%d\n",M);
00484
00485
00486
00487 nodes_id=topology->node_id[level];
00488 N=topology->nb_nodes[level];
00489
00490
00491
00492
00493
00494 assert(N==nb_proc);
00495
00496
00497 proc_list=(int*)malloc(sizeof(int)*M);
00498 i=0;
00499 depth_first(comm_tree,proc_list,&i);
00500
00501 l=0;
00502 for(i=0;i<M;i++){
00503
00504 }
00505
00506
00507 block_size=M/N;
00508
00509
00510 if(k){
00511 printf("M=%d, N=%d, BS=%d\n",M,N,block_size);
00512 for(i=0;i<nb_nodes(topology);i++){
00513 k[i]=-1;
00514 }
00515 for(i=0;i<M;i++){
00516 if(proc_list[i]!=-1){
00517 #ifdef DEBUG
00518 printf ("%d->%d\n",proc_list[i],nodes_id[i/block_size]);
00519 #endif
00520 sigma[proc_list[i]]=nodes_id[i/block_size];
00521 k[nodes_id[i/block_size]]=proc_list[i];
00522 }
00523 }
00524 }else{
00525 printf("M=%d, N=%d, BS=%d\n",M,N,block_size);
00526 for(i=0;i<M;i++){
00527 if(proc_list[i]!=-1){
00528 #ifdef DEBUG
00529 printf ("%d->%d\n",proc_list[i],nodes_id[i/block_size]);
00530 #endif
00531 sigma[proc_list[i]]=nodes_id[i/block_size];
00532 }
00533 }
00534
00535 }
00536 free(proc_list);
00537
00538 }
00539
00540 void map_topology_simple(tm_topology_t *topology,tree_t *comm_tree, int *sigma,int *k){
00541 map_topology(topology,comm_tree,topology->nb_nodes[topology->nb_levels-1],topology->nb_levels-1,sigma,k);
00542 }
00543
00544 static int int_cmp(const void* x1,const void* x2){
00545
00546 int *e1,*e2;
00547
00548 e1=((int *)x1);
00549 e2=((int *)x2);
00550
00551
00552 return (*e1)>(*e2)?-1:1;
00553 }
00554
00555
00556 int decompose(int n,int optimize,int *tab){
00557 int primes[6]={2,3,5,7,11,0},i=0;
00558 int flag=2;
00559 int j=1;
00560
00561
00562 while(primes[i]&&(n!=1)){
00563
00564 if(flag&&optimize&&(n%primes[i]!=0)){
00565 n+=primes[i]-n%primes[i];
00566 flag--;
00567 i=0;
00568 continue;
00569 }
00570
00571 if(n%primes[i]==0){
00572 tab[j++]=primes[i];
00573 n/=primes[i];
00574 }else{
00575 i++;
00576 flag=1;
00577 }
00578 }
00579 if(n!=1){
00580 tab[j++]=n;
00581 }
00582
00583 qsort(tab+1,j-1,sizeof(int),int_cmp);
00584
00585 for(i=0;i<j;i++)
00586 printf("%d:",tab[i]);
00587 printf("\n");
00588
00589 tab[j]=0;
00590
00591 return j+1;
00592 }
00593
00594
00595 tree_t *build_synthetic_topology_old(int *synt_tab,int id,int depth,int nb_levels){
00596 tree_t *res,**child;
00597 int arity=synt_tab[0];
00598 int val,i;
00599
00600 res=(tree_t*)malloc(sizeof(tree_t));
00601 val=0;
00602 if(depth>=nb_levels)
00603 child=NULL;
00604 else{
00605 child=(tree_t**)malloc(sizeof(tree_t*)*arity);
00606 for(i=0;i<arity;i++){
00607 child[i]=build_synthetic_topology_old(synt_tab+1,i,depth+1,nb_levels);
00608 child[i]->parent=res;
00609 val+=child[i]->val;
00610 }
00611 }
00612 set_node(res,child,arity,NULL,id,val+speed(depth),child[0]);
00613 return res;
00614 }
00615
00616
00617 void display_topology(tm_topology_t *topology){
00618 int i,j;
00619 for(i=0;i<topology->nb_levels;i++){
00620 printf("%d: ",i);
00621 for(j=0;j<topology->nb_nodes[i];j++)
00622 printf("%d ",topology->node_id[i][j]);
00623 printf("\n");
00624 }
00625 }
00626
00627
00628
00629
00630
00631
00632
00633
00634
00635
00636
00637
00638 tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_nodes){
00639 tm_topology_t *topology;
00640 int i,j,n=1;
00641
00642 topology=(tm_topology_t*)malloc(sizeof(tm_topology_t));
00643 topology->arity=(int*)malloc(sizeof(int)*nb_levels);
00644 memcpy(topology->arity,arity,sizeof(int)*nb_levels);
00645 topology->nb_levels=nb_levels;
00646
00647 topology->node_id=(int**)malloc(sizeof(int*)*topology->nb_levels);
00648 topology->nb_nodes=(int*)malloc(sizeof(int)*topology->nb_levels);
00649
00650
00651 for(i=0;i<topology->nb_levels;i++){
00652 topology->nb_nodes[i]=n;
00653 topology->node_id[i]=(int*)malloc(sizeof(int)*n);
00654 if(i<topology->nb_levels-1){
00655 for(j=0;j<n;j++)
00656 topology->node_id[i][j]=j;
00657 }else{
00658 for(j=0;j<n;j++)
00659 topology->node_id[i][j]=core_numbering[j%nb_core_per_nodes]+(nb_core_per_nodes)*(j/nb_core_per_nodes);
00660 }
00661
00662 n*=topology->arity[i];
00663 }
00664 return topology;
00665
00666 }
00667
00668
00669
00670
00671 void build_synthetic_proc_id(tm_topology_t *topology){
00672 int n=1,i,j;
00673 topology->node_id=(int**)malloc(sizeof(int*)*topology->nb_levels);
00674 topology->nb_nodes=(int*)malloc(sizeof(int)*topology->nb_levels);
00675
00676
00677 for(i=0;i<topology->nb_levels;i++){
00678 topology->nb_nodes[i]=n;
00679 topology->node_id[i]=(int*)malloc(sizeof(int)*n);
00680 for(j=0;j<n;j++)
00681 topology->node_id[i][j]=j;
00682 n*=topology->arity[i];
00683 }
00684
00685
00686
00687 }
00688
00689 void update_comm_speed(double **comm_speed,int old_size,int new_size){
00690 double *old_tab,*new_tab;
00691 int i;
00692 printf("comm speed [%p]: ", (void *)*comm_speed);
00693
00694 old_tab=*comm_speed;
00695 new_tab=(double*)malloc(sizeof(double)*new_size);
00696 *comm_speed=new_tab;
00697
00698 for(i=0;i<new_size;i++){
00699 if(i<old_size)
00700 new_tab[i]=old_tab[i];
00701 else
00702 new_tab[i]=new_tab[i-1];
00703
00704 printf("%f ",new_tab[i]);
00705 }
00706
00707 printf("\n");
00708 }
00709
00710
00711
00712
00713 void TreeMatchMapping(int nb_obj, int nb_proc, double **comm_mat, double *obj_weight, double * comm_speed, int d, int *sol){
00714 tree_t *comm_tree;
00715 tm_topology_t *topology;
00716 double duration;
00717
00718 int i;
00719 TIC;
00720
00721 for(i=0;i<nb_obj;i++){
00722 sol[i]=i;
00723
00724 }
00725
00726
00727
00728
00729
00730 topology=(tm_topology_t*)malloc(sizeof(tm_topology_t));
00731 topology->arity=(int*)malloc(sizeof(int)*MAX_LEVELS);
00732 topology->arity[0]=nb_proc;
00733 topology->nb_levels=decompose((int)ceil((1.0*nb_obj)/nb_proc),1,topology->arity);
00734 printf("Topology nb levels=%d\n",topology->nb_levels);
00735 build_synthetic_proc_id(topology);
00736
00737 if(topology->nb_levels>d)
00738 update_comm_speed(&comm_speed,d,topology->nb_levels);
00739
00740
00741
00742
00743
00744
00745
00746
00747
00748
00749
00750 TIC;
00751 comm_tree=build_tree_from_topology(topology,comm_mat,nb_obj,obj_weight,comm_speed);
00752 printf("Tree buildinbg time=%f\n",TOC);
00753 TIC;
00754 map_topology(topology,comm_tree,nb_proc,1,sol,NULL);
00755 printf("Topology mapping time=%f\n",TOC);
00756
00757
00758 if(topology->nb_levels>d)
00759 free(comm_speed);
00760
00761 free_topology(topology);
00762 free_tree(comm_tree);
00763
00764 duration=TOC;
00765 printf("-------------- Mapping done in %.4fs!\n",duration);
00766 }
00767
00768 void display_other_heuristics(tm_topology_t *topology,int N,double **comm,double **arch){
00769 CLOCK_T time1,time0;
00770 double duration;
00771 int *sol;
00772
00773 sol=(int*)malloc(sizeof(int)*N);
00774
00775 map_Packed(topology,N,sol);
00776 printf("Packed: ");
00777 print_sol(N,sol,comm,arch);
00778
00779
00780
00781 map_RR(N,sol);
00782 printf("RR: ");
00783 print_sol(N,sol,comm,arch);
00784
00785 CLOCK(time0);
00786 map_MPIPP(topology,1,N,sol,comm,arch);
00787 CLOCK(time1);
00788 duration=CLOCK_DIFF(time1,time0);
00789 printf("MPIPP-1-D:%f\n",duration);
00790 printf("MPIPP-1: ");
00791 print_sol(N,sol,comm,arch);
00792
00793 CLOCK(time0);
00794 map_MPIPP(topology,5,N,sol,comm,arch);
00795 CLOCK(time1);
00796 duration=CLOCK_DIFF(time1,time0);
00797 printf("MPIPP-5-D:%f\n",duration);
00798 printf("MPIPP-5: ");
00799 print_sol(N,sol,comm,arch);
00800
00801 free(sol);
00802 }
00803
00804