00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "dataloop.h"
00011
00012 static int MPIOI_Type_block(int *array_of_gsizes, int dim, int ndims,
00013 int nprocs, int rank, int darg, int order, MPI_Aint orig_extent,
00014 MPI_Datatype type_old, MPI_Datatype *type_new,
00015 MPI_Aint *st_offset);
00016 static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nprocs,
00017 int rank, int darg, int order, MPI_Aint orig_extent,
00018 MPI_Datatype type_old, MPI_Datatype *type_new,
00019 MPI_Aint *st_offset);
00020
00021
00022 int PREPEND_PREFIX(Type_convert_darray)(int size,
00023 int rank,
00024 int ndims,
00025 int *array_of_gsizes,
00026 int *array_of_distribs,
00027 int *array_of_dargs,
00028 int *array_of_psizes,
00029 int order,
00030 MPI_Datatype oldtype,
00031 MPI_Datatype *newtype)
00032 {
00033 MPI_Datatype type_old, type_new=MPI_DATATYPE_NULL, types[3];
00034 int procs, tmp_rank, i, tmp_size, blklens[3], *coords;
00035 MPI_Aint *st_offsets, orig_extent, disps[3];
00036
00037 MPI_Type_extent(oldtype, &orig_extent);
00038
00039
00040
00041 coords = (int *) DLOOP_Malloc(ndims*sizeof(int));
00042 procs = size;
00043 tmp_rank = rank;
00044 for (i=0; i<ndims; i++) {
00045 procs = procs/array_of_psizes[i];
00046 coords[i] = tmp_rank/procs;
00047 tmp_rank = tmp_rank % procs;
00048 }
00049
00050 st_offsets = (MPI_Aint *) DLOOP_Malloc(ndims*sizeof(MPI_Aint));
00051 type_old = oldtype;
00052
00053 if (order == MPI_ORDER_FORTRAN) {
00054
00055 for (i=0; i<ndims; i++) {
00056 switch(array_of_distribs[i]) {
00057 case MPI_DISTRIBUTE_BLOCK:
00058 MPIOI_Type_block(array_of_gsizes, i, ndims,
00059 array_of_psizes[i],
00060 coords[i], array_of_dargs[i],
00061 order, orig_extent,
00062 type_old, &type_new,
00063 st_offsets+i);
00064 break;
00065 case MPI_DISTRIBUTE_CYCLIC:
00066 MPIOI_Type_cyclic(array_of_gsizes, i, ndims,
00067 array_of_psizes[i], coords[i],
00068 array_of_dargs[i], order,
00069 orig_extent, type_old,
00070 &type_new, st_offsets+i);
00071 break;
00072 case MPI_DISTRIBUTE_NONE:
00073
00074 MPIOI_Type_block(array_of_gsizes, i, ndims, 1, 0,
00075 MPI_DISTRIBUTE_DFLT_DARG, order,
00076 orig_extent,
00077 type_old, &type_new,
00078 st_offsets+i);
00079 break;
00080 }
00081 if (i) MPI_Type_free(&type_old);
00082 type_old = type_new;
00083 }
00084
00085
00086 disps[1] = st_offsets[0];
00087 tmp_size = 1;
00088 for (i=1; i<ndims; i++) {
00089 tmp_size *= array_of_gsizes[i-1];
00090 disps[1] += tmp_size*st_offsets[i];
00091 }
00092
00093 }
00094
00095 else {
00096
00097 for (i=ndims-1; i>=0; i--) {
00098 switch(array_of_distribs[i]) {
00099 case MPI_DISTRIBUTE_BLOCK:
00100 MPIOI_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i],
00101 coords[i], array_of_dargs[i], order,
00102 orig_extent, type_old, &type_new,
00103 st_offsets+i);
00104 break;
00105 case MPI_DISTRIBUTE_CYCLIC:
00106 MPIOI_Type_cyclic(array_of_gsizes, i, ndims,
00107 array_of_psizes[i], coords[i],
00108 array_of_dargs[i], order,
00109 orig_extent, type_old, &type_new,
00110 st_offsets+i);
00111 break;
00112 case MPI_DISTRIBUTE_NONE:
00113
00114 MPIOI_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i],
00115 coords[i], MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent,
00116 type_old, &type_new, st_offsets+i);
00117 break;
00118 }
00119 if (i != ndims-1) MPI_Type_free(&type_old);
00120 type_old = type_new;
00121 }
00122
00123
00124 disps[1] = st_offsets[ndims-1];
00125 tmp_size = 1;
00126 for (i=ndims-2; i>=0; i--) {
00127 tmp_size *= array_of_gsizes[i+1];
00128 disps[1] += tmp_size*st_offsets[i];
00129 }
00130 }
00131
00132 disps[1] *= orig_extent;
00133
00134 disps[2] = orig_extent;
00135 for (i=0; i<ndims; i++) disps[2] *= array_of_gsizes[i];
00136
00137 disps[0] = 0;
00138 blklens[0] = blklens[1] = blklens[2] = 1;
00139 types[0] = MPI_LB;
00140 types[1] = type_new;
00141 types[2] = MPI_UB;
00142
00143 MPI_Type_struct(3, blklens, disps, types, newtype);
00144
00145 MPI_Type_free(&type_new);
00146 DLOOP_Free(st_offsets);
00147 DLOOP_Free(coords);
00148 return MPI_SUCCESS;
00149 }
00150
00151
00152
00153
00154
00155 static int MPIOI_Type_block(int *array_of_gsizes, int dim, int ndims, int nprocs,
00156 int rank, int darg, int order, MPI_Aint orig_extent,
00157 MPI_Datatype type_old, MPI_Datatype *type_new,
00158 MPI_Aint *st_offset)
00159 {
00160
00161
00162 int blksize, global_size, mysize, i, j;
00163 MPI_Aint stride;
00164
00165 global_size = array_of_gsizes[dim];
00166
00167 if (darg == MPI_DISTRIBUTE_DFLT_DARG)
00168 blksize = (global_size + nprocs - 1)/nprocs;
00169 else {
00170 blksize = darg;
00171
00172
00173 if (blksize <= 0) {
00174 return MPI_ERR_ARG;
00175 }
00176
00177 if (blksize * nprocs < global_size) {
00178 return MPI_ERR_ARG;
00179 }
00180
00181 }
00182
00183 j = global_size - blksize*rank;
00184 mysize = (blksize < j) ? blksize : j;
00185 if (mysize < 0) mysize = 0;
00186
00187 stride = orig_extent;
00188 if (order == MPI_ORDER_FORTRAN) {
00189 if (dim == 0)
00190 MPI_Type_contiguous(mysize, type_old, type_new);
00191 else {
00192 for (i=0; i<dim; i++) stride *= array_of_gsizes[i];
00193 MPI_Type_hvector(mysize, 1, stride, type_old, type_new);
00194 }
00195 }
00196 else {
00197 if (dim == ndims-1)
00198 MPI_Type_contiguous(mysize, type_old, type_new);
00199 else {
00200 for (i=ndims-1; i>dim; i--) stride *= array_of_gsizes[i];
00201 MPI_Type_hvector(mysize, 1, stride, type_old, type_new);
00202 }
00203
00204 }
00205
00206 *st_offset = blksize * rank;
00207
00208 if (mysize == 0) *st_offset = 0;
00209
00210 return MPI_SUCCESS;
00211 }
00212
00213
00214
00215
00216
00217 static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nprocs,
00218 int rank, int darg, int order, MPI_Aint orig_extent,
00219 MPI_Datatype type_old, MPI_Datatype *type_new,
00220 MPI_Aint *st_offset)
00221 {
00222
00223
00224 int blksize, i, blklens[3], st_index, end_index, local_size, rem, count;
00225 MPI_Aint stride, disps[3];
00226 MPI_Datatype type_tmp, types[3];
00227
00228 if (darg == MPI_DISTRIBUTE_DFLT_DARG) blksize = 1;
00229 else blksize = darg;
00230
00231
00232 if (blksize <= 0) {
00233 return MPI_ERR_ARG;
00234 }
00235
00236
00237 st_index = rank*blksize;
00238 end_index = array_of_gsizes[dim] - 1;
00239
00240 if (end_index < st_index) local_size = 0;
00241 else {
00242 local_size = ((end_index - st_index + 1)/(nprocs*blksize))*blksize;
00243 rem = (end_index - st_index + 1) % (nprocs*blksize);
00244 local_size += (rem < blksize) ? rem : blksize;
00245 }
00246
00247 count = local_size/blksize;
00248 rem = local_size % blksize;
00249
00250 stride = nprocs*blksize*orig_extent;
00251 if (order == MPI_ORDER_FORTRAN)
00252 for (i=0; i<dim; i++) stride *= array_of_gsizes[i];
00253 else for (i=ndims-1; i>dim; i--) stride *= array_of_gsizes[i];
00254
00255 MPI_Type_hvector(count, blksize, stride, type_old, type_new);
00256
00257 if (rem) {
00258
00259
00260
00261 types[0] = *type_new;
00262 types[1] = type_old;
00263 disps[0] = 0;
00264 disps[1] = count*stride;
00265 blklens[0] = 1;
00266 blklens[1] = rem;
00267
00268 MPI_Type_struct(2, blklens, disps, types, &type_tmp);
00269
00270 MPI_Type_free(type_new);
00271 *type_new = type_tmp;
00272 }
00273
00274
00275
00276 if ( ((order == MPI_ORDER_FORTRAN) && (dim == 0)) ||
00277 ((order == MPI_ORDER_C) && (dim == ndims-1)) ) {
00278 types[0] = MPI_LB;
00279 disps[0] = 0;
00280 types[1] = *type_new;
00281 disps[1] = rank * blksize * orig_extent;
00282 types[2] = MPI_UB;
00283 disps[2] = orig_extent * array_of_gsizes[dim];
00284 blklens[0] = blklens[1] = blklens[2] = 1;
00285 MPI_Type_struct(3, blklens, disps, types, &type_tmp);
00286 MPI_Type_free(type_new);
00287 *type_new = type_tmp;
00288
00289 *st_offset = 0;
00290
00291 }
00292 else {
00293 *st_offset = rank * blksize;
00294
00295
00296 }
00297
00298 if (local_size == 0) *st_offset = 0;
00299
00300 return MPI_SUCCESS;
00301 }