00001
00002
00003
00004
00005
00006
00007
00008 #include <stdlib.h>
00009
00010 #include "./dataloop.h"
00011
00012 static DLOOP_Count DLOOP_Type_indexed_count_contig(DLOOP_Count count,
00013 int *blocklength_array,
00014 void *displacement_array,
00015 int dispinbytes,
00016 DLOOP_Offset old_extent);
00017
00018 static void DLOOP_Type_indexed_array_copy(DLOOP_Count count,
00019 DLOOP_Count contig_count,
00020 int *input_blocklength_array,
00021 void *input_displacement_array,
00022 DLOOP_Count *output_blocklength_array,
00023 DLOOP_Offset *out_disp_array,
00024 int dispinbytes,
00025 DLOOP_Offset old_extent);
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045 int PREPEND_PREFIX(Dataloop_create_indexed)(int icount,
00046 int *blocklength_array,
00047 void *displacement_array,
00048 int dispinbytes,
00049 MPI_Datatype oldtype,
00050 DLOOP_Dataloop **dlp_p,
00051 int *dlsz_p,
00052 int *dldepth_p,
00053 int flag)
00054 {
00055 int err, is_builtin;
00056 int i, new_loop_sz, old_loop_depth, blksz;
00057
00058 DLOOP_Count old_type_count = 0, contig_count, count;
00059 DLOOP_Offset old_extent;
00060 struct DLOOP_Dataloop *new_dlp;
00061
00062 count = (DLOOP_Count) icount;
00063
00064
00065
00066 if (count == 0)
00067 {
00068 err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
00069 MPI_INT,
00070 dlp_p,
00071 dlsz_p,
00072 dldepth_p,
00073 flag);
00074 return err;
00075 }
00076
00077 is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;
00078
00079 if (is_builtin)
00080 {
00081 DLOOP_Handle_get_extent_macro(oldtype, old_extent);
00082 old_loop_depth = 0;
00083 }
00084 else
00085 {
00086 DLOOP_Handle_get_extent_macro(oldtype, old_extent);
00087 DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
00088 }
00089
00090 for (i=0; i < count; i++)
00091 {
00092 old_type_count += (DLOOP_Count) blocklength_array[i];
00093 }
00094
00095 contig_count = DLOOP_Type_indexed_count_contig(count,
00096 blocklength_array,
00097 displacement_array,
00098 dispinbytes,
00099 old_extent);
00100
00101
00102 if (contig_count == 0)
00103 {
00104 err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
00105 MPI_INT,
00106 dlp_p,
00107 dlsz_p,
00108 dldepth_p,
00109 flag);
00110 return err;
00111 }
00112
00113
00114
00115
00116
00117
00118 if ((contig_count == 1) &&
00119 ((!dispinbytes && ((int *) displacement_array)[0] == 0) ||
00120 (dispinbytes && ((MPI_Aint *) displacement_array)[0] == 0)))
00121 {
00122 err = PREPEND_PREFIX(Dataloop_create_contiguous)((int) old_type_count,
00123 oldtype,
00124 dlp_p,
00125 dlsz_p,
00126 dldepth_p,
00127 flag);
00128 return err;
00129 }
00130
00131
00132
00133
00134
00135
00136
00137 if (contig_count == 1)
00138 {
00139 err = PREPEND_PREFIX(Dataloop_create_blockindexed)(1,
00140 (int) old_type_count,
00141 displacement_array,
00142 dispinbytes,
00143 oldtype,
00144 dlp_p,
00145 dlsz_p,
00146 dldepth_p,
00147 flag);
00148
00149 return err;
00150 }
00151
00152
00153
00154
00155
00156
00157 blksz = blocklength_array[0];
00158 for (i=1; i < count; i++)
00159 {
00160 if (blocklength_array[i] != blksz)
00161 {
00162 blksz--;
00163 break;
00164 }
00165 }
00166 if (blksz == blocklength_array[0])
00167 {
00168 err = PREPEND_PREFIX(Dataloop_create_blockindexed)(icount,
00169 blksz,
00170 displacement_array,
00171 dispinbytes,
00172 oldtype,
00173 dlp_p,
00174 dlsz_p,
00175 dldepth_p,
00176 flag);
00177
00178 return err;
00179 }
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191 if (is_builtin)
00192 {
00193 PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_INDEXED,
00194 count,
00195 &new_dlp,
00196 &new_loop_sz);
00197
00198 if (!new_dlp) return -1;
00199
00200
00201 new_dlp->kind = DLOOP_KIND_INDEXED | DLOOP_FINAL_MASK;
00202
00203 if (flag == DLOOP_DATALOOP_ALL_BYTES)
00204 {
00205
00206 new_dlp->el_size = 1;
00207 new_dlp->el_extent = 1;
00208 new_dlp->el_type = MPI_BYTE;
00209 }
00210 else
00211 {
00212 new_dlp->el_size = old_extent;
00213 new_dlp->el_extent = old_extent;
00214 new_dlp->el_type = oldtype;
00215 }
00216 }
00217 else
00218 {
00219 DLOOP_Dataloop *old_loop_ptr = NULL;
00220 int old_loop_sz = 0;
00221
00222 DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
00223 DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
00224
00225 PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_INDEXED,
00226 contig_count,
00227 old_loop_ptr,
00228 old_loop_sz,
00229 &new_dlp,
00230 &new_loop_sz);
00231
00232 if (!new_dlp) return -1;
00233
00234
00235 new_dlp->kind = DLOOP_KIND_INDEXED;
00236
00237 DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
00238 DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
00239 DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
00240 }
00241
00242 new_dlp->loop_params.i_t.count = contig_count;
00243 new_dlp->loop_params.i_t.total_blocks = old_type_count;
00244
00245
00246
00247
00248
00249 DLOOP_Type_indexed_array_copy(count,
00250 contig_count,
00251 blocklength_array,
00252 displacement_array,
00253 new_dlp->loop_params.i_t.blocksize_array,
00254 new_dlp->loop_params.i_t.offset_array,
00255 dispinbytes,
00256 old_extent);
00257
00258 if (is_builtin && (flag == DLOOP_DATALOOP_ALL_BYTES))
00259 {
00260 DLOOP_Count *tmp_blklen_array =
00261 new_dlp->loop_params.i_t.blocksize_array;
00262
00263 for (i=0; i < contig_count; i++)
00264 {
00265
00266 tmp_blklen_array[i] *= old_extent;
00267 }
00268 }
00269
00270 *dlp_p = new_dlp;
00271 *dlsz_p = new_loop_sz;
00272 *dldepth_p = old_loop_depth + 1;
00273
00274 return MPI_SUCCESS;
00275 }
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287 static void DLOOP_Type_indexed_array_copy(DLOOP_Count count,
00288 DLOOP_Count contig_count,
00289 int *in_blklen_array,
00290 void *in_disp_array,
00291 DLOOP_Count *out_blklen_array,
00292 DLOOP_Offset *out_disp_array,
00293 int dispinbytes,
00294 DLOOP_Offset old_extent)
00295 {
00296 DLOOP_Count i, cur_idx = 0;
00297
00298 out_blklen_array[0] = (DLOOP_Count) in_blklen_array[0];
00299
00300 if (!dispinbytes)
00301 {
00302 out_disp_array[0] = (DLOOP_Offset)
00303 ((int *) in_disp_array)[0] * old_extent;
00304
00305 for (i = 1; i < count; i++)
00306 {
00307 if (in_blklen_array[i] == 0)
00308 {
00309 continue;
00310 }
00311 else if (out_disp_array[cur_idx] +
00312 ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
00313 ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent)
00314 {
00315
00316 out_blklen_array[cur_idx] += (DLOOP_Count) in_blklen_array[i];
00317 }
00318 else
00319 {
00320 cur_idx++;
00321 DLOOP_Assert(cur_idx < contig_count);
00322 out_disp_array[cur_idx] =
00323 ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;
00324 out_blklen_array[cur_idx] = in_blklen_array[i];
00325 }
00326 }
00327 }
00328 else
00329 {
00330 out_disp_array[0] = (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[0];
00331
00332 for (i = 1; i < count; i++)
00333 {
00334 if (in_blklen_array[i] == 0)
00335 {
00336 continue;
00337 }
00338 else if (out_disp_array[cur_idx] +
00339 ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
00340 ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]))
00341 {
00342
00343 out_blklen_array[cur_idx] += in_blklen_array[i];
00344 }
00345 else
00346 {
00347 cur_idx++;
00348 DLOOP_Assert(cur_idx < contig_count);
00349 out_disp_array[cur_idx] =
00350 (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i];
00351 out_blklen_array[cur_idx] = (DLOOP_Count) in_blklen_array[i];
00352 }
00353 }
00354 }
00355
00356 DLOOP_Assert(cur_idx == contig_count - 1);
00357 return;
00358 }
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368 static DLOOP_Count DLOOP_Type_indexed_count_contig(DLOOP_Count count,
00369 int *blocklength_array,
00370 void *displacement_array,
00371 int dispinbytes,
00372 DLOOP_Offset old_extent)
00373 {
00374 DLOOP_Count i, contig_count = 1;
00375 DLOOP_Count cur_blklen = (DLOOP_Count) blocklength_array[0];
00376
00377 if (!dispinbytes)
00378 {
00379 DLOOP_Offset cur_tdisp =
00380 (DLOOP_Offset) ((int *) displacement_array)[0];
00381
00382 for (i = 1; i < count; i++)
00383 {
00384 if (blocklength_array[i] == 0)
00385 {
00386 continue;
00387 }
00388 else if (cur_tdisp + cur_blklen ==
00389 (DLOOP_Offset) ((int *) displacement_array)[i])
00390 {
00391
00392 cur_blklen += (DLOOP_Count) blocklength_array[i];
00393 }
00394 else
00395 {
00396 cur_tdisp = (DLOOP_Offset) ((int *) displacement_array)[i];
00397 cur_blklen = (DLOOP_Count) blocklength_array[i];
00398 contig_count++;
00399 }
00400 }
00401 }
00402 else
00403 {
00404 DLOOP_Offset cur_bdisp =
00405 (DLOOP_Offset) ((MPI_Aint *) displacement_array)[0];
00406
00407 for (i = 1; i < count; i++)
00408 {
00409 if (blocklength_array[i] == 0)
00410 {
00411 continue;
00412 }
00413 else if (cur_bdisp + cur_blklen * old_extent ==
00414 (DLOOP_Offset) ((MPI_Aint *) displacement_array)[i])
00415 {
00416
00417 cur_blklen += (DLOOP_Count) blocklength_array[i];
00418 }
00419 else
00420 {
00421 cur_bdisp =
00422 (DLOOP_Offset) ((MPI_Aint *) displacement_array)[i];
00423 cur_blklen = (DLOOP_Count) blocklength_array[i];
00424 contig_count++;
00425 }
00426 }
00427 }
00428 return contig_count;
00429 }