00001
00002
00003
00004
00005
00006
00007
00008 #include <stdio.h>
00009
00010 #include "./dataloop.h"
00011
00012 static DLOOP_Count DLOOP_Type_blockindexed_count_contig(DLOOP_Count count,
00013 DLOOP_Count blklen,
00014 void *disp_array,
00015 int dispinbytes,
00016 DLOOP_Offset old_extent);
00017
00018 static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,
00019 void *disp_array,
00020 DLOOP_Offset *out_disp_array,
00021 int dispinbytes,
00022 DLOOP_Offset old_extent);
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 int PREPEND_PREFIX(Dataloop_create_blockindexed)(int icount,
00041 int iblklen,
00042 void *disp_array,
00043 int dispinbytes,
00044 DLOOP_Type oldtype,
00045 DLOOP_Dataloop **dlp_p,
00046 int *dlsz_p,
00047 int *dldepth_p,
00048 int flag)
00049 {
00050 int err, is_builtin, is_vectorizable = 1;
00051 int i, new_loop_sz, old_loop_depth;
00052
00053 DLOOP_Count contig_count, count, blklen;
00054 DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride;
00055 DLOOP_Dataloop *new_dlp;
00056
00057 count = (DLOOP_Count) icount;
00058 blklen = (DLOOP_Count) iblklen;
00059
00060
00061 if (count == 0 || blklen == 0)
00062 {
00063 err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
00064 MPI_INT,
00065 dlp_p,
00066 dlsz_p,
00067 dldepth_p,
00068 flag);
00069 return err;
00070 }
00071
00072 is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;
00073
00074 if (is_builtin)
00075 {
00076 DLOOP_Handle_get_size_macro(oldtype, old_extent);
00077 old_loop_depth = 0;
00078 }
00079 else
00080 {
00081 DLOOP_Handle_get_extent_macro(oldtype, old_extent);
00082 DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
00083 }
00084
00085 contig_count = DLOOP_Type_blockindexed_count_contig(count,
00086 blklen,
00087 disp_array,
00088 dispinbytes,
00089 old_extent);
00090
00091
00092
00093
00094
00095
00096 if ((contig_count == 1) &&
00097 ((!dispinbytes && ((int *) disp_array)[0] == 0) ||
00098 (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0)))
00099 {
00100 err = PREPEND_PREFIX(Dataloop_create_contiguous)(icount * iblklen,
00101 oldtype,
00102 dlp_p,
00103 dlsz_p,
00104 dldepth_p,
00105 flag);
00106 return err;
00107 }
00108
00109
00110
00111
00112
00113
00114 if (contig_count == 1)
00115 {
00116
00117 blklen *= count;
00118 count = 1;
00119 iblklen *= icount;
00120 icount = 1;
00121 }
00122
00123
00124
00125
00126
00127
00128 eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) :
00129 (((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent);
00130
00131 if (count > 1 && eff_disp0 == (DLOOP_Offset) 0)
00132 {
00133 eff_disp1 = (dispinbytes) ?
00134 ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) :
00135 (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent);
00136 last_stride = eff_disp1 - eff_disp0;
00137
00138 for (i=2; i < count; i++) {
00139 eff_disp0 = eff_disp1;
00140 eff_disp1 = (dispinbytes) ?
00141 ((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) :
00142 (((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent);
00143 if (eff_disp1 - eff_disp0 != last_stride) {
00144 is_vectorizable = 0;
00145 break;
00146 }
00147 }
00148 if (is_vectorizable)
00149 {
00150 err = PREPEND_PREFIX(Dataloop_create_vector)(count,
00151 blklen,
00152 last_stride,
00153 1,
00154 oldtype,
00155 dlp_p,
00156 dlsz_p,
00157 dldepth_p,
00158 flag);
00159 return err;
00160 }
00161 }
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181 if (is_builtin)
00182 {
00183 PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_BLOCKINDEXED,
00184 count,
00185 &new_dlp,
00186 &new_loop_sz);
00187
00188 if (!new_dlp) return -1;
00189
00190
00191 new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK;
00192
00193 if (flag == DLOOP_DATALOOP_ALL_BYTES)
00194 {
00195 blklen *= old_extent;
00196 new_dlp->el_size = 1;
00197 new_dlp->el_extent = 1;
00198 new_dlp->el_type = MPI_BYTE;
00199 }
00200 else
00201 {
00202 new_dlp->el_size = old_extent;
00203 new_dlp->el_extent = old_extent;
00204 new_dlp->el_type = oldtype;
00205 }
00206 }
00207 else
00208 {
00209 DLOOP_Dataloop *old_loop_ptr = NULL;
00210 int old_loop_sz = 0;
00211
00212 DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
00213 DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
00214
00215 PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_BLOCKINDEXED,
00216 count,
00217 old_loop_ptr,
00218 old_loop_sz,
00219 &new_dlp,
00220 &new_loop_sz);
00221
00222 if (!new_dlp) return -1;
00223
00224
00225 new_dlp->kind = DLOOP_KIND_BLOCKINDEXED;
00226
00227 DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
00228 DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
00229 DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
00230 }
00231
00232 new_dlp->loop_params.bi_t.count = count;
00233 new_dlp->loop_params.bi_t.blocksize = blklen;
00234
00235
00236
00237
00238
00239 DLOOP_Type_blockindexed_array_copy(count,
00240 disp_array,
00241 new_dlp->loop_params.bi_t.offset_array,
00242 dispinbytes,
00243 old_extent);
00244
00245 *dlp_p = new_dlp;
00246 *dlsz_p = new_loop_sz;
00247 *dldepth_p = old_loop_depth + 1;
00248
00249 return 0;
00250 }
00251
00252
00253
00254
00255
00256
00257 static void DLOOP_Type_blockindexed_array_copy(DLOOP_Count count,
00258 void *in_disp_array,
00259 DLOOP_Offset *out_disp_array,
00260 int dispinbytes,
00261 DLOOP_Offset old_extent)
00262 {
00263 int i;
00264 if (!dispinbytes)
00265 {
00266 for (i=0; i < count; i++)
00267 {
00268 out_disp_array[i] =
00269 ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;
00270 }
00271 }
00272 else
00273 {
00274 for (i=0; i < count; i++)
00275 {
00276 out_disp_array[i] =
00277 ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]);
00278 }
00279 }
00280 return;
00281 }
00282
00283 static DLOOP_Count DLOOP_Type_blockindexed_count_contig(DLOOP_Count count,
00284 DLOOP_Count blklen,
00285 void *disp_array,
00286 int dispinbytes,
00287 DLOOP_Offset old_extent)
00288 {
00289 int i, contig_count = 1;
00290
00291 if (!dispinbytes)
00292 {
00293
00294 DLOOP_Offset cur_tdisp = (DLOOP_Offset) ((int *) disp_array)[0];
00295
00296 for (i=1; i < count; i++)
00297 {
00298 DLOOP_Offset next_tdisp = (DLOOP_Offset) ((int *) disp_array)[i];
00299
00300 if (cur_tdisp + blklen != next_tdisp)
00301 {
00302 contig_count++;
00303 }
00304 cur_tdisp = next_tdisp;
00305 }
00306 }
00307 else
00308 {
00309
00310 DLOOP_Offset cur_bdisp = (DLOOP_Offset) ((MPI_Aint *) disp_array)[0];
00311
00312 for (i=1; i < count; i++)
00313 {
00314 DLOOP_Offset next_bdisp =
00315 (DLOOP_Offset) ((MPI_Aint *) disp_array)[i];
00316
00317 if (cur_bdisp + (DLOOP_Offset) blklen * old_extent != next_bdisp)
00318 {
00319 contig_count++;
00320 }
00321 cur_bdisp = next_bdisp;
00322 }
00323 }
00324 return contig_count;
00325 }