00001
00002
00003
00004
00005
00006
00007
00008 #include <stdio.h>
00009 #include <stdlib.h>
00010
00011 #include "./dataloop.h"
00012
00013 #undef DLOOP_DEBUG_MANIPULATE
00014
00015 #ifndef PREPEND_PREFIX
00016 #error "You must explicitly include a header that sets the PREPEND_PREFIX and includes dataloop_parts.h"
00017 #endif
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 static inline DLOOP_Count DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm *elmp);
00029 static inline DLOOP_Offset DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm *elmp);
00030 static inline void DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm *elmp,
00031 struct DLOOP_Dataloop *dlp,
00032 int branch_flag);
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049 int PREPEND_PREFIX(Segment_init)(const DLOOP_Buffer buf,
00050 DLOOP_Count count,
00051 DLOOP_Handle handle,
00052 struct DLOOP_Segment *segp,
00053 int flag)
00054 {
00055 DLOOP_Offset elmsize = 0;
00056 int i, depth = 0;
00057 int branch_detected = 0;
00058
00059 struct DLOOP_Dataloop_stackelm *elmp;
00060 struct DLOOP_Dataloop *dlp = 0, *sblp = &segp->builtin_loop;
00061
00062 DLOOP_Assert(flag == DLOOP_DATALOOP_HETEROGENEOUS ||
00063 flag == DLOOP_DATALOOP_HOMOGENEOUS ||
00064 flag == DLOOP_DATALOOP_ALL_BYTES);
00065
00066 #ifdef DLOOP_DEBUG_MANIPULATE
00067 DLOOP_dbg_printf("DLOOP_Segment_init: count = %d, buf = %x\n",
00068 count,
00069 buf);
00070 #endif
00071
00072 if (!DLOOP_Handle_hasloop_macro(handle)) {
00073
00074
00075 DLOOP_Handle_get_size_macro(handle, elmsize);
00076
00077 sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
00078 sblp->loop_params.c_t.count = count;
00079 sblp->loop_params.c_t.dataloop = 0;
00080 sblp->el_size = elmsize;
00081 DLOOP_Handle_get_basic_type_macro(handle, sblp->el_type);
00082 DLOOP_Handle_get_extent_macro(handle, sblp->el_extent);
00083
00084 dlp = sblp;
00085 depth = 1;
00086 }
00087 else if (count == 0) {
00088
00089 sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
00090 sblp->loop_params.c_t.count = 0;
00091 sblp->loop_params.c_t.dataloop = 0;
00092 sblp->el_size = 0;
00093 sblp->el_extent = 0;
00094
00095 dlp = sblp;
00096 depth = 1;
00097 }
00098 else if (count == 1) {
00099
00100 DLOOP_Handle_get_loopptr_macro(handle, dlp, flag);
00101 DLOOP_Handle_get_loopdepth_macro(handle, depth, flag);
00102 }
00103 else {
00104
00105
00106
00107 DLOOP_Dataloop *oldloop;
00108 DLOOP_Offset type_size, type_extent;
00109 DLOOP_Type el_type;
00110
00111 DLOOP_Handle_get_loopdepth_macro(handle, depth, flag);
00112 if (depth >= DLOOP_MAX_DATATYPE_DEPTH) return -1;
00113
00114 DLOOP_Handle_get_loopptr_macro(handle, oldloop, flag);
00115 DLOOP_Assert(oldloop != NULL);
00116 DLOOP_Handle_get_size_macro(handle, type_size);
00117 DLOOP_Handle_get_extent_macro(handle, type_extent);
00118 DLOOP_Handle_get_basic_type_macro(handle, el_type);
00119
00120 if (depth == 1 && ((oldloop->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG))
00121 {
00122 if (type_size == type_extent)
00123 {
00124
00125 sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
00126 sblp->loop_params.c_t.count = count * oldloop->loop_params.c_t.count;
00127 sblp->loop_params.c_t.dataloop = NULL;
00128 sblp->el_size = oldloop->el_size;
00129 sblp->el_extent = oldloop->el_extent;
00130 sblp->el_type = oldloop->el_type;
00131 }
00132 else
00133 {
00134
00135 sblp->kind = DLOOP_KIND_VECTOR | DLOOP_FINAL_MASK;
00136 sblp->loop_params.v_t.count = count;
00137 sblp->loop_params.v_t.blocksize = oldloop->loop_params.c_t.count;
00138 sblp->loop_params.v_t.stride = type_extent;
00139 sblp->loop_params.v_t.dataloop = NULL;
00140 sblp->el_size = oldloop->el_size;
00141 sblp->el_extent = oldloop->el_extent;
00142 sblp->el_type = oldloop->el_type;
00143 }
00144 }
00145 else
00146 {
00147
00148 sblp->kind = DLOOP_KIND_CONTIG;
00149 sblp->loop_params.c_t.count = count;
00150 sblp->loop_params.c_t.dataloop = oldloop;
00151 sblp->el_size = type_size;
00152 sblp->el_extent = type_extent;
00153 sblp->el_type = el_type;
00154
00155 depth++;
00156 }
00157
00158 dlp = sblp;
00159 }
00160
00161
00162 segp->handle = handle;
00163 segp->ptr = (DLOOP_Buffer) buf;
00164 segp->stream_off = 0;
00165 segp->cur_sp = 0;
00166 segp->valid_sp = 0;
00167
00168
00169 elmp = &(segp->stackelm[0]);
00170 DLOOP_Stackelm_load(elmp, dlp, 0);
00171 branch_detected = elmp->may_require_reloading;
00172
00173
00174 elmp->orig_offset = 0;
00175 elmp->curblock = elmp->orig_block;
00176
00177 elmp->curoffset = DLOOP_Stackelm_offset(elmp);
00178
00179 i = 1;
00180 while(!(dlp->kind & DLOOP_FINAL_MASK))
00181 {
00182
00183 switch (dlp->kind & DLOOP_KIND_MASK)
00184 {
00185 case DLOOP_KIND_CONTIG:
00186 case DLOOP_KIND_VECTOR:
00187 case DLOOP_KIND_BLOCKINDEXED:
00188 case DLOOP_KIND_INDEXED:
00189 dlp = dlp->loop_params.cm_t.dataloop;
00190 break;
00191 case DLOOP_KIND_STRUCT:
00192 dlp = dlp->loop_params.s_t.dataloop_array[0];
00193 break;
00194 default:
00195
00196 DLOOP_Assert(0);
00197 break;
00198
00199 }
00200
00201
00202
00203
00204 elmp = &(segp->stackelm[i]);
00205
00206 DLOOP_Stackelm_load(elmp, dlp, branch_detected);
00207 branch_detected = elmp->may_require_reloading;
00208 i++;
00209
00210 }
00211
00212 segp->valid_sp = depth-1;
00213
00214 return 0;
00215 }
00216
00217
00218
00219
00220 struct DLOOP_Segment * PREPEND_PREFIX(Segment_alloc)(void)
00221 {
00222 return (struct DLOOP_Segment *) DLOOP_Malloc(sizeof(struct DLOOP_Segment));
00223 }
00224
00225
00226
00227
00228
00229
00230 void PREPEND_PREFIX(Segment_free)(struct DLOOP_Segment *segp)
00231 {
00232 DLOOP_Free(segp);
00233 return;
00234 }
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258 #define DLOOP_SEGMENT_SAVE_LOCAL_VALUES \
00259 { \
00260 segp->cur_sp = cur_sp; \
00261 segp->valid_sp = valid_sp; \
00262 segp->stream_off = stream_off; \
00263 *lastp = stream_off; \
00264 }
00265
00266 #define DLOOP_SEGMENT_LOAD_LOCAL_VALUES \
00267 { \
00268 last = *lastp; \
00269 cur_sp = segp->cur_sp; \
00270 valid_sp = segp->valid_sp; \
00271 stream_off = segp->stream_off; \
00272 cur_elmp = &(segp->stackelm[cur_sp]); \
00273 }
00274
00275 #define DLOOP_SEGMENT_RESET_VALUES \
00276 { \
00277 segp->stream_off = 0; \
00278 segp->cur_sp = 0; \
00279 cur_elmp = &(segp->stackelm[0]); \
00280 cur_elmp->curcount = cur_elmp->orig_count; \
00281 cur_elmp->orig_block = DLOOP_Stackelm_blocksize(cur_elmp); \
00282 cur_elmp->curblock = cur_elmp->orig_block; \
00283 cur_elmp->curoffset = cur_elmp->orig_offset + \
00284 DLOOP_Stackelm_offset(cur_elmp); \
00285 }
00286
00287 #define DLOOP_SEGMENT_POP_AND_MAYBE_EXIT \
00288 { \
00289 cur_sp--; \
00290 if (cur_sp >= 0) cur_elmp = &segp->stackelm[cur_sp]; \
00291 else { \
00292 DLOOP_SEGMENT_SAVE_LOCAL_VALUES; \
00293 return; \
00294 } \
00295 }
00296
00297 #define DLOOP_SEGMENT_PUSH \
00298 { \
00299 cur_sp++; \
00300 cur_elmp = &segp->stackelm[cur_sp]; \
00301 }
00302
00303 #define DLOOP_STACKELM_BLOCKINDEXED_OFFSET(elmp_, curcount_) \
00304 (elmp_)->loop_p->loop_params.bi_t.offset_array[(curcount_)]
00305
00306 #define DLOOP_STACKELM_INDEXED_OFFSET(elmp_, curcount_) \
00307 (elmp_)->loop_p->loop_params.i_t.offset_array[(curcount_)]
00308
00309 #define DLOOP_STACKELM_INDEXED_BLOCKSIZE(elmp_, curcount_) \
00310 (elmp_)->loop_p->loop_params.i_t.blocksize_array[(curcount_)]
00311
00312 #define DLOOP_STACKELM_STRUCT_OFFSET(elmp_, curcount_) \
00313 (elmp_)->loop_p->loop_params.s_t.offset_array[(curcount_)]
00314
00315 #define DLOOP_STACKELM_STRUCT_BLOCKSIZE(elmp_, curcount_) \
00316 (elmp_)->loop_p->loop_params.s_t.blocksize_array[(curcount_)]
00317
00318 #define DLOOP_STACKELM_STRUCT_EL_EXTENT(elmp_, curcount_) \
00319 (elmp_)->loop_p->loop_params.s_t.el_extent_array[(curcount_)]
00320
00321 #define DLOOP_STACKELM_STRUCT_DATALOOP(elmp_, curcount_) \
00322 (elmp_)->loop_p->loop_params.s_t.dataloop_array[(curcount_)]
00323
00324 void PREPEND_PREFIX(Segment_manipulate)(struct DLOOP_Segment *segp,
00325 DLOOP_Offset first,
00326 DLOOP_Offset *lastp,
00327 int (*contigfn) (DLOOP_Offset *blocks_p,
00328 DLOOP_Type el_type,
00329 DLOOP_Offset rel_off,
00330 DLOOP_Buffer bufp,
00331 void *v_paramp),
00332 int (*vectorfn) (DLOOP_Offset *blocks_p,
00333 DLOOP_Count count,
00334 DLOOP_Count blklen,
00335 DLOOP_Offset stride,
00336 DLOOP_Type el_type,
00337 DLOOP_Offset rel_off,
00338 DLOOP_Buffer bufp,
00339 void *v_paramp),
00340 int (*blkidxfn) (DLOOP_Offset *blocks_p,
00341 DLOOP_Count count,
00342 DLOOP_Count blklen,
00343 DLOOP_Offset *offsetarray,
00344 DLOOP_Type el_type,
00345 DLOOP_Offset rel_off,
00346 DLOOP_Buffer bufp,
00347 void *v_paramp),
00348 int (*indexfn) (DLOOP_Offset *blocks_p,
00349 DLOOP_Count count,
00350 DLOOP_Count *blockarray,
00351 DLOOP_Offset *offsetarray,
00352 DLOOP_Type el_type,
00353 DLOOP_Offset rel_off,
00354 DLOOP_Buffer bufp,
00355 void *v_paramp),
00356 DLOOP_Offset (*sizefn) (DLOOP_Type el_type),
00357 void *pieceparams)
00358 {
00359
00360 int cur_sp, valid_sp;
00361 DLOOP_Offset last, stream_off;
00362
00363 struct DLOOP_Dataloop_stackelm *cur_elmp;
00364 enum { PF_NULL, PF_CONTIG, PF_VECTOR, PF_BLOCKINDEXED, PF_INDEXED } piecefn_type = PF_NULL;
00365
00366 DLOOP_SEGMENT_LOAD_LOCAL_VALUES;
00367
00368 if (first == *lastp) {
00369
00370 DLOOP_dbg_printf("dloop_segment_manipulate: warning: first == last (%d)\n", (int) first);
00371 return;
00372 }
00373
00374
00375 if (first != stream_off) {
00376 #ifdef DLOOP_DEBUG_MANIPULATE
00377 DLOOP_dbg_printf("first=%d; stream_off=%ld; resetting.\n",
00378 first, stream_off);
00379 #endif
00380
00381 if (first < stream_off) {
00382 DLOOP_SEGMENT_RESET_VALUES;
00383 stream_off = 0;
00384 }
00385
00386 if (first != stream_off) {
00387 DLOOP_Offset tmp_last = first;
00388
00389
00390
00391
00392 PREPEND_PREFIX(Segment_manipulate)(segp,
00393 stream_off,
00394 &tmp_last,
00395 NULL,
00396 NULL,
00397 NULL,
00398 NULL,
00399 sizefn,
00400 NULL);
00401
00402
00403
00404 if (tmp_last != first) DLOOP_Assert(0);
00405
00406 }
00407
00408 DLOOP_SEGMENT_LOAD_LOCAL_VALUES;
00409
00410 #ifdef DLOOP_DEBUG_MANIPULATE
00411 DLOOP_dbg_printf("done repositioning stream_off; first=%d, stream_off=%ld, last=%d\n",
00412 first, stream_off, last);
00413 #endif
00414 }
00415
00416 for (;;) {
00417 #ifdef DLOOP_DEBUG_MANIPULATE
00418 #if 0
00419 DLOOP_dbg_printf("looptop; cur_sp=%d, cur_elmp=%x\n",
00420 cur_sp, (unsigned) cur_elmp);
00421 #endif
00422 #endif
00423
00424 if (cur_elmp->loop_p->kind & DLOOP_FINAL_MASK) {
00425 int piecefn_indicated_exit = -1;
00426 DLOOP_Offset myblocks, local_el_size, stream_el_size;
00427 DLOOP_Type el_type;
00428
00429
00430 DLOOP_Assert((cur_elmp->loop_p->kind & DLOOP_KIND_MASK) !=
00431 DLOOP_KIND_STRUCT);
00432
00433
00434 if (cur_elmp->curcount == 0) DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
00435
00436
00437
00438
00439 local_el_size = cur_elmp->loop_p->el_size;
00440 el_type = cur_elmp->loop_p->el_type;
00441 stream_el_size = (sizefn) ? sizefn(el_type) : local_el_size;
00442
00443
00444
00445
00446 myblocks = cur_elmp->curblock;
00447 piecefn_type = (contigfn ? PF_CONTIG : PF_NULL);
00448
00449
00450 switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
00451 case DLOOP_KIND_CONTIG:
00452 break;
00453 case DLOOP_KIND_BLOCKINDEXED:
00454
00455 if (blkidxfn &&
00456 cur_elmp->orig_block == cur_elmp->curblock &&
00457 cur_elmp->orig_count == cur_elmp->curcount)
00458 {
00459
00460 myblocks = cur_elmp->curblock * cur_elmp->curcount;
00461 piecefn_type = PF_BLOCKINDEXED;
00462 }
00463 break;
00464 case DLOOP_KIND_INDEXED:
00465
00466
00467
00468
00469 if (indexfn &&
00470 cur_elmp->orig_count == cur_elmp->curcount &&
00471 cur_elmp->curblock == DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, 0))
00472 {
00473
00474 myblocks = cur_elmp->loop_p->loop_params.i_t.total_blocks;
00475 piecefn_type = PF_INDEXED;
00476 }
00477 break;
00478 case DLOOP_KIND_VECTOR:
00479
00480
00481
00482 if (vectorfn && cur_elmp->orig_block == cur_elmp->curblock)
00483 {
00484 myblocks = cur_elmp->curblock * cur_elmp->curcount;
00485 piecefn_type = PF_VECTOR;
00486 }
00487 break;
00488 default:
00489
00490 DLOOP_Assert(0);
00491 break;
00492
00493 }
00494
00495 #ifdef DLOOP_DEBUG_MANIPULATE
00496 DLOOP_dbg_printf("\thit leaf; cur_sp=%d, elmp=%x, piece_sz=%d\n",
00497 cur_sp,
00498 (unsigned) cur_elmp, myblocks * local_el_size);
00499 #endif
00500
00501
00502 if (last != SEGMENT_IGNORE_LAST &&
00503 (stream_off + (myblocks * stream_el_size) > last))
00504 {
00505 myblocks = ((last - stream_off) / stream_el_size);
00506 #ifdef DLOOP_DEBUG_MANIPULATE
00507 DLOOP_dbg_printf("\tpartial block count=%d (%d bytes)\n",
00508 myblocks,
00509 (int) myblocks * stream_el_size);
00510 #endif
00511 if (myblocks == 0) {
00512 DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
00513 return;
00514 }
00515 }
00516
00517
00518 switch (piecefn_type) {
00519 case PF_NULL:
00520 piecefn_indicated_exit = 0;
00521 #ifdef DLOOP_DEBUG_MANIPULATE
00522 DLOOP_dbg_printf("\tNULL piecefn for this piece\n");
00523 #endif
00524 break;
00525 case PF_CONTIG:
00526 DLOOP_Assert(myblocks <= cur_elmp->curblock);
00527 piecefn_indicated_exit =
00528 contigfn(&myblocks,
00529 el_type,
00530 cur_elmp->curoffset,
00531 segp->ptr,
00532 pieceparams);
00533 break;
00534 case PF_VECTOR:
00535 piecefn_indicated_exit =
00536 vectorfn(&myblocks,
00537 cur_elmp->curcount,
00538 cur_elmp->orig_block,
00539 cur_elmp->loop_p->loop_params.v_t.stride,
00540 el_type,
00541 cur_elmp->curoffset,
00542 segp->ptr,
00543 pieceparams);
00544 break;
00545 case PF_BLOCKINDEXED:
00546 piecefn_indicated_exit =
00547 blkidxfn(&myblocks,
00548 cur_elmp->curcount,
00549 cur_elmp->orig_block,
00550 cur_elmp->loop_p->loop_params.bi_t.offset_array,
00551 el_type,
00552 cur_elmp->orig_offset,
00553 segp->ptr,
00554 pieceparams);
00555 break;
00556 case PF_INDEXED:
00557 piecefn_indicated_exit =
00558 indexfn(&myblocks,
00559 cur_elmp->curcount,
00560 cur_elmp->loop_p->loop_params.i_t.blocksize_array,
00561 cur_elmp->loop_p->loop_params.i_t.offset_array,
00562 el_type,
00563 cur_elmp->orig_offset,
00564 segp->ptr,
00565 pieceparams);
00566 break;
00567 }
00568
00569
00570
00571
00572 DLOOP_Assert(piecefn_indicated_exit >= 0);
00573 DLOOP_Assert(myblocks >= 0);
00574 stream_off += myblocks * stream_el_size;
00575
00576
00577
00578
00579 if (myblocks == 0) {
00580 DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
00581 return;
00582 }
00583 else if (myblocks < cur_elmp->curblock) {
00584 cur_elmp->curoffset += myblocks * local_el_size;
00585 cur_elmp->curblock -= myblocks;
00586
00587 DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
00588 return;
00589 }
00590 else {
00591 int count_index = 0;
00592
00593
00594
00595
00596
00597
00598 switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
00599 case DLOOP_KIND_INDEXED:
00600 while (myblocks > 0 && myblocks >= cur_elmp->curblock) {
00601 myblocks -= cur_elmp->curblock;
00602 cur_elmp->curcount--;
00603 DLOOP_Assert(cur_elmp->curcount >= 0);
00604
00605 count_index = cur_elmp->orig_count -
00606 cur_elmp->curcount;
00607 cur_elmp->curblock =
00608 DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp,
00609 count_index);
00610 }
00611
00612 if (cur_elmp->curcount == 0) {
00613
00614 DLOOP_Assert(myblocks == 0);
00615 DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
00616 }
00617 else {
00618 cur_elmp->orig_block = cur_elmp->curblock;
00619 cur_elmp->curoffset = cur_elmp->orig_offset +
00620 DLOOP_STACKELM_INDEXED_OFFSET(cur_elmp,
00621 count_index);
00622
00623 cur_elmp->curblock -= myblocks;
00624 cur_elmp->curoffset += myblocks * local_el_size;
00625 }
00626 break;
00627 case DLOOP_KIND_VECTOR:
00628
00629 cur_elmp->curcount -= myblocks / cur_elmp->curblock;
00630 if (cur_elmp->curcount == 0) {
00631 DLOOP_Assert(myblocks % cur_elmp->curblock == 0);
00632 DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
00633 }
00634 else {
00635
00636
00637
00638 cur_elmp->curblock = cur_elmp->orig_block -
00639 (myblocks % cur_elmp->curblock);
00640
00641
00642
00643
00644 cur_elmp->curoffset = cur_elmp->orig_offset +
00645 ((cur_elmp->orig_count - cur_elmp->curcount) *
00646 cur_elmp->loop_p->loop_params.v_t.stride) +
00647 ((cur_elmp->orig_block - cur_elmp->curblock) *
00648 local_el_size);
00649 }
00650 break;
00651 case DLOOP_KIND_CONTIG:
00652
00653
00654
00655 DLOOP_Assert(myblocks == cur_elmp->curblock &&
00656 cur_elmp->curcount == 1);
00657 DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
00658 break;
00659 case DLOOP_KIND_BLOCKINDEXED:
00660 while (myblocks > 0 && myblocks >= cur_elmp->curblock)
00661 {
00662 myblocks -= cur_elmp->curblock;
00663 cur_elmp->curcount--;
00664 DLOOP_Assert(cur_elmp->curcount >= 0);
00665
00666 count_index = cur_elmp->orig_count -
00667 cur_elmp->curcount;
00668 cur_elmp->curblock = cur_elmp->orig_block;
00669 }
00670 if (cur_elmp->curcount == 0) {
00671
00672 DLOOP_Assert(myblocks == 0);
00673 DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
00674 }
00675 else {
00676
00677 cur_elmp->curoffset = cur_elmp->orig_offset +
00678 DLOOP_STACKELM_BLOCKINDEXED_OFFSET(cur_elmp,
00679 count_index);
00680 cur_elmp->curblock -= myblocks;
00681 cur_elmp->curoffset += myblocks * local_el_size;
00682 }
00683 break;
00684 }
00685 }
00686
00687 if (piecefn_indicated_exit) {
00688
00689 DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
00690 return;
00691 }
00692 }
00693 else if (cur_elmp->curblock == 0) {
00694 #ifdef DLOOP_DEBUG_MANIPULATE
00695 DLOOP_dbg_printf("\thit end of block; elmp=%x [%d]\n",
00696 (unsigned) cur_elmp, cur_sp);
00697 #endif
00698 cur_elmp->curcount--;
00699
00700
00701
00702
00703 switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
00704 case DLOOP_KIND_CONTIG:
00705 case DLOOP_KIND_VECTOR:
00706 case DLOOP_KIND_BLOCKINDEXED:
00707 break;
00708 case DLOOP_KIND_INDEXED:
00709 cur_elmp->orig_block =
00710 DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, cur_elmp->curcount ? cur_elmp->orig_count - cur_elmp->curcount : 0);
00711 break;
00712 case DLOOP_KIND_STRUCT:
00713 cur_elmp->orig_block =
00714 DLOOP_STACKELM_STRUCT_BLOCKSIZE(cur_elmp, cur_elmp->curcount ? cur_elmp->orig_count - cur_elmp->curcount : 0);
00715 break;
00716 default:
00717
00718 DLOOP_Assert(0);
00719 break;
00720
00721 }
00722 cur_elmp->curblock = cur_elmp->orig_block;
00723
00724 if (cur_elmp->curcount == 0) {
00725 #ifdef DLOOP_DEBUG_MANIPULATE
00726 DLOOP_dbg_printf("\talso hit end of count; elmp=%x [%d]\n",
00727 (unsigned) cur_elmp, cur_sp);
00728 #endif
00729 DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
00730 }
00731 }
00732 else {
00733 DLOOP_Dataloop_stackelm *next_elmp;
00734 int count_index, block_index;
00735
00736 count_index = cur_elmp->orig_count - cur_elmp->curcount;
00737 block_index = cur_elmp->orig_block - cur_elmp->curblock;
00738
00739
00740 next_elmp = &(segp->stackelm[cur_sp + 1]);
00741 if (cur_elmp->may_require_reloading) {
00742 DLOOP_Dataloop *load_dlp = NULL;
00743 switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
00744 case DLOOP_KIND_CONTIG:
00745 case DLOOP_KIND_VECTOR:
00746 case DLOOP_KIND_BLOCKINDEXED:
00747 case DLOOP_KIND_INDEXED:
00748 load_dlp = cur_elmp->loop_p->loop_params.cm_t.dataloop;
00749 break;
00750 case DLOOP_KIND_STRUCT:
00751 load_dlp = DLOOP_STACKELM_STRUCT_DATALOOP(cur_elmp,
00752 count_index);
00753 break;
00754 default:
00755
00756 DLOOP_Assert(0);
00757 break;
00758
00759 }
00760
00761 #ifdef DLOOP_DEBUG_MANIPULATE
00762 DLOOP_dbg_printf("\tloading dlp=%x, elmp=%x [%d]\n",
00763 (unsigned) load_dlp,
00764 (unsigned) next_elmp,
00765 cur_sp+1);
00766 #endif
00767
00768 DLOOP_Stackelm_load(next_elmp,
00769 load_dlp,
00770 1);
00771 }
00772
00773 #ifdef DLOOP_DEBUG_MANIPULATE
00774 DLOOP_dbg_printf("\tpushing type, elmp=%x [%d], count=%d, block=%d\n",
00775 (unsigned) cur_elmp, cur_sp, count_index,
00776 block_index);
00777 #endif
00778
00779
00780
00781
00782 switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
00783 case DLOOP_KIND_CONTIG:
00784 next_elmp->orig_offset = cur_elmp->curoffset +
00785 block_index * cur_elmp->loop_p->el_extent;
00786 break;
00787 case DLOOP_KIND_VECTOR:
00788
00789 next_elmp->orig_offset = cur_elmp->orig_offset +
00790 count_index * cur_elmp->loop_p->loop_params.v_t.stride +
00791 block_index * cur_elmp->loop_p->el_extent;
00792 break;
00793 case DLOOP_KIND_BLOCKINDEXED:
00794 next_elmp->orig_offset = cur_elmp->orig_offset +
00795 block_index * cur_elmp->loop_p->el_extent +
00796 DLOOP_STACKELM_BLOCKINDEXED_OFFSET(cur_elmp,
00797 count_index);
00798 break;
00799 case DLOOP_KIND_INDEXED:
00800 next_elmp->orig_offset = cur_elmp->orig_offset +
00801 block_index * cur_elmp->loop_p->el_extent +
00802 DLOOP_STACKELM_INDEXED_OFFSET(cur_elmp, count_index);
00803 break;
00804 case DLOOP_KIND_STRUCT:
00805 next_elmp->orig_offset = cur_elmp->orig_offset +
00806 block_index * DLOOP_STACKELM_STRUCT_EL_EXTENT(cur_elmp, count_index) +
00807 DLOOP_STACKELM_STRUCT_OFFSET(cur_elmp, count_index);
00808 break;
00809 default:
00810
00811 DLOOP_Assert(0);
00812 break;
00813
00814 }
00815
00816 #ifdef DLOOP_DEBUG_MANIPULATE
00817 DLOOP_dbg_printf("\tstep 1: next orig_offset = %d (0x%x)\n",
00818 next_elmp->orig_offset,
00819 next_elmp->orig_offset);
00820 #endif
00821
00822 switch (next_elmp->loop_p->kind & DLOOP_KIND_MASK) {
00823 case DLOOP_KIND_CONTIG:
00824 case DLOOP_KIND_VECTOR:
00825 next_elmp->curcount = next_elmp->orig_count;
00826 next_elmp->curblock = next_elmp->orig_block;
00827 next_elmp->curoffset = next_elmp->orig_offset;
00828 break;
00829 case DLOOP_KIND_BLOCKINDEXED:
00830 next_elmp->curcount = next_elmp->orig_count;
00831 next_elmp->curblock = next_elmp->orig_block;
00832 next_elmp->curoffset = next_elmp->orig_offset +
00833 DLOOP_STACKELM_BLOCKINDEXED_OFFSET(next_elmp, 0);
00834 break;
00835 case DLOOP_KIND_INDEXED:
00836 next_elmp->curcount = next_elmp->orig_count;
00837 next_elmp->curblock =
00838 DLOOP_STACKELM_INDEXED_BLOCKSIZE(next_elmp, 0);
00839 next_elmp->curoffset = next_elmp->orig_offset +
00840 DLOOP_STACKELM_INDEXED_OFFSET(next_elmp, 0);
00841 break;
00842 case DLOOP_KIND_STRUCT:
00843 next_elmp->curcount = next_elmp->orig_count;
00844 next_elmp->curblock =
00845 DLOOP_STACKELM_STRUCT_BLOCKSIZE(next_elmp, 0);
00846 next_elmp->curoffset = next_elmp->orig_offset +
00847 DLOOP_STACKELM_STRUCT_OFFSET(next_elmp, 0);
00848 break;
00849 default:
00850
00851 DLOOP_Assert(0);
00852 break;
00853
00854 }
00855
00856 #ifdef DLOOP_DEBUG_MANIPULATE
00857 DLOOP_dbg_printf("\tstep 2: next curoffset = %d (0x%x)\n",
00858 next_elmp->curoffset,
00859 next_elmp->curoffset);
00860 #endif
00861
00862 cur_elmp->curblock--;
00863 DLOOP_SEGMENT_PUSH;
00864 }
00865 }
00866
00867 #ifdef DLOOP_DEBUG_MANIPULATE
00868 DLOOP_dbg_printf("hit end of datatype\n");
00869 #endif
00870
00871 DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
00872 return;
00873 }
00874
00875
00876
00877
00878
00879
00880
00881
00882 static inline DLOOP_Count DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm *elmp)
00883 {
00884 struct DLOOP_Dataloop *dlp = elmp->loop_p;
00885
00886 switch(dlp->kind & DLOOP_KIND_MASK) {
00887 case DLOOP_KIND_CONTIG:
00888
00889
00890
00891
00892 return dlp->loop_params.c_t.count;
00893 break;
00894 case DLOOP_KIND_VECTOR:
00895 return dlp->loop_params.v_t.blocksize;
00896 break;
00897 case DLOOP_KIND_BLOCKINDEXED:
00898 return dlp->loop_params.bi_t.blocksize;
00899 break;
00900 case DLOOP_KIND_INDEXED:
00901 return dlp->loop_params.i_t.blocksize_array[elmp->orig_count - elmp->curcount];
00902 break;
00903 case DLOOP_KIND_STRUCT:
00904 return dlp->loop_params.s_t.blocksize_array[elmp->orig_count - elmp->curcount];
00905 break;
00906 default:
00907
00908 DLOOP_Assert(0);
00909 break;
00910
00911 }
00912 return -1;
00913 }
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925 static inline DLOOP_Offset DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm *elmp)
00926 {
00927 struct DLOOP_Dataloop *dlp = elmp->loop_p;
00928
00929 switch(dlp->kind & DLOOP_KIND_MASK) {
00930 case DLOOP_KIND_VECTOR:
00931 case DLOOP_KIND_CONTIG:
00932 return 0;
00933 break;
00934 case DLOOP_KIND_BLOCKINDEXED:
00935 return dlp->loop_params.bi_t.offset_array[elmp->orig_count - elmp->curcount];
00936 break;
00937 case DLOOP_KIND_INDEXED:
00938 return dlp->loop_params.i_t.offset_array[elmp->orig_count - elmp->curcount];
00939 break;
00940 case DLOOP_KIND_STRUCT:
00941 return dlp->loop_params.s_t.offset_array[elmp->orig_count - elmp->curcount];
00942 break;
00943 default:
00944
00945 DLOOP_Assert(0);
00946 break;
00947
00948 }
00949 return -1;
00950 }
00951
00952
00953
00954
00955
00956 static inline void DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm *elmp,
00957 struct DLOOP_Dataloop *dlp,
00958 int branch_flag)
00959 {
00960 elmp->loop_p = dlp;
00961
00962 if ((dlp->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG) {
00963 elmp->orig_count = 1;
00964 }
00965 else {
00966 elmp->orig_count = dlp->loop_params.count;
00967 }
00968
00969 if (branch_flag || (dlp->kind & DLOOP_KIND_MASK) == DLOOP_KIND_STRUCT)
00970 {
00971 elmp->may_require_reloading = 1;
00972 }
00973 else {
00974 elmp->may_require_reloading = 0;
00975 }
00976
00977
00978 elmp->curcount = elmp->orig_count;
00979
00980 elmp->orig_block = DLOOP_Stackelm_blocksize(elmp);
00981
00982 }
00983
00984
00985
00986
00987
00988