00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00043 #include "ctrump/analyzer/loop.h"
00044 #include "ctrump/analyzer/peel.h"
00045 #include "ctrump/analyzer/deptest.h"
00046 #include "ctrump/common/intmap.h"
00047 #include "ctrump/common/varray.h"
00048 #include "ctrump/common/queue.h"
00049 #include "ctrump/common/mempool.h"
00050 #include "ctrump/common/bitmap.h"
00051 #include "ctrump/common/abort.h"
00052 #include "ctrump/ast/ast.h"
00053 #include <stdlib.h>
00054 #include <stdio.h>
00055 #include <string.h>
00056 #include <assert.h>
00057
00058 #define zero_bmp ctrump_bmp_zero
00059 #define fill_bmp ctrump_bmp_fill
00060
00064 enum loop_cfg_node_code {
00065 LOOP_CFG_NODE_INNER_LOOP,
00066 LOOP_CFG_NODE_BB
00067 };
00068
00072 struct loop_cfg_node {
00073 enum loop_cfg_node_code code;
00074 union {
00075 struct ctrump_loop_info_node *loop;
00076 struct ctrump_bb *bb;
00077 } u;
00078 };
00079
00084 static void
00085 find_invariant(struct ctrump_cfg *cfg,
00086 ctrump_bitmap_t invariant,
00087 ctrump_bitmap_t use,
00088 ctrump_bitmap_t modify)
00089 {
00090 int num_var = cfg->num_var;
00091
00092
00093 ctrump_bitmap_nand(invariant, use, modify, num_var);
00094
00095
00096
00097
00098
00099 }
00100
00104 struct iv_info {
00105 struct ctrump_pdg_node *reach_at_loop_entry;
00106 int incr;
00107 };
00108
00115 static int
00116 recog_inductive(struct iv_info *recog,
00117 struct ctrump_pdg_node *pdg)
00118 {
00119 int add_val;
00120 int is_const;
00121
00122 switch (pdg->code) {
00123 case CTRUMP_PDG_VALUE_REDUCTION:
00124 if (pdg->u.reduction.op == CTRUMP_REDUCTIVE_ADD) {
00125 add_val = ctrump_ast_fold_const_sint(pdg->u.reduction.value, &is_const);
00126 if (is_const) {
00127 recog->incr += add_val;
00128 return 1;
00129 }
00130 }
00131 return 0;
00132
00133 case CTRUMP_PDG_VALUE_INCREMENTAL:
00134 if (pdg->u.incr.op == CTRUMP_PDG_INC) {
00135 recog->incr++;
00136 return 1;
00137 }
00138 return 0;
00139
00140 default:
00141 break;
00142 }
00143
00144 return 0;
00145 }
00146
00150 static void
00151 find_loop_carry_dependency(ctrump_bitmap_t modified_and_live_after_loop,
00152 ctrump_bitmap_t loop_carry_dep,
00153 struct ctrump_bb *cond_bb,
00154 struct ctrump_bb *next_bb,
00155 int num_bb,
00156 ctrump_bitmap_t used,
00157 ctrump_bitmap_t modify,
00158 ctrump_bitmap_t inductive,
00159 int num_var)
00160 {
00161
00162
00163 ctrump_bitmap_nand_or(modified_and_live_after_loop,
00164 next_bb->live, next_bb->kill,
00165 next_bb->use, num_var);
00166 ctrump_bitmap_and(modified_and_live_after_loop,
00167 modified_and_live_after_loop, modify, num_var);
00168
00169
00170 ctrump_bitmap_nand_or(loop_carry_dep,
00171 next_bb->live,
00172 next_bb->kill,
00173 next_bb->use,
00174 num_var);
00175
00176
00177
00178
00179 ctrump_bitmap_nand_or(loop_carry_dep,
00180 loop_carry_dep,
00181 cond_bb->kill,
00182 cond_bb->use,
00183 num_var);
00184
00185
00186 ctrump_bitmap_and(loop_carry_dep, loop_carry_dep, modify, num_var);
00187 ctrump_bitmap_nand(loop_carry_dep, loop_carry_dep, inductive, num_var);
00188 }
00189
00201 static void
00202 find_iv(ctrump_bitmap_t inductive,
00203 struct loop_cfg_node *bbs,
00204 struct ctrump_loop_cfg_info *loop_cfg,
00205 ctrump_bitmap_t is_conditional_bb,
00206 int num_bb,
00207 int num_var,
00208 struct iv_info *iv_recogs,
00209 struct ctrump_intmap *var_info_map)
00210 {
00211 int i, j, n;
00212 int pred_loop_index;
00213 struct ctrump_bb *cond_bb = loop_cfg->cond_bb;
00214 struct ctrump_bb *loop_pred = loop_cfg->loop_pred;
00215
00216 assert(loop_pred);
00217
00218 n = cond_bb->num_preds;
00219
00220 for (i=0; i<n; i++) {
00221 if (cond_bb->preds[i] == loop_pred)
00222 break;
00223 }
00224
00225 assert(i < n);
00226 pred_loop_index = i;
00227
00228
00229
00230
00231 n = cond_bb->num_phi;
00232 for (i=0; i<n; i++) {
00233 struct ctrump_phi_node *p = &cond_bb->phi_nodes[i];
00234 int var_idx = p->var_index;
00235 struct ctrump_pdg_node *reach_at_loop_entry = p->nodes[pred_loop_index];
00236 struct iv_info *iv = &iv_recogs[var_idx];
00237
00238 iv->reach_at_loop_entry = reach_at_loop_entry;
00239 ctrump_bitmap_set(inductive, var_idx);
00240 }
00241
00242
00243 for (i=0; i<num_bb; i++) {
00244 struct loop_cfg_node *node = &bbs[i];
00245
00246 switch (node->code) {
00247 case LOOP_CFG_NODE_INNER_LOOP:
00248
00249 ctrump_bitmap_nand(inductive, inductive, node->u.loop->modify, num_var);
00250 break;
00251
00252 case LOOP_CFG_NODE_BB:
00253 if (ctrump_bitmap_p(is_conditional_bb, i)) {
00254
00255 ctrump_bitmap_nand(inductive, inductive, node->u.bb->kill, num_var);
00256 }
00257 }
00258 }
00259
00260
00261 for (i=0; i<num_bb; i++) {
00262 struct loop_cfg_node *node = &bbs[i];
00263
00264 switch (node->code) {
00265 case LOOP_CFG_NODE_INNER_LOOP:
00266
00267 break;
00268
00269 case LOOP_CFG_NODE_BB:
00270 if (! ctrump_bitmap_p(is_conditional_bb, i)) {
00271 struct ctrump_bb *bb = node->u.bb;
00272 struct ctrump_var_store *stores = bb->load_store.stores;
00273 int num_st = bb->load_store.num_store;
00274 for (j=0; j<num_st; j++) {
00275 struct ctrump_var *v = stores[j].var;
00276 struct ctrump_cfg_var_info *vi;
00277 struct ctrump_pdg_node *pdg = stores[j].new_val;
00278 struct iv_info *recog;
00279 int found, index;
00280
00281 vi = ctrump_intmap_lookup(var_info_map, &found, v->id);
00282 index = vi->index;
00283 recog = &iv_recogs[index];
00284
00285 if (ctrump_bitmap_p(inductive, index)) {
00286 if (! recog_inductive(recog, pdg)) {
00287 ctrump_bitmap_clear(inductive,index);
00288 }
00289 }
00290 }
00291 }
00292 }
00293 }
00294 }
00295
00296
00297 enum parallel_access_type {
00298 LOAD,
00299 STORE,
00300 };
00301
00302 #define PARALLEL_ACCESS_PARTIAL (1<<0)
00303 #define PARALLEL_ACCESS_OP_ASSIGN (1<<1)
00304 #define PARALLEL_ACCESS_INCPTR (1<<2)
00305 #define PARALLEL_ACCESS_INVARIANT_PTR (1<<3)
00306
00310 struct parallel_access_info {
00311 enum parallel_access_type type;
00313 int flags;
00315 struct ctrump_var *invariant;
00317 int num_subscript;
00318 struct ctrump_loop_subscript *subscripts;
00319 int num_constraint;
00320 struct ctrump_loop_subscript_constraint *constraints;
00321 struct ctrump_stmt *at_stmt;
00322 struct ctrump_expr *at_expr;
00323 };
00324
00325
00331 static struct parallel_access_info *
00332 append_access_info(struct ctrump_varray *access_info,
00333 struct ctrump_var *invariant_var,
00334 int num_subscript,
00335 struct ctrump_loop_subscript *subscripts,
00336 int num_constraint,
00337 struct ctrump_loop_subscript_constraint *constraints,
00338 enum parallel_access_type type,
00339 int flags,
00340 struct ctrump_stmt *at_stmt,
00341 struct ctrump_expr *at_expr,
00342 struct ctrump_mempool *alloc)
00343 {
00344 struct parallel_access_info *ai = access_info->elements;
00345
00346 VA_NEWELEM_P(access_info, alloc);
00347 ai = VA_LAST_PTR(struct parallel_access_info, access_info);
00348
00349 ai->invariant = invariant_var;
00350 ai->num_subscript = num_subscript;
00351 ai->subscripts = subscripts;
00352 ai->num_constraint = num_constraint;
00353 ai->constraints = constraints;
00354 ai->type = type;
00355 ai->at_stmt = at_stmt;
00356 ai->at_expr = at_expr;
00357 ai->flags = flags;
00358
00359 return NULL;
00360 }
00361
00366 static int
00367 check_var_bmp_p(struct ctrump_var *var,
00368 ctrump_bitmap_t bmp,
00369 struct ctrump_intmap *var_info_map)
00370 {
00371 struct ctrump_cfg_var_info *vi;
00372 int found, index;
00373 vi = ctrump_intmap_lookup(var_info_map, &found, var->id);
00374 index = vi->index;
00375 return ctrump_bitmap_p(bmp, index);
00376 }
00377
00381 #define MAX(a,b) (((a)>(b))?(a):(b))
00382
00386 static void
00387 parallel_store(struct ctrump_varray *access_info,
00388 struct ctrump_var *invariant_var,
00389 int num_subscript,
00390 struct ctrump_loop_subscript *subscripts,
00391 int num_constraint,
00392 struct ctrump_loop_subscript_constraint *constraints,
00393 int flags,
00394 struct ctrump_stmt *at_stmt,
00395 struct ctrump_expr *at_expr,
00396 struct ctrump_mempool *tmp_alloc)
00397 {
00398 if ((flags & PARALLEL_ACCESS_PARTIAL) ||
00399 (flags & PARALLEL_ACCESS_OP_ASSIGN)) {
00400 append_access_info(access_info, invariant_var,
00401 num_subscript, subscripts,
00402 num_constraint, constraints,
00403 LOAD, flags, at_stmt, at_expr, tmp_alloc);
00404 }
00405
00406 append_access_info(access_info, invariant_var,
00407 num_subscript, subscripts,
00408 num_constraint, constraints,
00409 STORE, flags, at_stmt, at_expr, tmp_alloc);
00410 }
00414 static void
00415 parallel_load(struct ctrump_varray *access_info,
00416 struct ctrump_var *invariant_var,
00417 int num_subscript,
00418 struct ctrump_loop_subscript *subscripts,
00419 int num_constraint,
00420 struct ctrump_loop_subscript_constraint *constraints,
00421 int flags,
00422 struct ctrump_stmt *at_stmt,
00423 struct ctrump_expr *at_expr,
00424 struct ctrump_mempool *tmp_alloc)
00425 {
00426 append_access_info(access_info, invariant_var,
00427 num_subscript, subscripts,
00428 num_constraint, constraints,
00429 LOAD, flags, at_stmt, at_expr, tmp_alloc);
00430 }
00431
00432 static void
00433 append_random_access_array(struct ctrump_varray *access,
00434 struct ctrump_expr *array,
00435 int num_subscript,
00436 struct ctrump_subscript *subscripts,
00437 struct ctrump_stmt *at_stmt,
00438 struct ctrump_expr *at_expr,
00439 enum ctrump_random_access_code code,
00440 struct ctrump_mempool *alloc)
00441 {
00442 struct ctrump_random_access *a;
00443 VA_NEWELEM_P(access, alloc);
00444 a = VA_LAST_PTR(struct ctrump_random_access, access);
00445
00446 a->code = code;
00447 a->access_at_stmt = at_stmt;
00448 a->access_at_expr = at_expr;
00449 a->u.arr_not_invariant.array = array;
00450 a->u.arr_not_invariant.num_subscript = num_subscript;
00451 a->u.arr_not_invariant.subscripts = subscripts;
00452 }
00453
00454 #define append_array_not_invariant(a,arr,ns,ss,ats,ate,alloc) \
00455 append_random_access_array(a,arr,ns,ss,ats,ate,CTRUMP_RANDOM_ACCESS_ARRAY_NOT_INVARIANT,alloc)
00456 #define append_subscript_not_inductive(a,arr,ns,ss,ats,ate,alloc) \
00457 append_random_access_array(a,arr,ns,ss,ats,ate,CTRUMP_RANDOM_ACCESS_SUBSCRIPT_NOT_INDUCTIVE,alloc)
00458 #define append_complicated_array(a,arr,ns,ss,ats,ate,alloc) \
00459 append_random_access_array(a,arr,ns,ss,ats,ate,CTRUMP_RANDOM_ACCESS_COMPLICATED_ARRAY,alloc)
00460
00461 static void
00462 append_complicated_subscript(struct ctrump_varray *access,
00463 struct ctrump_expr *subscript_expr,
00464 struct ctrump_stmt *at_stmt,
00465 struct ctrump_expr *at_expr,
00466 struct ctrump_mempool *alloc)
00467 {
00468 struct ctrump_random_access *a;
00469 VA_NEWELEM_P(access, alloc);
00470 a = VA_LAST_PTR(struct ctrump_random_access, access);
00471
00472 a->code = CTRUMP_RANDOM_ACCESS_COMPLICATED_SUBSCRIPT;
00473 a->access_at_stmt = at_stmt;
00474 a->access_at_expr = at_expr;
00475
00476 a->u.complicated_subscript = subscript_expr;
00477 }
00478
00479 static void
00480 append_index_var_not_inductive(struct ctrump_varray *access,
00481 struct ctrump_var *var,
00482 struct ctrump_stmt *at_stmt,
00483 struct ctrump_expr *at_expr,
00484 struct ctrump_mempool *alloc)
00485 {
00486 struct ctrump_random_access *a;
00487 VA_NEWELEM_P(access, alloc);
00488 a = VA_LAST_PTR(struct ctrump_random_access, access);
00489
00490 a->code = CTRUMP_RANDOM_ACCESS_INDEX_VAR_NOT_INDUCTIVE;
00491 a->access_at_stmt = at_stmt;
00492 a->access_at_expr = at_expr;
00493
00494 a->u.index_var_not_inductive = var;
00495 }
00496
00497
00498 static void
00499 append_multiple_scaled_index(struct ctrump_varray *access,
00500 struct ctrump_expr *index0,
00501 struct ctrump_expr *index1,
00502 struct ctrump_stmt *at_stmt,
00503 struct ctrump_expr *at_expr,
00504 struct ctrump_mempool *alloc)
00505 {
00506 struct ctrump_random_access *a;
00507 VA_NEWELEM_P(access, alloc);
00508 a = VA_LAST_PTR(struct ctrump_random_access, access);
00509
00510 a->code = CTRUMP_RANDOM_ACCESS_MULTIPLE_SCALED_INDEX;
00511 a->access_at_stmt = at_stmt;
00512 a->access_at_expr = at_expr;
00513
00514 a->u.multiple_scaled_index.index0 = index0;
00515 a->u.multiple_scaled_index.index1 = index1;
00516 }
00517
00518
00522 static int
00523 analyze_subscript_coef(struct ctrump_expr *e,
00524 struct ctrump_expr **next_level_index_expr,
00525 struct ctrump_var **coef_var,
00526 ctrump_bitmap_t invariant,
00527 struct ctrump_intmap *var_info_map)
00528 {
00529
00530
00531
00532
00533 if (e->code == CTRUMP_EXPR_BIN_MUL) {
00534 struct ctrump_expr *lhs = e->u.binary.lhs, *rhs = e->u.binary.rhs;
00535 struct ctrump_var *v;
00536 struct ctrump_cfg_var_info *vi;
00537 int found;
00538 lhs = ctrump_peel_paren(lhs);
00539 rhs = ctrump_peel_paren(rhs);
00540 if (lhs->code == CTRUMP_EXPR_VARREF) {
00541 v = lhs->u.varref.var;
00542 vi = ctrump_intmap_lookup(var_info_map, &found, v->id);
00543 assert(found);
00544
00545 if (ctrump_bitmap_p(invariant, vi->index)) {
00546 *next_level_index_expr = rhs;
00547 *coef_var = v;
00548 return 1;
00549 }
00550 }
00551
00552 if (rhs->code == CTRUMP_EXPR_VARREF) {
00553 v = rhs->u.varref.var;
00554 vi = ctrump_intmap_lookup(var_info_map, &found, v->id);
00555 assert(found);
00556
00557 if (ctrump_bitmap_p(invariant, vi->index)) {
00558 *next_level_index_expr = lhs;
00559 *coef_var = v;
00560 return 1;
00561 }
00562 }
00563 }
00564
00565 return 0;
00566 }
00567
00571 struct add_stack_elem {
00572 int is_sign;
00573 struct ctrump_expr *e;
00574 };
00575
00593 static int
00594 single_expr_subscripts_to_loop_subscript(struct ctrump_varray *random_access,
00595 struct ctrump_varray *indices,
00596 int *constant_offset,
00597 struct ctrump_expr *subscript_expr,
00598 struct ctrump_expr **next_level_index_ret,
00599 struct ctrump_var **coef_var,
00600 struct iv_info **iv_level,
00601 ctrump_bitmap_t invariant,
00602 ctrump_bitmap_t *inductive_bmp,
00603 struct ctrump_intmap *var_info_map,
00604 int loop_level,
00605 struct ctrump_stmt *at_stmt,
00606 struct ctrump_expr *at_expr,
00607 struct ctrump_mempool *tmp_alloc)
00608 {
00609 int offset_sum = 0;
00610 struct ctrump_expr *next_level_index = NULL;
00611
00612 struct ctrump_varray add_stack;
00613 struct add_stack_elem ae;
00614 ctrump_varray_init(&add_stack, 4, sizeof(struct add_stack_elem));
00615
00616 ae.e = subscript_expr;
00617 ae.is_sign = 0;
00618 VA_PUSH(struct add_stack_elem, &add_stack, ae);
00619
00620 *next_level_index_ret = NULL;
00621
00622
00623
00624
00625 while (add_stack.nelem) {
00626 int j, r;
00627 struct ctrump_var *var;
00628 struct ctrump_cfg_var_info *vi;
00629 int off, incr = 0, found, index;
00630 struct ctrump_expr *index_expr;
00631 struct ctrump_loop_index *loop_index;
00632 int is_sign;
00633
00634 ae = VA_POP(struct add_stack_elem, &add_stack);
00635
00636 subscript_expr = ctrump_peel_paren(ae.e);
00637 is_sign = ae.is_sign;
00638
00639 ctrump_ast_fold_const_offset_sint(&off, &index_expr, subscript_expr);
00640
00641 offset_sum += off;
00642
00643 if (! index_expr)
00644 continue;
00645
00646 index_expr = ctrump_peel_paren(index_expr);
00647
00648 if (index_expr->code == CTRUMP_EXPR_BIN_ADD ||
00649 index_expr->code == CTRUMP_EXPR_BIN_SUB) {
00650
00651
00652 ae.e = index_expr->u.binary.rhs;
00653 ae.is_sign = (index_expr->code == CTRUMP_EXPR_BIN_SUB);
00654 VA_PUSH(struct add_stack_elem, &add_stack, ae);
00655 index_expr = ctrump_peel_paren(index_expr->u.binary.lhs);
00656 }
00657
00658 if (analyze_subscript_coef(index_expr, &next_level_index,
00659 coef_var, invariant, var_info_map)) {
00660 if (*next_level_index_ret) {
00661
00662
00663 append_multiple_scaled_index(random_access,
00664 *next_level_index_ret, next_level_index,
00665 at_stmt, at_expr, tmp_alloc);
00666 goto error;
00667 }
00668
00669 *next_level_index_ret = next_level_index;
00670
00671 continue;
00672 }
00673
00674 if (index_expr->code != CTRUMP_EXPR_VARREF) {
00675 append_complicated_subscript(random_access, index_expr, at_stmt, at_expr, tmp_alloc);
00676 goto error;
00677 }
00678
00679 var = index_expr->u.varref.var;
00680 vi = ctrump_intmap_lookup(var_info_map, &found, var->id);
00681 assert(found);
00682
00683 index = vi->index;
00684
00685 for (j=loop_level; j>=0; j--) {
00686 incr = iv_level[j][index].incr;
00687 r = ctrump_bitmap_p(inductive_bmp[j], index);
00688
00689 if (r) {
00690 break;
00691 }
00692 }
00693
00694 if (j==-1) {
00695
00696
00697 if (! ctrump_bitmap_p(invariant, index)) {
00698 append_index_var_not_inductive(random_access,
00699 var, at_stmt, at_expr, tmp_alloc);
00700 goto error;
00701 }
00702
00703 VA_NEWELEM(indices);
00704 loop_index = VA_LAST_PTR(struct ctrump_loop_index, indices);
00705 loop_index->code = CTRUMP_LOOP_INDEX_INVARIANT;
00706 loop_index->u.invariant = vi->var;
00707 } else {
00708
00709 VA_NEWELEM(indices);
00710 loop_index = VA_LAST_PTR(struct ctrump_loop_index, indices);
00711
00712 loop_index->code = CTRUMP_LOOP_INDEX_INDUCTIVE;
00713 loop_index->u.iv.incr = incr;
00714 loop_index->u.iv.iv_level = j;
00715 loop_index->u.iv.var = var;
00716 loop_index->u.iv.reach_at_loop_entry = iv_level[j][index].reach_at_loop_entry;
00717 }
00718
00719 if (is_sign) {
00720 loop_index->flags = CTRUMP_LOOP_INDEX_NEGATIVE;
00721 } else {
00722 loop_index->flags = 0;
00723 }
00724 }
00725
00726 *constant_offset = offset_sum;
00727 return 1;
00728
00729 error:
00730 return 0;
00731 }
00732
00733
00734
00739 static int
00740 expr_subscripts_to_loop_subscript(struct ctrump_varray *random_access,
00741 int num_subscript,
00742 struct ctrump_subscript *subscripts,
00743 int *num_loop_subscript,
00744 struct ctrump_loop_subscript **ret_subscripts,
00745 int *num_constraint,
00746 struct ctrump_loop_subscript_constraint **ret_constraints,
00747 int ptr_incr,
00748 int ptr_incr_iv_level,
00749 int loop_level,
00750 struct iv_info **iv_level,
00751 struct ctrump_loop_exit **exit_level,
00752 ctrump_bitmap_t invariant,
00753 ctrump_bitmap_t *inductive_bmp,
00754 struct ctrump_intmap *var_info_map,
00755 struct ctrump_expr *array,
00756 struct ctrump_stmt *at_stmt,
00757 struct ctrump_expr *at_expr,
00758 struct ctrump_mempool *tmp_alloc,
00759 struct ctrump_mempool *alloc,
00760 int *id)
00761 {
00762 struct ctrump_varray loop_subscripts, loop_indices;
00763 struct ctrump_loop_subscript *reverse, *p;
00764 int i, n, nd, num_nd;
00765
00766 ctrump_varray_init(&loop_subscripts, 4, sizeof(struct ctrump_loop_subscript));
00767 ctrump_varray_init(&loop_indices, 4, sizeof(struct ctrump_loop_index));
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782
00783
00784 for (i=0; i<num_subscript; i++) {
00785
00786
00787 union ctrump_loop_subscript_scale subscript_scale = {0};
00788 enum ctrump_loop_subscript_code subscript_code = -1;
00789
00790 struct ctrump_subscript *tree_subscript = &subscripts[i];
00791
00792 num_nd = tree_subscript->num_nd_subscript;
00793
00794 for (nd=0; nd<num_nd; nd++) {
00795
00796 struct ctrump_expr *subscript_expr;
00797 int subscript_coef;
00798 struct ctrump_subscript_nd *nds = &subscripts[i].nd_subscripts[nd];
00799 struct ctrump_loop_subscript *e;
00800
00801 switch (nds->code) {
00802 case CTRUMP_SUBSCRIPT_RECORD_MEMBER:
00803 VA_NEWELEM(&loop_subscripts);
00804 e = VA_LAST_PTR(struct ctrump_loop_subscript, &loop_subscripts);
00805 e->id = (*id)++;
00806 e->code = CTRUMP_LOOP_SUBSCRIPT_RECORD_MEMBER;
00807 e->u.record_member.member_name = nds->u.record_member.member_name;
00808 e->u.record_member.record_type = nds->u.record_member.record_type;
00809 e->indices = NULL;
00810 e->num_index = 0;
00811 e->offset = 0;
00812 break;
00813
00814 case CTRUMP_SUBSCRIPT_RECORD_MEMBER_TERMINAL:
00815 VA_NEWELEM(&loop_subscripts);
00816 e = VA_LAST_PTR(struct ctrump_loop_subscript, &loop_subscripts);
00817 e->id = (*id)++;
00818 e->code = CTRUMP_LOOP_SUBSCRIPT_RECORD_MEMBER_TERMINAL;
00819 e->u.record_member_terminal.member_name = nds->u.record_member_terminal.member_name;
00820 e->u.record_member_terminal.record_type = nds->u.record_member_terminal.record_type;
00821 e->u.record_member_terminal.load_type = nds->u.record_member_terminal.load_type;
00822 e->indices = NULL;
00823 e->num_index = 0;
00824 e->offset = 0;
00825 break;
00826
00827 case CTRUMP_SUBSCRIPT_LOAD_RECORD:
00828 case CTRUMP_SUBSCRIPT_COEF_ARRAY:
00829 case CTRUMP_SUBSCRIPT_TERMINAL:
00830 switch (nds->code) {
00831 case CTRUMP_SUBSCRIPT_COEF_ARRAY:
00832 subscript_expr = nds->u.array_coef.expr;
00833 subscript_coef = nds->u.array_coef.coef;
00834 subscript_code = CTRUMP_LOOP_SUBSCRIPT_COEF_ARRAYSIZE;
00835 subscript_scale.array_size_coef = subscript_coef;
00836 break;
00837
00838 case CTRUMP_SUBSCRIPT_TERMINAL:
00839 subscript_expr = nds->u.terminal.expr;
00840 subscript_code = CTRUMP_LOOP_SUBSCRIPT_COEF_TERMINAL;
00841 subscript_scale.load_type = nds->u.terminal.load_type;
00842 break;
00843
00844 case CTRUMP_SUBSCRIPT_LOAD_RECORD:
00845 subscript_expr = nds->u.load_record.expr;
00846 subscript_code = CTRUMP_LOOP_SUBSCRIPT_LOAD_RECORD;
00847 subscript_scale.load_record.load_type = nds->u.load_record.load_type;
00848 break;
00849
00850 default:
00851 ctrump_unreachable("compiler bug");
00852 break;
00853 }
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877 while (1) {
00878
00879
00880 struct ctrump_expr *next_level_index_expr = NULL;
00881 struct ctrump_var *coef_var = NULL;
00882 int subscript_off;
00883
00884 loop_indices.nelem = 0;
00885 VA_NEWELEM(&loop_subscripts);
00886 e = VA_LAST_PTR(struct ctrump_loop_subscript, &loop_subscripts);
00887 e->id = (*id)++;
00888
00889 if (subscript_expr) {
00890
00891 int r = single_expr_subscripts_to_loop_subscript(random_access,
00892 &loop_indices,
00893 &subscript_off,
00894 subscript_expr,
00895 &next_level_index_expr,
00896 &coef_var,
00897 iv_level,
00898 invariant,
00899 inductive_bmp,
00900 var_info_map,
00901 loop_level,
00902 at_stmt, at_expr,
00903 tmp_alloc);
00904
00905 if (! r)
00906 goto error;
00907
00908 e->code = subscript_code;
00909 e->u = subscript_scale;
00910 e->num_index = loop_indices.nelem;
00911 e->offset = subscript_off;
00912
00913 if (next_level_index_expr == NULL) {
00914 if (nd == (num_nd-1) && i == (num_subscript-1)) {
00915
00916
00917 if (ptr_incr != 0) {
00918 struct ctrump_loop_index *loop_index;
00919 VA_NEWELEM(&loop_indices);
00920 loop_index = VA_LAST_PTR(struct ctrump_loop_index, &loop_indices);
00921
00922 loop_index->code = CTRUMP_LOOP_INDEX_POINTER_INC;
00923 loop_index->flags = 0;
00924 loop_index->u.ptrinc.incr = ptr_incr;
00925 loop_index->u.ptrinc.iv_level = ptr_incr_iv_level;
00926 }
00927 }
00928
00929 e->indices = ctrump_varray_copy(&loop_indices, alloc);
00930
00931 break;
00932 }
00933
00934 e->indices = ctrump_varray_copy(&loop_indices, alloc);
00935
00936 subscript_code = CTRUMP_LOOP_SUBSCRIPT_COEF_SCALE;
00937 subscript_scale.scale_array_coef = coef_var;
00938 subscript_expr = ctrump_peel_paren(next_level_index_expr);
00939 assert(coef_var);
00940 } else {
00941
00942
00943 if (ptr_incr != 0) {
00944 struct ctrump_loop_index *loop_index;
00945 loop_index = ctrump_mempool_alloc(alloc, sizeof(*loop_index));
00946
00947 loop_index->code = CTRUMP_LOOP_INDEX_POINTER_INC;
00948 loop_index->flags = 0;
00949 loop_index->u.ptrinc.incr = ptr_incr;
00950 loop_index->u.ptrinc.iv_level = ptr_incr_iv_level;
00951
00952 e->code = subscript_code;
00953 e->u = subscript_scale;
00954 e->num_index = 1;
00955 e->indices = loop_index;
00956 e->offset = 0;
00957 } else {
00958 e->code = subscript_code;
00959 e->u = subscript_scale;
00960 e->num_index = 0;
00961 e->indices = 0;
00962 e->offset = 0;
00963 }
00964
00965 break;
00966 }
00967 }
00968 break;
00969 }
00970 }
00971 }
00972
00973 *num_constraint = 0;
00974 *ret_constraints = NULL;
00975
00976 if (ptr_incr != 0) {
00977
00978 }
00979
00980 n = *num_loop_subscript = loop_subscripts.nelem;
00981 reverse = ctrump_mempool_alloc(alloc, sizeof(*reverse)*n);
00982 p = loop_subscripts.elements;
00983
00984
00985 for (i=0; i<n; i++) {
00986 reverse[i] = p[n-1-i];
00987 }
00988
00989 *ret_subscripts = reverse;
00990
00991 ctrump_varray_discard(&loop_indices);
00992
00993 return 1;
00994
00995 error:
00996 ctrump_varray_discard(&loop_subscripts);
00997 ctrump_varray_discard(&loop_indices);
00998
00999 *num_constraint = 0;
01000 *ret_constraints = NULL;
01001
01002 *num_loop_subscript = 0;
01003 *ret_subscripts = NULL;
01004
01005 return 0;
01006 }
01007
01008 typedef void (*parallel_array_func_t)(struct ctrump_varray *access_info,
01009 struct ctrump_var *invariant_var,
01010 int num_subscript,
01011 struct ctrump_loop_subscript *subscripts,
01012 int num_constraint,
01013 struct ctrump_loop_subscript_constraint *constraints,
01014 int flags,
01015 struct ctrump_stmt *at_stmt,
01016 struct ctrump_expr *at_expr,
01017 struct ctrump_mempool *alloc);
01018
01022 static void
01023 recog_array_index(parallel_array_func_t parallel_func,
01024 struct ctrump_varray *parallel_access,
01025 struct ctrump_varray *random_access,
01026 struct ctrump_memory_access *memory_access,
01027 ctrump_bitmap_t invariant,
01028 ctrump_bitmap_t invariant_on_loop_top,
01029 ctrump_bitmap_t *inductive,
01030 struct ctrump_stmt *at_stmt,
01031 struct ctrump_expr *at_expr,
01032 struct ctrump_intmap *var_info_map,
01033 int flags,
01034 struct iv_info **iv_level,
01035 struct ctrump_loop_exit **exit_level,
01036 int loop_level,
01037 const struct ctrump_abi *abi,
01038 struct ctrump_mempool *tmp_alloc,
01039 struct ctrump_mempool *loop_alloc,
01040 int *id)
01041 {
01042 struct ctrump_expr *arr = memory_access->array;
01043 struct ctrump_subscript *subscripts = memory_access->subscripts;
01044 int num_subscript = memory_access->num_subscript;
01045
01046 struct ctrump_var *av;
01047 struct ctrump_loop_subscript *loop_subscripts;
01048 struct ctrump_loop_subscript_constraint *constraints;
01049
01050 int num_loop_subscript, num_constraint, r;
01051
01052
01053 arr = ctrump_peel_cast_to_pointer_from_array(arr);
01054
01055 if (arr->code == CTRUMP_EXPR_VARREF) {
01056 int ptr_incr = 0;
01057 int ptr_incr_iv_level = 0;
01058
01059 av = arr->u.varref.var;
01060
01061 if (! check_var_bmp_p(av, invariant, var_info_map)) {
01062 int found;
01063 struct ctrump_cfg_var_info *vi = ctrump_intmap_lookup(var_info_map,
01064 &found, av->id);
01065 assert(found);
01066
01067
01068 if (!ctrump_bitmap_p(inductive[loop_level], vi->index)) {
01069 append_array_not_invariant(random_access, arr, num_subscript, subscripts, at_stmt, at_expr, tmp_alloc);
01070 return;
01071 }
01072
01073
01074 ptr_incr = iv_level[loop_level][vi->index].incr;
01075 ptr_incr_iv_level = loop_level;
01076 }
01077
01078 r = expr_subscripts_to_loop_subscript(random_access,
01079 num_subscript,
01080 subscripts,
01081 &num_loop_subscript,
01082 &loop_subscripts,
01083 &num_constraint,
01084 &constraints,
01085 ptr_incr, ptr_incr_iv_level,
01086 loop_level,
01087 iv_level,
01088 exit_level,
01089 invariant_on_loop_top,
01090 inductive,
01091 var_info_map,
01092 arr, at_stmt, at_expr, tmp_alloc, loop_alloc, id);
01093
01094 if (! r) {
01095 return;
01096 }
01097
01098 (*parallel_func)(parallel_access, av,
01099 num_loop_subscript, loop_subscripts,
01100 num_constraint, constraints, flags, at_stmt, at_expr, tmp_alloc);
01101 } else {
01102 append_complicated_array(random_access, arr, num_subscript,
01103 subscripts, at_stmt, at_expr, tmp_alloc);
01104 }
01105 }
01106
01107
01108 int
01109 ctrump_loop_index_equal(const struct ctrump_loop_index *i1,
01110 const struct ctrump_loop_index *i2)
01111 {
01112 if (i1->code != i2->code)
01113 return 0;
01114 if (i1->flags != i2->flags)
01115 return 0;
01116
01117 switch (i1->code) {
01118 case CTRUMP_LOOP_INDEX_INDUCTIVE:
01119 if (i1->u.iv.var != i2->u.iv.var)
01120 return 0;
01121
01122 case CTRUMP_LOOP_INDEX_INVARIANT:
01123 if (i1->u.invariant != i2->u.invariant)
01124 return 0;
01125
01126 case CTRUMP_LOOP_INDEX_POINTER_INC:
01127 if (! (i1->u.ptrinc.iv_level == i2->u.ptrinc.iv_level &&
01128 i1->u.ptrinc.incr == i2->u.ptrinc.incr))
01129 return 0;
01130 }
01131
01132 return 1;
01133 }
01134
01135
01136 int
01137 ctrump_loop_subscript_equal(const struct ctrump_loop_subscript *s1,
01138 const struct ctrump_loop_subscript *s2)
01139 {
01140 int i, n;
01141
01142 if (s1->offset != s2->offset)
01143 return 0;
01144 if (s1->num_index != s2->num_index)
01145 return 0;
01146 if (s1->code != s2->code)
01147 return 0;
01148
01149 switch (s1->code) {
01150 case CTRUMP_LOOP_SUBSCRIPT_RECORD_MEMBER:
01151 case CTRUMP_LOOP_SUBSCRIPT_RECORD_MEMBER_TERMINAL:
01152 if (s1->u.record_member.member_name != s2->u.record_member.member_name)
01153 return 0;
01154 break;
01155
01156
01157
01158 case CTRUMP_LOOP_SUBSCRIPT_COEF_ARRAYSIZE:
01159 case CTRUMP_LOOP_SUBSCRIPT_COEF_TERMINAL:
01160 case CTRUMP_LOOP_SUBSCRIPT_COEF_SCALE:
01161 case CTRUMP_LOOP_SUBSCRIPT_COEF_CONSTANT:
01162 case CTRUMP_LOOP_SUBSCRIPT_LOAD_RECORD:
01163 break;
01164 }
01165
01166 n = s1->num_index;
01167
01168 for (i=0; i<n; i++) {
01169 if (!ctrump_loop_index_equal(&s1->indices[i],
01170 &s2->indices[i]))
01171 return 0;
01172 }
01173
01174 return 1;
01175 }
01176
01177 int
01178 ctrump_loop_index_hash(const struct ctrump_loop_index *i0)
01179 {
01180 switch (i0->code) {
01181 case CTRUMP_LOOP_INDEX_INDUCTIVE:
01182 return i0->u.iv.var->name->hashval+1;
01183
01184 case CTRUMP_LOOP_INDEX_INVARIANT:
01185 return i0->u.invariant->name->hashval+2;
01186
01187 case CTRUMP_LOOP_INDEX_POINTER_INC:
01188 return (i0->u.ptrinc.iv_level<<10) | (i0->u.ptrinc.incr);
01189 }
01190
01191 ctrump_unreachable("index_hash");
01192 }
01193
01194 int
01195 ctrump_loop_subscript_hash(const struct ctrump_loop_subscript *s0)
01196 {
01197 int i, n;
01198 int v = 0;
01199
01200 switch (s0->code) {
01201 case CTRUMP_LOOP_SUBSCRIPT_RECORD_MEMBER:
01202 v = s0->u.record_member.member_name->hashval + 5;
01203 break;
01204
01205 case CTRUMP_LOOP_SUBSCRIPT_RECORD_MEMBER_TERMINAL:
01206 v = s0->u.record_member.member_name->hashval + 4;
01207 break;
01208
01209 case CTRUMP_LOOP_SUBSCRIPT_COEF_ARRAYSIZE:
01210 v = s0->u.array_size_coef * 7;
01211 break;
01212
01213 case CTRUMP_LOOP_SUBSCRIPT_COEF_TERMINAL:
01214 v = 99;
01215 break;
01216
01217 case CTRUMP_LOOP_SUBSCRIPT_COEF_SCALE:
01218 v = s0->u.scale_array_coef->name->hashval + 9;
01219 break;
01220
01221 case CTRUMP_LOOP_SUBSCRIPT_COEF_CONSTANT:
01222 v = s0->u.array_size_coef + 13;
01223 break;
01224
01225 case CTRUMP_LOOP_SUBSCRIPT_LOAD_RECORD:
01226 v = s0->u.load_record.load_type->id*8;
01227 }
01228
01229 v += s0->offset<<10;
01230 n = s0->num_index;
01231
01232 for (i=0; i<n; i++) {
01233 v += ctrump_loop_index_hash(&s0->indices[i]);
01234 }
01235
01236 return v;
01237 }
01238
01239
01240
01241 typedef void (*parallel_pointer_func_t)(struct ctrump_varray *access_info,
01242 struct ctrump_var *inductive_var,
01243 struct ctrump_mempool *alloc);
01244
01245 static void
01246 append_memory_access(struct ctrump_varray *acc,
01247 struct ctrump_ordered_memory_load_store_node *ordered,
01248 enum ctrump_load_store_t lscode,
01249 struct parallel_access_info *ai,
01250 struct ctrump_mempool *tmp_alloc,
01251 struct ctrump_mempool *loop_alloc,
01252 struct ctrump_intmap *var_info_map,
01253 struct iv_info *iv_info,
01254 int *id)
01255 {
01256 int index = acc->nelem;
01257 struct ctrump_loop_memory_access *ma;
01258
01259 ordered->op_index = index;
01260 ordered->code = lscode;
01261
01262 VA_NEWELEM_P(acc, tmp_alloc);
01263 ma = VA_LAST_PTR(struct ctrump_loop_memory_access, acc);
01264
01265
01266 ma->id = (*id)++;
01267 ma->array.var = ai->invariant;
01268 ma->num_subscript = ai->num_subscript;
01269 ma->subscripts = ai->subscripts;
01270 ma->num_constraint = ai->num_constraint;
01271 ma->constraints = ai->constraints;
01272 ma->at_stmt = ai->at_stmt;
01273 ma->at_expr = ai->at_expr;
01274 }
01275
01276
01280 static void
01281 classify_memop(struct ctrump_loop_info_node *loop_info_node,
01282 struct loop_cfg_node *bbs,
01283 ctrump_bitmap_t is_conditional_bb,
01284 int num_bb,
01285 int all_num_bb_in_cfg,
01286 ctrump_bitmap_t invariant,
01287 ctrump_bitmap_t invariant_on_loop_top,
01288 ctrump_bitmap_t *inductive,
01289 int num_var,
01290 struct ctrump_intmap *var_info_map,
01291 struct iv_info **iv_level,
01292 struct ctrump_loop_exit **exit_level,
01293 const struct ctrump_abi *abi,
01294 struct ctrump_mempool *loop_alloc,
01295 struct ctrump_mempool *tmp_alloc,
01296 int *id)
01297 {
01298 int i,j, n;
01299 int loop_level = loop_info_node->nest_level;
01300 struct iv_info *iv_info = iv_level[loop_info_node->nest_level];
01301 struct ctrump_varray access_info;
01302 struct ctrump_varray random_access, load, store;
01303 struct ctrump_ordered_memory_load_store_node *ordered;
01304 struct parallel_access_info *ai, *a;
01305 ctrump_bitmap_t visited;
01306 struct stack_node {
01307 int i;
01308 struct ctrump_bb *bb;
01309 } *sp;
01310 struct ctrump_bb *exit, *entry;
01311 struct ctrump_varray dfs_stack;
01312 struct ctrump_mempool memop_pool;
01313
01314 ctrump_mempool_init(&memop_pool, 256);
01315
01316 ctrump_varray_init_pool(&access_info, 16, sizeof(struct parallel_access_info), &memop_pool);
01317 ctrump_varray_init_pool(&random_access, 16, sizeof(struct ctrump_random_access), &memop_pool);
01318
01319 visited = zero_bmp(all_num_bb_in_cfg, &memop_pool);
01320 ctrump_varray_init_pool(&dfs_stack, 16, sizeof(struct stack_node), &memop_pool);
01321 VA_NEWELEM_P(&dfs_stack, &memop_pool);
01322 sp = VA_LAST_PTR(struct stack_node, &dfs_stack);
01323 entry = loop_info_node->cfg_info->cond_bb;
01324 sp->bb = entry;
01325 sp->i = 0;
01326 exit = loop_info_node->cfg_info->break_bb;
01327
01328 while (dfs_stack.nelem) {
01329 struct ctrump_bb *bb;
01330
01331 next:
01332 sp = VA_LAST_PTR(struct stack_node, &dfs_stack);
01333 bb = sp->bb;
01334 for (i=sp->i; i<bb->num_succs; i++) {
01335 struct ctrump_bb *suc = bb->succs[i];
01336
01337 if ((! ctrump_bitmap_p(visited,suc->id_in_cfg)) &&
01338 (suc != exit)) {
01339 ctrump_bitmap_set(visited, suc->id_in_cfg);
01340
01341
01342 sp->i = i;
01343
01344
01345 VA_NEWELEM_P(&dfs_stack, &memop_pool);
01346 sp = VA_LAST_PTR(struct stack_node ,&dfs_stack);
01347 sp->i = 0;
01348 sp->bb = suc;
01349 goto next;
01350 }
01351 }
01352
01353 dfs_stack.nelem--;
01354
01355 if (bb->loop_belong_to != loop_info_node->cfg_info)
01356 continue;
01357
01358
01359 struct ctrump_load_store_set *memop = &bb->load_store;
01360 struct ctrump_memory_load *loads = memop->mem_loads;
01361 struct ctrump_memory_store *stores = memop->mem_stores;
01362 struct ctrump_ordered_memory_load_store_node *ord = memop->ordered_load_store;
01363 int n;
01364 int is_cond_bb;
01365 int flags;
01366
01367 if (bb->num_dfs == 1 &&
01368 bb->dfs[0] == entry) {
01369
01370
01371
01372 is_cond_bb = 0;
01373 } else {
01374 is_cond_bb = 1;
01375 }
01376
01377 n = memop->num_memop;
01378 for (j=0; j<n; j++) {
01379 struct ctrump_ordered_memory_load_store_node *n = &ord[j];
01380 struct ctrump_memory_store *as;
01381 struct ctrump_memory_load *al;
01382
01383 switch (n->code) {
01384 case CTRUMP_MEMORY_LOAD:
01385 al = &loads[n->op_index];
01386 recog_array_index(parallel_load,
01387 &access_info, &random_access,
01388 &al->mem_access, invariant, invariant_on_loop_top,
01389 inductive,
01390 al->mem_access.at_stmt, al->mem_access.at_expr,
01391 var_info_map, 0, iv_level,
01392 exit_level, loop_level, abi, &memop_pool, loop_alloc, id);
01393 break;
01394
01395 case CTRUMP_MEMORY_STORE:
01396 as = &stores[n->op_index];
01397
01398 flags = 0;
01399 if (is_cond_bb ||
01400 (as->mem_access.flags & CTRUMP_MEMORY_ACCESS_PARTIAL)) {
01401 flags |= PARALLEL_ACCESS_PARTIAL;
01402 }
01403
01404 if (IS_BIN_OP_ASSIGN(as->store_op)) {
01405 flags |= PARALLEL_ACCESS_OP_ASSIGN;
01406 }
01407
01408 recog_array_index(parallel_store,
01409 &access_info, &random_access,
01410 &as->mem_access,
01411 invariant, invariant_on_loop_top,
01412 inductive, as->mem_access.at_stmt, as->mem_access.at_expr,
01413 var_info_map, flags, iv_level,
01414 exit_level, loop_level, abi, &memop_pool, loop_alloc, id);
01415 break;
01416 }
01417 }
01418 }
01419
01420 loop_info_node->num_random_access = random_access.nelem;
01421 loop_info_node->random_accesses = ctrump_varray_copy(&random_access, loop_alloc);
01422
01423 ctrump_varray_init_pool(&load, access_info.nelem, sizeof(struct ctrump_loop_memory_access), &memop_pool);
01424 ctrump_varray_init_pool(&store, access_info.nelem, sizeof(struct ctrump_loop_memory_access), &memop_pool);
01425 ordered = ctrump_mempool_alloc(loop_alloc, sizeof(*ordered)*access_info.nelem);
01426
01427 ai = access_info.elements;
01428 n = access_info.nelem;
01429
01430 for (i=0; i<n; i++) {
01431 a = &ai[i];
01432
01433 switch (a->type) {
01434 case LOAD:
01435 append_memory_access(&load, &ordered[i], CTRUMP_MEMORY_LOAD,
01436 a, &memop_pool, loop_alloc, var_info_map, iv_info, id);
01437 break;
01438 case STORE:
01439 append_memory_access(&store, &ordered[i], CTRUMP_MEMORY_STORE,
01440 a, &memop_pool, loop_alloc, var_info_map, iv_info, id);
01441 break;
01442 }
01443 }
01444
01445 loop_info_node->num_parallel_load = load.nelem;
01446 loop_info_node->parallel_loads = ctrump_varray_copy(&load, loop_alloc);
01447
01448 loop_info_node->num_parallel_store = store.nelem;
01449 loop_info_node->parallel_stores = ctrump_varray_copy(&store, loop_alloc);
01450
01451 loop_info_node->num_ordered_loadstore = access_info.nelem;
01452 loop_info_node->ordered_loadstore = ordered;
01453
01454 ctrump_mempool_destroy(&memop_pool);
01455 }
01456
01460 struct bb_info {
01461 struct ctrump_bb *bb;
01462 int index;
01463 };
01464
01465 static struct bb_info *
01466 alloc_bb_info(struct ctrump_mempool *alloc,
01467 struct ctrump_bb *bb)
01468 {
01469 struct bb_info *info = ctrump_mempool_alloc(alloc, sizeof(*info));
01470
01471 info->bb = bb;
01472
01473 return info;
01474 }
01475
01480 static int
01481 build_bfs_order(struct ctrump_intmap *ret,
01482 struct ctrump_bb *cond_bb,
01483 struct ctrump_bb *body_bb,
01484 struct loop_cfg_node **bfs_order_ret_ptr,
01485 struct ctrump_mempool *tmp_alloc)
01486 {
01487 struct ctrump_intmap_bucket *bucket;
01488 int found;
01489 struct ctrump_queue queue;
01490 struct ctrump_intmap index_map;
01491 struct bb_info *bb_info;
01492 int index = 0, i, n;
01493 int q_elem;
01494 struct ctrump_varray bfs_order;
01495 struct loop_cfg_node *bfs_order_ret;
01496 struct ctrump_bb **bbs;
01497
01498 ctrump_intmap_init(&index_map, 53);
01499 ctrump_queue_init(&queue, 16, sizeof(struct bb_info*));
01500 ctrump_varray_init(&bfs_order, 16, sizeof(struct ctrump_bb*));
01501
01502 bucket = ctrump_intmap_lookup_add(&index_map, &found, cond_bb->id);
01503
01504 bb_info = alloc_bb_info(tmp_alloc, cond_bb);
01505 bb_info->index = index;
01506 index++;
01507 VA_PUSH(struct ctrump_bb*, &bfs_order, cond_bb);
01508
01509 bucket->val = bb_info;
01510
01511 bb_info = alloc_bb_info(tmp_alloc, body_bb);
01512 QUEUE_PUSH(struct bb_info*, &queue, bb_info);
01513
01514 q_elem = 1;
01515
01516 while (q_elem) {
01517 int i, n;
01518 struct ctrump_bb *bb;
01519
01520 QUEUE_POP(struct bb_info*, &queue, bb_info);
01521 bb = bb_info->bb;
01522 bb_info->index = index;
01523
01524 n = bb->num_succs;
01525 VA_PUSH(struct ctrump_bb*, &bfs_order, bb);
01526
01527 index++;
01528 q_elem--;
01529
01530 if (CTRUMP_BB_IS_LOOP_ENTRY(bb)) {
01531
01532
01533 struct ctrump_bb *succ = bb->loop_belong_to->break_bb;
01534 bucket = ctrump_intmap_lookup_add(&index_map, &found, succ->id);
01535 if (!found) {
01536 bb_info = alloc_bb_info(tmp_alloc, succ);
01537 QUEUE_PUSH(struct bb_info*, &queue, bb_info);
01538 bucket->val = bb_info;
01539 q_elem++;
01540 }
01541 } else {
01542 for (i=0; i<n; i++) {
01543 struct ctrump_bb *succ = bb->succs[i];
01544 bucket = ctrump_intmap_lookup_add(&index_map, &found, succ->id);
01545 if (!found) {
01546 bb_info = alloc_bb_info(tmp_alloc, succ);
01547 QUEUE_PUSH(struct bb_info*, &queue, bb_info);
01548 bucket->val = bb_info;
01549 q_elem++;
01550 }
01551 }
01552 }
01553 }
01554
01555 ctrump_intmap_resize_pool(ret, &index_map, tmp_alloc);
01556 ctrump_intmap_free(&index_map);
01557 ctrump_queue_destroy(&queue);
01558
01559 n = bfs_order.nelem;
01560 bbs = bfs_order.elements;
01561 bfs_order_ret = ctrump_mempool_alloc(tmp_alloc, sizeof(*ret)*n);
01562
01563 for (i=0; i<n; i++) {
01564 if (bbs[i] != cond_bb && CTRUMP_BB_IS_LOOP_ENTRY(bbs[i])) {
01565
01566 bfs_order_ret[i].code = LOOP_CFG_NODE_INNER_LOOP;
01567 bfs_order_ret[i].u.loop = bbs[i]->loop_belong_to->u.loop_info_node;
01568 } else {
01569 bfs_order_ret[i].code = LOOP_CFG_NODE_BB;
01570 bfs_order_ret[i].u.bb = bbs[i];
01571 }
01572 }
01573
01574 *bfs_order_ret_ptr = bfs_order_ret;
01575 ctrump_varray_discard(&bfs_order);
01576
01577 return n;
01578 }
01579
01580 static int
01581 get_var_id(struct ctrump_var *var,
01582 struct ctrump_intmap *var_info_map)
01583 {
01584 struct ctrump_cfg_var_info *vi;
01585 int found;
01586 vi = ctrump_intmap_lookup(var_info_map, &found, var->id);
01587
01588 return vi->index;
01589 }
01590
01594 static int
01595 is_invariant_expr(struct ctrump_expr *expr,
01596 ctrump_bitmap_t invariant,
01597 struct ctrump_intmap *var_info_map)
01598 {
01599 struct ctrump_var *v;
01600
01601 switch (expr->code) {
01602 case CTRUMP_EXPR_VARREF:
01603 v = expr->u.varref.var;
01604 return ctrump_bitmap_p(invariant, get_var_id(v, var_info_map));
01605
01606 case CTRUMP_CASE_CONSTANT_TERM:
01607 return 1;
01608
01609 case CTRUMP_CASE_BIN_ARITH_EXPR:
01610 case CTRUMP_CASE_BIN_LOG_EXPR:
01611 return is_invariant_expr(expr->u.binary.lhs,invariant,var_info_map) &&
01612 is_invariant_expr(expr->u.binary.rhs,invariant,var_info_map);
01613
01614 default:
01615 break;
01616 }
01617
01618 return 0;
01619 }
01620
01624 static void
01625 recog_exit_cond_cmp(struct ctrump_loop_exit *loop_exit,
01626 struct ctrump_expr *lhs,
01627 struct ctrump_expr *rhs,
01628 ctrump_bitmap_t inductive,
01629 ctrump_bitmap_t invariant,
01630 struct ctrump_intmap *var_info_map,
01631 struct iv_info *iv,
01632 enum ctrump_expr_code cmp_op)
01633 {
01634 int is_bound_invariant;
01635 int ind_var_info_id;
01636 struct ctrump_var *ind_var;
01637
01638 if (lhs->code != CTRUMP_EXPR_VARREF) {
01639 goto unpredicatable;
01640 }
01641
01642 ind_var = lhs->u.varref.var;
01643 ind_var_info_id = get_var_id(ind_var, var_info_map);
01644
01645 if (! BMP_P(inductive, ind_var_info_id)) {
01646 goto unpredicatable;
01647 }
01648
01649 is_bound_invariant = is_invariant_expr(rhs, invariant, var_info_map);
01650 if (! is_bound_invariant) {
01651 goto unpredicatable;
01652 }
01653
01654
01655 loop_exit->code = CTRUMP_LOOP_COUNT_PREDICTABLE;
01656 loop_exit->u.pred.cmp_op = cmp_op;
01657 loop_exit->u.pred.incr = iv[ind_var_info_id].incr;
01658 loop_exit->u.pred.inductive = ind_var;
01659 loop_exit->u.pred.iv_loop_entry_value = iv[ind_var_info_id].reach_at_loop_entry;
01660 loop_exit->u.pred.bound = rhs;
01661 return;
01662
01663 unpredicatable:
01664 loop_exit->code = CTRUMP_LOOP_COUNT_UNPREDICTABLE;
01665 }
01666
01670 static void
01671 recog_exit_cond(struct ctrump_loop_exit *loop_exit,
01672 struct ctrump_expr *cond_expr,
01673 ctrump_bitmap_t inductive,
01674 ctrump_bitmap_t invariant,
01675 struct ctrump_intmap *var_info_map,
01676 struct iv_info *iv_info)
01677 {
01678 switch (cond_expr->code) {
01679 case CTRUMP_EXPR_BIN_GT:
01680 case CTRUMP_EXPR_BIN_GE:
01681 case CTRUMP_EXPR_BIN_LT:
01682 case CTRUMP_EXPR_BIN_LE:
01683 case CTRUMP_EXPR_BIN_EQ:
01684 recog_exit_cond_cmp(loop_exit,
01685 cond_expr->u.binary.lhs, cond_expr->u.binary.rhs,
01686 inductive, invariant, var_info_map, iv_info, cond_expr->code);
01687 break;
01688
01689 default:
01690 loop_exit->code = CTRUMP_LOOP_COUNT_UNPREDICTABLE;
01691 break;
01692 }
01693 }
01694
01698 struct analyze_loop_node_info {
01699 ctrump_bitmap_t is_conditional_bb;
01700 ctrump_bitmap_t tmp_bmp;
01701 int num_bb;
01702 struct loop_cfg_node *bfs_order;
01703 struct iv_info *iv_info;
01704 };
01705
01709 static void
01710 init_analyze_loop_node_info(struct analyze_loop_node_info *dest,
01711 struct ctrump_cfg *cfg,
01712 struct ctrump_loop_info_node *node,
01713 struct ctrump_mempool *loop_alloc,
01714 struct ctrump_mempool *tmp_alloc)
01715
01716 {
01717 struct ctrump_loop_cfg_info *loop_cfg_info = node->cfg_info;
01718 struct ctrump_intmap bb_info_map;
01719 struct loop_cfg_node *bfs_order;
01720 int num_bb, num_var = cfg->num_var;
01721 ctrump_bitmap_t use, modify, is_conditional_bb;
01722 int i;
01723 struct iv_info *iv_info;
01724
01725 is_conditional_bb = zero_bmp(num_var, tmp_alloc);
01726 num_bb = build_bfs_order(&bb_info_map,
01727 loop_cfg_info->cond_bb,
01728 loop_cfg_info->body_start_bb, &bfs_order,
01729 tmp_alloc);
01730
01731
01732
01733
01734
01735
01736
01737
01738 use = zero_bmp(num_var, loop_alloc);
01739 modify = zero_bmp(num_var, loop_alloc);
01740
01741 iv_info = ctrump_mempool_alloc(tmp_alloc, sizeof(struct iv_info)*num_var);
01742 for (i=0; i<num_var; i++) {
01743 iv_info[i].incr = 0;
01744 }
01745
01746
01747 for (i=0; i<num_bb; i++) {
01748 struct loop_cfg_node *node = &bfs_order[i];
01749 struct ctrump_bb *bb;
01750
01751 switch (node->code) {
01752 case LOOP_CFG_NODE_INNER_LOOP:
01753 ctrump_bitmap_or(use, use, node->u.loop->use, num_var);
01754 ctrump_bitmap_or(modify, modify, node->u.loop->modify, num_var);
01755 break;
01756
01757 case LOOP_CFG_NODE_BB:
01758 bb = node->u.bb;
01759 ctrump_bitmap_or(use, use, bb->use, num_var);
01760 ctrump_bitmap_or(modify, modify, bb->kill, num_var);
01761 if (bb->num_dfs == 1 &&
01762 bb->dfs[0] == loop_cfg_info->cond_bb) {
01763
01764
01765
01766 } else {
01767 ctrump_bitmap_set(is_conditional_bb, i);
01768 }
01769 break;
01770 }
01771 }
01772
01773 dest->num_bb = num_bb;
01774 dest->bfs_order = bfs_order;
01775
01776 dest->is_conditional_bb = is_conditional_bb;
01777 dest->iv_info = iv_info;
01778 dest->tmp_bmp = zero_bmp(num_var, tmp_alloc);
01779
01780 node->use = use;
01781 node->modify = modify;
01782
01783 loop_cfg_info->u.loop_info_node = node;
01784 loop_cfg_info->cond_bb->loop_belong_to = loop_cfg_info;
01785
01786 loop_cfg_info->code = CTRUMP_LOOP;
01787 }
01788
01789
01793 static int
01794 ctrump_find_iv_node(struct ctrump_loop_info *loop_info,
01795 struct ctrump_loop_info_node *loop_info_node,
01796 struct analyze_loop_node_info *analyze_info,
01797 struct ctrump_cfg *cfg,
01798 struct ctrump_intmap *var_info_map,
01799 struct ctrump_mempool *loop_alloc,
01800 struct ctrump_mempool *tmp_alloc,
01801 int id)
01802 {
01803 struct ctrump_loop_cfg_info *loop_cfg_info = loop_info_node->cfg_info;
01804 struct loop_cfg_node *bfs_order;
01805 int num_bb, num_var = cfg->num_var;
01806 ctrump_bitmap_t invariant, use, modify, inductive, is_conditional_bb;
01807 ctrump_bitmap_t modified_and_live_after_loop, carry_dep;
01808 ctrump_bitmap_t tmp_bmp;
01809 struct iv_info *iv_info;
01810
01811 num_bb = analyze_info->num_bb;
01812 bfs_order = analyze_info->bfs_order;
01813 tmp_bmp = analyze_info->tmp_bmp;
01814 is_conditional_bb = analyze_info->is_conditional_bb;
01815 use = loop_info_node->use;
01816 modify = loop_info_node->modify;
01817
01818 iv_info = analyze_info->iv_info;
01819
01820 carry_dep = zero_bmp(num_var, loop_alloc);
01821 inductive = zero_bmp(num_var, loop_alloc);
01822 invariant = zero_bmp(num_var, loop_alloc);
01823
01824 modified_and_live_after_loop = zero_bmp(num_var, loop_alloc);
01825
01826 find_invariant(cfg, invariant, use, modify);
01827 find_iv(inductive, bfs_order, loop_cfg_info,
01828 is_conditional_bb, num_bb, num_var,
01829 iv_info, var_info_map);
01830
01831 ctrump_bitmap_copy(tmp_bmp, inductive, num_var);
01832
01833 find_loop_carry_dependency(modified_and_live_after_loop,
01834 carry_dep,
01835 loop_cfg_info->cond_bb,
01836 loop_cfg_info->break_bb,
01837 num_bb,
01838 use, modify,
01839 inductive,
01840 num_var);
01841
01842 loop_info_node->num_var = num_var;
01843 loop_info_node->inductive_var = inductive;
01844 loop_info_node->invariant_var = invariant;
01845 loop_info_node->modified_and_live_after_loop = modified_and_live_after_loop;
01846 loop_info_node->carry_dep = carry_dep;
01847
01848 return id;
01849 }
01850
01854 static int
01855 assign_nest_level(struct ctrump_loop_info_node *loop_tree_node,
01856 int max_nest_level,
01857 int nest_level)
01858 {
01859 int i, n;
01860 loop_tree_node->nest_level = nest_level;
01861
01862 n = loop_tree_node->num_child;
01863 for (i=0; i<n; i++) {
01864 max_nest_level = assign_nest_level(loop_tree_node->children[i],
01865 max_nest_level,
01866 nest_level+1);
01867 loop_tree_node->children[i]->parent = loop_tree_node;
01868 }
01869
01870 if (nest_level > max_nest_level) {
01871 return nest_level;
01872 }
01873 return max_nest_level;
01874 }
01875
01879 static int
01880 analyze_loop_iv(struct ctrump_loop_info *dest,
01881 struct ctrump_loop_info_node **dfs_order,
01882 int num_node,
01883 struct analyze_loop_node_info *analyze_info,
01884 struct ctrump_cfg *cfg,
01885 struct ctrump_intmap *var_info_map,
01886 struct ctrump_mempool *loop_alloc,
01887 struct ctrump_mempool *tmp_alloc,
01888 int id)
01889 {
01890 int i;
01891
01892 for (i=0; i<num_node; i++) {
01893 id = ctrump_find_iv_node(dest,
01894 dfs_order[i],
01895 &analyze_info[i],
01896 cfg, var_info_map, loop_alloc, tmp_alloc, id);
01897 }
01898
01899 return id;
01900 }
01901
01902
01906 static void
01907 analyze_memory_access_level(struct ctrump_loop_info *dest,
01908 struct ctrump_loop_info_node *loop_tree_node,
01909 struct analyze_loop_node_info *analyze_info,
01910 struct ctrump_cfg *cfg,
01911 struct ctrump_intmap *var_info_map,
01912 const struct ctrump_abi *abi,
01913 struct ctrump_mempool *loop_alloc,
01914 struct ctrump_mempool *tmp_alloc,
01915 struct iv_info **iv_level,
01916 ctrump_bitmap_t *ivbmp_level,
01917 ctrump_bitmap_t invariant_on_loop_top,
01918 struct ctrump_loop_exit **exit_level,
01919 int *id)
01920 {
01921 int i, n;
01922 int level = loop_tree_node->nest_level;
01923 struct analyze_loop_node_info *self_info;
01924 self_info = &analyze_info[loop_tree_node->dfs_order];
01925 n = loop_tree_node->num_child;
01926 iv_level[level] = self_info->iv_info;
01927 ivbmp_level[level] = loop_tree_node->inductive_var;
01928 exit_level[level] = &loop_tree_node->exit_info;
01929
01930 for (i=0; i<n; i++) {
01931 struct ctrump_loop_info_node *child = loop_tree_node->children[i];
01932 analyze_memory_access_level(dest, child,
01933 analyze_info,
01934 cfg, var_info_map, abi,
01935 loop_alloc, tmp_alloc, iv_level,
01936 ivbmp_level, invariant_on_loop_top, exit_level, id);
01937 }
01938
01939 classify_memop(loop_tree_node, self_info->bfs_order,
01940 self_info->is_conditional_bb,
01941 self_info->num_bb,
01942 cfg->num_bb,
01943 loop_tree_node->invariant_var,
01944 invariant_on_loop_top,
01945 ivbmp_level,
01946 cfg->num_var,
01947 var_info_map,
01948 iv_level, exit_level, abi, loop_alloc, tmp_alloc, id);
01949 }
01950
01954 static void
01955 analyze_memory_access(struct ctrump_loop_info *dest,
01956 struct ctrump_loop_info_node *loop_tree_node,
01957 struct analyze_loop_node_info *analyze_info,
01958 struct ctrump_cfg *cfg,
01959 struct ctrump_intmap *var_info_map,
01960 const struct ctrump_abi *abi,
01961 struct ctrump_mempool *loop_alloc,
01962 struct ctrump_mempool *tmp_alloc,
01963 int *id)
01964 {
01965 int max_level = dest->nest_level;
01966 struct iv_info **iv_level = malloc(sizeof(struct iv_info*)*max_level);
01967 ctrump_bitmap_t *iv_bitmap = malloc(sizeof(ctrump_bitmap_t)*max_level);
01968 struct ctrump_loop_exit **exit_level = malloc(sizeof(struct ctrump_loop_exit)*max_level);
01969 ctrump_bitmap_t invariant_on_loop_top = loop_tree_node->invariant_var;
01970 analyze_memory_access_level(dest, loop_tree_node,
01971 analyze_info, cfg, var_info_map,
01972 abi, loop_alloc, tmp_alloc, iv_level, iv_bitmap, invariant_on_loop_top, exit_level, id);
01973 free(iv_level);
01974 }
01975
01976
01980 static int
01981 count_loop_node(struct ctrump_loop_info_node *node, int dfs_order_index)
01982 {
01983 int i,n = node->num_child;
01984
01985 for (i=0; i<n; i++) {
01986 dfs_order_index = count_loop_node(node->children[i], dfs_order_index);
01987 }
01988
01989 node->dfs_order = dfs_order_index;
01990
01991 return dfs_order_index + 1;
01992 }
01993
01997 static int
01998 build_dfs_order(struct ctrump_loop_info_node **dfs_order, struct ctrump_loop_info_node *node,
01999 int dfs_order_index)
02000 {
02001 int i, n = node->num_child;
02002 for (i=0; i<n; i++) {
02003 dfs_order_index = build_dfs_order(dfs_order, node->children[i], dfs_order_index);
02004 }
02005 dfs_order[dfs_order_index] = node;
02006
02007 return dfs_order_index+1;
02008 }
02009
02010
02014 static void
02015 init_analyze_info(struct analyze_loop_node_info *info,
02016 struct ctrump_loop_info_node **node_dfs_order,
02017 int num_node,
02018 struct ctrump_cfg *cfg,
02019 struct ctrump_mempool *loop_alloc,
02020 struct ctrump_mempool *tmp_alloc)
02021 {
02022 int i;
02023
02024 for (i=0; i<num_node; i++) {
02025 init_analyze_loop_node_info(&info[i], cfg, node_dfs_order[i],
02026 loop_alloc, tmp_alloc);
02027 }
02028 }
02029
02033 static void
02034 find_loop_entry(struct ctrump_loop_info_node **nodes,
02035 int num_node)
02036 {
02037 int i, j;
02038 for (i=0; i<num_node; i++) {
02039 struct ctrump_loop_info_node *node = nodes[i];
02040 struct ctrump_loop_cfg_info *cfg_info = node->cfg_info;
02041 struct ctrump_bb *cond_bb = cfg_info->cond_bb;
02042 int num_pred = cond_bb->num_preds;
02043 struct ctrump_bb **preds = cond_bb->preds;
02044
02045 int bb_before_loop = -1;
02046
02047 for (j=0; j<num_pred; j++) {
02048 struct ctrump_bb *p = preds[j];
02049
02050 if ((p->num_dfs == 1) &&
02051 (p->dfs[0] == cond_bb)) {
02052
02053 } else {
02054
02055 assert(bb_before_loop == -1);
02056
02057 bb_before_loop = j;
02058 }
02059 }
02060
02061
02062 assert(bb_before_loop != -1);
02063 }
02064 }
02065
02069 static void
02070 iv_canonicalize(struct ctrump_loop_info_node **dfs_order,
02071 struct analyze_loop_node_info *info,
02072 int num_node)
02073 {
02074 int i;
02075 int change = 0;
02076 do {
02077 for (i=0; i<num_node; i++) {
02078
02079 }
02080 } while (change);
02081 }
02082
02083
02087 static void
02088 loop_exit_analyze(struct ctrump_loop_info_node **nodes,
02089 struct analyze_loop_node_info *info,
02090 int num_node,
02091 struct ctrump_intmap *var_info_map)
02092 {
02093 int i;
02094 for (i=0; i<num_node; i++) {
02095 struct ctrump_loop_info_node *node = nodes[i];
02096
02097 recog_exit_cond(&node->exit_info,
02098 node->cfg_info->exit_cond,
02099 node->inductive_var, node->invariant_var,
02100 var_info_map, info[i].iv_info);
02101 }
02102 }
02103
02107 struct recog_reduction_info {
02108 enum ctrump_reductive_expr_code code;
02109 struct ctrump_stmt *at;
02110 };
02111
02116 static void
02117 merge_reduc_op(ctrump_bitmap_t is_reduc,
02118 ctrump_bitmap_t is_initial,
02119 struct recog_reduction_info *reduc_list,
02120 int var_idx,
02121 enum ctrump_reductive_expr_code merge_op,
02122 struct ctrump_stmt *at)
02123 {
02124 if (ctrump_bitmap_p(is_reduc, var_idx)) {
02125 if (ctrump_bitmap_p(is_initial, var_idx)) {
02126
02127 ctrump_bitmap_clear(is_initial, var_idx);
02128 reduc_list[var_idx].code = merge_op;
02129 reduc_list[var_idx].at = at;
02130 } else {
02131 if (reduc_list[var_idx].code != merge_op) {
02132 ctrump_bitmap_clear(is_reduc, var_idx);
02133 }
02134 }
02135 }
02136 }
02137
02138 static enum ctrump_reductive_expr_code incr_op_to_reduc_op[] = {
02139 [CTRUMP_PDG_INC] = CTRUMP_REDUCTIVE_ADD,
02140 [CTRUMP_PDG_DEC] = CTRUMP_REDUCTIVE_SUB,
02141 [CTRUMP_PDG_FINC] = CTRUMP_REDUCTIVE_FADD,
02142 [CTRUMP_PDG_FDEC] = CTRUMP_REDUCTIVE_FSUB,
02143 };
02144
02148 static void
02149 recog_reduction_domtree(struct ctrump_bb *bb,
02150 struct ctrump_bb *exit_bb,
02151 struct ctrump_intmap *var_info_map,
02152 int num_var,
02153 ctrump_bitmap_t is_reduc,
02154 ctrump_bitmap_t is_initial,
02155 struct recog_reduction_info *reduc_list)
02156 {
02157 int i, n = bb->num_dom_children;
02158 struct ctrump_var_store *st;
02159 for (i=0; i<n; i++) {
02160 struct ctrump_bb *c = bb->dom_children[i];
02161
02162 if (c != exit_bb) {
02163 recog_reduction_domtree(c, exit_bb, var_info_map, num_var,
02164 is_reduc, is_initial, reduc_list);
02165 }
02166 }
02167
02168 n = bb->load_store.num_store;
02169 st = bb->load_store.stores;
02170 for (i=0; i<n; i++) {
02171 struct ctrump_var_store *s = &st[i];
02172 struct ctrump_pdg_node *store_val = s->new_val;
02173 struct ctrump_var *var = s->var;
02174 int found;
02175 struct ctrump_cfg_var_info *vi = ctrump_intmap_lookup(var_info_map, &found, var->id);
02176 int var_idx = vi->index;
02177
02178 assert(found);
02179
02180 switch (store_val->code) {
02181 case CTRUMP_PDG_VALUE_REDUCTION:
02182 merge_reduc_op(is_reduc, is_initial, reduc_list,
02183 var_idx, store_val->u.reduction.op, store_val->at);
02184 break;
02185
02186 case CTRUMP_PDG_VALUE_INCREMENTAL:
02187 merge_reduc_op(is_reduc, is_initial, reduc_list,
02188 var_idx, incr_op_to_reduc_op[store_val->u.incr.op], store_val->at);
02189 break;
02190
02191 default:
02192
02193 ctrump_bitmap_clear(is_reduc, var_idx);
02194 break;
02195 }
02196 }
02197 }
02198
02202 static void
02203 recog_reduction(struct ctrump_loop_info_node **loops,
02204 int num_loop_node,
02205 struct analyze_loop_node_info *info_list,
02206 struct ctrump_intmap *var_info_map,
02207 struct ctrump_cfg_var_info *var_info_list,
02208 int num_var,
02209 struct ctrump_mempool *loop_alloc,
02210 struct ctrump_mempool *tmp_alloc)
02211 {
02212 int i, j;
02213
02214 ctrump_bitmap_t is_reduc = ctrump_bmp_alloc(num_var, tmp_alloc);
02215 ctrump_bitmap_t is_initial = ctrump_bmp_alloc(num_var, tmp_alloc);
02216 struct recog_reduction_info *reduc_list = ctrump_mempool_alloc(tmp_alloc, num_var*sizeof(*reduc_list));
02217
02218 for (i=0; i<num_loop_node; i++) {
02219 struct ctrump_loop_info_node *n = loops[i];
02220 int num_reduc;
02221
02222 ctrump_bitmap_set_all(is_reduc, num_var);
02223 ctrump_bitmap_set_all(is_initial, num_var);
02224
02225 recog_reduction_domtree(n->cfg_info->cond_bb, n->cfg_info->break_bb,
02226 var_info_map, num_var, is_reduc, is_initial, reduc_list);
02227
02228
02229 ctrump_bitmap_nand(is_reduc, is_reduc, is_initial, num_var);
02230
02231
02232 ctrump_bitmap_and(is_reduc, is_reduc, n->modified_and_live_after_loop, num_var);
02233
02234 num_reduc = ctrump_bitmap_popcnt(is_reduc, num_var);
02235
02236
02237 ctrump_bitmap_nand(n->carry_dep, n->carry_dep, is_reduc, num_var);
02238
02239 n->num_reduc_op = num_reduc;
02240 n->reductions = ctrump_mempool_alloc(loop_alloc, num_reduc*sizeof(*n->reductions));
02241
02242 for (j=0; j<num_reduc; j++) {
02243 int pos = ctrump_bitmap_ffs(is_reduc, num_var);
02244 assert(pos >= 0);
02245
02246 ctrump_bitmap_clear(is_reduc, pos);
02247
02248 n->reductions[j].op = reduc_list[pos].code;
02249 n->reductions[j].var = var_info_list[pos].var;
02250 n->reductions[j].at = reduc_list[pos].at;
02251 }
02252 }
02253 }
02254
02255 int
02256 ctrump_analyze_loop(struct ctrump_loop_info *dest,
02257 int *level_depth,
02258 struct ctrump_loop_info_node *loop_tree_node,
02259 struct ctrump_cfg *cfg,
02260 struct ctrump_intmap *var_info_map,
02261 const struct ctrump_abi *abi,
02262 struct ctrump_mempool *loop_alloc,
02263 int id)
02264 {
02265 struct ctrump_mempool tmp_alloc;
02266 struct analyze_loop_node_info *analyze_info_dfs;
02267 int num_loop_node;
02268 struct ctrump_loop_info_node **dfs_order;
02269
02270 ctrump_mempool_init(&tmp_alloc, 1024);
02271
02272 num_loop_node = count_loop_node(loop_tree_node, 0);
02273 dfs_order = ctrump_mempool_alloc(&tmp_alloc, num_loop_node*sizeof(*dfs_order));
02274 build_dfs_order(dfs_order, loop_tree_node, 0);
02275 analyze_info_dfs = ctrump_mempool_alloc(&tmp_alloc, num_loop_node*sizeof(*analyze_info_dfs));
02276
02277 init_analyze_info(analyze_info_dfs, dfs_order, num_loop_node, cfg, loop_alloc, &tmp_alloc);
02278
02279 *level_depth = assign_nest_level(loop_tree_node, 0, 0) + 1;
02280 find_loop_entry(dfs_order, num_loop_node);
02281
02282 id = analyze_loop_iv(dest, dfs_order, num_loop_node, analyze_info_dfs, cfg, var_info_map, loop_alloc, &tmp_alloc, id);
02283 iv_canonicalize(dfs_order, analyze_info_dfs, num_loop_node);
02284 loop_exit_analyze(dfs_order, analyze_info_dfs, num_loop_node, var_info_map);
02285
02286 analyze_memory_access(dest, loop_tree_node, analyze_info_dfs, cfg, var_info_map, abi, loop_alloc, &tmp_alloc, &id);
02287
02288 recog_reduction(dfs_order, num_loop_node, analyze_info_dfs, var_info_map, cfg->var_info, cfg->num_var, loop_alloc, &tmp_alloc);
02289
02290 ctrump_mempool_destroy(&tmp_alloc);
02291
02292 ctrump_get_loop_depinfo(dest, &id, loop_alloc);
02293
02294 return id;
02295 }