kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 572b0bbeeaeb5e30e79c8fcb5cdacd298a1de11a
parent 5b2748e99c18922f08cc6fcb285526a119f441f9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 21 May 2026 12:53:55 -0700

Implement scalar GVN

Diffstat:
Mdoc/OPT.md | 4++--
Msrc/opt/pass_combine.c | 39++++++---------------------------------
Msrc/opt/pass_hard_live.c | 6++++++
Msrc/opt/pass_o2.c | 716++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mtest/opt/opt_test.c | 22+++++++++++-----------
5 files changed, 740 insertions(+), 47 deletions(-)

diff --git a/doc/OPT.md b/doc/OPT.md @@ -662,7 +662,7 @@ Implement in order: 2. [x] `opt_copy_cleanup` 3. [x] `opt_block_cloning` 4. [x] `opt_addr_xform` -5. [ ] scalar-only `opt_gvn` +5. [x] scalar-only `opt_gvn` 6. [ ] `opt_copy_prop` 7. [ ] memory-aware `opt_gvn` / redundant load elimination design and tests 8. [ ] `opt_dse` @@ -702,7 +702,7 @@ Completed pre-GVN slice exit criteria: Remaining Phase D exit criteria: -- [ ] Scalar GVN pass-local dump tests prove redundant pure expressions are +- [x] Scalar GVN pass-local dump tests prove redundant pure expressions are replaced and constant branches fold. - [ ] Memory numbering and alias invalidation are documented before any memory GVN or redundant-load rewrite lands. diff --git a/src/opt/pass_combine.c b/src/opt/pass_combine.c @@ -242,6 +242,9 @@ static int inst_defines_phys_reg(const Inst* in, const Operand* r) { if (!aux) return 0; for (u32 i = 0; i < aux->nout; ++i) if (same_phys_reg(&aux->out_ops[i], r)) return 1; + if (r->cls < OPT_REG_CLASSES && r->v.reg < 32 && + (aux->clobber_mask[r->cls] & (1u << r->v.reg))) + return 1; return 0; } case IR_INTRINSIC: { @@ -315,15 +318,6 @@ static int binop_is_commutative(BinOp op) { } } -static int no_intervening_phys_access(Block* bl, u32 first, u32 last, - const Operand* r) { - for (u32 i = first; i < last; ++i) { - Inst* in = &bl->insts[i]; - if (inst_uses_phys_reg(in, r) || inst_defines_phys_reg(in, r)) return 0; - } - return 1; -} - static int retarget_producer_legal(Inst* producer, const Operand* copy_dst, int* swap_binop) { *swap_binop = 0; @@ -458,30 +452,9 @@ static void opt_combine_fold_block(Func* f, Block* bl, } } - if (f->opt_rewritten && - ((IROp)in->op == IR_BINOP || (IROp)in->op == IR_UNOP) && - in->nopnds >= 1 && in->opnds[0].kind == OPK_REG && - find_single_direct_use(f, bl, hard_live, i, &in->opnds[0], NULL, 0, 0, - 0, &use_i, &op_i)) { - Inst* copy = &bl->insts[use_i]; - int swap_binop = 0; - if ((IROp)copy->op == IR_COPY && op_i == 1 && copy->nopnds >= 2 && - copy->opnds[0].kind == OPK_REG && - same_phys_reg(&copy->opnds[1], &in->opnds[0]) && - !same_phys_reg(&copy->opnds[0], &in->opnds[0]) && - no_intervening_phys_access(bl, i + 1u, use_i, &copy->opnds[0]) && - retarget_producer_legal(in, &copy->opnds[0], &swap_binop)) { - if (swap_binop) { - Operand tmp = in->opnds[1]; - in->opnds[1] = in->opnds[2]; - in->opnds[2] = tmp; - } - in->opnds[0] = copy->opnds[0]; - copy->opnds[1] = copy->opnds[0]; - continue; - } - } - + /* Producer-to-copy retargeting is unsafe after rewrite: hard registers are + * mutable storage, and the original producer register may remain live into + * successor blocks even when the local copy looks like the only use. */ if ((IROp)in->op == IR_COPY && in->nopnds >= 2 && in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG && !same_phys_reg(&in->opnds[0], &in->opnds[1]) && diff --git a/src/opt/pass_hard_live.c b/src/opt/pass_hard_live.c @@ -64,6 +64,11 @@ static void hard_def_operand(OptHardRegSet* s, const Operand* op) { if (op && op->kind == OPK_REG) hard_add(s, op->cls, op->v.reg); } +static void hard_def_clobber_mask(OptHardRegSet* s, const u32* masks) { + if (!masks) return; + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) s->cls[c] |= masks[c]; +} + static void hard_use_abivalue(OptHardRegSet* use, const CGABIValue* v) { if (!v) return; hard_use_operand(use, &v->storage); @@ -187,6 +192,7 @@ void opt_hard_inst_use_def(Func* f, const Inst* in, OptHardRegSet* use, for (u32 i = 0; i < aux->nin; ++i) hard_use_operand(use, &aux->in_ops[i]); for (u32 i = 0; i < aux->nout; ++i) hard_def_operand(def, &aux->out_ops[i]); + hard_def_clobber_mask(def, aux->clobber_mask); break; } case IR_INTRINSIC: { diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c @@ -1,17 +1,73 @@ #include <string.h> +#include <cfree/cg.h> + #include "opt/opt_internal.h" #define OPT_BLOCK_NONE 0xffffffffu #define OPT_CLONE_MAX_BLOCK_INSTS 4u #define OPT_CLONE_MAX_PREDS 4u #define OPT_CLONE_ABS_GROWTH_LIMIT 32u +#define GVN_ENTRY_NONE 0xffffffffu typedef struct CloneValMap { Val old_val; Val new_val; } CloneValMap; +typedef struct GvnConst { + i64 value; + u8 valid; +} GvnConst; + +typedef struct GvnOperandKey { + u8 kind; /* OPK_REG or OPK_IMM */ + u8 cls; + u16 pad; + CfreeCgTypeId type; + union { + Val reg; + i64 imm; + } v; +} GvnOperandKey; + +typedef struct GvnKey { + u16 op; + u16 nops; + CfreeCgTypeId type; + u8 cls; + u8 pad[3]; + i64 tag; + GvnOperandKey ops[2]; +} GvnKey; + +typedef struct GvnEntry { + GvnKey key; + Val val; + u32 block; + u32 inst; + u32 next; +} GvnEntry; + +typedef struct GvnTable { + Func* f; + u32* buckets; + u32 nbuckets; + GvnEntry* entries; + u32 nentries; + u32 cap; +} GvnTable; + +typedef struct GvnCtx { + Func* f; + OptAnalysis* analysis; + Val* parent; + GvnConst* constants; + GvnTable table; + int changed; + int cfg_changed; +} GvnCtx; + static u32 opt_inst_count(Func* f) { u32 n = 0; for (u32 b = 0; b < f->nblocks; ++b) n += f->blocks[b].ninsts; @@ -372,8 +428,666 @@ void opt_addr_xform(Func* f) { opt_rebuild_def_use(f); } +static u64 gvn_width_mask(u32 width) { + if (width >= 64u) return ~0ull; + return (1ull << width) - 1ull; +} + +static u64 gvn_mask_width(u64 v, u32 width) { + return v & gvn_width_mask(width); +} + +static i64 gvn_sign_extend_width(u64 v, u32 width) { + v = gvn_mask_width(v, width); + if (!width || width >= 64u) return (i64)v; + u64 sign = 1ull << (width - 1u); + return (i64)((v ^ sign) - sign); +} + +static int gvn_int_like_width(Func* f, CfreeCgTypeId ty, u32* width_out) { + u32 width = cfree_cg_type_int_width((CfreeCompiler*)f->c, ty); + if (!width || width > 64u) return 0; + *width_out = width; + return 1; +} + +static i64 gvn_fold_result(u64 v, u32 width) { + return (i64)gvn_mask_width(v, width); +} + +static int gvn_fold_binop(Func* f, BinOp op, CfreeCgTypeId ty, i64 a, i64 b, + i64* out) { + u32 width; + u64 ua, ub, r; + if (!gvn_int_like_width(f, ty, &width)) return 0; + ua = gvn_mask_width((u64)a, width); + ub = gvn_mask_width((u64)b, width); + r = 0; + switch (op) { + case BO_IADD: + r = ua + ub; + break; + case BO_ISUB: + r = ua - ub; + break; + case BO_IMUL: + r = ua * ub; + break; + case BO_AND: + r = ua & ub; + break; + case BO_OR: + r = ua | ub; + break; + case BO_XOR: + r = ua ^ ub; + break; + case BO_SHL: + r = ua << (u32)(ub & (u64)(width - 1u)); + break; + case BO_SHR_U: + r = ua >> (u32)(ub & (u64)(width - 1u)); + break; + case BO_SHR_S: { + u32 sh = (u32)(ub & (u64)(width - 1u)); + if (!sh) { + r = ua; + } else { + u64 sign = 1ull << (width - 1u); + r = ua >> sh; + if (ua & sign) r |= gvn_width_mask(width) << (width - sh); + } + break; + } + default: + return 0; + } + *out = gvn_fold_result(r, width); + return 1; +} + +static int gvn_fold_unop(Func* f, UnOp op, CfreeCgTypeId ty, i64 a, + i64* out) { + u32 width; + u64 ua, r; + if (!gvn_int_like_width(f, ty, &width)) return 0; + ua = gvn_mask_width((u64)a, width); + switch (op) { + case UO_NEG: + r = 0u - ua; + break; + case UO_NOT: + r = ua == 0; + break; + case UO_BNOT: + r = ~ua; + break; + default: + return 0; + } + *out = gvn_fold_result(r, width); + return 1; +} + +static int gvn_fold_cmp(Func* f, CmpOp op, CfreeCgTypeId ty, i64 a, i64 b, + i64* out) { + u32 width; + u64 ua, ub; + i64 sa, sb; + int r; + if (!gvn_int_like_width(f, ty, &width)) return 0; + ua = gvn_mask_width((u64)a, width); + ub = gvn_mask_width((u64)b, width); + sa = gvn_sign_extend_width(ua, width); + sb = gvn_sign_extend_width(ub, width); + switch (op) { + case CMP_EQ: + r = ua == ub; + break; + case CMP_NE: + r = ua != ub; + break; + case CMP_LT_S: + r = sa < sb; + break; + case CMP_LE_S: + r = sa <= sb; + break; + case CMP_GT_S: + r = sa > sb; + break; + case CMP_GE_S: + r = sa >= sb; + break; + case CMP_LT_U: + r = ua < ub; + break; + case CMP_LE_U: + r = ua <= ub; + break; + case CMP_GT_U: + r = ua > ub; + break; + case CMP_GE_U: + r = ua >= ub; + break; + default: + return 0; + } + *out = r ? 1 : 0; + return 1; +} + +static int gvn_fold_convert(Func* f, ConvKind k, CfreeCgTypeId dst_ty, + CfreeCgTypeId src_ty, i64 src, i64* out) { + u32 sw, dw; + u64 v; + if (!gvn_int_like_width(f, src_ty, &sw) || + !gvn_int_like_width(f, dst_ty, &dw)) + return 0; + switch (k) { + case CV_TRUNC: + case CV_ZEXT: + v = gvn_mask_width((u64)src, sw); + break; + case CV_SEXT: + v = (u64)gvn_sign_extend_width((u64)src, sw); + break; + case CV_BITCAST: + if (sw != dw) return 0; + v = gvn_mask_width((u64)src, sw); + break; + default: + return 0; + } + *out = gvn_fold_result(v, dw); + return 1; +} + +static Val gvn_find(GvnCtx* ctx, Val v) { + if (v == VAL_NONE || v >= ctx->f->nvals) return VAL_NONE; + Val p = ctx->parent[v]; + if (p == VAL_NONE || p == v) return v; + ctx->parent[v] = gvn_find(ctx, p); + return ctx->parent[v]; +} + +static int gvn_same_shape(Func* f, Val a, Val b) { + if (a == VAL_NONE || b == VAL_NONE || a >= f->nvals || b >= f->nvals) + return 0; + return f->val_type[a] == f->val_type[b] && f->val_cls[a] == f->val_cls[b]; +} + +static int gvn_const_for_operand(GvnCtx* ctx, const Operand* op, i64* out) { + if (!op) return 0; + if (op->kind == OPK_IMM) { + *out = op->v.imm; + return 1; + } + if (op->kind != OPK_REG) return 0; + Val v = gvn_find(ctx, (Val)op->v.reg); + if (v == VAL_NONE || v >= ctx->f->nvals || !ctx->constants[v].valid) + return 0; + *out = ctx->constants[v].value; + return 1; +} + +static void gvn_make_load_imm(Func* f, Inst* in, i64 value) { + Val dst = in->def; + CfreeCgTypeId ty = dst != VAL_NONE && dst < f->nvals ? f->val_type[dst] + : in->type; + u8 cls = dst != VAL_NONE && dst < f->nvals ? f->val_cls[dst] : RC_INT; + Operand* opnds = in->opnds; + if (!opnds) opnds = arena_array(f->arena, Operand, 1); + memset(&opnds[0], 0, sizeof opnds[0]); + opnds[0].kind = OPK_REG; + opnds[0].type = ty; + opnds[0].cls = cls; + opnds[0].v.reg = (Reg)dst; + in->op = IR_LOAD_IMM; + in->type = ty; + in->opnds = opnds; + in->nopnds = 1; + in->extra.imm = value; +} + +static int gvn_try_fold_inst(GvnCtx* ctx, Inst* in) { + i64 a, b, out; + switch ((IROp)in->op) { + case IR_BINOP: + if (in->nopnds < 3) return 0; + if (!gvn_const_for_operand(ctx, &in->opnds[1], &a) || + !gvn_const_for_operand(ctx, &in->opnds[2], &b)) + return 0; + if (!gvn_fold_binop(ctx->f, (BinOp)in->extra.imm, in->type, a, b, &out)) + return 0; + gvn_make_load_imm(ctx->f, in, out); + return 1; + case IR_UNOP: + if (in->nopnds < 2) return 0; + if (!gvn_const_for_operand(ctx, &in->opnds[1], &a)) return 0; + if (!gvn_fold_unop(ctx->f, (UnOp)in->extra.imm, in->type, a, &out)) + return 0; + gvn_make_load_imm(ctx->f, in, out); + return 1; + case IR_CMP: + if (in->nopnds < 3) return 0; + if (!gvn_const_for_operand(ctx, &in->opnds[1], &a) || + !gvn_const_for_operand(ctx, &in->opnds[2], &b)) + return 0; + if (!gvn_fold_cmp(ctx->f, (CmpOp)in->extra.imm, in->opnds[1].type, a, b, + &out)) + return 0; + gvn_make_load_imm(ctx->f, in, out); + return 1; + case IR_CONVERT: + if (in->nopnds < 2) return 0; + if (!gvn_const_for_operand(ctx, &in->opnds[1], &a)) return 0; + if (!gvn_fold_convert(ctx->f, (ConvKind)in->extra.imm, in->opnds[0].type, + in->opnds[1].type, a, &out)) + return 0; + gvn_make_load_imm(ctx->f, in, out); + return 1; + default: + return 0; + } +} + +static u64 gvn_mix_u64(u64 h, u64 v) { + h ^= v + 0x9e3779b97f4a7c15ull + (h << 6) + (h >> 2); + return h; +} + +static u64 gvn_key_hash(const GvnKey* k) { + u64 h = 1469598103934665603ull; + h = gvn_mix_u64(h, k->op); + h = gvn_mix_u64(h, k->nops); + h = gvn_mix_u64(h, k->type); + h = gvn_mix_u64(h, k->cls); + h = gvn_mix_u64(h, (u64)k->tag); + for (u32 i = 0; i < k->nops; ++i) { + h = gvn_mix_u64(h, k->ops[i].kind); + h = gvn_mix_u64(h, k->ops[i].cls); + h = gvn_mix_u64(h, k->ops[i].type); + h = gvn_mix_u64(h, k->ops[i].kind == OPK_REG ? k->ops[i].v.reg + : (u64)k->ops[i].v.imm); + } + return h; +} + +static int gvn_operand_key_equal(const GvnOperandKey* a, + const GvnOperandKey* b) { + if (a->kind != b->kind || a->cls != b->cls || a->type != b->type) return 0; + if (a->kind == OPK_REG) return a->v.reg == b->v.reg; + return a->v.imm == b->v.imm; +} + +static int gvn_key_equal(const GvnKey* a, const GvnKey* b) { + if (a->op != b->op || a->nops != b->nops || a->type != b->type || + a->cls != b->cls || a->tag != b->tag) + return 0; + for (u32 i = 0; i < a->nops; ++i) + if (!gvn_operand_key_equal(&a->ops[i], &b->ops[i])) return 0; + return 1; +} + +static void gvn_table_init(GvnTable* t, Func* f) { + u32 ninsts = opt_inst_count(f); + u32 nbuckets = 16u; + while (nbuckets < ninsts * 2u + 1u) nbuckets *= 2u; + t->f = f; + t->nbuckets = nbuckets; + t->buckets = arena_array(f->arena, u32, nbuckets); + for (u32 i = 0; i < nbuckets; ++i) t->buckets[i] = GVN_ENTRY_NONE; + t->entries = NULL; + t->nentries = 0; + t->cap = 0; +} + +static void gvn_table_add(GvnTable* t, const GvnKey* key, Val v, u32 b, + u32 i) { + if (t->nentries == t->cap) { + u32 ncap = t->cap ? t->cap * 2u : 32u; + GvnEntry* entries = arena_array(t->f->arena, GvnEntry, ncap); + if (t->entries) + memcpy(entries, t->entries, sizeof(entries[0]) * t->nentries); + t->entries = entries; + t->cap = ncap; + } + u32 bucket = (u32)(gvn_key_hash(key) & (u64)(t->nbuckets - 1u)); + GvnEntry* e = &t->entries[t->nentries]; + e->key = *key; + e->val = v; + e->block = b; + e->inst = i; + e->next = t->buckets[bucket]; + t->buckets[bucket] = t->nentries++; +} + +static int gvn_leader_dominates_site(const OptAnalysis* a, u32 leader_b, + u32 leader_i, u32 use_b, u32 use_i) { + if (leader_b == use_b) return leader_i < use_i; + return opt_analysis_dominates(a, leader_b, use_b); +} + +static int gvn_leader_dominates_use(GvnCtx* ctx, u32 leader_b, u32 leader_i, + const OptUse* use) { + if (use->kind == OPT_USE_PHI_INPUT) { + Inst* in = &ctx->f->blocks[use->block].insts[use->inst]; + IRPhiAux* aux = (IRPhiAux*)in->extra.aux; + if (!aux || use->phi_pred_index >= aux->npreds) return 0; + u32 pred = aux->pred_blocks[use->phi_pred_index]; + if (leader_b == pred) return 1; + return opt_analysis_dominates(ctx->analysis, leader_b, pred); + } + return gvn_leader_dominates_site(ctx->analysis, leader_b, leader_i, + use->block, use->inst); +} + +static int gvn_table_find(GvnCtx* ctx, const GvnKey* key, u32 b, u32 i, + Val* out) { + GvnTable* t = &ctx->table; + u32 bucket = (u32)(gvn_key_hash(key) & (u64)(t->nbuckets - 1u)); + for (u32 e = t->buckets[bucket]; e != GVN_ENTRY_NONE; + e = t->entries[e].next) { + GvnEntry* ent = &t->entries[e]; + if (!gvn_key_equal(&ent->key, key)) continue; + if (!gvn_leader_dominates_site(ctx->analysis, ent->block, ent->inst, b, i)) + continue; + *out = ent->val; + return 1; + } + return 0; +} + +static int gvn_make_operand_key(GvnCtx* ctx, const Operand* op, + GvnOperandKey* out) { + memset(out, 0, sizeof *out); + if (!op || (op->kind != OPK_REG && op->kind != OPK_IMM)) return 0; + out->kind = op->kind; + out->cls = op->cls; + out->type = op->type; + if (op->kind == OPK_REG) { + Val v = gvn_find(ctx, (Val)op->v.reg); + if (v == VAL_NONE || v >= ctx->f->nvals) return 0; + out->v.reg = v; + } else { + out->v.imm = op->v.imm; + } + return 1; +} + +static int gvn_operand_key_less(const GvnOperandKey* a, + const GvnOperandKey* b) { + if (a->kind != b->kind) return a->kind < b->kind; + if (a->type != b->type) return a->type < b->type; + if (a->cls != b->cls) return a->cls < b->cls; + if (a->kind == OPK_REG) return a->v.reg < b->v.reg; + return a->v.imm < b->v.imm; +} + +static int gvn_commutative_binop(BinOp op) { + switch (op) { + case BO_IADD: + case BO_IMUL: + case BO_AND: + case BO_OR: + case BO_XOR: + return 1; + default: + return 0; + } +} + +static int gvn_commutative_cmp(CmpOp op) { + return op == CMP_EQ || op == CMP_NE; +} + +static int gvn_make_key(GvnCtx* ctx, const Inst* in, GvnKey* key) { + memset(key, 0, sizeof *key); + if (in->def == VAL_NONE || in->def >= ctx->f->nvals) return 0; + key->op = in->op; + key->type = ctx->f->val_type[in->def]; + key->cls = ctx->f->val_cls[in->def]; + key->tag = in->extra.imm; + + switch ((IROp)in->op) { + case IR_LOAD_IMM: + case IR_CONST_I: + key->nops = 0; + key->tag = in->extra.imm; + return 1; + case IR_COPY: + if (in->nopnds < 2) return 0; + key->nops = 1; + return gvn_make_operand_key(ctx, &in->opnds[1], &key->ops[0]); + case IR_UNOP: + case IR_CONVERT: + if (in->nopnds < 2) return 0; + key->nops = 1; + return gvn_make_operand_key(ctx, &in->opnds[1], &key->ops[0]); + case IR_BINOP: + case IR_CMP: + if (in->nopnds < 3) return 0; + key->nops = 2; + if (!gvn_make_operand_key(ctx, &in->opnds[1], &key->ops[0]) || + !gvn_make_operand_key(ctx, &in->opnds[2], &key->ops[1])) + return 0; + if (((IROp)in->op == IR_BINOP && + gvn_commutative_binop((BinOp)in->extra.imm)) || + ((IROp)in->op == IR_CMP && + gvn_commutative_cmp((CmpOp)in->extra.imm))) { + if (gvn_operand_key_less(&key->ops[1], &key->ops[0])) { + GvnOperandKey tmp = key->ops[0]; + key->ops[0] = key->ops[1]; + key->ops[1] = tmp; + } + } + return 1; + default: + return 0; + } +} + +static void gvn_replace_one_use(Func* f, const OptUse* use, Val repl) { + Inst* in = &f->blocks[use->block].insts[use->inst]; + switch ((OptUseKind)use->kind) { + case OPT_USE_OPERAND: + use->operand->v.reg = (Reg)repl; + use->operand->type = f->val_type[repl]; + use->operand->cls = f->val_cls[repl]; + break; + case OPT_USE_INDIRECT_BASE: + use->operand->v.ind.base = (Reg)repl; + break; + case OPT_USE_PHI_INPUT: { + IRPhiAux* aux = (IRPhiAux*)in->extra.aux; + if (aux && use->phi_pred_index < aux->npreds) + aux->pred_vals[use->phi_pred_index] = repl; + break; + } + default: + break; + } +} + +static int gvn_replace_dominated_uses(GvnCtx* ctx, Val old, Val repl, u32 rb, + u32 ri) { + Func* f = ctx->f; + int changed = 0; + if (old == VAL_NONE || old >= f->nvals || repl == VAL_NONE || + repl >= f->nvals || old == repl || !gvn_same_shape(f, old, repl)) + return 0; + for (u32 u = f->opt_first_use_by_val[old]; u != OPT_USE_NONE; + u = f->opt_uses[u].next_for_val) { + OptUse* use = &f->opt_uses[u]; + if (!gvn_leader_dominates_use(ctx, rb, ri, use)) continue; + gvn_replace_one_use(f, use, repl); + changed = 1; + } + if (changed) { + ctx->parent[old] = repl; + ctx->constants[old] = ctx->constants[gvn_find(ctx, repl)]; + } + return changed; +} + +static void gvn_note_const(GvnCtx* ctx, Inst* in) { + if (in->def == VAL_NONE || in->def >= ctx->f->nvals) return; + if ((IROp)in->op == IR_LOAD_IMM || (IROp)in->op == IR_CONST_I) { + ctx->constants[in->def].valid = 1; + ctx->constants[in->def].value = in->extra.imm; + } +} + +static int gvn_scalar_candidate(const Inst* in) { + if (!in || in->def == VAL_NONE || in->ndefs) return 0; + switch ((IROp)in->op) { + case IR_CONST_I: + case IR_LOAD_IMM: + case IR_COPY: + case IR_BINOP: + case IR_UNOP: + case IR_CMP: + case IR_CONVERT: + return 1; + default: + return 0; + } +} + +static int gvn_fold_branch(GvnCtx* ctx, u32 b, Inst* in) { + Block* bl = &ctx->f->blocks[b]; + i64 a, c, taken; + u32 target; + switch ((IROp)in->op) { + case IR_CONDBR: + if (in->nopnds < 1 || bl->nsucc < 2) return 0; + if (in->opnds[0].kind != OPK_REG) return 0; + if (!gvn_const_for_operand(ctx, &in->opnds[0], &a)) return 0; + target = bl->succ[a != 0 ? 0u : 1u]; + break; + case IR_CMP_BRANCH: + if (in->nopnds < 2 || bl->nsucc < 2) return 0; + if (in->opnds[0].kind != OPK_REG && in->opnds[1].kind != OPK_REG) + return 0; + if (!gvn_const_for_operand(ctx, &in->opnds[0], &a) || + !gvn_const_for_operand(ctx, &in->opnds[1], &c)) + return 0; + if (!gvn_fold_cmp(ctx->f, (CmpOp)in->extra.imm, in->opnds[0].type, a, c, + &taken)) + return 0; + target = bl->succ[taken ? 0u : 1u]; + break; + default: + return 0; + } + + in->op = IR_BR; + in->def = VAL_NONE; + in->ndefs = 0; + in->defs = NULL; + in->nopnds = 0; + in->opnds = NULL; + bl->succ[0] = target; + bl->nsucc = 1; + return 1; +} + +static void gvn_visit_inst(GvnCtx* ctx, u32 b, u32 i) { + Inst* in = &ctx->f->blocks[b].insts[i]; + if (gvn_fold_branch(ctx, b, in)) { + ctx->changed = 1; + ctx->cfg_changed = 1; + return; + } + if (!gvn_scalar_candidate(in)) return; + + if (gvn_try_fold_inst(ctx, in)) ctx->changed = 1; + gvn_note_const(ctx, in); + + GvnKey key; + if (!gvn_make_key(ctx, in, &key)) return; + + Val leader = VAL_NONE; + if (gvn_table_find(ctx, &key, b, i, &leader)) { + if (gvn_replace_dominated_uses(ctx, in->def, leader, + ctx->f->val_def_block[leader], + ctx->f->val_def_inst[leader])) + ctx->changed = 1; + return; + } + gvn_table_add(&ctx->table, &key, in->def, b, i); +} + +static void gvn_realign_phi_preds(Func* f) { + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* phi = &bl->insts[i]; + if ((IROp)phi->op != IR_PHI) break; + IRPhiAux* aux = (IRPhiAux*)phi->extra.aux; + if (!aux) continue; + u32 old_n = aux->npreds; + u32* old_blocks = aux->pred_blocks; + Val* old_vals = aux->pred_vals; + u32* pred_blocks = + bl->npreds ? arena_array(f->arena, u32, bl->npreds) : NULL; + Val* pred_vals = + bl->npreds ? arena_zarray(f->arena, Val, bl->npreds) : NULL; + for (u32 p = 0; p < bl->npreds; ++p) { + pred_blocks[p] = bl->preds[p]; + pred_vals[p] = phi->def; + for (u32 old = 0; old < old_n; ++old) { + if (old_blocks && old_blocks[old] == bl->preds[p]) { + pred_vals[p] = old_vals ? old_vals[old] : VAL_NONE; + break; + } + } + } + aux->npreds = bl->npreds; + aux->pred_blocks = pred_blocks; + aux->pred_vals = pred_vals; + } + } +} + void opt_gvn(Func* f) { - if (f) opt_rebuild_def_use(f); + if (!f || f->opt_rewritten) return; + if (!f->opt_reg_ssa && f->npregs > 1) { + opt_rebuild_def_use(f); + return; + } + + opt_rebuild_def_use(f); + OptAnalysis a; + memset(&a, 0, sizeof a); + opt_analysis_build_dominators(f, &a); + + GvnCtx ctx; + memset(&ctx, 0, sizeof ctx); + ctx.f = f; + ctx.analysis = &a; + ctx.parent = arena_array(f->arena, Val, f->nvals ? f->nvals : 1u); + ctx.constants = arena_zarray(f->arena, GvnConst, f->nvals ? f->nvals : 1u); + for (Val v = 0; v < f->nvals; ++v) ctx.parent[v] = v; + gvn_table_init(&ctx.table, f); + + for (u32 ri = 0; ri < a.nrpo; ++ri) { + u32 b = a.rpo[ri]; + if (b >= f->nblocks || !a.reachable[b]) continue; + for (u32 i = 0; i < f->blocks[b].ninsts; ++i) gvn_visit_inst(&ctx, b, i); + } + + if (ctx.cfg_changed) { + opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE | + OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP); + opt_build_cfg(f); + gvn_realign_phi_preds(f); + } else if (ctx.changed) { + opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE); + } + opt_rebuild_def_use(f); } void opt_dse(Func* f) { diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -3565,7 +3565,7 @@ static void opt_combine_single_use_copy_and_imm(void) { tc_fini(&tc); } -static void opt_combine_retargets_single_use_producer_copy(void) { +static void opt_combine_preserves_producer_copy_after_rewrite(void) { TestCtx tc; tc_init(&tc); @@ -3576,11 +3576,11 @@ static void opt_combine_retargets_single_use_producer_copy(void) { emit_ret_val(f, f->entry, 22, tc.i32); opt_combine(f); - EXPECT(count_op(f, IR_BINOP) == 1 && count_op(f, IR_COPY) == 0, - "single-use binop-copy should retarget producer and remove copy"); + EXPECT(count_op(f, IR_BINOP) == 1 && count_op(f, IR_COPY) == 1, + "combine should preserve producer-copy pairs after rewrite"); Inst* add = &f->blocks[f->entry].insts[0]; - EXPECT(add->opnds[0].v.reg == 22, - "retargeted binop should define the copy destination"); + EXPECT(add->opnds[0].v.reg == 21, + "rewritten producer should keep its original destination"); Func* lhs = new_func(&tc); lhs->opt_rewritten = 1; @@ -3590,9 +3590,9 @@ static void opt_combine_retargets_single_use_producer_copy(void) { opt_combine(lhs); add = &lhs->blocks[lhs->entry].insts[0]; - EXPECT(count_op(lhs, IR_COPY) == 0 && add->opnds[0].v.reg == 20 && + EXPECT(count_op(lhs, IR_COPY) == 1 && add->opnds[0].v.reg == 21 && add->opnds[1].v.reg == 20, - "producer may retarget to its lhs operand"); + "producer-copy preservation should keep lhs overlap unchanged"); Func* rhs = new_func(&tc); rhs->opt_rewritten = 1; @@ -3602,9 +3602,9 @@ static void opt_combine_retargets_single_use_producer_copy(void) { opt_combine(rhs); add = &rhs->blocks[rhs->entry].insts[0]; - EXPECT(count_op(rhs, IR_COPY) == 0 && add->opnds[0].v.reg == 20 && - add->opnds[1].v.reg == 20 && add->opnds[2].v.reg == 19, - "commutative rhs overlap should swap operands before retargeting"); + EXPECT(count_op(rhs, IR_COPY) == 1 && add->opnds[0].v.reg == 21 && + add->opnds[1].v.reg == 19 && add->opnds[2].v.reg == 20, + "producer-copy preservation should keep rhs overlap unchanged"); Func* retreg = new_func(&tc); retreg->opt_rewritten = 1; @@ -4724,7 +4724,7 @@ int main(void) { opt_regalloc_spill_requires_scratch(); opt_combine_spill_peeps(); opt_combine_single_use_copy_and_imm(); - opt_combine_retargets_single_use_producer_copy(); + opt_combine_preserves_producer_copy_after_rewrite(); opt_combine_keeps_unsafe_and_multiuse_defs(); opt_combine_copy_chains_and_convert_pairs(); opt_dce_physical_dead_defs();