commit 572b0bbeeaeb5e30e79c8fcb5cdacd298a1de11a
parent 5b2748e99c18922f08cc6fcb285526a119f441f9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 12:53:55 -0700
Implement scalar GVN
Diffstat:
5 files changed, 740 insertions(+), 47 deletions(-)
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -662,7 +662,7 @@ Implement in order:
2. [x] `opt_copy_cleanup`
3. [x] `opt_block_cloning`
4. [x] `opt_addr_xform`
-5. [ ] scalar-only `opt_gvn`
+5. [x] scalar-only `opt_gvn`
6. [ ] `opt_copy_prop`
7. [ ] memory-aware `opt_gvn` / redundant load elimination design and tests
8. [ ] `opt_dse`
@@ -702,7 +702,7 @@ Completed pre-GVN slice exit criteria:
Remaining Phase D exit criteria:
-- [ ] Scalar GVN pass-local dump tests prove redundant pure expressions are
+- [x] Scalar GVN pass-local dump tests prove redundant pure expressions are
replaced and constant branches fold.
- [ ] Memory numbering and alias invalidation are documented before any memory
GVN or redundant-load rewrite lands.
diff --git a/src/opt/pass_combine.c b/src/opt/pass_combine.c
@@ -242,6 +242,9 @@ static int inst_defines_phys_reg(const Inst* in, const Operand* r) {
if (!aux) return 0;
for (u32 i = 0; i < aux->nout; ++i)
if (same_phys_reg(&aux->out_ops[i], r)) return 1;
+ if (r->cls < OPT_REG_CLASSES && r->v.reg < 32 &&
+ (aux->clobber_mask[r->cls] & (1u << r->v.reg)))
+ return 1;
return 0;
}
case IR_INTRINSIC: {
@@ -315,15 +318,6 @@ static int binop_is_commutative(BinOp op) {
}
}
-static int no_intervening_phys_access(Block* bl, u32 first, u32 last,
- const Operand* r) {
- for (u32 i = first; i < last; ++i) {
- Inst* in = &bl->insts[i];
- if (inst_uses_phys_reg(in, r) || inst_defines_phys_reg(in, r)) return 0;
- }
- return 1;
-}
-
static int retarget_producer_legal(Inst* producer, const Operand* copy_dst,
int* swap_binop) {
*swap_binop = 0;
@@ -458,30 +452,9 @@ static void opt_combine_fold_block(Func* f, Block* bl,
}
}
- if (f->opt_rewritten &&
- ((IROp)in->op == IR_BINOP || (IROp)in->op == IR_UNOP) &&
- in->nopnds >= 1 && in->opnds[0].kind == OPK_REG &&
- find_single_direct_use(f, bl, hard_live, i, &in->opnds[0], NULL, 0, 0,
- 0, &use_i, &op_i)) {
- Inst* copy = &bl->insts[use_i];
- int swap_binop = 0;
- if ((IROp)copy->op == IR_COPY && op_i == 1 && copy->nopnds >= 2 &&
- copy->opnds[0].kind == OPK_REG &&
- same_phys_reg(©->opnds[1], &in->opnds[0]) &&
- !same_phys_reg(©->opnds[0], &in->opnds[0]) &&
- no_intervening_phys_access(bl, i + 1u, use_i, ©->opnds[0]) &&
- retarget_producer_legal(in, ©->opnds[0], &swap_binop)) {
- if (swap_binop) {
- Operand tmp = in->opnds[1];
- in->opnds[1] = in->opnds[2];
- in->opnds[2] = tmp;
- }
- in->opnds[0] = copy->opnds[0];
- copy->opnds[1] = copy->opnds[0];
- continue;
- }
- }
-
+ /* Producer-to-copy retargeting is unsafe after rewrite: hard registers are
+ * mutable storage, and the original producer register may remain live into
+ * successor blocks even when the local copy looks like the only use. */
if ((IROp)in->op == IR_COPY && in->nopnds >= 2 &&
in->opnds[0].kind == OPK_REG && in->opnds[1].kind == OPK_REG &&
!same_phys_reg(&in->opnds[0], &in->opnds[1]) &&
diff --git a/src/opt/pass_hard_live.c b/src/opt/pass_hard_live.c
@@ -64,6 +64,11 @@ static void hard_def_operand(OptHardRegSet* s, const Operand* op) {
if (op && op->kind == OPK_REG) hard_add(s, op->cls, op->v.reg);
}
+static void hard_def_clobber_mask(OptHardRegSet* s, const u32* masks) {
+ if (!masks) return;
+ for (u32 c = 0; c < OPT_REG_CLASSES; ++c) s->cls[c] |= masks[c];
+}
+
static void hard_use_abivalue(OptHardRegSet* use, const CGABIValue* v) {
if (!v) return;
hard_use_operand(use, &v->storage);
@@ -187,6 +192,7 @@ void opt_hard_inst_use_def(Func* f, const Inst* in, OptHardRegSet* use,
for (u32 i = 0; i < aux->nin; ++i) hard_use_operand(use, &aux->in_ops[i]);
for (u32 i = 0; i < aux->nout; ++i)
hard_def_operand(def, &aux->out_ops[i]);
+ hard_def_clobber_mask(def, aux->clobber_mask);
break;
}
case IR_INTRINSIC: {
diff --git a/src/opt/pass_o2.c b/src/opt/pass_o2.c
@@ -1,17 +1,73 @@
#include <string.h>
+#include <cfree/cg.h>
+
#include "opt/opt_internal.h"
#define OPT_BLOCK_NONE 0xffffffffu
#define OPT_CLONE_MAX_BLOCK_INSTS 4u
#define OPT_CLONE_MAX_PREDS 4u
#define OPT_CLONE_ABS_GROWTH_LIMIT 32u
+#define GVN_ENTRY_NONE 0xffffffffu
typedef struct CloneValMap {
Val old_val;
Val new_val;
} CloneValMap;
+typedef struct GvnConst {
+ i64 value;
+ u8 valid;
+} GvnConst;
+
+typedef struct GvnOperandKey {
+ u8 kind; /* OPK_REG or OPK_IMM */
+ u8 cls;
+ u16 pad;
+ CfreeCgTypeId type;
+ union {
+ Val reg;
+ i64 imm;
+ } v;
+} GvnOperandKey;
+
+typedef struct GvnKey {
+ u16 op;
+ u16 nops;
+ CfreeCgTypeId type;
+ u8 cls;
+ u8 pad[3];
+ i64 tag;
+ GvnOperandKey ops[2];
+} GvnKey;
+
+typedef struct GvnEntry {
+ GvnKey key;
+ Val val;
+ u32 block;
+ u32 inst;
+ u32 next;
+} GvnEntry;
+
+typedef struct GvnTable {
+ Func* f;
+ u32* buckets;
+ u32 nbuckets;
+ GvnEntry* entries;
+ u32 nentries;
+ u32 cap;
+} GvnTable;
+
+typedef struct GvnCtx {
+ Func* f;
+ OptAnalysis* analysis;
+ Val* parent;
+ GvnConst* constants;
+ GvnTable table;
+ int changed;
+ int cfg_changed;
+} GvnCtx;
+
static u32 opt_inst_count(Func* f) {
u32 n = 0;
for (u32 b = 0; b < f->nblocks; ++b) n += f->blocks[b].ninsts;
@@ -372,8 +428,666 @@ void opt_addr_xform(Func* f) {
opt_rebuild_def_use(f);
}
+static u64 gvn_width_mask(u32 width) {
+ if (width >= 64u) return ~0ull;
+ return (1ull << width) - 1ull;
+}
+
+static u64 gvn_mask_width(u64 v, u32 width) {
+ return v & gvn_width_mask(width);
+}
+
+static i64 gvn_sign_extend_width(u64 v, u32 width) {
+ v = gvn_mask_width(v, width);
+ if (!width || width >= 64u) return (i64)v;
+ u64 sign = 1ull << (width - 1u);
+ return (i64)((v ^ sign) - sign);
+}
+
+static int gvn_int_like_width(Func* f, CfreeCgTypeId ty, u32* width_out) {
+ u32 width = cfree_cg_type_int_width((CfreeCompiler*)f->c, ty);
+ if (!width || width > 64u) return 0;
+ *width_out = width;
+ return 1;
+}
+
+static i64 gvn_fold_result(u64 v, u32 width) {
+ return (i64)gvn_mask_width(v, width);
+}
+
+static int gvn_fold_binop(Func* f, BinOp op, CfreeCgTypeId ty, i64 a, i64 b,
+ i64* out) {
+ u32 width;
+ u64 ua, ub, r;
+ if (!gvn_int_like_width(f, ty, &width)) return 0;
+ ua = gvn_mask_width((u64)a, width);
+ ub = gvn_mask_width((u64)b, width);
+ r = 0;
+ switch (op) {
+ case BO_IADD:
+ r = ua + ub;
+ break;
+ case BO_ISUB:
+ r = ua - ub;
+ break;
+ case BO_IMUL:
+ r = ua * ub;
+ break;
+ case BO_AND:
+ r = ua & ub;
+ break;
+ case BO_OR:
+ r = ua | ub;
+ break;
+ case BO_XOR:
+ r = ua ^ ub;
+ break;
+ case BO_SHL:
+ r = ua << (u32)(ub & (u64)(width - 1u));
+ break;
+ case BO_SHR_U:
+ r = ua >> (u32)(ub & (u64)(width - 1u));
+ break;
+ case BO_SHR_S: {
+ u32 sh = (u32)(ub & (u64)(width - 1u));
+ if (!sh) {
+ r = ua;
+ } else {
+ u64 sign = 1ull << (width - 1u);
+ r = ua >> sh;
+ if (ua & sign) r |= gvn_width_mask(width) << (width - sh);
+ }
+ break;
+ }
+ default:
+ return 0;
+ }
+ *out = gvn_fold_result(r, width);
+ return 1;
+}
+
+static int gvn_fold_unop(Func* f, UnOp op, CfreeCgTypeId ty, i64 a,
+ i64* out) {
+ u32 width;
+ u64 ua, r;
+ if (!gvn_int_like_width(f, ty, &width)) return 0;
+ ua = gvn_mask_width((u64)a, width);
+ switch (op) {
+ case UO_NEG:
+ r = 0u - ua;
+ break;
+ case UO_NOT:
+ r = ua == 0;
+ break;
+ case UO_BNOT:
+ r = ~ua;
+ break;
+ default:
+ return 0;
+ }
+ *out = gvn_fold_result(r, width);
+ return 1;
+}
+
+static int gvn_fold_cmp(Func* f, CmpOp op, CfreeCgTypeId ty, i64 a, i64 b,
+ i64* out) {
+ u32 width;
+ u64 ua, ub;
+ i64 sa, sb;
+ int r;
+ if (!gvn_int_like_width(f, ty, &width)) return 0;
+ ua = gvn_mask_width((u64)a, width);
+ ub = gvn_mask_width((u64)b, width);
+ sa = gvn_sign_extend_width(ua, width);
+ sb = gvn_sign_extend_width(ub, width);
+ switch (op) {
+ case CMP_EQ:
+ r = ua == ub;
+ break;
+ case CMP_NE:
+ r = ua != ub;
+ break;
+ case CMP_LT_S:
+ r = sa < sb;
+ break;
+ case CMP_LE_S:
+ r = sa <= sb;
+ break;
+ case CMP_GT_S:
+ r = sa > sb;
+ break;
+ case CMP_GE_S:
+ r = sa >= sb;
+ break;
+ case CMP_LT_U:
+ r = ua < ub;
+ break;
+ case CMP_LE_U:
+ r = ua <= ub;
+ break;
+ case CMP_GT_U:
+ r = ua > ub;
+ break;
+ case CMP_GE_U:
+ r = ua >= ub;
+ break;
+ default:
+ return 0;
+ }
+ *out = r ? 1 : 0;
+ return 1;
+}
+
+static int gvn_fold_convert(Func* f, ConvKind k, CfreeCgTypeId dst_ty,
+ CfreeCgTypeId src_ty, i64 src, i64* out) {
+ u32 sw, dw;
+ u64 v;
+ if (!gvn_int_like_width(f, src_ty, &sw) ||
+ !gvn_int_like_width(f, dst_ty, &dw))
+ return 0;
+ switch (k) {
+ case CV_TRUNC:
+ case CV_ZEXT:
+ v = gvn_mask_width((u64)src, sw);
+ break;
+ case CV_SEXT:
+ v = (u64)gvn_sign_extend_width((u64)src, sw);
+ break;
+ case CV_BITCAST:
+ if (sw != dw) return 0;
+ v = gvn_mask_width((u64)src, sw);
+ break;
+ default:
+ return 0;
+ }
+ *out = gvn_fold_result(v, dw);
+ return 1;
+}
+
+static Val gvn_find(GvnCtx* ctx, Val v) {
+ if (v == VAL_NONE || v >= ctx->f->nvals) return VAL_NONE;
+ Val p = ctx->parent[v];
+ if (p == VAL_NONE || p == v) return v;
+ ctx->parent[v] = gvn_find(ctx, p);
+ return ctx->parent[v];
+}
+
+static int gvn_same_shape(Func* f, Val a, Val b) {
+ if (a == VAL_NONE || b == VAL_NONE || a >= f->nvals || b >= f->nvals)
+ return 0;
+ return f->val_type[a] == f->val_type[b] && f->val_cls[a] == f->val_cls[b];
+}
+
+static int gvn_const_for_operand(GvnCtx* ctx, const Operand* op, i64* out) {
+ if (!op) return 0;
+ if (op->kind == OPK_IMM) {
+ *out = op->v.imm;
+ return 1;
+ }
+ if (op->kind != OPK_REG) return 0;
+ Val v = gvn_find(ctx, (Val)op->v.reg);
+ if (v == VAL_NONE || v >= ctx->f->nvals || !ctx->constants[v].valid)
+ return 0;
+ *out = ctx->constants[v].value;
+ return 1;
+}
+
+static void gvn_make_load_imm(Func* f, Inst* in, i64 value) {
+ Val dst = in->def;
+ CfreeCgTypeId ty = dst != VAL_NONE && dst < f->nvals ? f->val_type[dst]
+ : in->type;
+ u8 cls = dst != VAL_NONE && dst < f->nvals ? f->val_cls[dst] : RC_INT;
+ Operand* opnds = in->opnds;
+ if (!opnds) opnds = arena_array(f->arena, Operand, 1);
+ memset(&opnds[0], 0, sizeof opnds[0]);
+ opnds[0].kind = OPK_REG;
+ opnds[0].type = ty;
+ opnds[0].cls = cls;
+ opnds[0].v.reg = (Reg)dst;
+ in->op = IR_LOAD_IMM;
+ in->type = ty;
+ in->opnds = opnds;
+ in->nopnds = 1;
+ in->extra.imm = value;
+}
+
+static int gvn_try_fold_inst(GvnCtx* ctx, Inst* in) {
+ i64 a, b, out;
+ switch ((IROp)in->op) {
+ case IR_BINOP:
+ if (in->nopnds < 3) return 0;
+ if (!gvn_const_for_operand(ctx, &in->opnds[1], &a) ||
+ !gvn_const_for_operand(ctx, &in->opnds[2], &b))
+ return 0;
+ if (!gvn_fold_binop(ctx->f, (BinOp)in->extra.imm, in->type, a, b, &out))
+ return 0;
+ gvn_make_load_imm(ctx->f, in, out);
+ return 1;
+ case IR_UNOP:
+ if (in->nopnds < 2) return 0;
+ if (!gvn_const_for_operand(ctx, &in->opnds[1], &a)) return 0;
+ if (!gvn_fold_unop(ctx->f, (UnOp)in->extra.imm, in->type, a, &out))
+ return 0;
+ gvn_make_load_imm(ctx->f, in, out);
+ return 1;
+ case IR_CMP:
+ if (in->nopnds < 3) return 0;
+ if (!gvn_const_for_operand(ctx, &in->opnds[1], &a) ||
+ !gvn_const_for_operand(ctx, &in->opnds[2], &b))
+ return 0;
+ if (!gvn_fold_cmp(ctx->f, (CmpOp)in->extra.imm, in->opnds[1].type, a, b,
+ &out))
+ return 0;
+ gvn_make_load_imm(ctx->f, in, out);
+ return 1;
+ case IR_CONVERT:
+ if (in->nopnds < 2) return 0;
+ if (!gvn_const_for_operand(ctx, &in->opnds[1], &a)) return 0;
+ if (!gvn_fold_convert(ctx->f, (ConvKind)in->extra.imm, in->opnds[0].type,
+ in->opnds[1].type, a, &out))
+ return 0;
+ gvn_make_load_imm(ctx->f, in, out);
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static u64 gvn_mix_u64(u64 h, u64 v) {
+ h ^= v + 0x9e3779b97f4a7c15ull + (h << 6) + (h >> 2);
+ return h;
+}
+
+static u64 gvn_key_hash(const GvnKey* k) {
+ u64 h = 1469598103934665603ull;
+ h = gvn_mix_u64(h, k->op);
+ h = gvn_mix_u64(h, k->nops);
+ h = gvn_mix_u64(h, k->type);
+ h = gvn_mix_u64(h, k->cls);
+ h = gvn_mix_u64(h, (u64)k->tag);
+ for (u32 i = 0; i < k->nops; ++i) {
+ h = gvn_mix_u64(h, k->ops[i].kind);
+ h = gvn_mix_u64(h, k->ops[i].cls);
+ h = gvn_mix_u64(h, k->ops[i].type);
+ h = gvn_mix_u64(h, k->ops[i].kind == OPK_REG ? k->ops[i].v.reg
+ : (u64)k->ops[i].v.imm);
+ }
+ return h;
+}
+
+static int gvn_operand_key_equal(const GvnOperandKey* a,
+ const GvnOperandKey* b) {
+ if (a->kind != b->kind || a->cls != b->cls || a->type != b->type) return 0;
+ if (a->kind == OPK_REG) return a->v.reg == b->v.reg;
+ return a->v.imm == b->v.imm;
+}
+
+static int gvn_key_equal(const GvnKey* a, const GvnKey* b) {
+ if (a->op != b->op || a->nops != b->nops || a->type != b->type ||
+ a->cls != b->cls || a->tag != b->tag)
+ return 0;
+ for (u32 i = 0; i < a->nops; ++i)
+ if (!gvn_operand_key_equal(&a->ops[i], &b->ops[i])) return 0;
+ return 1;
+}
+
+static void gvn_table_init(GvnTable* t, Func* f) {
+ u32 ninsts = opt_inst_count(f);
+ u32 nbuckets = 16u;
+ while (nbuckets < ninsts * 2u + 1u) nbuckets *= 2u;
+ t->f = f;
+ t->nbuckets = nbuckets;
+ t->buckets = arena_array(f->arena, u32, nbuckets);
+ for (u32 i = 0; i < nbuckets; ++i) t->buckets[i] = GVN_ENTRY_NONE;
+ t->entries = NULL;
+ t->nentries = 0;
+ t->cap = 0;
+}
+
+static void gvn_table_add(GvnTable* t, const GvnKey* key, Val v, u32 b,
+ u32 i) {
+ if (t->nentries == t->cap) {
+ u32 ncap = t->cap ? t->cap * 2u : 32u;
+ GvnEntry* entries = arena_array(t->f->arena, GvnEntry, ncap);
+ if (t->entries)
+ memcpy(entries, t->entries, sizeof(entries[0]) * t->nentries);
+ t->entries = entries;
+ t->cap = ncap;
+ }
+ u32 bucket = (u32)(gvn_key_hash(key) & (u64)(t->nbuckets - 1u));
+ GvnEntry* e = &t->entries[t->nentries];
+ e->key = *key;
+ e->val = v;
+ e->block = b;
+ e->inst = i;
+ e->next = t->buckets[bucket];
+ t->buckets[bucket] = t->nentries++;
+}
+
+static int gvn_leader_dominates_site(const OptAnalysis* a, u32 leader_b,
+ u32 leader_i, u32 use_b, u32 use_i) {
+ if (leader_b == use_b) return leader_i < use_i;
+ return opt_analysis_dominates(a, leader_b, use_b);
+}
+
+static int gvn_leader_dominates_use(GvnCtx* ctx, u32 leader_b, u32 leader_i,
+ const OptUse* use) {
+ if (use->kind == OPT_USE_PHI_INPUT) {
+ Inst* in = &ctx->f->blocks[use->block].insts[use->inst];
+ IRPhiAux* aux = (IRPhiAux*)in->extra.aux;
+ if (!aux || use->phi_pred_index >= aux->npreds) return 0;
+ u32 pred = aux->pred_blocks[use->phi_pred_index];
+ if (leader_b == pred) return 1;
+ return opt_analysis_dominates(ctx->analysis, leader_b, pred);
+ }
+ return gvn_leader_dominates_site(ctx->analysis, leader_b, leader_i,
+ use->block, use->inst);
+}
+
+static int gvn_table_find(GvnCtx* ctx, const GvnKey* key, u32 b, u32 i,
+ Val* out) {
+ GvnTable* t = &ctx->table;
+ u32 bucket = (u32)(gvn_key_hash(key) & (u64)(t->nbuckets - 1u));
+ for (u32 e = t->buckets[bucket]; e != GVN_ENTRY_NONE;
+ e = t->entries[e].next) {
+ GvnEntry* ent = &t->entries[e];
+ if (!gvn_key_equal(&ent->key, key)) continue;
+ if (!gvn_leader_dominates_site(ctx->analysis, ent->block, ent->inst, b, i))
+ continue;
+ *out = ent->val;
+ return 1;
+ }
+ return 0;
+}
+
+static int gvn_make_operand_key(GvnCtx* ctx, const Operand* op,
+ GvnOperandKey* out) {
+ memset(out, 0, sizeof *out);
+ if (!op || (op->kind != OPK_REG && op->kind != OPK_IMM)) return 0;
+ out->kind = op->kind;
+ out->cls = op->cls;
+ out->type = op->type;
+ if (op->kind == OPK_REG) {
+ Val v = gvn_find(ctx, (Val)op->v.reg);
+ if (v == VAL_NONE || v >= ctx->f->nvals) return 0;
+ out->v.reg = v;
+ } else {
+ out->v.imm = op->v.imm;
+ }
+ return 1;
+}
+
+static int gvn_operand_key_less(const GvnOperandKey* a,
+ const GvnOperandKey* b) {
+ if (a->kind != b->kind) return a->kind < b->kind;
+ if (a->type != b->type) return a->type < b->type;
+ if (a->cls != b->cls) return a->cls < b->cls;
+ if (a->kind == OPK_REG) return a->v.reg < b->v.reg;
+ return a->v.imm < b->v.imm;
+}
+
+static int gvn_commutative_binop(BinOp op) {
+ switch (op) {
+ case BO_IADD:
+ case BO_IMUL:
+ case BO_AND:
+ case BO_OR:
+ case BO_XOR:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int gvn_commutative_cmp(CmpOp op) {
+ return op == CMP_EQ || op == CMP_NE;
+}
+
+static int gvn_make_key(GvnCtx* ctx, const Inst* in, GvnKey* key) {
+ memset(key, 0, sizeof *key);
+ if (in->def == VAL_NONE || in->def >= ctx->f->nvals) return 0;
+ key->op = in->op;
+ key->type = ctx->f->val_type[in->def];
+ key->cls = ctx->f->val_cls[in->def];
+ key->tag = in->extra.imm;
+
+ switch ((IROp)in->op) {
+ case IR_LOAD_IMM:
+ case IR_CONST_I:
+ key->nops = 0;
+ key->tag = in->extra.imm;
+ return 1;
+ case IR_COPY:
+ if (in->nopnds < 2) return 0;
+ key->nops = 1;
+ return gvn_make_operand_key(ctx, &in->opnds[1], &key->ops[0]);
+ case IR_UNOP:
+ case IR_CONVERT:
+ if (in->nopnds < 2) return 0;
+ key->nops = 1;
+ return gvn_make_operand_key(ctx, &in->opnds[1], &key->ops[0]);
+ case IR_BINOP:
+ case IR_CMP:
+ if (in->nopnds < 3) return 0;
+ key->nops = 2;
+ if (!gvn_make_operand_key(ctx, &in->opnds[1], &key->ops[0]) ||
+ !gvn_make_operand_key(ctx, &in->opnds[2], &key->ops[1]))
+ return 0;
+ if (((IROp)in->op == IR_BINOP &&
+ gvn_commutative_binop((BinOp)in->extra.imm)) ||
+ ((IROp)in->op == IR_CMP &&
+ gvn_commutative_cmp((CmpOp)in->extra.imm))) {
+ if (gvn_operand_key_less(&key->ops[1], &key->ops[0])) {
+ GvnOperandKey tmp = key->ops[0];
+ key->ops[0] = key->ops[1];
+ key->ops[1] = tmp;
+ }
+ }
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static void gvn_replace_one_use(Func* f, const OptUse* use, Val repl) {
+ Inst* in = &f->blocks[use->block].insts[use->inst];
+ switch ((OptUseKind)use->kind) {
+ case OPT_USE_OPERAND:
+ use->operand->v.reg = (Reg)repl;
+ use->operand->type = f->val_type[repl];
+ use->operand->cls = f->val_cls[repl];
+ break;
+ case OPT_USE_INDIRECT_BASE:
+ use->operand->v.ind.base = (Reg)repl;
+ break;
+ case OPT_USE_PHI_INPUT: {
+ IRPhiAux* aux = (IRPhiAux*)in->extra.aux;
+ if (aux && use->phi_pred_index < aux->npreds)
+ aux->pred_vals[use->phi_pred_index] = repl;
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static int gvn_replace_dominated_uses(GvnCtx* ctx, Val old, Val repl, u32 rb,
+ u32 ri) {
+ Func* f = ctx->f;
+ int changed = 0;
+ if (old == VAL_NONE || old >= f->nvals || repl == VAL_NONE ||
+ repl >= f->nvals || old == repl || !gvn_same_shape(f, old, repl))
+ return 0;
+ for (u32 u = f->opt_first_use_by_val[old]; u != OPT_USE_NONE;
+ u = f->opt_uses[u].next_for_val) {
+ OptUse* use = &f->opt_uses[u];
+ if (!gvn_leader_dominates_use(ctx, rb, ri, use)) continue;
+ gvn_replace_one_use(f, use, repl);
+ changed = 1;
+ }
+ if (changed) {
+ ctx->parent[old] = repl;
+ ctx->constants[old] = ctx->constants[gvn_find(ctx, repl)];
+ }
+ return changed;
+}
+
+static void gvn_note_const(GvnCtx* ctx, Inst* in) {
+ if (in->def == VAL_NONE || in->def >= ctx->f->nvals) return;
+ if ((IROp)in->op == IR_LOAD_IMM || (IROp)in->op == IR_CONST_I) {
+ ctx->constants[in->def].valid = 1;
+ ctx->constants[in->def].value = in->extra.imm;
+ }
+}
+
+static int gvn_scalar_candidate(const Inst* in) {
+ if (!in || in->def == VAL_NONE || in->ndefs) return 0;
+ switch ((IROp)in->op) {
+ case IR_CONST_I:
+ case IR_LOAD_IMM:
+ case IR_COPY:
+ case IR_BINOP:
+ case IR_UNOP:
+ case IR_CMP:
+ case IR_CONVERT:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int gvn_fold_branch(GvnCtx* ctx, u32 b, Inst* in) {
+ Block* bl = &ctx->f->blocks[b];
+ i64 a, c, taken;
+ u32 target;
+ switch ((IROp)in->op) {
+ case IR_CONDBR:
+ if (in->nopnds < 1 || bl->nsucc < 2) return 0;
+ if (in->opnds[0].kind != OPK_REG) return 0;
+ if (!gvn_const_for_operand(ctx, &in->opnds[0], &a)) return 0;
+ target = bl->succ[a != 0 ? 0u : 1u];
+ break;
+ case IR_CMP_BRANCH:
+ if (in->nopnds < 2 || bl->nsucc < 2) return 0;
+ if (in->opnds[0].kind != OPK_REG && in->opnds[1].kind != OPK_REG)
+ return 0;
+ if (!gvn_const_for_operand(ctx, &in->opnds[0], &a) ||
+ !gvn_const_for_operand(ctx, &in->opnds[1], &c))
+ return 0;
+ if (!gvn_fold_cmp(ctx->f, (CmpOp)in->extra.imm, in->opnds[0].type, a, c,
+ &taken))
+ return 0;
+ target = bl->succ[taken ? 0u : 1u];
+ break;
+ default:
+ return 0;
+ }
+
+ in->op = IR_BR;
+ in->def = VAL_NONE;
+ in->ndefs = 0;
+ in->defs = NULL;
+ in->nopnds = 0;
+ in->opnds = NULL;
+ bl->succ[0] = target;
+ bl->nsucc = 1;
+ return 1;
+}
+
+static void gvn_visit_inst(GvnCtx* ctx, u32 b, u32 i) {
+ Inst* in = &ctx->f->blocks[b].insts[i];
+ if (gvn_fold_branch(ctx, b, in)) {
+ ctx->changed = 1;
+ ctx->cfg_changed = 1;
+ return;
+ }
+ if (!gvn_scalar_candidate(in)) return;
+
+ if (gvn_try_fold_inst(ctx, in)) ctx->changed = 1;
+ gvn_note_const(ctx, in);
+
+ GvnKey key;
+ if (!gvn_make_key(ctx, in, &key)) return;
+
+ Val leader = VAL_NONE;
+ if (gvn_table_find(ctx, &key, b, i, &leader)) {
+ if (gvn_replace_dominated_uses(ctx, in->def, leader,
+ ctx->f->val_def_block[leader],
+ ctx->f->val_def_inst[leader]))
+ ctx->changed = 1;
+ return;
+ }
+ gvn_table_add(&ctx->table, &key, in->def, b, i);
+}
+
+static void gvn_realign_phi_preds(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* phi = &bl->insts[i];
+ if ((IROp)phi->op != IR_PHI) break;
+ IRPhiAux* aux = (IRPhiAux*)phi->extra.aux;
+ if (!aux) continue;
+ u32 old_n = aux->npreds;
+ u32* old_blocks = aux->pred_blocks;
+ Val* old_vals = aux->pred_vals;
+ u32* pred_blocks =
+ bl->npreds ? arena_array(f->arena, u32, bl->npreds) : NULL;
+ Val* pred_vals =
+ bl->npreds ? arena_zarray(f->arena, Val, bl->npreds) : NULL;
+ for (u32 p = 0; p < bl->npreds; ++p) {
+ pred_blocks[p] = bl->preds[p];
+ pred_vals[p] = phi->def;
+ for (u32 old = 0; old < old_n; ++old) {
+ if (old_blocks && old_blocks[old] == bl->preds[p]) {
+ pred_vals[p] = old_vals ? old_vals[old] : VAL_NONE;
+ break;
+ }
+ }
+ }
+ aux->npreds = bl->npreds;
+ aux->pred_blocks = pred_blocks;
+ aux->pred_vals = pred_vals;
+ }
+ }
+}
+
void opt_gvn(Func* f) {
- if (f) opt_rebuild_def_use(f);
+ if (!f || f->opt_rewritten) return;
+ if (!f->opt_reg_ssa && f->npregs > 1) {
+ opt_rebuild_def_use(f);
+ return;
+ }
+
+ opt_rebuild_def_use(f);
+ OptAnalysis a;
+ memset(&a, 0, sizeof a);
+ opt_analysis_build_dominators(f, &a);
+
+ GvnCtx ctx;
+ memset(&ctx, 0, sizeof ctx);
+ ctx.f = f;
+ ctx.analysis = &a;
+ ctx.parent = arena_array(f->arena, Val, f->nvals ? f->nvals : 1u);
+ ctx.constants = arena_zarray(f->arena, GvnConst, f->nvals ? f->nvals : 1u);
+ for (Val v = 0; v < f->nvals; ++v) ctx.parent[v] = v;
+ gvn_table_init(&ctx.table, f);
+
+ for (u32 ri = 0; ri < a.nrpo; ++ri) {
+ u32 b = a.rpo[ri];
+ if (b >= f->nblocks || !a.reachable[b]) continue;
+ for (u32 i = 0; i < f->blocks[b].ninsts; ++i) gvn_visit_inst(&ctx, b, i);
+ }
+
+ if (ctx.cfg_changed) {
+ opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE |
+ OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
+ opt_build_cfg(f);
+ gvn_realign_phi_preds(f);
+ } else if (ctx.changed) {
+ opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
+ }
+ opt_rebuild_def_use(f);
}
void opt_dse(Func* f) {
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -3565,7 +3565,7 @@ static void opt_combine_single_use_copy_and_imm(void) {
tc_fini(&tc);
}
-static void opt_combine_retargets_single_use_producer_copy(void) {
+static void opt_combine_preserves_producer_copy_after_rewrite(void) {
TestCtx tc;
tc_init(&tc);
@@ -3576,11 +3576,11 @@ static void opt_combine_retargets_single_use_producer_copy(void) {
emit_ret_val(f, f->entry, 22, tc.i32);
opt_combine(f);
- EXPECT(count_op(f, IR_BINOP) == 1 && count_op(f, IR_COPY) == 0,
- "single-use binop-copy should retarget producer and remove copy");
+ EXPECT(count_op(f, IR_BINOP) == 1 && count_op(f, IR_COPY) == 1,
+ "combine should preserve producer-copy pairs after rewrite");
Inst* add = &f->blocks[f->entry].insts[0];
- EXPECT(add->opnds[0].v.reg == 22,
- "retargeted binop should define the copy destination");
+ EXPECT(add->opnds[0].v.reg == 21,
+ "rewritten producer should keep its original destination");
Func* lhs = new_func(&tc);
lhs->opt_rewritten = 1;
@@ -3590,9 +3590,9 @@ static void opt_combine_retargets_single_use_producer_copy(void) {
opt_combine(lhs);
add = &lhs->blocks[lhs->entry].insts[0];
- EXPECT(count_op(lhs, IR_COPY) == 0 && add->opnds[0].v.reg == 20 &&
+ EXPECT(count_op(lhs, IR_COPY) == 1 && add->opnds[0].v.reg == 21 &&
add->opnds[1].v.reg == 20,
- "producer may retarget to its lhs operand");
+ "producer-copy preservation should keep lhs overlap unchanged");
Func* rhs = new_func(&tc);
rhs->opt_rewritten = 1;
@@ -3602,9 +3602,9 @@ static void opt_combine_retargets_single_use_producer_copy(void) {
opt_combine(rhs);
add = &rhs->blocks[rhs->entry].insts[0];
- EXPECT(count_op(rhs, IR_COPY) == 0 && add->opnds[0].v.reg == 20 &&
- add->opnds[1].v.reg == 20 && add->opnds[2].v.reg == 19,
- "commutative rhs overlap should swap operands before retargeting");
+ EXPECT(count_op(rhs, IR_COPY) == 1 && add->opnds[0].v.reg == 21 &&
+ add->opnds[1].v.reg == 19 && add->opnds[2].v.reg == 20,
+ "producer-copy preservation should keep rhs overlap unchanged");
Func* retreg = new_func(&tc);
retreg->opt_rewritten = 1;
@@ -4724,7 +4724,7 @@ int main(void) {
opt_regalloc_spill_requires_scratch();
opt_combine_spill_peeps();
opt_combine_single_use_copy_and_imm();
- opt_combine_retargets_single_use_producer_copy();
+ opt_combine_preserves_producer_copy_after_rewrite();
opt_combine_keeps_unsafe_and_multiuse_defs();
opt_combine_copy_chains_and_convert_pairs();
opt_dce_physical_dead_defs();