commit fabd386acadaae35207001867c0fefd28ab1aaf3
parent c9c0ce1781de35f3fe8b8c62610ca60e2f7eb097
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 17:26:46 -0700
opt: add identity simplification pass
Diffstat:
8 files changed, 748 insertions(+), 43 deletions(-)
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -433,6 +433,11 @@ ssa_dce
verify(o2-addr-xform-dce)
copy_cleanup
verify(o2-addr-xform-copy-cleanup)
+simplify
+verify(o2-simplify-ssa)
+ssa_dce
+copy_cleanup
+verify(o2-simplify-cleanup)
gvn
verify(o2-gvn-ssa)
copy_prop
@@ -452,7 +457,93 @@ shared O1 lowering pipeline (machinize ... jump_cleanup ... opt_emit)
Every CFG or value-use mutation above is followed by the relevant verifier
checkpoint in `opt_run_o2_pipeline`.
-### 4.3 Transformations We Do Not Take
+### 4.3 Identity Simplification
+
+Identity simplification is implemented in three homes, depending on how much
+context the rewrite needs.
+
+1. CG-time stack/value simplification, before IR recording.
+ This is the earliest and best place for identities where the operation can
+ simply avoid materializing a result. It benefits `-O0`, `-O1`, and `-O2`,
+ and it prevents avoidable temporaries from ever entering opt IR. Keep this
+ limited to rewrites that are local to the two values currently on the CG
+ stack, do not require CFG/use-def information, and are valid for every
+ target backend.
+
+ Current CG folds immediate integer binops, collapses delayed integer
+ arithmetic chains, collapses integer identities through
+ `api_try_collapse_binop_identity`, and `api_cg_convert` elides same-type
+ conversions. The identity set now includes safe unflagged integer cases such
+ as `x * 1`, `1 * x`, `x / 1`, `x % 1 -> 0`, `x << 0`, `x >> 0`, zero
+ annihilators for multiply/and, and all-ones annihilators for or.
+
+ Do not put target-dependent legality here. Do not fold floating-point
+ identities here: signed zero, NaNs, exceptions, contraction, and rounding
+ flags make algebraic FP identities nontrivial. Also keep the existing
+ `!flags` rule for integer folding unless a flag's exact semantics are
+ explicitly modeled.
+
+2. A tiny non-SSA `opt_simplify_local` in the shared lowering pipeline.
+ This runs after the initial `build_cfg`/`jump_cleanup`/`build_cfg` cleanup
+ and before `machinize`, so it is available to `-O1` and to the final lowering
+ leg of `-O2` after SSA has been undone:
+
+ ```
+ build_cfg
+ jump_cleanup(CFG)
+ build_cfg
+ simplify_local
+ machinize
+ ...
+ ```
+
+ This pass stays local and cheap. It rewrites one instruction at a time:
+ neutral/annihilator integer binops, `x - x -> 0`, `x ^ x -> 0`, `x & x` and
+ `x | x` to copies, same-value integer compares, and exact no-op converts.
+ It does not chase definitions, compute dominance, rewrite arbitrary uses, or
+ become a substitute for SSA value passes. Its job is to reduce obvious work
+ before liveness/RA without changing the meaning of O1 as the cheap backend
+ tier.
+
+ O1 should not grow a general value optimizer. If an O1 simplification needs
+ def-use, dominance, copy propagation, memory reasoning, or loop information,
+ it belongs in O2 instead. Post-RA `combine` remains the target-aware cleanup
+ point for no-op moves, immediate folding into legal machine operands, and
+ backend artifacts exposed only after register assignment.
+
+3. An SSA `opt_simplify` in the O2 pre-lowering value pipeline.
+ This is the primary home for scalar and address identities that require
+ use-def information. It runs after mem2reg/address cleanup has exposed SSA
+ values and before GVN builds value-number keys:
+
+ ```
+ build_ssa
+ ssa_dce
+ copy_cleanup
+ addr_xform
+ ssa_dce
+ copy_cleanup
+ simplify
+ ssa_dce
+ copy_cleanup
+ gvn
+ ```
+
+ This pass rewrites the defining instruction to `IR_COPY` or `IR_LOAD_IMM`
+ and relies on `opt_ssa_dce`/`opt_copy_cleanup` to remove leftovers. It
+ shares the local rules and adds SSA-only rules for double bitwise-not,
+ add-zero address/value chains fed by SSA constants, and no-op bitcast chains
+ that reduce to a copy. GVN still owns constant folding, leader selection,
+ redundant expression/load elimination, and branch folding; `opt_simplify`
+ canonicalizes cheap identities so GVN has fewer equivalent shapes to learn.
+
+The practical rule is: if CG can avoid emitting IR without looking beyond the
+top CG values, do it in CG. If the rewrite is a single recorded instruction and
+does not need SSA, put it in shared `simplify_local` so O1 benefits. If the
+rewrite needs def-use, dominance, memory/address canonicalization, or cleanup
+after inlining, put it in O2 `opt_simplify`.
+
+### 4.4 Transformations We Do Not Take
`doc/DESIGN.md` is still binding: no UB-exploiting transforms. Do not assume
signed overflow, shift-by-width, division by zero, or null dereference are
@@ -479,6 +570,9 @@ The optimizer is no longer just a stub:
conflict analysis, and conservative coalescing.
- `src/opt/pass_analysis.c` owns shared CFG order, dominators, dominance
frontiers, dominator children, def-use rebuilding, and verifier guardrails.
+- `src/opt/pass_simplify.c` implements CG-adjacent local identity cleanup for
+ the shared lowering path and SSA-aware identity canonicalization for O2 before
+ GVN.
- `cfree cc` forwards `-O0`/`-O1`/`-O2` into `CfreeCodeOptions`, so native
frontends exercise the optimizer through the normal compile driver.
- `test/toy/run.sh` supports `CFREE_TOY_OPT_LEVELS`; default is `0 1 2`, so
@@ -530,14 +624,13 @@ Current behavior:
atomic, global, TLS, non-zero indirect-offset, and otherwise non-local address
cases remain materialized. Both passes rebuild/invalidate analysis state and
are bracketed by `opt_ssa_dce`, `opt_copy_cleanup`, and verifier checkpoints.
-- The first post-address value passes are in the O2 path: scalar GVN rewrites
+- The first post-address value passes are in the O2 path: `opt_simplify`
+ canonicalizes cheap scalar/address identities before GVN; scalar GVN rewrites
dominated duplicate pure expressions and folds safe integer
constants/branches; the first memory-aware GVN slice rewrites exact
straight-line redundant loads and store/load reuse through the documented
alias/version model; and `opt_copy_prop` propagates SSA copy chains plus
- collapses redundant integer extension chains. Memory GVN is still local to
- the current dominating value table and does not yet model path-specific memory
- states at join blocks.
+ collapses redundant integer extension chains.
Temporary defensive/pessimizing checks to lift:
@@ -704,8 +797,9 @@ Implement in order:
5. [x] scalar-only `opt_gvn`
6. [x] `opt_copy_prop`
7. [x] first memory-aware `opt_gvn` / redundant load elimination tests
-8. [ ] scalar/address identity simplification before memory GVN
-9. [ ] extended memory tracker for joins/path state
+8. [x] CG identity expansion, shared `opt_simplify_local`, and O2
+ `opt_simplify` as described in Section 4.3
+9. [x] extended memory tracker for joins/path state
10. [ ] `opt_dse`
11. [ ] `opt_licm`
12. [ ] `opt_pressure_relief`
@@ -722,11 +816,11 @@ cleanup, block cloning, and address folding have made CFG mutation,
memory/address operand mutation, verifier coverage, and analysis invalidation
boring and reliable.
-The first memory GVN step is now landed as a straight-line, dominance-table
-slice. Keep later memory rewrites tied to the documented alias and memory
-numbering model: do not expand `IR_LOAD` rewrites or reason across `IR_STORE`,
-calls, atomics, fences, volatile operations, TLS, globals, or unknown memory
-without an explicit invalidation rule and pass-local tests.
+The path-aware memory GVN slice is now landed. Keep later memory rewrites tied
+to the documented alias and memory numbering model: do not expand `IR_LOAD`
+rewrites or reason across `IR_STORE`, calls, atomics, fences, volatile
+operations, TLS, globals, or unknown memory without an explicit invalidation
+rule and pass-local tests.
Completed pre-GVN slice exit criteria:
@@ -760,10 +854,11 @@ Memory numbering and alias invalidation model for memory GVN:
forwarded from or to, and also acts as an observable memory barrier for other
volatile operations.
- Calls increment the unknown token and invalidate all roots that can escape:
- globals, params, heap, TLS/global address roots, address-taken locals, and
- any local whose address has flowed to unknown memory. Non-address-taken
- promoted locals may keep their root version across a call. Later call attrs
- can narrow this, but the first memory GVN slice should assume calls clobber.
+ globals, params, heap, TLS/global address roots, and any local whose address
+ has flowed to a call argument, return value, unknown/global store, aggregate
+ memory op, inline asm, intrinsic, or unsupported address use. Non-escaping
+ local roots keep their root version across ordinary calls. Later call attrs
+ can narrow this further.
- Atomics, fences, inline asm with memory effects, `IR_AGG_COPY`,
`IR_AGG_SET`, bitfield stores, varargs memory operations, and runtime memory
intrinsics are full memory barriers until each has a precise alias rule.
@@ -778,36 +873,38 @@ Memory numbering and alias invalidation model for memory GVN:
invalidating barrier lies between the two through the version model. Store/load
reuse uses the same key and additionally requires the stored operand's value
to dominate the load.
+- At block joins, memory availability is intersected across reachable
+ predecessors. An entry survives only when every reachable predecessor has the
+ same memory key available with the same `Val`; otherwise the joined load is
+ preserved. This slice does not invent memory phis.
Current memory GVN slice:
- [x] Pass-local tests cover repeated exact local loads, store/load reuse for
- the same precise local, unknown-store barriers, call barriers, and
- volatile/atomic exclusions.
+ the same precise local, path-aware branch joins, loop-header backedge
+ conservatism, unknown-store barriers, call preservation for non-escaped
+ locals, escaped-local call clobbers, and volatile/atomic exclusions.
- [x] The pass maintains per-root plus unknown versions, treats calls and
conservative memory ops as barriers, and can see through simple `addr_of` plus
- zero-index address materialization when both memory operations refer to the
- same precise root.
+ zero-index/address-minus-zero materialization when both memory operations
+ refer to the same precise root.
- [x] AArch64 disassembly sanity check: a direct address-taken store followed by
a load of the same address reuses the stored value, removing the post-store
`ldur`.
-- [ ] Extend the tracker from a single RPO walk table to memory state snapshots
- at block boundaries. Join blocks need a memory-phi/equivalent availability
- model so stores from all predecessors to the same root/version can make a
- joined load available. This is why `128_o2_branch_join_addr_mem` still keeps
- the final stack load after the branch join.
-- [ ] Record root escape information precisely enough that calls can preserve
- non-escaping address-taken locals, while globals/params/heap/TLS and unknown
- roots remain conservatively clobbered.
-
-Scalar/address identity simplification should run before memory GVN, and the
-memory key builder should also tolerate the normalized forms. Red-green cases
-should include integer `x + 0`, `0 + x`, `x - 0`, `x * 1`, `1 * x`, `x / 1`,
-bitwise `x | 0`, `x ^ 0`, `x & -1`, shifts by zero, copies through identity
-conversions, and pointer/index forms such as `base + 0`, `gep/index 0`, and
-address arithmetic that only changes representation. These rewrites should be
-typed and legality-aware: do not erase operations with trapping, overflow, FP
-rounding/NaN/sign-zero semantics, volatile/atomic memory effects, or target
+- [x] The tracker uses memory state snapshots at block boundaries. Join blocks
+ reuse memory only when all reachable predecessors agree on the same available
+ key/value pair; otherwise no memory phi is invented and the load stays.
+- [x] Root escape information is precise enough for calls to preserve
+ non-escaping address-taken locals, while globals/params/heap/TLS, escaped
+ locals, and unknown roots remain conservatively clobbered.
+
+Scalar/address identity simplification now runs before memory GVN. Red-green
+coverage includes integer neutral/annihilator identities, same-register
+integer identities, same-value integer compares, exact no-op conversions,
+double bitwise-not, add-zero address chains, and a toy end-to-end identity
+chain at `-O0`, `-O1`, and `-O2`. These rewrites stay typed and
+legality-aware: they do not erase operations with trapping, flagged overflow,
+FP rounding/NaN/sign-zero semantics, volatile/atomic memory effects, or target
addressing constraints that have not been modeled.
Remaining Phase D exit criteria:
@@ -820,11 +917,13 @@ Remaining Phase D exit criteria:
extension-chain cleanup, with a focused toy O2 fixture.
- [x] AArch64 O2 disassembly sanity checks show `130_o2_copy_prop_ext`
collapsing the widening/copy chain to a single byte load/zero-extend path,
- and the first memory GVN slice removing a straight-line post-store stack load.
- `128_o2_branch_join_addr_mem` still keeps the join load pending extended
- memory-state tracking.
-- [ ] Identity simplification, extended memory tracking, DSE, LICM, pressure
- relief, SSA combine, and full O2 jump optimization
+ and memory GVN removing redundant post-store stack loads in covered cases.
+ `128_o2_branch_join_addr_mem` now covers both all-predecessors-agree and
+ predecessor-disagreement branch stores.
+- [x] Identity simplification has focused pass-local tests, a toy end-to-end
+ fixture, and an AArch64 disassembly sanity check showing identity arithmetic
+ removed from the generated code.
+- [ ] DSE, LICM, pressure relief, SSA combine, and full O2 jump optimization
each have focused red-green tests before they enter the default O2 schedule.
### Phase E - Inlining and Cleanup
@@ -891,6 +990,7 @@ src/opt/
pass_cfg.c CFG, unreachable cleanup
pass_o2.c early O2 transforms: block cloning, address transform,
scalar GVN, temporary DSE/cleanup stubs
+ pass_simplify.c local and SSA-aware scalar/address identity simplification
pass_clone.c block cloning (split out when pass size warrants it)
pass_ssa.c SSA build, conventional SSA, undo SSA
pass_addr.c address transformation (split out when pass size warrants it)
diff --git a/src/cg/value.c b/src/cg/value.c
@@ -1295,7 +1295,14 @@ int api_op_is_int_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 imm) {
case BO_ISUB:
case BO_OR:
case BO_XOR:
+ case BO_SHL:
+ case BO_SHR_S:
+ case BO_SHR_U:
return v == 0;
+ case BO_IMUL:
+ case BO_SDIV:
+ case BO_UDIV:
+ return v == 1;
case BO_AND:
return v == api_width_mask(width);
default:
@@ -1306,6 +1313,15 @@ int api_op_is_int_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 imm) {
int api_try_collapse_binop_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty,
ApiSValue* a, ApiSValue* b,
ApiSValue* out) {
+ u32 width;
+ u64 av = 0;
+ u64 bv = 0;
+ if (!api_foldable_int_type(g->c, ty, &width)) return 0;
+ if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM)
+ av = api_mask_width((u64)a->op.v.imm, width);
+ if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM)
+ bv = api_mask_width((u64)b->op.v.imm, width);
+
if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND &&
a->op.kind != OPK_IMM && api_op_is_int_identity(g, op, ty, b->op.v.imm)) {
*out = api_make_sv_with_reg_ownership(a->op, ty,
@@ -1313,15 +1329,47 @@ int api_try_collapse_binop_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty,
a->res = RES_INHERENT;
return 1;
}
+ if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND &&
+ a->op.kind != OPK_IMM &&
+ (op == BO_SREM || op == BO_UREM || op == BO_IMUL || op == BO_AND ||
+ op == BO_OR)) {
+ if ((op == BO_SREM || op == BO_UREM) && bv == 1) {
+ *out = api_make_sv(api_op_imm(0, ty), ty);
+ return 1;
+ }
+ if ((op == BO_IMUL || op == BO_AND) && bv == 0) {
+ *out = api_make_sv(api_op_imm(0, ty), ty);
+ return 1;
+ }
+ if (op == BO_OR && bv == api_width_mask(width)) {
+ *out = api_make_sv(api_op_imm(api_fold_result(g->c, ty, bv, width), ty),
+ ty);
+ return 1;
+ }
+ }
if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND &&
b->op.kind != OPK_IMM &&
- (op == BO_IADD || op == BO_OR || op == BO_XOR || op == BO_AND) &&
+ (op == BO_IADD || op == BO_IMUL || op == BO_OR || op == BO_XOR ||
+ op == BO_AND) &&
api_op_is_int_identity(g, op, ty, a->op.v.imm)) {
*out = api_make_sv_with_reg_ownership(b->op, ty,
api_sv_owns_operand_reg(b, &b->op));
b->res = RES_INHERENT;
return 1;
}
+ if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND &&
+ b->op.kind != OPK_IMM &&
+ (op == BO_IMUL || op == BO_AND || op == BO_OR)) {
+ if ((op == BO_IMUL || op == BO_AND) && av == 0) {
+ *out = api_make_sv(api_op_imm(0, ty), ty);
+ return 1;
+ }
+ if (op == BO_OR && av == api_width_mask(width)) {
+ *out = api_make_sv(api_op_imm(api_fold_result(g->c, ty, av, width), ty),
+ ty);
+ return 1;
+ }
+ }
return 0;
}
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1329,6 +1329,7 @@ static void opt_run_lowering_pipeline(OptImpl* o, const char* total_scope,
opt_build_cfg(o->f);
opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG);
opt_build_cfg(o->f);
+ opt_simplify_local(o->f);
opt_verify(o->f, "lowering-cfg");
metrics_scope_end(o->c, "opt.build_cfg");
metrics_scope_begin(o->c, "opt.machinize");
@@ -1427,6 +1428,11 @@ static void opt_run_o2_pipeline(OptImpl* o) {
opt_verify(o->f, "o2-addr-xform-dce");
opt_copy_cleanup(o->f);
opt_verify(o->f, "o2-addr-xform-copy-cleanup");
+ opt_simplify(o->f);
+ opt_verify(o->f, "o2-simplify-ssa");
+ opt_ssa_dce(o->f);
+ opt_copy_cleanup(o->f);
+ opt_verify(o->f, "o2-simplify-cleanup");
opt_gvn(o->f);
opt_verify(o->f, "o2-gvn-ssa");
opt_copy_prop(o->f);
diff --git a/src/opt/opt.h b/src/opt/opt.h
@@ -38,6 +38,8 @@ void opt_block_cloning(Func*);
void opt_build_reg_ssa(Func*);
void opt_build_ssa(Func*);
void opt_addr_xform(Func*);
+void opt_simplify_local(Func*);
+void opt_simplify(Func*);
void opt_gvn(Func*); /* incl. constprop, redundant-load elim */
void opt_copy_cleanup(Func*);
void opt_copy_prop(Func*); /* incl. redundant-extension elim */
diff --git a/src/opt/pass_simplify.c b/src/opt/pass_simplify.c
@@ -0,0 +1,352 @@
+#include <string.h>
+
+#include <cfree/cg.h>
+
+#include "opt/opt_internal.h"
+
+static u32 simplify_width(Func* f, CfreeCgTypeId ty) {
+ u32 width = cfree_cg_type_int_width((CfreeCompiler*)f->c, ty);
+ if (width && width <= 64u) return width;
+ if (cfree_cg_type_kind((CfreeCompiler*)f->c, ty) == CFREE_CG_TYPE_PTR) {
+ u64 size = cfree_cg_type_size((CfreeCompiler*)f->c, ty);
+ if (size && size <= 8u) return (u32)(size * 8u);
+ }
+ return 0;
+}
+
+static u64 width_mask(u32 width) {
+ return width >= 64u ? UINT64_MAX : ((1ull << width) - 1ull);
+}
+
+static int imm_value(Func* f, const Operand* op, u32 width, i64* out) {
+ (void)f;
+ if (!op || op->kind != OPK_IMM) return 0;
+ *out = (i64)((u64)op->v.imm & width_mask(width));
+ return 1;
+}
+
+static Inst* def_inst(Func* f, Val v) {
+ if (!f || v == VAL_NONE || v >= f->nvals) return NULL;
+ u32 b = f->val_def_block[v];
+ u32 i = f->val_def_inst[v];
+ if (b >= f->nblocks || i >= f->blocks[b].ninsts) return NULL;
+ Inst* in = &f->blocks[b].insts[i];
+ return in->def == v ? in : NULL;
+}
+
+static int val_load_imm(Func* f, Val v, i64* out) {
+ Inst* in = def_inst(f, v);
+ if (!in || ((IROp)in->op != IR_LOAD_IMM && (IROp)in->op != IR_CONST_I))
+ return 0;
+ *out = in->extra.imm;
+ return 1;
+}
+
+static int operand_const(Func* f, const Operand* op, u32 width, i64* out) {
+ if (imm_value(f, op, width, out)) return 1;
+ if (!f->opt_reg_ssa || !op || op->kind != OPK_REG) return 0;
+ if (!val_load_imm(f, (Val)op->v.reg, out)) return 0;
+ *out = (i64)((u64)*out & width_mask(width));
+ return 1;
+}
+
+static int same_reg(const Operand* a, const Operand* b) {
+ return a && b && a->kind == OPK_REG && b->kind == OPK_REG &&
+ a->v.reg == b->v.reg;
+}
+
+static int same_shape(Func* f, Val dst, Val src) {
+ if (dst == src) return 1;
+ if (dst == VAL_NONE || src == VAL_NONE || dst >= f->nvals || src >= f->nvals)
+ return 0;
+ return f->val_type[dst] == f->val_type[src] &&
+ f->val_cls[dst] == f->val_cls[src];
+}
+
+static void make_load_imm(Func* f, Inst* in, i64 value) {
+ Operand* opnds = in->opnds;
+ Val dst = in->def;
+ CfreeCgTypeId ty = in->type;
+ u8 cls = RC_INT;
+ if (dst != VAL_NONE && dst < f->nvals) {
+ ty = f->val_type[dst] ? f->val_type[dst] : ty;
+ cls = f->val_cls[dst];
+ } else if (in->nopnds >= 1) {
+ ty = in->opnds[0].type;
+ cls = in->opnds[0].cls;
+ }
+ if (!opnds) opnds = arena_array(f->arena, Operand, 1);
+ memset(&opnds[0], 0, sizeof opnds[0]);
+ opnds[0].kind = OPK_REG;
+ opnds[0].type = ty;
+ opnds[0].cls = cls;
+ opnds[0].v.reg = (Reg)(dst != VAL_NONE ? dst : in->opnds[0].v.reg);
+ in->op = IR_LOAD_IMM;
+ in->type = ty;
+ in->opnds = opnds;
+ in->nopnds = 1;
+ in->extra.imm = value;
+}
+
+static void make_copy(Func* f, Inst* in, const Operand* src) {
+ Operand* opnds = in->opnds;
+ Val dst = in->def;
+ CfreeCgTypeId dst_ty = in->type;
+ u8 dst_cls = RC_INT;
+ if (dst != VAL_NONE && dst < f->nvals) {
+ dst_ty = f->val_type[dst] ? f->val_type[dst] : dst_ty;
+ dst_cls = f->val_cls[dst];
+ } else if (in->nopnds >= 1) {
+ dst_ty = in->opnds[0].type;
+ dst_cls = in->opnds[0].cls;
+ }
+ if (!opnds) opnds = arena_array(f->arena, Operand, 2);
+ memset(&opnds[0], 0, sizeof opnds[0]);
+ opnds[0].kind = OPK_REG;
+ opnds[0].type = dst_ty;
+ opnds[0].cls = dst_cls;
+ opnds[0].v.reg = (Reg)(dst != VAL_NONE ? dst : in->opnds[0].v.reg);
+ opnds[1] = *src;
+ in->op = IR_COPY;
+ in->type = dst_ty;
+ in->opnds = opnds;
+ in->nopnds = 2;
+}
+
+static int convert_noop(Func* f, const Inst* in) {
+ if (!in || (IROp)in->op != IR_CONVERT || in->nopnds < 2) return 0;
+ CfreeCgTypeId dst_ty = in->opnds[0].type;
+ CfreeCgTypeId src_ty = in->opnds[1].type;
+ if (dst_ty != src_ty) return 0;
+ if (simplify_width(f, dst_ty) != simplify_width(f, src_ty)) return 0;
+ switch ((ConvKind)in->extra.imm) {
+ case CV_ZEXT:
+ case CV_SEXT:
+ case CV_TRUNC:
+ case CV_BITCAST:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int simplify_binop(Func* f, Inst* in, int ssa) {
+ if (!in || (IROp)in->op != IR_BINOP || in->flags || in->nopnds < 3)
+ return 0;
+ if (in->opnds[0].kind != OPK_REG) return 0;
+ u32 width = simplify_width(f, in->type ? in->type : in->opnds[0].type);
+ if (!width) return 0;
+
+ Operand* a = &in->opnds[1];
+ Operand* b = &in->opnds[2];
+ i64 av = 0;
+ i64 bv = 0;
+ int ac = ssa ? operand_const(f, a, width, &av) : imm_value(f, a, width, &av);
+ int bc = ssa ? operand_const(f, b, width, &bv) : imm_value(f, b, width, &bv);
+ u64 all = width_mask(width);
+
+ switch ((BinOp)in->extra.imm) {
+ case BO_IADD:
+ if (bc && bv == 0 && a->kind == OPK_REG) {
+ make_copy(f, in, a);
+ return 1;
+ }
+ if (ac && av == 0 && b->kind == OPK_REG) {
+ make_copy(f, in, b);
+ return 1;
+ }
+ break;
+ case BO_ISUB:
+ if (bc && bv == 0 && a->kind == OPK_REG) {
+ make_copy(f, in, a);
+ return 1;
+ }
+ if (same_reg(a, b)) {
+ make_load_imm(f, in, 0);
+ return 1;
+ }
+ break;
+ case BO_IMUL:
+ if (bc && bv == 1 && a->kind == OPK_REG) {
+ make_copy(f, in, a);
+ return 1;
+ }
+ if (ac && av == 1 && b->kind == OPK_REG) {
+ make_copy(f, in, b);
+ return 1;
+ }
+ if ((bc && bv == 0) || (ac && av == 0)) {
+ make_load_imm(f, in, 0);
+ return 1;
+ }
+ break;
+ case BO_SDIV:
+ case BO_UDIV:
+ if (bc && bv == 1 && a->kind == OPK_REG) {
+ make_copy(f, in, a);
+ return 1;
+ }
+ break;
+ case BO_SREM:
+ case BO_UREM:
+ if (bc && bv == 1) {
+ make_load_imm(f, in, 0);
+ return 1;
+ }
+ break;
+ case BO_AND:
+ if (bc && (u64)bv == all && a->kind == OPK_REG) {
+ make_copy(f, in, a);
+ return 1;
+ }
+ if (ac && av == (i64)all && b->kind == OPK_REG) {
+ make_copy(f, in, b);
+ return 1;
+ }
+ if ((bc && bv == 0) || (ac && av == 0)) {
+ make_load_imm(f, in, 0);
+ return 1;
+ }
+ if (same_reg(a, b)) {
+ make_copy(f, in, a);
+ return 1;
+ }
+ break;
+ case BO_OR:
+ if (bc && bv == 0 && a->kind == OPK_REG) {
+ make_copy(f, in, a);
+ return 1;
+ }
+ if (ac && av == 0 && b->kind == OPK_REG) {
+ make_copy(f, in, b);
+ return 1;
+ }
+ if ((bc && (u64)bv == all) || (ac && (u64)av == all)) {
+ make_load_imm(f, in, (i64)all);
+ return 1;
+ }
+ if (same_reg(a, b)) {
+ make_copy(f, in, a);
+ return 1;
+ }
+ break;
+ case BO_XOR:
+ if (bc && bv == 0 && a->kind == OPK_REG) {
+ make_copy(f, in, a);
+ return 1;
+ }
+ if (ac && av == 0 && b->kind == OPK_REG) {
+ make_copy(f, in, b);
+ return 1;
+ }
+ if (same_reg(a, b)) {
+ make_load_imm(f, in, 0);
+ return 1;
+ }
+ break;
+ case BO_SHL:
+ case BO_SHR_S:
+ case BO_SHR_U:
+ if (bc && bv == 0 && a->kind == OPK_REG) {
+ make_copy(f, in, a);
+ return 1;
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int simplify_cmp(Func* f, Inst* in) {
+ if (!in || (IROp)in->op != IR_CMP || in->nopnds < 3) return 0;
+ if (!same_reg(&in->opnds[1], &in->opnds[2])) return 0;
+ if (!simplify_width(f, in->opnds[1].type)) return 0;
+ switch ((CmpOp)in->extra.imm) {
+ case CMP_EQ:
+ case CMP_LE_S:
+ case CMP_LE_U:
+ case CMP_GE_S:
+ case CMP_GE_U:
+ make_load_imm(f, in, 1);
+ return 1;
+ case CMP_NE:
+ case CMP_LT_S:
+ case CMP_LT_U:
+ case CMP_GT_S:
+ case CMP_GT_U:
+ make_load_imm(f, in, 0);
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int simplify_unop_ssa(Func* f, Inst* in) {
+ if (!f->opt_reg_ssa || !in || (IROp)in->op != IR_UNOP || in->nopnds < 2 ||
+ (UnOp)in->extra.imm != UO_BNOT || in->opnds[1].kind != OPK_REG)
+ return 0;
+ Inst* inner = def_inst(f, (Val)in->opnds[1].v.reg);
+ if (!inner || (IROp)inner->op != IR_UNOP || inner->nopnds < 2 ||
+ (UnOp)inner->extra.imm != UO_BNOT || inner->opnds[1].kind != OPK_REG)
+ return 0;
+ if (!same_shape(f, in->def, (Val)inner->opnds[1].v.reg)) return 0;
+ make_copy(f, in, &inner->opnds[1]);
+ return 1;
+}
+
+static int simplify_convert_chain_ssa(Func* f, Inst* in) {
+ if (!f->opt_reg_ssa || !in || (IROp)in->op != IR_CONVERT || in->nopnds < 2 ||
+ in->opnds[1].kind != OPK_REG)
+ return 0;
+ Inst* inner = def_inst(f, (Val)in->opnds[1].v.reg);
+ if (!inner || (IROp)inner->op != IR_CONVERT || inner->nopnds < 2 ||
+ inner->opnds[1].kind != OPK_REG)
+ return 0;
+ if ((ConvKind)in->extra.imm != CV_BITCAST ||
+ (ConvKind)inner->extra.imm != CV_BITCAST)
+ return 0;
+ if (in->opnds[0].type != inner->opnds[1].type) return 0;
+ if (!same_shape(f, in->def, (Val)inner->opnds[1].v.reg)) return 0;
+ u32 dw = simplify_width(f, in->opnds[0].type);
+ u32 sw = simplify_width(f, inner->opnds[1].type);
+ if (!dw || dw != sw) return 0;
+ make_copy(f, in, &inner->opnds[1]);
+ return 1;
+}
+
+static int simplify_one(Func* f, Inst* in, int ssa) {
+ switch ((IROp)in->op) {
+ case IR_BINOP:
+ return simplify_binop(f, in, ssa);
+ case IR_CMP:
+ return simplify_cmp(f, in);
+ case IR_CONVERT:
+ if (convert_noop(f, in)) {
+ make_copy(f, in, &in->opnds[1]);
+ return 1;
+ }
+ return ssa ? simplify_convert_chain_ssa(f, in) : 0;
+ case IR_UNOP:
+ return ssa ? simplify_unop_ssa(f, in) : 0;
+ default:
+ return 0;
+ }
+}
+
+static void simplify_run(Func* f, int ssa) {
+ if (!f || f->opt_rewritten) return;
+ if (ssa) opt_rebuild_def_use(f);
+ int changed = 0;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i)
+ if (simplify_one(f, &bl->insts[i], ssa)) changed = 1;
+ }
+ if (changed) opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
+ if (ssa || changed) opt_rebuild_def_use(f);
+}
+
+void opt_simplify_local(Func* f) { simplify_run(f, 0); }
+
+void opt_simplify(Func* f) { simplify_run(f, 1); }
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -2571,6 +2571,177 @@ static void opt_addr_xform_preserves_volatile_and_globals(void) {
tc_fini(&tc);
}
+static void opt_simplify_local_rewrites_integer_identities(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ Val x = add_val(f, tc.i32);
+ Val mul = add_val(f, tc.i32);
+ Val rem = add_val(f, tc.i32);
+ Val self_xor = add_val(f, tc.i32);
+ Val self_or = add_val(f, tc.i32);
+ Val cmp = add_val(f, tc.i32);
+ Val cv = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, x, tc.i32);
+ Inst* in = emit_binop(f, f->entry, mul, x, x, tc.i32);
+ in->extra.imm = BO_IMUL;
+ in->opnds[2] = op_imm_(1, tc.i32);
+ in = emit_binop(f, f->entry, rem, x, x, tc.i32);
+ in->extra.imm = BO_SREM;
+ in->opnds[2] = op_imm_(1, tc.i32);
+ in = emit_binop(f, f->entry, self_xor, x, x, tc.i32);
+ in->extra.imm = BO_XOR;
+ in = emit_binop(f, f->entry, self_or, x, x, tc.i32);
+ in->extra.imm = BO_OR;
+ emit_cmp(f, f->entry, cmp, x, x, tc.i32, CMP_EQ);
+ emit_convert(f, f->entry, cv, x, tc.i32, CV_ZEXT);
+ emit_ret_val(f, f->entry, self_or, tc.i32);
+
+ opt_build_cfg(f);
+ opt_simplify_local(f);
+ opt_verify(f, "test-simplify-local");
+ EXPECT((IROp)def_inst(f, mul)->op == IR_COPY,
+ "x * 1 should become a copy");
+ EXPECT(val_is_load_imm(f, rem, 0), "x %% 1 should become zero");
+ EXPECT(val_is_load_imm(f, self_xor, 0), "x ^ x should become zero");
+ EXPECT((IROp)def_inst(f, self_or)->op == IR_COPY,
+ "x | x should become a copy");
+ EXPECT(val_is_load_imm(f, cmp, 1), "x == x should become true");
+ EXPECT((IROp)def_inst(f, cv)->op == IR_COPY,
+ "exact no-op convert should become a copy");
+ tc_fini(&tc);
+}
+
+static void opt_simplify_local_preserves_unsafe_cases(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ Val x = add_val(f, tc.i32);
+ Val flagged = add_val(f, tc.i32);
+ Val div_self = add_val(f, tc.i32);
+ Val fp = add_val_cls(f, tc.f64, RC_FP);
+ Val fp_add = add_val_cls(f, tc.f64, RC_FP);
+ emit_scalar_input(f, f->entry, x, tc.i32);
+ emit_scalar_input(f, f->entry, fp, tc.f64);
+ Inst* in = emit_binop(f, f->entry, flagged, x, x, tc.i32);
+ in->extra.imm = BO_IMUL;
+ in->opnds[2] = op_imm_(1, tc.i32);
+ in->flags = 1;
+ in = emit_binop(f, f->entry, div_self, x, x, tc.i32);
+ in->extra.imm = BO_SDIV;
+ in = emit_binop(f, f->entry, fp_add, fp, fp, tc.f64);
+ in->extra.imm = BO_FADD;
+ in->opnds[2].kind = OPK_IMM;
+ in->opnds[2].cls = RC_FP;
+ in->opnds[2].type = tc.f64;
+ in->opnds[2].v.imm = 0;
+ emit_ret_val(f, f->entry, div_self, tc.i32);
+
+ opt_build_cfg(f);
+ opt_simplify_local(f);
+ opt_verify(f, "test-simplify-local-unsafe");
+ EXPECT((IROp)def_inst(f, flagged)->op == IR_BINOP,
+ "flagged binop should not be simplified");
+ EXPECT((IROp)def_inst(f, div_self)->op == IR_BINOP,
+ "x / x should not be simplified");
+ EXPECT((IROp)def_inst(f, fp_add)->op == IR_BINOP,
+ "FP identity should not be simplified");
+ tc_fini(&tc);
+}
+
+static void opt_simplify_rewrites_ssa_nested_identities(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ f->opt_reg_ssa = 1;
+ Val x = add_val(f, tc.i32);
+ Val n = add_val(f, tc.i32);
+ Val nn = add_val(f, tc.i32);
+ Val z = add_val(f, tc.i32);
+ Val add = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, x, tc.i32);
+ emit_unop(f, f->entry, n, x, tc.i32, UO_BNOT);
+ emit_unop(f, f->entry, nn, n, tc.i32, UO_BNOT);
+ emit_load_imm(f, f->entry, z, tc.i32, 0);
+ Inst* in = emit_binop(f, f->entry, add, nn, z, tc.i32);
+ in->extra.imm = BO_IADD;
+ emit_ret_val(f, f->entry, add, tc.i32);
+
+ opt_build_cfg(f);
+ opt_simplify(f);
+ opt_verify(f, "test-simplify-ssa-nested");
+ EXPECT((IROp)def_inst(f, nn)->op == IR_COPY &&
+ def_inst(f, nn)->opnds[1].v.reg == (Reg)x,
+ "double bitwise-not should become copy of original source");
+ EXPECT((IROp)def_inst(f, add)->op == IR_COPY &&
+ def_inst(f, add)->opnds[1].v.reg == (Reg)nn,
+ "SSA add-zero value should become a copy");
+ tc_fini(&tc);
+}
+
+static void opt_simplify_canonicalizes_add_zero_address_chain(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ CfreeCgTypeId ptr_ty = cfree_cg_type_ptr(tc.c, tc.i32, 0);
+ Func* f = new_func(&tc);
+ f->opt_reg_ssa = 1;
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, FSF_ADDR_TAKEN);
+ Val addr = add_val(f, ptr_ty);
+ Val zero = add_val(f, ptr_ty);
+ Val addr2 = add_val(f, ptr_ty);
+ Val out = add_val(f, tc.i32);
+ emit_addr_of_local(f, f->entry, addr, fs, ptr_ty, tc.i32);
+ emit_load_imm(f, f->entry, zero, ptr_ty, 0);
+ Inst* in = emit_binop(f, f->entry, addr2, addr, zero, ptr_ty);
+ in->extra.imm = BO_IADD;
+ emit_load_indirect(f, f->entry, out, addr2, tc.i32, 0);
+ emit_ret_val(f, f->entry, out, tc.i32);
+
+ opt_build_cfg(f);
+ opt_simplify(f);
+ opt_copy_cleanup(f);
+ opt_verify(f, "test-simplify-addr-zero");
+ EXPECT(f->blocks[f->entry].insts[2].op != IR_BINOP,
+ "add-zero address chain should be canonicalized away");
+ EXPECT(f->blocks[f->entry].insts[2].op == IR_LOAD &&
+ f->blocks[f->entry].insts[2].opnds[1].v.ind.base == (Reg)addr,
+ "load should use the canonical address value");
+ tc_fini(&tc);
+}
+
+static void opt_simplify_feeds_gvn_with_canonical_shape(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ f->opt_reg_ssa = 1;
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, 0);
+ Val a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32);
+ Val zero = add_val(f, tc.i32);
+ Val a0 = add_val(f, tc.i32);
+ Val first = add_val(f, tc.i32);
+ Val second = add_val(f, tc.i32);
+ emit_scalar_input(f, f->entry, a, tc.i32);
+ emit_scalar_input(f, f->entry, b, tc.i32);
+ emit_load_imm(f, f->entry, zero, tc.i32, 0);
+ Inst* in = emit_binop(f, f->entry, a0, a, zero, tc.i32);
+ in->extra.imm = BO_IADD;
+ emit_binop(f, f->entry, first, a, b, tc.i32);
+ emit_store_local(f, f->entry, fs, first, tc.i32, 0);
+ emit_binop(f, f->entry, second, a0, b, tc.i32);
+ emit_ret_val(f, f->entry, second, tc.i32);
+
+ opt_build_cfg(f);
+ opt_simplify(f);
+ opt_ssa_dce(f);
+ opt_copy_cleanup(f);
+ opt_gvn(f);
+ opt_verify(f, "test-simplify-feeds-gvn");
+ EXPECT(ret_val(f, f->entry) == first,
+ "simplify should expose duplicate scalar shape to GVN");
+ tc_fini(&tc);
+}
+
static void opt_gvn_rewrites_same_block_scalar_duplicate(void) {
TestCtx tc;
tc_init(&tc);
@@ -5390,6 +5561,11 @@ int main(void) {
opt_block_cloning_skips_loop_backedges();
opt_addr_xform_folds_local_addr_into_memory_operand();
opt_addr_xform_preserves_volatile_and_globals();
+ opt_simplify_local_rewrites_integer_identities();
+ opt_simplify_local_preserves_unsafe_cases();
+ opt_simplify_rewrites_ssa_nested_identities();
+ opt_simplify_canonicalizes_add_zero_address_chain();
+ opt_simplify_feeds_gvn_with_canonical_shape();
opt_gvn_rewrites_same_block_scalar_duplicate();
opt_gvn_rewrites_dominated_scalar_duplicate();
opt_gvn_preserves_nondominated_scalar_duplicates();
diff --git a/test/toy/cases/132_identity_simplify.expected b/test/toy/cases/132_identity_simplify.expected
@@ -0,0 +1 @@
+37
diff --git a/test/toy/cases/132_identity_simplify.toy b/test/toy/cases/132_identity_simplify.toy
@@ -0,0 +1,20 @@
+fn identity_chain(x: i64): i64 {
+ var y: i64 = x;
+ y = y * 1;
+ y = y + 0;
+ y = y ^ 0;
+ y = y | 0;
+ y = y & -1;
+ y = y << 0;
+ y = y >> 0;
+ let z: i64 = ~~y;
+ var slot: i64 = z % 1;
+ let p: *i64 = &slot;
+ return z + p[0];
+}
+
+fn __user_main(): i64 {
+ return identity_chain(37);
+}
+
+fn main(): i32 { return __user_main() as i32; }