commit cfe4be02ba8c5cb7e022bb550458fbcd40ef7d14
parent 892d1268979e7a1df6d56de09516d0de77dc140b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 07:08:45 -0700
opt: add O2 analysis substrate
Diffstat:
| M | doc/OPT.md | | | 66 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++----------- |
| M | src/opt/opt.c | | | 186 | ++++++++++++++++++++++++++++++++++++++++++------------------------------------- |
| M | src/opt/opt_internal.h | | | 44 | ++++++++++++++++++++++++++++++++++++++++++++ |
| M | src/opt/opt_util.c | | | 97 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/opt/pass_analysis.c | | | 267 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | src/opt/pass_live.c | | | 126 | +++++++++---------------------------------------------------------------------- |
| M | src/opt/pass_lower.c | | | 108 | +++++-------------------------------------------------------------------------- |
| M | test/opt/opt_test.c | | | 57 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
8 files changed, 639 insertions(+), 312 deletions(-)
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -421,13 +421,22 @@ The optimizer is no longer just a stub:
Current behavior:
-- Level `1` records and replays 1:1 into the wrapped target.
-- Level `2` runs `opt_build_cfg + opt_build_ssa` as a dry run, discards that
- result, then replays 1:1.
-- No real optimized lowering path exists yet.
-
-The next implementation work should replace replay with the MIR-style lowering
-pipeline, first at `-O1`, then at `-O2`.
+- Level `1` records IR, builds CFG, machinizes, builds loop frequencies,
+ computes liveness, runs the current priority allocator/rewrite path,
+ combines, runs post-RA DCE, cleans jumps/layout, and emits through the
+ wrapped target.
+- Level `2` has its own scheduler entry point, but until production SSA/value
+ passes land it deliberately routes through the same proven lowering path as
+ `-O1`.
+- `opt_regalloc(..., allow_live_range_split)` accepts the O2 policy bit, but
+ live-range splitting is not implemented yet.
+- The current `opt_build_ssa` remains a shape-checking mem2reg prototype: it
+ computes dominators/frontiers and phi placement, but phis do not yet receive
+ real `Val` defs and uses are not rewritten for downstream value passes.
+
+The next implementation work should build the shared O2 analysis/data-flow
+substrate, make SSA production-quality, and then enable the MIR-style
+pre-lowering pass sequence behind the level-2 scheduler.
---
@@ -438,6 +447,10 @@ the exact pass being introduced, then expand through the CG corpus.
### Phase A - Production `-O1` Lowering
+Status: implemented as the public level-1 path. Keep extending tests and
+target-specific legality checks here, but do not make O1 depend on SSA/value
+passes.
+
Goal: make `-O1` stop replaying and emit through the optimized backend path
without SSA value passes.
@@ -466,7 +479,38 @@ Exit criteria:
- Add focused allocation cases for call-clobber saves, stack spills, tied
hard regs from inline asm, and values live through branches.
-### Phase B - Full Allocation Infrastructure
+### Phase B - O2 Analysis Substrate
+
+Status: started. `OptPassCtx`, the shared operand/reference walker,
+`OptAnalysis` CFG order/dominator/frontier data, and `opt_verify` CFG/value-id
+guardrails are in place. Remaining work is stable instruction ids, pass
+invalidation flags, stricter def-use/type verification after production SSA,
+and replacing pass-local dominator implementations with the shared analysis.
+
+Goal: create the durable data structures and invalidation model that all O2
+passes will share before implementing transformations.
+
+Implement:
+
+- A small pass context for per-pass state, stage names, metrics, and temporary
+ arenas.
+- A shared operand/reference walker for every pass that needs uses/defs,
+ including `CGABIValue`, calls, returns, inline asm, intrinsics, and indirect
+ bases.
+- First-class analysis results for RPO/PO, dominators, dominance frontiers,
+ dominator children, loop metadata, block frequencies, and CFG invalidation.
+- Stable instruction references or ids so long-lived analysis never depends on
+ raw `Inst*` addresses across block array rewrites.
+- `opt_verify(Func*, stage)` gates for CFG consistency, value type/class
+ consistency, phi arity, def-use consistency, and pass invalidation mistakes.
+
+Exit criteria:
+
+- `-O1` stays green.
+- `-O2` can run the substrate and verifier, then lower through the current O1
+ backend path with no observable behavior change.
+
+### Phase C - Full Allocation Infrastructure
Goal: bring `-O2` allocation quality up to MIR's model before value passes
start changing code aggressively.
@@ -485,7 +529,7 @@ Exit criteria:
- `-O2` can use the same lowering path with coalescing/splitting enabled, even
if all SSA value passes are still disabled.
-### Phase C - SSA Value and Memory Passes
+### Phase D - Production SSA, Value, and Memory Passes
Goal: enable the MIR full pre-lowering schedule for `-O2`.
@@ -512,7 +556,7 @@ Exit criteria:
- `CFREE_OPT_LEVELS="0 1 2" make test-cg` passes for the relevant arch.
- Pass-local dump tests prove the intended rewrite fires.
-### Phase D - Inlining and Cleanup
+### Phase E - Inlining and Cleanup
Goal: add MIR-style small direct-call inlining for `-O2`.
@@ -535,7 +579,7 @@ Exit criteria:
- Small wrapper-call cases optimize after inline+cleanup.
- Recursive and mutually recursive cases are unchanged and correct.
-### Phase E - Benchmark Closure
+### Phase F - Benchmark Closure
Goal: tune by measurement, not by adding passes speculatively.
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -642,8 +642,8 @@ static void w_switch_(CGTarget* t, const CGSwitchDesc* d) {
}
}
-static void w_indirect_branch(CGTarget* t, Operand addr,
- const Label* targets, u32 ntargets) {
+static void w_indirect_branch(CGTarget* t, Operand addr, const Label* targets,
+ u32 ntargets) {
OptImpl* o = impl_of(t);
Inst* in = rec(o, IR_INDIRECT_BRANCH);
IRIndirectAux* aux = arena_znew(o->f->arena, IRIndirectAux);
@@ -1311,6 +1311,100 @@ static u64 func_spill_store_count(Func* f) {
return n;
}
+static void opt_run_lowering_pipeline(OptImpl* o, const char* total_scope,
+ int allow_live_range_split) {
+ metrics_scope_begin(o->c, total_scope);
+ metrics_count(o->c, "opt.funcs", 1);
+ metrics_count(o->c, "opt.blocks", o->f->nblocks);
+ metrics_count(o->c, "opt.insts", func_inst_count(o->f));
+ metrics_count(o->c, "opt.vals", o->f->nvals);
+ metrics_scope_begin(o->c, "opt.build_cfg");
+ opt_build_cfg(o->f);
+ opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG);
+ opt_build_cfg(o->f);
+ opt_verify(o->f, "lowering-cfg");
+ metrics_scope_end(o->c, "opt.build_cfg");
+ metrics_scope_begin(o->c, "opt.machinize");
+ opt_machinize(o->f, o->target);
+ metrics_scope_end(o->c, "opt.machinize");
+ metrics_scope_begin(o->c, "opt.build_loop_tree");
+ opt_build_loop_tree(o->f);
+ metrics_scope_end(o->c, "opt.build_loop_tree");
+ metrics_scope_begin(o->c, "opt.live_blocks.pre_dde");
+ OptLiveInfo live;
+ opt_live_blocks(o->f, &live);
+ metrics_count(o->c, "opt.live_words", o->f->opt_live_words);
+ metrics_count(o->c, "opt.live.blocks", o->f->nblocks);
+ metrics_count(o->c, "opt.live.active_words", live.active_words);
+ metrics_count(o->c, "opt.live.block_bytes", live.block_bytes);
+ metrics_count(o->c, "opt.live.set_bit_scans", live.set_bit_scans);
+ metrics_count(o->c, "opt.live.bitset_words_touched",
+ live.bitset_words_touched);
+ metrics_count(o->c, "opt.live.dataflow_iterations", live.dataflow_iterations);
+ metrics_count(o->c, "opt.live.dataflow_block_visits",
+ live.dataflow_block_visits);
+ metrics_count(o->c, "opt.conflict_bytes", 0);
+ metrics_scope_end(o->c, "opt.live_blocks.pre_dde");
+ metrics_scope_begin(o->c, "opt.dead_def_elim");
+ opt_dead_def_elim_with_live(o->f, &live);
+ metrics_count(o->c, "opt.dde.live_words_touched",
+ o->f->opt_dde_live_words_touched);
+ metrics_scope_end(o->c, "opt.dead_def_elim");
+ metrics_scope_begin(o->c, "opt.regalloc");
+ opt_regalloc(o->f, allow_live_range_split);
+ metrics_count(o->c, "opt.alloc.used_loc_words", o->f->opt_used_loc_words);
+ metrics_count(o->c, "opt.alloc.hard_loc_words",
+ o->f->opt_alloc_hard_loc_words);
+ metrics_count(o->c, "opt.alloc.stack_loc_words",
+ o->f->opt_alloc_stack_loc_words);
+ metrics_count(o->c, "opt.alloc.stack_slots", o->f->opt_alloc_stack_slots);
+ metrics_count(o->c, "opt.alloc.hard_point_visits",
+ o->f->opt_alloc_hard_point_visits);
+ metrics_count(o->c, "opt.alloc.stack_point_visits",
+ o->f->opt_alloc_stack_point_visits);
+ metrics_count(o->c, "opt.alloc.hard_word_ors", o->f->opt_alloc_hard_word_ors);
+ metrics_count(o->c, "opt.alloc.stack_word_ors",
+ o->f->opt_alloc_stack_word_ors);
+ metrics_count(o->c, "opt.alloc.hard_mark_points",
+ o->f->opt_alloc_hard_mark_points);
+ metrics_count(o->c, "opt.alloc.stack_mark_points",
+ o->f->opt_alloc_stack_mark_points);
+ metrics_count(o->c, "opt.alloc.spills", func_spill_alloc_count(o->f));
+ metrics_count(o->c, "opt.rewrite.reloads", func_spill_load_count(o->f));
+ metrics_count(o->c, "opt.rewrite.stores", func_spill_store_count(o->f));
+ metrics_count(o->c, "opt.rewrite.inserted_insts",
+ o->f->opt_rewrite_inserted_insts);
+ metrics_count(o->c, "opt.rewrite.live_words_touched",
+ o->f->opt_rewrite_live_words_touched);
+ metrics_scope_end(o->c, "opt.regalloc");
+ metrics_scope_begin(o->c, "opt.combine");
+ opt_combine(o->f);
+ metrics_scope_end(o->c, "opt.combine");
+ metrics_scope_begin(o->c, "opt.dce");
+ opt_dce(o->f);
+ metrics_scope_end(o->c, "opt.dce");
+ metrics_scope_begin(o->c, "opt.jump_cleanup");
+ opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG);
+ opt_build_cfg(o->f);
+ opt_verify(o->f, "post-ra-jump-cfg");
+ opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_LAYOUT);
+ metrics_scope_end(o->c, "opt.jump_cleanup");
+ metrics_scope_begin(o->c, "opt.emit");
+ opt_emit(o->c, o->f, o->target);
+ metrics_scope_end(o->c, "opt.emit");
+ metrics_scope_end(o->c, total_scope);
+}
+
+static void opt_run_o1_pipeline(OptImpl* o) {
+ opt_run_lowering_pipeline(o, "opt.o1.total", 0);
+}
+
+static void opt_run_o2_pipeline(OptImpl* o) {
+ /* O2 has a distinct scheduler now, but its value/SSA pre-lowering passes
+ * are still disabled until their shared analysis substrate lands. */
+ opt_run_lowering_pipeline(o, "opt.o2.total", 1);
+}
+
/* ---- func_end: optionally run dry-run passes; replay; reset ---- */
static void w_func_end(CGTarget* t) {
@@ -1318,90 +1412,10 @@ static void w_func_end(CGTarget* t) {
if (!o->f) return;
opt_frame_home_addr_taken_locals(o->f);
- if (o->level >= 1) {
- /* Full -O2 is still reserved design space. Until the SSA/value pipeline has
- * a real lowering allocator, route public -O2 through the implemented O1
- * schedule instead of the incomplete SSA replay path. */
- metrics_scope_begin(o->c, "opt.o1.total");
- metrics_count(o->c, "opt.funcs", 1);
- metrics_count(o->c, "opt.blocks", o->f->nblocks);
- metrics_count(o->c, "opt.insts", func_inst_count(o->f));
- metrics_count(o->c, "opt.vals", o->f->nvals);
- metrics_scope_begin(o->c, "opt.build_cfg");
- opt_build_cfg(o->f);
- opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG);
- opt_build_cfg(o->f);
- metrics_scope_end(o->c, "opt.build_cfg");
- metrics_scope_begin(o->c, "opt.machinize");
- opt_machinize(o->f, o->target);
- metrics_scope_end(o->c, "opt.machinize");
- metrics_scope_begin(o->c, "opt.build_loop_tree");
- opt_build_loop_tree(o->f);
- metrics_scope_end(o->c, "opt.build_loop_tree");
- metrics_scope_begin(o->c, "opt.live_blocks.pre_dde");
- OptLiveInfo live;
- opt_live_blocks(o->f, &live);
- metrics_count(o->c, "opt.live_words", o->f->opt_live_words);
- metrics_count(o->c, "opt.live.blocks", o->f->nblocks);
- metrics_count(o->c, "opt.live.active_words", live.active_words);
- metrics_count(o->c, "opt.live.block_bytes", live.block_bytes);
- metrics_count(o->c, "opt.live.set_bit_scans", live.set_bit_scans);
- metrics_count(o->c, "opt.live.bitset_words_touched",
- live.bitset_words_touched);
- metrics_count(o->c, "opt.live.dataflow_iterations",
- live.dataflow_iterations);
- metrics_count(o->c, "opt.live.dataflow_block_visits",
- live.dataflow_block_visits);
- metrics_count(o->c, "opt.conflict_bytes", 0);
- metrics_scope_end(o->c, "opt.live_blocks.pre_dde");
- metrics_scope_begin(o->c, "opt.dead_def_elim");
- opt_dead_def_elim_with_live(o->f, &live);
- metrics_count(o->c, "opt.dde.live_words_touched",
- o->f->opt_dde_live_words_touched);
- metrics_scope_end(o->c, "opt.dead_def_elim");
- metrics_scope_begin(o->c, "opt.regalloc");
- opt_regalloc(o->f, 0);
- metrics_count(o->c, "opt.alloc.used_loc_words", o->f->opt_used_loc_words);
- metrics_count(o->c, "opt.alloc.hard_loc_words",
- o->f->opt_alloc_hard_loc_words);
- metrics_count(o->c, "opt.alloc.stack_loc_words",
- o->f->opt_alloc_stack_loc_words);
- metrics_count(o->c, "opt.alloc.stack_slots", o->f->opt_alloc_stack_slots);
- metrics_count(o->c, "opt.alloc.hard_point_visits",
- o->f->opt_alloc_hard_point_visits);
- metrics_count(o->c, "opt.alloc.stack_point_visits",
- o->f->opt_alloc_stack_point_visits);
- metrics_count(o->c, "opt.alloc.hard_word_ors",
- o->f->opt_alloc_hard_word_ors);
- metrics_count(o->c, "opt.alloc.stack_word_ors",
- o->f->opt_alloc_stack_word_ors);
- metrics_count(o->c, "opt.alloc.hard_mark_points",
- o->f->opt_alloc_hard_mark_points);
- metrics_count(o->c, "opt.alloc.stack_mark_points",
- o->f->opt_alloc_stack_mark_points);
- metrics_count(o->c, "opt.alloc.spills", func_spill_alloc_count(o->f));
- metrics_count(o->c, "opt.rewrite.reloads", func_spill_load_count(o->f));
- metrics_count(o->c, "opt.rewrite.stores", func_spill_store_count(o->f));
- metrics_count(o->c, "opt.rewrite.inserted_insts",
- o->f->opt_rewrite_inserted_insts);
- metrics_count(o->c, "opt.rewrite.live_words_touched",
- o->f->opt_rewrite_live_words_touched);
- metrics_scope_end(o->c, "opt.regalloc");
- metrics_scope_begin(o->c, "opt.combine");
- opt_combine(o->f);
- metrics_scope_end(o->c, "opt.combine");
- metrics_scope_begin(o->c, "opt.dce");
- opt_dce(o->f);
- metrics_scope_end(o->c, "opt.dce");
- metrics_scope_begin(o->c, "opt.jump_cleanup");
- opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_CFG);
- opt_build_cfg(o->f);
- opt_jump_cleanup(o->f, OPT_JUMP_CLEANUP_LAYOUT);
- metrics_scope_end(o->c, "opt.jump_cleanup");
- metrics_scope_begin(o->c, "opt.emit");
- opt_emit(o->c, o->f, o->target);
- metrics_scope_end(o->c, "opt.emit");
- metrics_scope_end(o->c, "opt.o1.total");
+ if (o->level >= 2) {
+ opt_run_o2_pipeline(o);
+ } else if (o->level >= 1) {
+ opt_run_o1_pipeline(o);
} else {
opt_replay(o->c, o->f, o->target);
}
diff --git a/src/opt/opt_internal.h b/src/opt/opt_internal.h
@@ -14,6 +14,50 @@ typedef struct OptHardBlockLive {
OptHardRegSet live_def;
} OptHardBlockLive;
+typedef struct OptPassCtx {
+ Compiler* c;
+ Func* f;
+ Arena* arena;
+ const char* stage;
+} OptPassCtx;
+
+typedef struct OptBlockList {
+ u32* items;
+ u32 n;
+ u32 cap;
+} OptBlockList;
+
+typedef struct OptAnalysis {
+ Arena* arena;
+ Func* f;
+ u32 nblocks;
+ u32 entry;
+ u32* po;
+ u32* rpo;
+ u32* po_index;
+ u8* reachable;
+ u32 npo;
+ u32 nrpo;
+ u32* idom;
+ OptBlockList* dom_children;
+ OptBlockList* dom_frontier;
+} OptAnalysis;
+
+typedef void (*OptOperandWalkFn)(Func*, Inst*, Operand*, int is_def, void*);
+
+void opt_analysis_build_order(Func*, OptAnalysis*);
+void opt_analysis_build_dominators(Func*, OptAnalysis*);
+void opt_analysis_build_dom_frontier(Func*, OptAnalysis*);
+int opt_analysis_dominates(const OptAnalysis*, u32 dom, u32 node);
+void opt_verify(Func*, const char* stage);
+
+int opt_val_in_inst_defs(const Inst*, Val);
+void opt_walk_operand(Func*, Inst*, Operand*, int is_def, OptOperandWalkFn,
+ void*);
+void opt_walk_abivalue(Func*, Inst*, CGABIValue*, int storage_def,
+ OptOperandWalkFn, void*);
+void opt_walk_inst_operands(Func*, Inst*, OptOperandWalkFn, void*);
+
int opt_mem_observable(const MemAccess*);
u32 opt_call_clobber_mask_for(Func*, const Inst*, u8 cls);
diff --git a/src/opt/opt_util.c b/src/opt/opt_util.c
@@ -1,5 +1,102 @@
#include "opt/opt_internal.h"
+int opt_val_in_inst_defs(const Inst* in, Val v) {
+ if (!in || v == VAL_NONE) return 0;
+ if (in->def == v) return 1;
+ for (u32 i = 0; i < in->ndefs; ++i)
+ if (in->defs[i] == v) return 1;
+ return 0;
+}
+
+void opt_walk_operand(Func* f, Inst* in, Operand* op, int is_def,
+ OptOperandWalkFn fn, void* ctx) {
+ if (!op || !fn) return;
+ if (op->kind == OPK_REG) {
+ fn(f, in, op, is_def, ctx);
+ } else if (op->kind == OPK_INDIRECT) {
+ Operand base = *op;
+ base.kind = OPK_REG;
+ base.cls = RC_INT;
+ base.v.reg = op->v.ind.base;
+ fn(f, in, &base, 0, ctx);
+ op->v.ind.base = base.v.reg;
+ }
+}
+
+void opt_walk_abivalue(Func* f, Inst* in, CGABIValue* v, int storage_def,
+ OptOperandWalkFn fn, void* ctx) {
+ if (!v) return;
+ opt_walk_operand(f, in, &v->storage, storage_def, fn, ctx);
+ for (u32 i = 0; i < v->nparts; ++i)
+ opt_walk_operand(f, in, (Operand*)&v->parts[i].op, storage_def, fn, ctx);
+}
+
+void opt_walk_inst_operands(Func* f, Inst* in, OptOperandWalkFn fn, void* ctx) {
+ if (!in || !fn) return;
+ for (u32 i = 0; i < in->nopnds; ++i) {
+ int is_def = 0;
+ if (in->opnds[i].kind == OPK_REG)
+ is_def = i == 0 && opt_val_in_inst_defs(in, (Val)in->opnds[i].v.reg);
+ if (!is_def) opt_walk_operand(f, in, &in->opnds[i], 0, fn, ctx);
+ }
+ for (u32 i = 0; i < in->nopnds; ++i) {
+ int is_def = 0;
+ if (in->opnds[i].kind == OPK_REG)
+ is_def = i == 0 && opt_val_in_inst_defs(in, (Val)in->opnds[i].v.reg);
+ if (is_def) opt_walk_operand(f, in, &in->opnds[i], 1, fn, ctx);
+ }
+
+ switch ((IROp)in->op) {
+ case IR_CALL: {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ if (!aux) break;
+ if (aux->use_plan_replay) {
+ opt_walk_operand(f, in, &aux->plan.callee, 0, fn, ctx);
+ for (u32 i = 0; i < aux->plan.nargs; ++i)
+ opt_walk_operand(f, in, &aux->plan.args[i].src, 0, fn, ctx);
+ for (u32 i = 0; i < aux->plan.nrets; ++i)
+ opt_walk_operand(f, in, &aux->plan.rets[i].dst, 1, fn, ctx);
+ } else {
+ opt_walk_operand(f, in, &aux->desc.callee, 0, fn, ctx);
+ for (u32 i = 0; i < aux->desc.nargs; ++i)
+ opt_walk_abivalue(f, in, (CGABIValue*)&aux->desc.args[i], 0, fn, ctx);
+ opt_walk_abivalue(f, in, &aux->desc.ret, 1, fn, ctx);
+ }
+ break;
+ }
+ case IR_RET: {
+ IRRetAux* aux = (IRRetAux*)in->extra.aux;
+ if (aux && aux->present) opt_walk_abivalue(f, in, &aux->val, 0, fn, ctx);
+ break;
+ }
+ case IR_SCOPE_BEGIN: {
+ IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
+ if (aux) opt_walk_operand(f, in, &aux->desc.cond, 0, fn, ctx);
+ break;
+ }
+ case IR_ASM_BLOCK: {
+ IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
+ if (!aux) break;
+ for (u32 i = 0; i < aux->nin; ++i)
+ opt_walk_operand(f, in, &aux->in_ops[i], 0, fn, ctx);
+ for (u32 i = 0; i < aux->nout; ++i)
+ opt_walk_operand(f, in, &aux->out_ops[i], 1, fn, ctx);
+ break;
+ }
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ if (!aux) break;
+ for (u32 i = 0; i < aux->narg; ++i)
+ opt_walk_operand(f, in, &aux->args[i], 0, fn, ctx);
+ for (u32 i = 0; i < aux->ndst; ++i)
+ opt_walk_operand(f, in, &aux->dsts[i], 1, fn, ctx);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
int opt_mem_observable(const MemAccess* m) {
return (m->flags & (MF_VOLATILE | MF_ATOMIC)) != 0;
}
diff --git a/src/opt/pass_analysis.c b/src/opt/pass_analysis.c
@@ -0,0 +1,267 @@
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/core.h"
+#include "opt/opt_internal.h"
+
+#define OPT_BLK_NONE 0xffffffffu
+
+static SrcLoc opt_no_loc(void) {
+ SrcLoc loc = {0, 0, 0};
+ return loc;
+}
+
+static void opt_fail(Func* f, const char* stage, const char* msg, u32 a,
+ u32 b) {
+ compiler_panic(f->c, opt_no_loc(), "opt verify[%s]: %s (%u, %u)",
+ stage ? stage : "?", msg, (unsigned)a, (unsigned)b);
+}
+
+static void block_list_add(Arena* arena, OptBlockList* list, u32 block) {
+ if (list->n == list->cap) {
+ u32 ncap = list->cap ? list->cap * 2u : 4u;
+ u32* items = arena_array(arena, u32, ncap);
+ if (list->items) memcpy(items, list->items, sizeof(items[0]) * list->n);
+ list->items = items;
+ list->cap = ncap;
+ }
+ list->items[list->n++] = block;
+}
+
+static void block_list_add_unique(Arena* arena, OptBlockList* list, u32 block) {
+ for (u32 i = 0; i < list->n; ++i)
+ if (list->items[i] == block) return;
+ block_list_add(arena, list, block);
+}
+
+static void order_dfs(OptAnalysis* a, u32 block) {
+ if (block >= a->nblocks || a->reachable[block]) return;
+ a->reachable[block] = 1;
+ Block* bl = &a->f->blocks[block];
+ for (u32 i = 0; i < bl->nsucc; ++i) order_dfs(a, bl->succ[i]);
+ a->po[a->npo] = block;
+ a->po_index[block] = a->npo;
+ ++a->npo;
+}
+
+void opt_analysis_build_order(Func* f, OptAnalysis* a) {
+ memset(a, 0, sizeof *a);
+ a->arena = f->arena;
+ a->f = f;
+ a->nblocks = f->nblocks;
+ a->entry = f->entry;
+ u32 n = f->nblocks ? f->nblocks : 1u;
+ a->po = arena_array(f->arena, u32, n);
+ a->rpo = arena_array(f->arena, u32, n);
+ a->po_index = arena_array(f->arena, u32, n);
+ a->reachable = arena_zarray(f->arena, u8, n);
+ for (u32 i = 0; i < n; ++i) a->po_index[i] = OPT_BLK_NONE;
+ if (f->entry < f->nblocks) order_dfs(a, f->entry);
+ a->nrpo = a->npo;
+ for (u32 i = 0; i < a->npo; ++i) a->rpo[i] = a->po[a->npo - 1u - i];
+}
+
+static u32 dom_intersect(u32 b1, u32 b2, const u32* idom, const u32* po_index) {
+ while (b1 != b2) {
+ while (po_index[b1] < po_index[b2]) b1 = idom[b1];
+ while (po_index[b2] < po_index[b1]) b2 = idom[b2];
+ }
+ return b1;
+}
+
+void opt_analysis_build_dominators(Func* f, OptAnalysis* a) {
+ if (!a->reachable) opt_analysis_build_order(f, a);
+ u32 n = f->nblocks ? f->nblocks : 1u;
+ a->idom = arena_array(f->arena, u32, n);
+ a->dom_children = arena_zarray(f->arena, OptBlockList, n);
+ for (u32 i = 0; i < n; ++i) a->idom[i] = OPT_BLK_NONE;
+ if (f->entry >= f->nblocks || !a->reachable[f->entry]) return;
+ a->idom[f->entry] = f->entry;
+
+ int changed = 1;
+ while (changed) {
+ changed = 0;
+ for (u32 ri = 0; ri < a->nrpo; ++ri) {
+ u32 b = a->rpo[ri];
+ if (b == f->entry) continue;
+ Block* bl = &f->blocks[b];
+ u32 new_idom = OPT_BLK_NONE;
+ for (u32 p = 0; p < bl->npreds; ++p) {
+ u32 pred = bl->preds[p];
+ if (pred >= f->nblocks || !a->reachable[pred]) continue;
+ if (a->idom[pred] == OPT_BLK_NONE) continue;
+ new_idom = new_idom == OPT_BLK_NONE
+ ? pred
+ : dom_intersect(pred, new_idom, a->idom, a->po_index);
+ }
+ if (new_idom != OPT_BLK_NONE && a->idom[b] != new_idom) {
+ a->idom[b] = new_idom;
+ changed = 1;
+ }
+ }
+ }
+
+ for (u32 i = 0; i < a->npo; ++i) {
+ u32 b = a->po[i];
+ u32 idom = a->idom[b];
+ if (idom == OPT_BLK_NONE || idom == b) continue;
+ block_list_add(f->arena, &a->dom_children[idom], b);
+ }
+}
+
+void opt_analysis_build_dom_frontier(Func* f, OptAnalysis* a) {
+ if (!a->idom) opt_analysis_build_dominators(f, a);
+ u32 n = f->nblocks ? f->nblocks : 1u;
+ a->dom_frontier = arena_zarray(f->arena, OptBlockList, n);
+ for (u32 i = 0; i < a->npo; ++i) {
+ u32 b = a->po[i];
+ Block* bl = &f->blocks[b];
+ if (bl->npreds < 2 || a->idom[b] == OPT_BLK_NONE) continue;
+ for (u32 p = 0; p < bl->npreds; ++p) {
+ u32 runner = bl->preds[p];
+ while (runner != OPT_BLK_NONE && runner != a->idom[b]) {
+ block_list_add_unique(f->arena, &a->dom_frontier[runner], b);
+ runner = a->idom[runner];
+ }
+ }
+ }
+}
+
+int opt_analysis_dominates(const OptAnalysis* a, u32 dom, u32 node) {
+ if (!a || !a->idom || dom >= a->nblocks || node >= a->nblocks) return 0;
+ if (!a->reachable || !a->reachable[dom] || !a->reachable[node]) return 0;
+ u32 cur = node;
+ while (cur != OPT_BLK_NONE) {
+ if (cur == dom) return 1;
+ if (cur == a->entry) break;
+ cur = a->idom[cur];
+ }
+ return 0;
+}
+
+static int block_has_pred(const Block* bl, u32 pred) {
+ for (u32 i = 0; i < bl->npreds; ++i)
+ if (bl->preds[i] == pred) return 1;
+ return 0;
+}
+
+static int block_has_succ(const Block* bl, u32 succ) {
+ for (u32 i = 0; i < bl->nsucc; ++i)
+ if (bl->succ[i] == succ) return 1;
+ return 0;
+}
+
+static int fixed_terminator_succ_count(const Inst* in, u32* count_out) {
+ switch ((IROp)in->op) {
+ case IR_RET:
+ *count_out = 0;
+ return 1;
+ case IR_BR:
+ case IR_BREAK_TO:
+ case IR_CONTINUE_TO:
+ *count_out = 1;
+ return 1;
+ case IR_CONDBR:
+ case IR_CMP_BRANCH:
+ *count_out = 2;
+ return 1;
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ if (aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP ||
+ aux->kind == INTRIN_UNREACHABLE)) {
+ *count_out = 0;
+ return 1;
+ }
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+static void verify_operand(Func* f, Inst* in, Operand* op, int is_def,
+ void* arg) {
+ (void)in;
+ const char* stage = (const char*)arg;
+ if (op->kind != OPK_REG) return;
+ Val v = (Val)op->v.reg;
+ if (v == VAL_NONE || v >= f->nvals)
+ opt_fail(f, stage, is_def ? "bad def val" : "bad use val", v, f->nvals);
+}
+
+static void verify_values(Func* f, const char* stage) {
+ if (f->opt_rewritten) return;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if (in->def != VAL_NONE) {
+ if (in->def >= f->nvals) opt_fail(f, stage, "bad inst def", b, i);
+ }
+ for (u32 d = 0; d < in->ndefs; ++d) {
+ Val v = in->defs[d];
+ if (v == VAL_NONE || v >= f->nvals)
+ opt_fail(f, stage, "bad inst multi-def", b, d);
+ }
+ opt_walk_inst_operands(f, in, verify_operand, (void*)stage);
+ if ((IROp)in->op == IR_PHI) {
+ IRPhiAux* aux = (IRPhiAux*)in->extra.aux;
+ if (!aux) opt_fail(f, stage, "phi missing aux", b, i);
+ if (aux->npreds != bl->npreds)
+ opt_fail(f, stage, "phi pred count mismatch", aux->npreds,
+ bl->npreds);
+ for (u32 p = 0; p < aux->npreds; ++p) {
+ if (p >= bl->npreds || aux->pred_blocks[p] != bl->preds[p])
+ opt_fail(f, stage, "phi pred block mismatch", b, p);
+ if (aux->pred_vals[p] != VAL_NONE && aux->pred_vals[p] >= f->nvals)
+ opt_fail(f, stage, "phi bad pred value", aux->pred_vals[p],
+ f->nvals);
+ }
+ }
+ }
+ }
+}
+
+void opt_verify(Func* f, const char* stage) {
+ if (!f) return;
+ if (f->nblocks && f->entry >= f->nblocks)
+ opt_fail(f, stage, "entry out of range", f->entry, f->nblocks);
+ OptAnalysis a;
+ opt_analysis_build_order(f, &a);
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ if (bl->id != b) opt_fail(f, stage, "block id mismatch", bl->id, b);
+ if (!a.reachable[b] && (bl->ninsts || bl->nsucc || bl->npreds))
+ opt_fail(f, stage, "unreachable block still connected", b, bl->ninsts);
+ if (bl->ninsts == 0 && bl->nsucc != 0)
+ opt_fail(f, stage, "empty block has successors", b, bl->nsucc);
+ if (bl->ninsts) {
+ u32 expected = 0;
+ if (fixed_terminator_succ_count(&bl->insts[bl->ninsts - 1], &expected) &&
+ bl->nsucc != expected)
+ opt_fail(f, stage, "terminator successor count mismatch", b, bl->nsucc);
+ }
+ for (u32 s = 0; s < bl->nsucc; ++s) {
+ u32 succ = bl->succ[s];
+ if (succ >= f->nblocks)
+ opt_fail(f, stage, "successor out of range", b, s);
+ if (!block_has_pred(&f->blocks[succ], b))
+ opt_fail(f, stage, "successor missing reciprocal predecessor", b, succ);
+ }
+ for (u32 p = 0; p < bl->npreds; ++p) {
+ u32 pred = bl->preds[p];
+ if (pred >= f->nblocks)
+ opt_fail(f, stage, "predecessor out of range", b, p);
+ if (!block_has_succ(&f->blocks[pred], b))
+ opt_fail(f, stage, "predecessor missing reciprocal successor", b, pred);
+ }
+ }
+ u8* seen_emit = arena_zarray(f->arena, u8, f->nblocks ? f->nblocks : 1u);
+ for (u32 i = 0; i < f->emit_order_n; ++i) {
+ u32 b = f->emit_order[i];
+ if (b >= f->nblocks) opt_fail(f, stage, "emit block out of range", b, i);
+ if (seen_emit[b]) opt_fail(f, stage, "duplicate emit block", b, i);
+ seen_emit[b] = 1;
+ }
+ verify_values(f, stage);
+}
diff --git a/src/opt/pass_live.c b/src/opt/pass_live.c
@@ -4,6 +4,7 @@
#include "core/arena.h"
#include "opt/opt.h"
+#include "opt/opt_internal.h"
static u32 opt_bit_words(u32 nvals) { return (nvals + 63u) / 64u; }
@@ -111,105 +112,6 @@ void opt_bitset_iter_set(const OptBitset* bs, OptBitsetIterFn fn, void* arg) {
}
}
-typedef void (*LiveOperandWalkFn)(Func*, Inst*, Operand*, int is_def, void*);
-
-static int live_val_in_inst_defs(const Inst* in, Val v) {
- if (v == VAL_NONE) return 0;
- if (in->def == v) return 1;
- for (u32 i = 0; i < in->ndefs; ++i)
- if (in->defs[i] == v) return 1;
- return 0;
-}
-
-static void live_walk_operand(Func* f, Inst* in, Operand* op, int is_def,
- LiveOperandWalkFn fn, void* ctx) {
- if (!op) return;
- if (op->kind == OPK_REG) {
- fn(f, in, op, is_def, ctx);
- } else if (op->kind == OPK_INDIRECT) {
- Operand base = *op;
- base.kind = OPK_REG;
- base.cls = RC_INT;
- base.v.reg = op->v.ind.base;
- fn(f, in, &base, 0, ctx);
- }
-}
-
-static void live_walk_abivalue(Func* f, Inst* in, CGABIValue* v,
- int storage_def, LiveOperandWalkFn fn,
- void* ctx) {
- live_walk_operand(f, in, &v->storage, storage_def, fn, ctx);
- for (u32 i = 0; i < v->nparts; ++i)
- live_walk_operand(f, in, (Operand*)&v->parts[i].op, storage_def, fn, ctx);
-}
-
-static void live_walk_inst_operands(Func* f, Inst* in, LiveOperandWalkFn fn,
- void* ctx) {
- for (u32 i = 0; i < in->nopnds; ++i) {
- int is_def = 0;
- if (in->opnds[i].kind == OPK_REG)
- is_def = i == 0 && live_val_in_inst_defs(in, (Val)in->opnds[i].v.reg);
- if (!is_def) live_walk_operand(f, in, &in->opnds[i], 0, fn, ctx);
- }
- for (u32 i = 0; i < in->nopnds; ++i) {
- int is_def = 0;
- if (in->opnds[i].kind == OPK_REG)
- is_def = i == 0 && live_val_in_inst_defs(in, (Val)in->opnds[i].v.reg);
- if (is_def) live_walk_operand(f, in, &in->opnds[i], 1, fn, ctx);
- }
-
- switch ((IROp)in->op) {
- case IR_CALL: {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (!aux) break;
- if (aux->use_plan_replay) {
- live_walk_operand(f, in, &aux->plan.callee, 0, fn, ctx);
- for (u32 i = 0; i < aux->plan.nargs; ++i)
- live_walk_operand(f, in, &aux->plan.args[i].src, 0, fn, ctx);
- for (u32 i = 0; i < aux->plan.nrets; ++i)
- live_walk_operand(f, in, &aux->plan.rets[i].dst, 1, fn, ctx);
- } else {
- live_walk_operand(f, in, &aux->desc.callee, 0, fn, ctx);
- for (u32 i = 0; i < aux->desc.nargs; ++i)
- live_walk_abivalue(f, in, (CGABIValue*)&aux->desc.args[i], 0, fn,
- ctx);
- live_walk_abivalue(f, in, &aux->desc.ret, 1, fn, ctx);
- }
- break;
- }
- case IR_RET: {
- IRRetAux* aux = (IRRetAux*)in->extra.aux;
- if (aux && aux->present) live_walk_abivalue(f, in, &aux->val, 0, fn, ctx);
- break;
- }
- case IR_SCOPE_BEGIN: {
- IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
- if (aux) live_walk_operand(f, in, &aux->desc.cond, 0, fn, ctx);
- break;
- }
- case IR_ASM_BLOCK: {
- IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
- if (!aux) break;
- for (u32 i = 0; i < aux->nin; ++i)
- live_walk_operand(f, in, &aux->in_ops[i], 0, fn, ctx);
- for (u32 i = 0; i < aux->nout; ++i)
- live_walk_operand(f, in, &aux->out_ops[i], 1, fn, ctx);
- break;
- }
- case IR_INTRINSIC: {
- IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
- if (!aux) break;
- for (u32 i = 0; i < aux->narg; ++i)
- live_walk_operand(f, in, &aux->args[i], 0, fn, ctx);
- for (u32 i = 0; i < aux->ndst; ++i)
- live_walk_operand(f, in, &aux->dsts[i], 1, fn, ctx);
- break;
- }
- default:
- break;
- }
-}
-
typedef struct LiveUseDefCtx {
OptBitset* use;
OptBitset* def;
@@ -259,8 +161,8 @@ static int live_metric_copy(OptLiveInfo* live, OptBitset* dst,
static int live_metric_union(OptLiveInfo* live, OptBitset* dst,
const OptBitset* src) {
u32 n = 0;
- if (dst && src) n = dst->nwords < src->active_words ? dst->nwords
- : src->active_words;
+ if (dst && src)
+ n = dst->nwords < src->active_words ? dst->nwords : src->active_words;
if (live) live->bitset_words_touched += n;
return opt_bitset_union(dst, src);
}
@@ -270,8 +172,8 @@ static int live_metric_union_and_not(OptLiveInfo* live, OptBitset* dst,
const OptBitset* not_bits) {
u32 n = 0;
(void)not_bits;
- if (dst && src) n = dst->nwords < src->active_words ? dst->nwords
- : src->active_words;
+ if (dst && src)
+ n = dst->nwords < src->active_words ? dst->nwords : src->active_words;
if (live) live->bitset_words_touched += n;
return opt_bitset_union_and_not(dst, src, not_bits);
}
@@ -316,7 +218,7 @@ void opt_live_blocks(Func* f, OptLiveInfo* live) {
ctx.use = &lb->live_use;
ctx.def = &lb->live_def;
for (u32 i = 0; i < bl->ninsts; ++i)
- live_walk_inst_operands(f, &bl->insts[i], live_collect_use_def, &ctx);
+ opt_walk_inst_operands(f, &bl->insts[i], live_collect_use_def, &ctx);
}
OptBitset new_out;
@@ -433,10 +335,10 @@ static void range_refs_reset(Func* f, RangeInstRefs* refs) {
refs->ndefs = 0;
++refs->gen;
if (refs->gen) return;
- memset(refs->use_mark, 0, sizeof(refs->use_mark[0]) *
- (f->nvals ? f->nvals : 1u));
- memset(refs->def_mark, 0, sizeof(refs->def_mark[0]) *
- (f->nvals ? f->nvals : 1u));
+ memset(refs->use_mark, 0,
+ sizeof(refs->use_mark[0]) * (f->nvals ? f->nvals : 1u));
+ memset(refs->def_mark, 0,
+ sizeof(refs->def_mark[0]) * (f->nvals ? f->nvals : 1u));
refs->gen = 1;
}
@@ -477,8 +379,7 @@ static void range_live_vals_init(Func* f, RangeLiveVals* live) {
}
static void range_live_vals_clear(RangeLiveVals* live) {
- for (u32 i = 0; i < live->nvals; ++i)
- live->pos_by_val[live->vals[i]] = 0;
+ for (u32 i = 0; i < live->nvals; ++i) live->pos_by_val[live->vals[i]] = 0;
live->nvals = 0;
}
@@ -489,8 +390,7 @@ static void range_live_vals_add(RangeLiveVals* live, Val v) {
if (live->nvals == live->cap) {
u32 ncap = live->cap ? live->cap * 2u : 64u;
Val* nv = arena_array(f->arena, Val, ncap);
- if (live->vals)
- memcpy(nv, live->vals, sizeof(live->vals[0]) * live->nvals);
+ if (live->vals) memcpy(nv, live->vals, sizeof(live->vals[0]) * live->nvals);
live->vals = nv;
live->cap = ncap;
}
@@ -828,7 +728,7 @@ void opt_live_ranges_build(Func* f, const OptLiveInfo* live,
u32 i = ri - 1u;
Inst* in = &bl->insts[i];
range_refs_reset(f, &refs);
- live_walk_inst_operands(f, in, range_collect_bits, &refs);
+ opt_walk_inst_operands(f, in, range_collect_bits, &refs);
if ((IROp)in->op == IR_CALL) {
RangeCallCtx call_ctx = {f, ranges, &refs, bl->frequency};
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -54,103 +54,6 @@ static int bit_has(const u64* bits, Val v) {
return (bits[v / 64u] & (1ull << (v % 64u))) != 0;
}
-typedef void (*OperandWalkFn)(Func*, Inst*, Operand*, int is_def, void*);
-
-static int val_in_inst_defs(const Inst* in, Val v) {
- if (v == VAL_NONE) return 0;
- if (in->def == v) return 1;
- for (u32 i = 0; i < in->ndefs; ++i)
- if (in->defs[i] == v) return 1;
- return 0;
-}
-
-static void walk_operand(Func* f, Inst* in, Operand* op, int is_def,
- OperandWalkFn fn, void* ctx) {
- if (!op) return;
- if (op->kind == OPK_REG) {
- fn(f, in, op, is_def, ctx);
- } else if (op->kind == OPK_INDIRECT) {
- Operand base = *op;
- base.kind = OPK_REG;
- base.cls = RC_INT;
- base.v.reg = op->v.ind.base;
- fn(f, in, &base, 0, ctx);
- op->v.ind.base = base.v.reg;
- }
-}
-
-static void walk_abivalue(Func* f, Inst* in, CGABIValue* v, int storage_def,
- OperandWalkFn fn, void* ctx) {
- walk_operand(f, in, &v->storage, storage_def, fn, ctx);
- for (u32 i = 0; i < v->nparts; ++i)
- walk_operand(f, in, (Operand*)&v->parts[i].op, storage_def, fn, ctx);
-}
-
-static void walk_inst_operands(Func* f, Inst* in, OperandWalkFn fn, void* ctx) {
- for (u32 i = 0; i < in->nopnds; ++i) {
- int is_def = 0;
- if (in->opnds[i].kind == OPK_REG)
- is_def = i == 0 && val_in_inst_defs(in, (Val)in->opnds[i].v.reg);
- if (!is_def) walk_operand(f, in, &in->opnds[i], 0, fn, ctx);
- }
- for (u32 i = 0; i < in->nopnds; ++i) {
- int is_def = 0;
- if (in->opnds[i].kind == OPK_REG)
- is_def = i == 0 && val_in_inst_defs(in, (Val)in->opnds[i].v.reg);
- if (is_def) walk_operand(f, in, &in->opnds[i], 1, fn, ctx);
- }
-
- switch ((IROp)in->op) {
- case IR_CALL: {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (!aux) break;
- if (aux->use_plan_replay) {
- walk_operand(f, in, &aux->plan.callee, 0, fn, ctx);
- for (u32 i = 0; i < aux->plan.nargs; ++i)
- walk_operand(f, in, &aux->plan.args[i].src, 0, fn, ctx);
- for (u32 i = 0; i < aux->plan.nrets; ++i)
- walk_operand(f, in, &aux->plan.rets[i].dst, 1, fn, ctx);
- } else {
- walk_operand(f, in, &aux->desc.callee, 0, fn, ctx);
- for (u32 i = 0; i < aux->desc.nargs; ++i)
- walk_abivalue(f, in, (CGABIValue*)&aux->desc.args[i], 0, fn, ctx);
- walk_abivalue(f, in, &aux->desc.ret, 1, fn, ctx);
- }
- break;
- }
- case IR_RET: {
- IRRetAux* aux = (IRRetAux*)in->extra.aux;
- if (aux && aux->present) walk_abivalue(f, in, &aux->val, 0, fn, ctx);
- break;
- }
- case IR_SCOPE_BEGIN: {
- IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
- if (aux) walk_operand(f, in, &aux->desc.cond, 0, fn, ctx);
- break;
- }
- case IR_ASM_BLOCK: {
- IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
- if (!aux) break;
- for (u32 i = 0; i < aux->nin; ++i)
- walk_operand(f, in, &aux->in_ops[i], 0, fn, ctx);
- for (u32 i = 0; i < aux->nout; ++i)
- walk_operand(f, in, &aux->out_ops[i], 1, fn, ctx);
- break;
- }
- case IR_INTRINSIC: {
- IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
- if (!aux) break;
- for (u32 i = 0; i < aux->narg; ++i)
- walk_operand(f, in, &aux->args[i], 0, fn, ctx);
- for (u32 i = 0; i < aux->ndst; ++i)
- walk_operand(f, in, &aux->dsts[i], 1, fn, ctx);
- break;
- }
- default:
- break;
- }
-}
-
typedef struct BitsCtx {
u64* use;
u64* def;
@@ -680,7 +583,7 @@ static void opt_apply_asm_constraints_from_live(Func* f,
bits_clear(use, words);
bits_clear(def, words);
BitsCtx bc = {use, def};
- walk_inst_operands(f, in, collect_bits, &bc);
+ opt_walk_inst_operands(f, in, collect_bits, &bc);
if ((IROp)in->op == IR_ASM_BLOCK)
apply_asm_register_constraints(f, in, use, def, live);
live_update_before(live, use, def, words);
@@ -1176,7 +1079,7 @@ static void rewrite_func(Func* f, const OptLiveInfo* live_info) {
continue;
}
refs_reset(&refs);
- walk_inst_operands(f, &in, refs_collect, &refs);
+ opt_walk_inst_operands(f, &in, refs_collect, &refs);
list_reset(&before);
list_reset(&after);
list_reset(&call_saves);
@@ -1202,11 +1105,12 @@ static void rewrite_func(Func* f, const OptLiveInfo* live_info) {
for (u32 k = 0; k < aux->desc.nargs; ++k)
rewrite_call_arg_value(f, &in, (CGABIValue*)&aux->desc.args[k],
&ctx);
- walk_abivalue(f, &in, &aux->desc.ret, 1, rewrite_one_operand, &ctx);
+ opt_walk_abivalue(f, &in, &aux->desc.ret, 1, rewrite_one_operand,
+ &ctx);
}
}
} else {
- walk_inst_operands(f, &in, rewrite_one_operand, &ctx);
+ opt_walk_inst_operands(f, &in, rewrite_one_operand, &ctx);
}
if ((IROp)in.op == IR_CALL) {
append_live_call_saves(f, &call_saves, &in, live, live_active_words,
@@ -1290,7 +1194,7 @@ void opt_dead_def_elim_with_live(Func* f, const OptLiveInfo* live_info) {
new_insts[w++] = *in;
refs_reset(&refs);
- walk_inst_operands(f, in, refs_collect, &refs);
+ opt_walk_inst_operands(f, in, refs_collect, &refs);
live_update_refs_before(live, &refs);
f->opt_dde_live_words_touched += refs.nuses + refs.ndefs;
}
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -16,6 +16,7 @@
#include "core/heap.h"
#include "core/pool.h"
#include "opt/ir.h"
+#include "opt/opt_internal.h"
static void* h_alloc(CfreeHeap* h, size_t n, size_t a) {
(void)h;
@@ -1819,6 +1820,61 @@ static void opt_cfg_preserves_scope_edges(void) {
tc_fini(&tc);
}
+static int block_list_has(const OptBlockList* list, u32 block) {
+ for (u32 i = 0; i < list->n; ++i)
+ if (list->items[i] == block) return 1;
+ return 0;
+}
+
+static void opt_analysis_dominators_and_frontier(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ u32 entry = f->entry;
+ u32 then_b = ir_block_new(f);
+ u32 else_b = ir_block_new(f);
+ u32 join = ir_block_new(f);
+ u32 dead = ir_block_new(f);
+ ir_note_emit(f, then_b);
+ ir_note_emit(f, else_b);
+ ir_note_emit(f, join);
+ ir_note_emit(f, dead);
+
+ Val v = add_val(f, tc.i32);
+ emit_test_branch(f, entry, then_b, else_b, tc.i32);
+ emit_br_to(f, then_b, join);
+ emit_br_to(f, else_b, join);
+ emit_load_imm(f, join, v, tc.i32, 17);
+ emit_ret_val(f, join, v, tc.i32);
+ emit_ret_val(f, dead, v, tc.i32);
+
+ opt_build_cfg(f);
+ opt_verify(f, "analysis-test");
+ OptAnalysis a;
+ memset(&a, 0, sizeof a);
+ opt_analysis_build_dom_frontier(f, &a);
+
+ EXPECT(a.npo == 4, "analysis should visit four reachable blocks, got %u",
+ (unsigned)a.npo);
+ EXPECT(a.reachable[entry] && a.reachable[then_b] && a.reachable[else_b] &&
+ a.reachable[join],
+ "analysis should mark diamond reachable");
+ EXPECT(!a.reachable[dead], "analysis should leave pruned block unreachable");
+ EXPECT(a.idom[entry] == entry, "entry should dominate itself");
+ EXPECT(a.idom[then_b] == entry, "then idom should be entry");
+ EXPECT(a.idom[else_b] == entry, "else idom should be entry");
+ EXPECT(a.idom[join] == entry, "join idom should be entry");
+ EXPECT(opt_analysis_dominates(&a, entry, join),
+ "entry should dominate join");
+ EXPECT(!opt_analysis_dominates(&a, then_b, else_b),
+ "then should not dominate else");
+ EXPECT(block_list_has(&a.dom_frontier[then_b], join),
+ "then dominance frontier should contain join");
+ EXPECT(block_list_has(&a.dom_frontier[else_b], join),
+ "else dominance frontier should contain join");
+ tc_fini(&tc);
+}
+
static void opt_jump_cleanup_forwards_branch_targets(void) {
TestCtx tc;
tc_init(&tc);
@@ -3854,6 +3910,7 @@ int main(void) {
opt_call_plan_drives_call_specific_preservation();
opt_cfg_prunes_unreachable();
opt_cfg_preserves_scope_edges();
+ opt_analysis_dominators_and_frontier();
opt_jump_cleanup_forwards_branch_targets();
opt_jump_cleanup_inverts_to_remove_jump_block();
opt_jump_cleanup_keeps_conditional_fallthrough_block();