commit 8ae3ca5059ac45fe43b64e8e770842d3775522b1
parent 9380df48697dc0c4091e58e2bf7958cbcf44cfb3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 08:23:20 -0700
opt: complete O2 SSA substrate
Diffstat:
12 files changed, 296 insertions(+), 140 deletions(-)
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -415,7 +415,10 @@ The optimizer is no longer just a stub:
- `src/opt/opt.c` implements the `CGTarget` wrapper.
- `src/opt/ir.c` and `src/opt/ir.h` implement the recorded IR container.
- `src/opt/pass_cfg.c` implements CFG construction.
-- `src/opt/pass_ssa.c` implements current SSA construction.
+- `src/opt/pass_ssa.c` implements mem2reg SSA construction, conventional SSA
+ edge-copy lowering, SSA undo, and stable SSA dumps for pass tests.
+- `src/opt/pass_analysis.c` owns shared CFG order, dominators, dominance
+ frontiers, dominator children, def-use rebuilding, and verifier guardrails.
- `cfree cc` forwards `-O0`/`-O1`/`-O2` into `CfreeCodeOptions`, so native
frontends exercise the optimizer through the normal compile driver.
- `test/toy/run.sh` supports `CFREE_TOY_OPT_LEVELS`; default is `0 1 2`, so
@@ -427,18 +430,23 @@ Current behavior:
computes liveness, runs the current priority allocator/rewrite path,
combines, runs post-RA DCE, cleans jumps/layout, and emits through the
wrapped target.
-- Level `2` has its own scheduler entry point, but until production SSA/value
- passes land it only runs the current SSA build/conventionalize/undo verifier
- path before routing through the same proven lowering path as `-O1`.
+- Level `2` has its own scheduler entry point. It now builds real mem2reg SSA,
+ verifies phi/use-def invariants, lowers phis through conventional SSA, undoes
+ SSA, and then routes through the same proven lowering path as `-O1`. It still
+ has no value-changing SSA transformations enabled.
- `opt_regalloc(..., allow_live_range_split)` accepts the O2 policy bit, but
live-range splitting is not implemented yet.
-- The current `opt_build_ssa` remains a shape-checking mem2reg prototype: it
- computes dominators/frontiers and phi placement, but phis do not yet receive
- real `Val` defs and uses are not rewritten for downstream value passes.
-
-The next implementation work should build the shared O2 analysis/data-flow
-substrate, make SSA production-quality, and then enable the MIR-style
-pre-lowering pass sequence behind the level-2 scheduler.
+- `opt_build_ssa` is no longer just a shape check: promoted loads/stores are
+ rewritten to SSA values, phis have real `Val` defs and predecessor inputs,
+ and def-use chains are rebuilt for downstream SSA passes.
+- Instruction ids are stable across block-array rewrites, so analysis and
+ dumps no longer depend on raw `Inst*` addresses or positional references.
+- Loop tree construction uses shared `OptAnalysis` dominators instead of a
+ pass-local dominator implementation.
+
+The next implementation work should finish the remaining analysis invalidation
+model and then land the first small SSA-backed transformation, preferably SSA
+DCE plus copy cleanup, before attempting GVN/DSE/LICM.
---
@@ -484,34 +492,46 @@ Exit criteria:
### Phase B - O2 Analysis Substrate
-Status: started. `OptPassCtx`, the shared operand/reference walker,
-`OptAnalysis` CFG order/dominator/frontier data, and `opt_verify` CFG/value-id
-guardrails are in place. Remaining work is stable instruction ids, pass
-invalidation flags, stricter def-use/type verification after production SSA,
-and replacing pass-local dominator implementations with the shared analysis.
+Status: mostly implemented. `OptPassCtx`, the shared operand/reference walker,
+`OptAnalysis` CFG order/dominator/frontier data, stable instruction ids,
+production mem2reg SSA, def-use rebuilding, SSA dumps, and stronger
+`opt_verify` guardrails are in place. Loop tree construction now uses shared
+dominators. Remaining work is a formal pass invalidation model and any
+additional shared analysis fields required by the first SSA value passes.
Goal: create the durable data structures and invalidation model that all O2
passes will share before implementing transformations.
-Implement:
+Checklist:
-- A small pass context for per-pass state, stage names, metrics, and temporary
- arenas.
-- A shared operand/reference walker for every pass that needs uses/defs,
+- [x] A small pass context for per-pass state, stage names, metrics, and
+ temporary arenas.
+- [x] A shared operand/reference walker for every pass that needs uses/defs,
including `CGABIValue`, calls, returns, inline asm, intrinsics, and indirect
bases.
-- First-class analysis results for RPO/PO, dominators, dominance frontiers,
- dominator children, loop metadata, block frequencies, and CFG invalidation.
-- Stable instruction references or ids so long-lived analysis never depends on
- raw `Inst*` addresses across block array rewrites.
-- `opt_verify(Func*, stage)` gates for CFG consistency, value type/class
- consistency, phi arity, def-use consistency, and pass invalidation mistakes.
+- [x] Shared analysis results for RPO/PO, dominators, dominance frontiers, and
+ dominator children.
+- [x] Loop tree construction moved onto shared dominator analysis.
+- [x] Stable instruction ids so long-lived analysis never depends on raw
+ `Inst*` addresses across block array rewrites.
+- [x] Real mem2reg phi `Val` definitions and rewritten promoted uses.
+- [x] Def-use chain rebuilding, including phi inputs and indirect bases.
+- [x] SSA dump coverage for phis and rewritten uses.
+- [x] `opt_verify(Func*, stage)` gates for CFG consistency, instruction id
+ consistency, phi arity, def-use consistency, and SSA value class checks.
+- [ ] Formal analysis invalidation flags for passes that preserve, rebuild, or
+ invalidate CFG/value/use-def state.
+- [ ] Additional shared analysis fields demanded by first value passes.
Exit criteria:
-- `-O1` stays green.
-- `-O2` can run the substrate and verifier, then lower through the current O1
- backend path with no observable behavior change.
+- [x] `-O1` stays green.
+- [x] `-O2` can run the substrate and verifier, then lower through the current
+ O1 backend path with no observable behavior change.
+- [x] SSA dump tests prove phis and rewritten uses on branch/loop locals.
+- [x] `make test-opt`
+- [x] Focused `CFREE_TOY_OPT_LEVELS="0 1 2" make test-toy` branch/loop subset.
+- [x] Full `CFREE_TOY_OPT_LEVELS="0 1 2" make test-toy`
### Phase C - Full Allocation Infrastructure
@@ -532,28 +552,35 @@ Exit criteria:
- `-O2` can use the same lowering path with coalescing/splitting enabled, even
if all SSA value passes are still disabled.
-### Phase D - Production SSA, Value, and Memory Passes
+### Phase D - SSA-Backed Value and Memory Passes
-Goal: enable the MIR full pre-lowering schedule for `-O2`.
+Goal: enable the MIR full pre-lowering schedule for `-O2`, starting with small
+SSA-backed passes that prove the use-def substrate before heavier value and
+memory optimizations.
Implement in order:
-1. `opt_block_cloning`
-2. `opt_addr_xform`
-3. `opt_gvn`
-4. `opt_copy_prop`
-5. `opt_dse`
-6. `opt_ssa_dce`
-7. `opt_licm`
-8. `opt_pressure_relief`
-9. `opt_make_conventional_ssa`
-10. `opt_ssa_combine`
-11. `opt_undo_ssa`
-12. `opt_jump_opt`
+1. `opt_ssa_dce`
+2. `opt_copy_cleanup`
+3. `opt_block_cloning`
+4. `opt_addr_xform`
+5. `opt_gvn`
+6. `opt_copy_prop`
+7. `opt_dse`
+8. `opt_licm`
+9. `opt_pressure_relief`
+10. `opt_make_conventional_ssa`
+11. `opt_ssa_combine`
+12. `opt_undo_ssa`
+13. `opt_jump_opt`
Do not batch these into one landing. Each pass needs a pass-local corpus case
that fails red without the pass or its bug fix.
+GVN is deliberately not the next milestone. It should wait until SSA DCE and
+copy cleanup have exercised ordinary use-def mutation, verifier coverage, and
+analysis invalidation on small transformations.
+
Exit criteria:
- `CFREE_TOY_OPT_LEVELS="0 1 2" make test-toy` passes for the relevant arch.
diff --git a/src/opt/ir.c b/src/opt/ir.c
@@ -147,9 +147,20 @@ Inst* ir_emit(Func* f, u32 block, IROp op) {
in = &b->insts[b->ninsts++];
memset(in, 0, sizeof *in);
in->op = (u16)op;
+ ir_assign_inst_id(f, in);
return in;
}
+InstId ir_inst_id_new(Func* f) {
+ if (!f->next_inst_id) f->next_inst_id = 1;
+ return f->next_inst_id++;
+}
+
+void ir_assign_inst_id(Func* f, Inst* in) {
+ if (!f || !in || in->id != INST_ID_NONE) return;
+ in->id = ir_inst_id_new(f);
+}
+
/* ---- frame slots / params ---- */
FrameSlot ir_frame_slot_new(Func* f, const FrameSlotDesc* d) {
diff --git a/src/opt/ir.h b/src/opt/ir.h
@@ -11,6 +11,9 @@
typedef u32 Val;
#define VAL_NONE 0u
+typedef u32 InstId;
+#define INST_ID_NONE 0u
+
/* IROps mirror the CGTarget surface 1:1 plus a handful of SSA-only ops
* (IR_PHI, IR_CONST_I, IR_CONST_BYTES). Each CGTarget method records as
* exactly one Inst, so level-1 replay is a flat walk that re-issues
@@ -264,6 +267,7 @@ typedef struct IRLocal {
typedef struct Inst {
u16 op;
u16 flags;
+ InstId id;
SrcLoc loc; /* sticky from CGTarget.set_loc at recording */
CfreeCgTypeId type;
Val def; /* primary SSA def, or VAL_NONE */
@@ -341,6 +345,7 @@ typedef struct OptUse {
Val val;
u32 block;
u32 inst;
+ InstId inst_id;
u32 next_for_val;
u32 operand_index;
u32 phi_pred_index;
@@ -408,6 +413,7 @@ typedef struct Func {
u64 opt_rewrite_inserted_insts;
u64 opt_rewrite_live_words_touched;
u64 opt_dde_live_words_touched;
+ InstId next_inst_id;
OptValInfo* val_info; /* indexed by Val, length nvals */
OptUse* opt_uses;
@@ -440,6 +446,8 @@ Val ir_alloc_val(Func*, CfreeCgTypeId, u8 cls);
void ir_ensure_val(Func*, Val, CfreeCgTypeId, u8 cls);
Inst* ir_emit(Func*, u32 block, IROp);
+InstId ir_inst_id_new(Func*);
+void ir_assign_inst_id(Func*, Inst*);
/* Resize a block's successor array. Used by ops with >2 successors
* (IR_SWITCH, IR_INDIRECT_BRANCH). Always sets nsucc to n. */
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -316,6 +316,7 @@ static int inst_uses_local_reg(const Inst* in, Reg reg) {
static void opt_make_local_load(Func* f, Inst* out, IRLocal* l, SrcLoc loc) {
memset(out, 0, sizeof *out);
out->op = IR_LOAD;
+ ir_assign_inst_id(f, out);
out->loc = loc;
out->type = l->desc.type;
out->def = (Val)l->storage.v.reg;
@@ -332,6 +333,7 @@ static void opt_make_local_load(Func* f, Inst* out, IRLocal* l, SrcLoc loc) {
static void opt_make_local_store(Func* f, Inst* out, IRLocal* l, SrcLoc loc) {
memset(out, 0, sizeof *out);
out->op = IR_STORE;
+ ir_assign_inst_id(f, out);
out->loc = loc;
out->opnds = arena_array(f->arena, Operand, 2);
out->opnds[0] = opt_local_addr_operand(l);
diff --git a/src/opt/opt.h b/src/opt/opt.h
@@ -162,6 +162,7 @@ void opt_live_blocks(Func*, OptLiveInfo*);
void opt_live_dump_blocks(Func*, const OptLiveInfo*, Writer*);
void opt_live_ranges_build(Func*, const OptLiveInfo*, OptLiveRangeSet*);
void opt_live_dump_ranges(Func*, const OptLiveRangeSet*, Writer*);
+void opt_ssa_dump(Func*, Writer*);
void opt_rewrite_dump(Func*, Writer*);
void opt_coalesce(Func*);
void opt_regalloc(Func*, int allow_live_range_split);
diff --git a/src/opt/opt_util.c b/src/opt/opt_util.c
@@ -18,6 +18,8 @@ void opt_walk_operand(Func* f, Inst* in, Operand* op, int is_def,
base.kind = OPK_REG;
base.cls = RC_INT;
base.v.reg = op->v.ind.base;
+ if ((Val)base.v.reg < f->nvals && f->val_type[(Val)base.v.reg])
+ base.type = f->val_type[(Val)base.v.reg];
fn(f, in, &base, 0, ctx);
op->v.ind.base = base.v.reg;
}
diff --git a/src/opt/pass_analysis.c b/src/opt/pass_analysis.c
@@ -17,6 +17,11 @@ static void opt_fail(Func* f, const char* stage, const char* msg, u32 a,
stage ? stage : "?", msg, (unsigned)a, (unsigned)b);
}
+static int verify_stage_is_ssa(const char* stage) {
+ return stage && strstr(stage, "ssa") != NULL &&
+ strstr(stage, "pre-ssa") == NULL;
+}
+
static void block_list_add(Arena* arena, OptBlockList* list, u32 block) {
if (list->n == list->cap) {
u32 ncap = list->cap ? list->cap * 2u : 4u;
@@ -195,6 +200,7 @@ static void opt_use_add(Func* f, Val v, u32 b, u32 i, u8 kind, u32 op_idx,
u->val = v;
u->block = b;
u->inst = i;
+ u->inst_id = f->blocks[b].insts[i].id;
u->kind = kind;
u->operand_index = op_idx;
u->phi_pred_index = pred_idx;
@@ -304,16 +310,31 @@ static void verify_operand(Func* f, Inst* in, Operand* op, int is_def,
Val v = (Val)op->v.reg;
if (v == VAL_NONE || v >= f->nvals)
opt_fail(f, stage, is_def ? "bad def val" : "bad use val", v, f->nvals);
+ if (!verify_stage_is_ssa(stage)) return;
+ if (op->cls != f->val_cls[v])
+ opt_fail(f, stage, is_def ? "def class mismatch" : "use class mismatch",
+ v, op->cls);
}
static void verify_values(Func* f, const char* stage) {
if (f->opt_rewritten) return;
+ u32 inst_cap = f->next_inst_id ? f->next_inst_id : 1u;
+ u8* seen_inst = arena_zarray(f->arena, u8, inst_cap);
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
for (u32 i = 0; i < bl->ninsts; ++i) {
Inst* in = &bl->insts[i];
+ if (in->id == INST_ID_NONE || in->id >= f->next_inst_id)
+ opt_fail(f, stage, "bad inst id", b, i);
+ if (seen_inst[in->id])
+ opt_fail(f, stage, "duplicate inst id", in->id, b);
+ seen_inst[in->id] = 1;
if (in->def != VAL_NONE) {
if (in->def >= f->nvals) opt_fail(f, stage, "bad inst def", b, i);
+ if (verify_stage_is_ssa(stage) && (IROp)in->op == IR_PHI &&
+ in->type && f->val_type[in->def] &&
+ in->type != f->val_type[in->def])
+ opt_fail(f, stage, "inst def type mismatch", in->def, b);
}
for (u32 d = 0; d < in->ndefs; ++d) {
Val v = in->defs[d];
@@ -339,12 +360,51 @@ static void verify_values(Func* f, const char* stage) {
if (aux->pred_vals[p] != VAL_NONE && aux->pred_vals[p] >= f->nvals)
opt_fail(f, stage, "phi bad pred value", aux->pred_vals[p],
f->nvals);
+ if (aux->pred_vals[p] != VAL_NONE &&
+ f->val_type[aux->pred_vals[p]] &&
+ f->val_type[aux->pred_vals[p]] != in->type)
+ opt_fail(f, stage, "phi input type mismatch", b, p);
}
}
}
}
}
+static void verify_use_site(Func* f, const char* stage, const OptUse* use) {
+ Inst* in = &f->blocks[use->block].insts[use->inst];
+ if (in->id != use->inst_id)
+ opt_fail(f, stage, "def-use stale inst id", use->inst_id, in->id);
+ switch ((OptUseKind)use->kind) {
+ case OPT_USE_OPERAND:
+ if (!use->operand)
+ opt_fail(f, stage, "def-use missing operand", use->block, use->inst);
+ if (use->operand->kind != OPK_REG ||
+ (Val)use->operand->v.reg != use->val)
+ opt_fail(f, stage, "def-use operand mismatch", use->val, use->kind);
+ break;
+ case OPT_USE_INDIRECT_BASE:
+ if (!use->operand || use->operand->kind != OPK_INDIRECT ||
+ (Val)use->operand->v.ind.base != use->val)
+ opt_fail(f, stage, "def-use indirect mismatch", use->val, use->kind);
+ break;
+ case OPT_USE_PHI_INPUT: {
+ if ((IROp)in->op != IR_PHI)
+ opt_fail(f, stage, "def-use phi site mismatch", use->block,
+ use->inst);
+ IRPhiAux* aux = (IRPhiAux*)in->extra.aux;
+ if (!aux || use->phi_pred_index >= aux->npreds)
+ opt_fail(f, stage, "def-use phi pred mismatch", use->val,
+ use->phi_pred_index);
+ if (aux->pred_vals[use->phi_pred_index] != use->val)
+ opt_fail(f, stage, "def-use phi value mismatch", use->val,
+ use->phi_pred_index);
+ break;
+ }
+ default:
+ opt_fail(f, stage, "def-use bad kind", use->kind, use->val);
+ }
+}
+
static void verify_def_use(Func* f, const char* stage) {
if (f->opt_rewritten) return;
opt_rebuild_def_use(f);
@@ -357,6 +417,7 @@ static void verify_def_use(Func* f, const char* stage) {
if (use->inst >= f->blocks[use->block].ninsts)
opt_fail(f, stage, "def-use bad inst", use->inst,
f->blocks[use->block].ninsts);
+ verify_use_site(f, stage, use);
if (f->val_def_block[use->val] >= f->nblocks)
opt_fail(f, stage, "def-use bad def block", use->val,
f->val_def_block[use->val]);
diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c
@@ -308,29 +308,42 @@ static void replay_parallel_moves(ReplayCtx* r, ReplayParallelMove* moves,
}
}
-static int replay_plan_supported(CGTarget* w, const CGCallPlan* p) {
- if (!p) return 0;
+static int replay_plan_supported(CGTarget* w, const CGCallPlan* p,
+ const char** reason) {
+ if (reason) *reason = NULL;
+ if (!p) {
+ if (reason) *reason = "missing plan";
+ return 0;
+ }
for (u32 i = 0; i < p->nargs; ++i) {
if ((p->args[i].dst_kind == CG_CALL_PLAN_STACK ||
p->args[i].dst_kind == CG_CALL_PLAN_TAIL_STACK) &&
- !w->store_call_arg)
+ !w->store_call_arg) {
+ if (reason) *reason = "stack arg without store_call_arg";
return 0;
+ }
if (p->args[i].dst_kind == CG_CALL_PLAN_REG &&
(p->args[i].src_kind == CG_CALL_PLAN_SRC_ADDR ||
p->args[i].src_offset) &&
- !w->load_call_arg)
+ !w->load_call_arg) {
+ if (reason) *reason = "reg arg without load_call_arg";
return 0;
+ }
}
for (u32 i = 0; i < p->nrets; ++i)
if (p->rets[i].dst.kind != OPK_REG && p->rets[i].dst.kind != OPK_LOCAL &&
- p->rets[i].dst.kind != OPK_INDIRECT)
+ p->rets[i].dst.kind != OPK_INDIRECT) {
+ if (reason) *reason = "unsupported ret destination";
return 0;
+ }
for (u32 i = 0; i < p->nrets; ++i)
if (p->rets[i].dst_offset &&
(p->rets[i].dst.kind == OPK_LOCAL ||
p->rets[i].dst.kind == OPK_INDIRECT) &&
- !w->store_call_ret)
+ !w->store_call_ret) {
+ if (reason) *reason = "ret offset without store_call_ret";
return 0;
+ }
return 1;
}
@@ -578,13 +591,14 @@ static void replay_inst(ReplayCtx* r, u32 b, Inst* in) {
}
case IR_CALL: {
IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ const char* plan_reason = NULL;
if (aux && aux->use_plan_replay && w->emit_call_plan &&
- replay_plan_supported(w, &aux->plan)) {
+ replay_plan_supported(w, &aux->plan, &plan_reason)) {
replay_planned_call(r, aux);
break;
}
- compiler_panic(r->c, in->loc,
- "opt replay: call has no supported call plan");
+ compiler_panic(r->c, in->loc, "opt replay: call has no supported call plan%s%s",
+ plan_reason ? ": " : "", plan_reason ? plan_reason : "");
break;
}
case IR_BR: {
diff --git a/src/opt/pass_loop.c b/src/opt/pass_loop.c
@@ -1,80 +1,7 @@
#include <string.h>
#include "core/arena.h"
-#include "opt/opt.h"
-
-#define OPT_BLK_NONE 0xffffffffu
-
-typedef struct LoopPostorderCtx {
- Func* f;
- u32* po;
- u32* po_idx;
- u8* visited;
- u32 count;
-} LoopPostorderCtx;
-
-static void loop_postorder_dfs(LoopPostorderCtx* ctx, u32 b) {
- if (b >= ctx->f->nblocks || ctx->visited[b]) return;
- ctx->visited[b] = 1;
- Block* bl = &ctx->f->blocks[b];
- for (u32 s = 0; s < bl->nsucc; ++s) {
- u32 t = bl->succ[s];
- if (t < ctx->f->nblocks) loop_postorder_dfs(ctx, t);
- }
- ctx->po[ctx->count] = b;
- ctx->po_idx[b] = ctx->count;
- ctx->count++;
-}
-
-static u32 loop_dom_intersect(u32 b1, u32 b2, const u32* idom,
- const u32* po_idx) {
- while (b1 != b2) {
- while (po_idx[b1] < po_idx[b2]) b1 = idom[b1];
- while (po_idx[b2] < po_idx[b1]) b2 = idom[b2];
- }
- return b1;
-}
-
-static u32* loop_compute_idom(Func* f, const u8* visited, const u32* po,
- const u32* po_idx, u32 npo, u32 entry) {
- u32* idom = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u);
- for (u32 b = 0; b < f->nblocks; ++b) idom[b] = OPT_BLK_NONE;
- idom[entry] = entry;
-
- int changed = 1;
- while (changed) {
- changed = 0;
- for (u32 ri = npo; ri > 0; --ri) {
- u32 b = po[ri - 1u];
- if (b == entry) continue;
- Block* bl = &f->blocks[b];
- u32 new_idom = OPT_BLK_NONE;
- for (u32 p = 0; p < bl->npreds; ++p) {
- u32 pp = bl->preds[p];
- if (pp >= f->nblocks || !visited[pp]) continue;
- if (idom[pp] == OPT_BLK_NONE) continue;
- new_idom = (new_idom == OPT_BLK_NONE)
- ? pp
- : loop_dom_intersect(pp, new_idom, idom, po_idx);
- }
- if (new_idom != OPT_BLK_NONE && idom[b] != new_idom) {
- idom[b] = new_idom;
- changed = 1;
- }
- }
- }
- return idom;
-}
-
-static int loop_dominates(const u32* idom, u32 entry, u32 dom, u32 node) {
- u32 cur = node;
- while (cur != OPT_BLK_NONE) {
- if (cur == dom) return 1;
- if (cur == entry) break;
- cur = idom[cur];
- }
- return 0;
-}
+#include "opt/opt_internal.h"
static void loop_mark_body(Func* f, const u8* visited, u32 header, u32 latch,
u8* body, u32* stack) {
@@ -110,33 +37,27 @@ void opt_build_loop_tree(Func* f) {
}
if (f->nblocks == 0 || f->entry >= f->nblocks) return;
- LoopPostorderCtx pctx;
- memset(&pctx, 0, sizeof pctx);
- pctx.f = f;
- pctx.po = arena_array(f->arena, u32, f->nblocks);
- pctx.po_idx = arena_array(f->arena, u32, f->nblocks);
- pctx.visited = arena_zarray(f->arena, u8, f->nblocks);
- loop_postorder_dfs(&pctx, f->entry);
- if (pctx.count == 0) return;
+ OptAnalysis a;
+ memset(&a, 0, sizeof a);
+ opt_analysis_build_dominators(f, &a);
+ if (a.npo == 0) return;
- u32* idom = loop_compute_idom(f, pctx.visited, pctx.po, pctx.po_idx,
- pctx.count, f->entry);
u8* body = arena_zarray(f->arena, u8, f->nblocks);
u32* stack = arena_array(f->arena, u32, f->nblocks);
for (u32 header = 0; header < f->nblocks; ++header) {
- if (!pctx.visited[header]) continue;
+ if (!a.reachable[header]) continue;
memset(body, 0, f->nblocks * sizeof body[0]);
int has_loop = 0;
for (u32 latch = 0; latch < f->nblocks; ++latch) {
- if (!pctx.visited[latch]) continue;
+ if (!a.reachable[latch]) continue;
Block* lb = &f->blocks[latch];
for (u32 s = 0; s < lb->nsucc; ++s) {
if (lb->succ[s] != header) continue;
- if (!loop_dominates(idom, f->entry, header, latch)) continue;
+ if (!opt_analysis_dominates(&a, header, latch)) continue;
has_loop = 1;
- loop_mark_body(f, pctx.visited, header, latch, body, stack);
+ loop_mark_body(f, a.reachable, header, latch, body, stack);
}
}
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -805,6 +805,7 @@ static Inst* list_push(Func* f, RewriteList* l, IROp op) {
Inst* in = &l->data[l->n++];
memset(in, 0, sizeof *in);
in->op = (u16)op;
+ ir_assign_inst_id(f, in);
++f->opt_rewrite_inserted_insts;
return in;
}
@@ -833,6 +834,7 @@ static Inst* out_push_front(Func* f, RewriteOut* out, IROp op) {
Inst* in = &out->data[--out->start];
memset(in, 0, sizeof *in);
in->op = (u16)op;
+ ir_assign_inst_id(f, in);
return in;
}
diff --git a/src/opt/pass_ssa.c b/src/opt/pass_ssa.c
@@ -2,6 +2,7 @@
#include "opt/opt_internal.h"
+#include <stdio.h>
#include <string.h>
#include "core/arena.h"
@@ -197,6 +198,7 @@ static void insert_phis(Func* f, u32 b, const u8* needs_phi) {
const IRFrameSlot* slot = &f->frame_slots[sid - 1u];
Inst* in = &insts[w++];
in->op = IR_PHI;
+ ir_assign_inst_id(f, in);
in->type = slot->type;
in->def = ir_alloc_val(f, slot->type, ssa_type_class(f, slot->type));
f->val_def_block[in->def] = b;
@@ -443,6 +445,7 @@ static Inst make_copy_inst(Func* f, Val dst, Val src, CfreeCgTypeId ty, u8 cls)
Inst in;
memset(&in, 0, sizeof in);
in.op = IR_COPY;
+ ir_assign_inst_id(f, &in);
in.type = ty;
in.def = dst;
in.nopnds = 2;
@@ -663,3 +666,72 @@ void opt_undo_ssa(Func* f) {
}
opt_rebuild_def_use(f);
}
+
+static void ssa_dump_write(Writer* w, const char* s) {
+ cfree_writer_write(w, s, strlen(s));
+}
+
+typedef struct SsaDumpUseCtx {
+ Writer* w;
+ int any;
+} SsaDumpUseCtx;
+
+static void ssa_dump_use(Func* f, Inst* in, Operand* op, int is_def,
+ void* arg) {
+ (void)f;
+ (void)in;
+ if (is_def || op->kind != OPK_REG) return;
+ SsaDumpUseCtx* ctx = (SsaDumpUseCtx*)arg;
+ char buf[32];
+ snprintf(buf, sizeof buf, "%sv%u", ctx->any ? "," : "", (unsigned)op->v.reg);
+ ssa_dump_write(ctx->w, buf);
+ ctx->any = 1;
+}
+
+void opt_ssa_dump(Func* f, Writer* w) {
+ if (!f || !w) return;
+ opt_rebuild_def_use(f);
+ char buf[160];
+ snprintf(buf, sizeof buf, "ssa blocks=%u vals=%u uses=%u\n",
+ (unsigned)f->nblocks, (unsigned)f->nvals,
+ (unsigned)f->opt_nuses);
+ ssa_dump_write(w, buf);
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ snprintf(buf, sizeof buf, "block %u preds=%u succs=%u\n", (unsigned)b,
+ (unsigned)bl->npreds, (unsigned)bl->nsucc);
+ ssa_dump_write(w, buf);
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ snprintf(buf, sizeof buf, " i%u op=%u", (unsigned)in->id,
+ (unsigned)in->op);
+ ssa_dump_write(w, buf);
+ if (in->def != VAL_NONE) {
+ snprintf(buf, sizeof buf, " def=v%u", (unsigned)in->def);
+ ssa_dump_write(w, buf);
+ }
+ if ((IROp)in->op == IR_PHI) {
+ IRPhiAux* aux = (IRPhiAux*)in->extra.aux;
+ snprintf(buf, sizeof buf, " phi slot=%u preds=",
+ aux ? (unsigned)aux->slot_id : 0u);
+ ssa_dump_write(w, buf);
+ if (aux) {
+ for (u32 p = 0; p < aux->npreds; ++p) {
+ snprintf(buf, sizeof buf, "%sb%u:v%u", p ? "," : "",
+ (unsigned)aux->pred_blocks[p],
+ (unsigned)aux->pred_vals[p]);
+ ssa_dump_write(w, buf);
+ }
+ }
+ } else {
+ SsaDumpUseCtx ctx;
+ ctx.w = w;
+ ctx.any = 0;
+ ssa_dump_write(w, " uses=");
+ opt_walk_inst_operands(f, in, ssa_dump_use, &ctx);
+ if (!ctx.any) ssa_dump_write(w, "-");
+ }
+ ssa_dump_write(w, "\n");
+ }
+ }
+}
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -420,6 +420,15 @@ static int count_op(Func* f, IROp op) {
return n;
}
+static u32 count_uses_of(Func* f, Val v) {
+ opt_rebuild_def_use(f);
+ u32 n = 0;
+ for (u32 u = f->opt_first_use_by_val[v]; u != OPT_USE_NONE;
+ u = f->opt_uses[u].next_for_val)
+ ++n;
+ return n;
+}
+
static u32 live_range_count_for(const OptLiveRangeSet* ranges, Val v) {
u32 n = 0;
for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE;
@@ -1925,6 +1934,27 @@ static void opt_ssa_diamond_mem2reg_phi(void) {
EXPECT(aux && aux->slot_id == (u32)fs, "phi should carry slot id in aux");
EXPECT(aux && aux->npreds == f->blocks[join].npreds,
"phi predecessor count should match block preds");
+ EXPECT(aux && aux->pred_vals[0] == tv && aux->pred_vals[1] == ev,
+ "phi inputs should be renamed to branch-local defs");
+ Inst* ret = &f->blocks[join].insts[f->blocks[join].ninsts - 1u];
+ IRRetAux* ret_aux = (IRRetAux*)ret->extra.aux;
+ EXPECT(ret_aux && ret_aux->val.storage.v.reg == (Reg)phi->def,
+ "promoted load use should be rewritten to phi def");
+ EXPECT(count_uses_of(f, phi->def) >= 1,
+ "def-use chains should include phi def users");
+
+ CfreeWriter* w = NULL;
+ (void)cfree_writer_mem(&g_heap, &w);
+ opt_ssa_dump(f, w);
+ size_t len = 0;
+ const unsigned char* bytes = cfree_writer_mem_bytes(w, &len);
+ EXPECT(bytes_contains(bytes, len, " phi slot="),
+ "SSA dump should include phi records");
+ EXPECT(bytes_contains(bytes, len, "preds=b"),
+ "SSA dump should include phi predecessor inputs");
+ EXPECT(bytes_contains(bytes, len, "uses=v"),
+ "SSA dump should include rewritten operand uses");
+ cfree_writer_close(w);
opt_make_conventional_ssa(f);
opt_verify(f, "test-ssa-diamond-conventional");
@@ -1971,6 +2001,11 @@ static void opt_ssa_loop_carried_phi(void) {
EXPECT(count_op(f, IR_PHI) == 1, "loop should insert one carried phi");
EXPECT((IROp)f->blocks[header].insts[0].op == IR_PHI,
"loop-carried phi should be in the header");
+ Inst* phi = &f->blocks[header].insts[0];
+ IRPhiAux* aux = (IRPhiAux*)phi->extra.aux;
+ EXPECT(aux && aux->npreds == 2 && aux->pred_vals[0] == zero &&
+ aux->pred_vals[1] == next,
+ "loop phi should carry entry and backedge defs");
EXPECT(count_op(f, IR_STORE) == 0, "loop promoted stores should be removed");
EXPECT(count_op(f, IR_LOAD) == 0, "loop promoted loads should be removed");
opt_make_conventional_ssa(f);