commit d43cbf6fc02269ac31d3fb6859ce514a86d48b5d
parent 8ae3ca5059ac45fe43b64e8e770842d3775522b1
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 08:39:12 -0700
opt: add SSA DCE and copy cleanup
Diffstat:
14 files changed, 571 insertions(+), 199 deletions(-)
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -443,10 +443,16 @@ Current behavior:
dumps no longer depend on raw `Inst*` addresses or positional references.
- Loop tree construction uses shared `OptAnalysis` dominators instead of a
pass-local dominator implementation.
+- `Func.opt_valid_analyses` now tracks CFG, def-use, dominator, and loop-info
+ validity. Mutating passes invalidate the affected bits, rebuild passes mark
+ their products valid, and `opt_verify` checks cached def-use before rebuilding
+ it so stale use-site assumptions are caught.
+- The first SSA-backed value passes are in the O2 path: `opt_ssa_dce` removes
+ unused pure SSA defs/phis, and `opt_copy_cleanup` rewrites safe SSA copy users
+ through def-use chains while avoiding multi-def conventional-SSA edge copies.
-The next implementation work should finish the remaining analysis invalidation
-model and then land the first small SSA-backed transformation, preferably SSA
-DCE plus copy cleanup, before attempting GVN/DSE/LICM.
+The next implementation work should extend this small-pass path incrementally,
+before attempting GVN/DSE/LICM.
---
@@ -492,12 +498,14 @@ Exit criteria:
### Phase B - O2 Analysis Substrate
-Status: mostly implemented. `OptPassCtx`, the shared operand/reference walker,
+Status: implemented for the first O2 value-pass slice. `OptPassCtx`, the
+shared operand/reference walker,
`OptAnalysis` CFG order/dominator/frontier data, stable instruction ids,
production mem2reg SSA, def-use rebuilding, SSA dumps, and stronger
`opt_verify` guardrails are in place. Loop tree construction now uses shared
-dominators. Remaining work is a formal pass invalidation model and any
-additional shared analysis fields required by the first SSA value passes.
+dominators. Passes now have explicit CFG, def-use, dominator, and loop-info
+validity bits, and the first SSA value passes did not require additional shared
+analysis fields.
Goal: create the durable data structures and invalidation model that all O2
passes will share before implementing transformations.
@@ -519,9 +527,9 @@ Checklist:
- [x] SSA dump coverage for phis and rewritten uses.
- [x] `opt_verify(Func*, stage)` gates for CFG consistency, instruction id
consistency, phi arity, def-use consistency, and SSA value class checks.
-- [ ] Formal analysis invalidation flags for passes that preserve, rebuild, or
+- [x] Formal analysis invalidation flags for passes that preserve, rebuild, or
invalidate CFG/value/use-def state.
-- [ ] Additional shared analysis fields demanded by first value passes.
+- [x] Additional shared analysis fields demanded by first value passes.
Exit criteria:
@@ -560,8 +568,8 @@ memory optimizations.
Implement in order:
-1. `opt_ssa_dce`
-2. `opt_copy_cleanup`
+1. [x] `opt_ssa_dce`
+2. [x] `opt_copy_cleanup`
3. `opt_block_cloning`
4. `opt_addr_xform`
5. `opt_gvn`
diff --git a/src/opt/ir.h b/src/opt/ir.h
@@ -31,15 +31,15 @@ typedef enum IROp {
IR_PARAM_DECL,
/* Address-bearing data movement. */
- IR_LOAD_IMM, /* opnds[0] dst REG; extra.imm = imm */
- IR_LOAD_CONST, /* opnds[0] dst REG; extra.cbytes */
- IR_COPY, /* opnds[0] dst REG, opnds[1] src REG */
- IR_LOAD, /* opnds[0] dst REG, opnds[1] addr; extra.mem */
- IR_STORE, /* opnds[0] addr, opnds[1] src REG|IMM; extra.mem */
- IR_ADDR_OF, /* opnds[0] dst REG, opnds[1] lv (LOCAL|GLOBAL|INDIRECT) */
- IR_TLS_ADDR_OF, /* opnds[0] dst REG; extra.aux = IRTlsAux */
- IR_AGG_COPY, /* opnds[0] dst, opnds[1] src; extra.aux = IRAggAux */
- IR_AGG_SET, /* opnds[0] dst, opnds[1] byte; extra.aux = IRAggAux */
+ IR_LOAD_IMM, /* opnds[0] dst REG; extra.imm = imm */
+ IR_LOAD_CONST, /* opnds[0] dst REG; extra.cbytes */
+ IR_COPY, /* opnds[0] dst REG, opnds[1] src REG */
+ IR_LOAD, /* opnds[0] dst REG, opnds[1] addr; extra.mem */
+ IR_STORE, /* opnds[0] addr, opnds[1] src REG|IMM; extra.mem */
+ IR_ADDR_OF, /* opnds[0] dst REG, opnds[1] lv (LOCAL|GLOBAL|INDIRECT) */
+ IR_TLS_ADDR_OF, /* opnds[0] dst REG; extra.aux = IRTlsAux */
+ IR_AGG_COPY, /* opnds[0] dst, opnds[1] src; extra.aux = IRAggAux */
+ IR_AGG_SET, /* opnds[0] dst, opnds[1] byte; extra.aux = IRAggAux */
IR_BITFIELD_LOAD, /* opnds[0] dst REG, opnds[1] record; extra.aux */
IR_BITFIELD_STORE, /* opnds[0] record, opnds[1] src; extra.aux */
@@ -57,36 +57,36 @@ typedef enum IROp {
IR_PHI,
/* Control flow / scopes. */
- IR_BR, /* unconditional. block.succ[0] = target block id. */
- IR_CONDBR, /* opnds[0] cond REG; succ[0] = true, succ[1] = false. */
- IR_CMP_BRANCH, /* fused. opnds = [a, b]; extra.imm = CmpOp;
- succ[0] = taken, succ[1] = fallthrough. */
- IR_SWITCH, /* multi-target structured switch. opnds[0] = selector;
- extra.aux = IRSwitchAux; succ[0..ncases) = case
- blocks, succ[ncases] = default block. */
+ IR_BR, /* unconditional. block.succ[0] = target block id. */
+ IR_CONDBR, /* opnds[0] cond REG; succ[0] = true, succ[1] = false. */
+ IR_CMP_BRANCH, /* fused. opnds = [a, b]; extra.imm = CmpOp;
+ succ[0] = taken, succ[1] = fallthrough. */
+ IR_SWITCH, /* multi-target structured switch. opnds[0] = selector;
+ extra.aux = IRSwitchAux; succ[0..ncases) = case
+ blocks, succ[ncases] = default block. */
IR_INDIRECT_BRANCH, /* opnds[0] = addr REG; extra.aux = IRIndirectAux.
succ[0..nvalid) = the valid target blocks. */
IR_LOAD_LABEL_ADDR, /* opnds[0] dst REG; extra.imm = target block id. */
- IR_RET, /* extra.aux = IRRetAux* (NULL for void). */
- IR_SCOPE_BEGIN, /* extra.aux = IRScopeAux. defs[0] = scope id Val. */
- IR_SCOPE_ELSE, /* extra.imm = scope id (Val). */
- IR_SCOPE_END, /* extra.imm = scope id (Val). */
- IR_BREAK_TO, /* extra.imm = scope id (Val). */
- IR_CONTINUE_TO, /* extra.imm = scope id (Val). */
+ IR_RET, /* extra.aux = IRRetAux* (NULL for void). */
+ IR_SCOPE_BEGIN, /* extra.aux = IRScopeAux. defs[0] = scope id Val. */
+ IR_SCOPE_ELSE, /* extra.imm = scope id (Val). */
+ IR_SCOPE_END, /* extra.imm = scope id (Val). */
+ IR_BREAK_TO, /* extra.imm = scope id (Val). */
+ IR_CONTINUE_TO, /* extra.imm = scope id (Val). */
/* alloca / variadics. */
- IR_ALLOCA, /* opnds = [dst REG, size]; extra.imm = align */
- IR_VA_START, /* opnds = [ap] */
- IR_VA_ARG, /* opnds = [dst REG, ap]; extra.aux = CfreeCgTypeId*/
- IR_VA_END, /* opnds = [ap] */
- IR_VA_COPY, /* opnds = [dst, src] */
+ IR_ALLOCA, /* opnds = [dst REG, size]; extra.imm = align */
+ IR_VA_START, /* opnds = [ap] */
+ IR_VA_ARG, /* opnds = [dst REG, ap]; extra.aux = CfreeCgTypeId*/
+ IR_VA_END, /* opnds = [ap] */
+ IR_VA_COPY, /* opnds = [dst, src] */
/* Atomics. */
- IR_ATOMIC_LOAD, /* opnds = [dst, addr]; extra.aux = IRAtomicAux */
- IR_ATOMIC_STORE, /* opnds = [addr, src]; extra.aux = IRAtomicAux */
- IR_ATOMIC_RMW, /* opnds = [dst, addr, val]; extra.aux */
- IR_ATOMIC_CAS, /* defs = [prior, ok]; extra.aux = IRCasAux */
- IR_FENCE, /* extra.imm = MemOrder */
+ IR_ATOMIC_LOAD, /* opnds = [dst, addr]; extra.aux = IRAtomicAux */
+ IR_ATOMIC_STORE, /* opnds = [addr, src]; extra.aux = IRAtomicAux */
+ IR_ATOMIC_RMW, /* opnds = [dst, addr, val]; extra.aux */
+ IR_ATOMIC_CAS, /* defs = [prior, ok]; extra.aux = IRCasAux */
+ IR_FENCE, /* extra.imm = MemOrder */
/* Inline asm. extra.aux = IRAsmAux. */
IR_ASM_BLOCK,
@@ -204,8 +204,9 @@ typedef struct IRAsmAux {
AsmConstraint* outs;
AsmConstraint* ins;
Sym* clobbers;
- Operand* out_ops; /* nout slots; the wrapped target may fill in REG location */
- Operand* in_ops; /* nin slots; recorded by w_asm_block, xlat'd at replay */
+ Operand*
+ out_ops; /* nout slots; the wrapped target may fill in REG location */
+ Operand* in_ops; /* nin slots; recorded by w_asm_block, xlat'd at replay */
u32 nout, nin, nclob;
/* Filled by opt_machinize from backend register-name resolution. */
u32 clobber_mask[OPT_REG_CLASSES];
@@ -217,8 +218,8 @@ typedef struct IRAsmAux {
typedef struct IRIntrinAux {
IntrinKind kind;
- Operand* dsts; /* ndst */
- Operand* args; /* narg */
+ Operand* dsts; /* ndst */
+ Operand* args; /* narg */
Val* result_vals; /* one per dst that's a REG, parallel to dsts */
u32 ndst, narg;
} IRIntrinAux;
@@ -270,8 +271,8 @@ typedef struct Inst {
InstId id;
SrcLoc loc; /* sticky from CGTarget.set_loc at recording */
CfreeCgTypeId type;
- Val def; /* primary SSA def, or VAL_NONE */
- u32 ndefs; /* multi-result */
+ Val def; /* primary SSA def, or VAL_NONE */
+ u32 ndefs; /* multi-result */
Val* defs;
/* Operands. We use Operand instead of Val so that the original
* CGTarget call shape (IMM / LOCAL / GLOBAL / INDIRECT in addition to
@@ -375,7 +376,8 @@ typedef struct Func {
u32* val_def_block;
u32* val_def_inst;
CfreeCgTypeId* val_type;
- u8* val_cls; /* RegClass per Val, used by replay to reconstruct REG operands */
+ u8* val_cls; /* RegClass per Val, used by replay to reconstruct REG operands
+ */
u32 nvals, vals_cap;
/* Scope id table. Indexed by scope_id (1-based). Values map to
@@ -419,6 +421,7 @@ typedef struct Func {
OptUse* opt_uses;
u32 opt_nuses, opt_uses_cap;
u32* opt_first_use_by_val; /* indexed by Val, OPT_USE_NONE if no uses */
+ u32 opt_valid_analyses;
Reg opt_hard_regs[OPT_REG_CLASSES][OPT_MAX_HARD_REGS];
u32 opt_hard_reg_count[OPT_REG_CLASSES];
@@ -426,7 +429,8 @@ typedef struct Func {
u32 opt_phys_reg_count[OPT_REG_CLASSES];
Reg opt_scratch_regs[OPT_REG_CLASSES][OPT_MAX_SCRATCH_REGS];
u32 opt_scratch_reg_count[OPT_REG_CLASSES];
- u32 opt_caller_saved[OPT_REG_CLASSES]; /* bit r set if hard reg r is caller-saved */
+ u32 opt_caller_saved[OPT_REG_CLASSES]; /* bit r set if hard reg r is
+ caller-saved */
u32 opt_callee_saved[OPT_REG_CLASSES];
u32 opt_reserved_regs[OPT_REG_CLASSES];
u32 opt_arg_regs[OPT_REG_CLASSES];
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1409,9 +1409,17 @@ static void opt_run_o2_pipeline(OptImpl* o) {
opt_verify(o->f, "o2-pre-ssa-cfg");
opt_build_ssa(o->f);
opt_verify(o->f, "o2-ssa");
+ opt_ssa_dce(o->f);
+ opt_verify(o->f, "o2-ssa-dce");
+ opt_copy_cleanup(o->f);
+ opt_verify(o->f, "o2-copy-cleanup");
+ opt_ssa_dce(o->f);
+ opt_verify(o->f, "o2-copy-dce");
opt_make_conventional_ssa(o->f);
opt_verify(o->f, "o2-conventional-ssa");
opt_undo_ssa(o->f);
+ opt_copy_cleanup(o->f);
+ opt_verify(o->f, "o2-undo-copy-cleanup");
opt_build_cfg(o->f);
opt_verify(o->f, "o2-undo-ssa");
metrics_scope_end(o->c, "opt.o2.ssa");
diff --git a/src/opt/opt.h b/src/opt/opt.h
@@ -37,7 +37,8 @@ void opt_jump_cleanup(Func*, OptJumpCleanupStage);
void opt_block_cloning(Func*);
void opt_build_ssa(Func*);
void opt_addr_xform(Func*);
-void opt_gvn(Func*); /* incl. constprop, redundant-load elim */
+void opt_gvn(Func*); /* incl. constprop, redundant-load elim */
+void opt_copy_cleanup(Func*);
void opt_copy_prop(Func*); /* incl. redundant-extension elim */
void opt_dse(Func*); /* dead store elimination */
void opt_ssa_dce(Func*);
diff --git a/src/opt/opt_internal.h b/src/opt/opt_internal.h
@@ -45,8 +45,18 @@ typedef struct OptAnalysis {
OptBlockList* dom_frontier;
} OptAnalysis;
+typedef enum OptAnalysisFlag {
+ OPT_ANALYSIS_CFG = 1u << 0,
+ OPT_ANALYSIS_DEF_USE = 1u << 1,
+ OPT_ANALYSIS_DOM = 1u << 2,
+ OPT_ANALYSIS_LOOP = 1u << 3,
+} OptAnalysisFlag;
+
typedef void (*OptOperandWalkFn)(Func*, Inst*, Operand*, int is_def, void*);
+void opt_analysis_mark_valid(Func*, u32 flags);
+void opt_analysis_invalidate(Func*, u32 flags);
+int opt_analysis_has(Func*, u32 flags);
void opt_analysis_build_order(Func*, OptAnalysis*);
void opt_analysis_build_dominators(Func*, OptAnalysis*);
void opt_analysis_build_dom_frontier(Func*, OptAnalysis*);
diff --git a/src/opt/pass_analysis.c b/src/opt/pass_analysis.c
@@ -22,6 +22,20 @@ static int verify_stage_is_ssa(const char* stage) {
strstr(stage, "pre-ssa") == NULL;
}
+void opt_analysis_mark_valid(Func* f, u32 flags) {
+ if (!f) return;
+ f->opt_valid_analyses |= flags;
+}
+
+void opt_analysis_invalidate(Func* f, u32 flags) {
+ if (!f) return;
+ f->opt_valid_analyses &= ~flags;
+}
+
+int opt_analysis_has(Func* f, u32 flags) {
+ return f && (f->opt_valid_analyses & flags) == flags;
+}
+
static void block_list_add(Arena* arena, OptBlockList* list, u32 block) {
if (list->n == list->cap) {
u32 ncap = list->cap ? list->cap * 2u : 4u;
@@ -80,7 +94,10 @@ void opt_analysis_build_dominators(Func* f, OptAnalysis* a) {
a->idom = arena_array(f->arena, u32, n);
a->dom_children = arena_zarray(f->arena, OptBlockList, n);
for (u32 i = 0; i < n; ++i) a->idom[i] = OPT_BLK_NONE;
- if (f->entry >= f->nblocks || !a->reachable[f->entry]) return;
+ if (f->entry >= f->nblocks || !a->reachable[f->entry]) {
+ opt_analysis_mark_valid(f, OPT_ANALYSIS_DOM);
+ return;
+ }
a->idom[f->entry] = f->entry;
int changed = 1;
@@ -112,6 +129,7 @@ void opt_analysis_build_dominators(Func* f, OptAnalysis* a) {
if (idom == OPT_BLK_NONE || idom == b) continue;
block_list_add(f->arena, &a->dom_children[idom], b);
}
+ opt_analysis_mark_valid(f, OPT_ANALYSIS_DOM);
}
void opt_analysis_build_dom_frontier(Func* f, OptAnalysis* a) {
@@ -190,8 +208,7 @@ static void opt_use_add(Func* f, Val v, u32 b, u32 i, u8 kind, u32 op_idx,
if (f->opt_nuses == f->opt_uses_cap) {
u32 ncap = f->opt_uses_cap ? f->opt_uses_cap * 2u : 32u;
OptUse* uses = arena_zarray(f->arena, OptUse, ncap);
- if (f->opt_uses)
- memcpy(uses, f->opt_uses, sizeof(uses[0]) * f->opt_nuses);
+ if (f->opt_uses) memcpy(uses, f->opt_uses, sizeof(uses[0]) * f->opt_nuses);
f->opt_uses = uses;
f->opt_uses_cap = ncap;
}
@@ -209,12 +226,12 @@ static void opt_use_add(Func* f, Val v, u32 b, u32 i, u8 kind, u32 op_idx,
f->opt_first_use_by_val[v] = id;
}
-static void opt_use_add_operand(Func* f, u32 b, u32 i, u32 op_idx,
- Operand* op, int is_def) {
+static void opt_use_add_operand(Func* f, u32 b, u32 i, u32 op_idx, Operand* op,
+ int is_def) {
if (!op || is_def) return;
if (op->kind == OPK_REG) {
- opt_use_add(f, (Val)op->v.reg, b, i, OPT_USE_OPERAND, op_idx,
- OPT_USE_NONE, op);
+ opt_use_add(f, (Val)op->v.reg, b, i, OPT_USE_OPERAND, op_idx, OPT_USE_NONE,
+ op);
} else if (op->kind == OPK_INDIRECT) {
opt_use_add(f, (Val)op->v.ind.base, b, i, OPT_USE_INDIRECT_BASE, op_idx,
OPT_USE_NONE, op);
@@ -242,8 +259,8 @@ static void opt_collect_inst_uses(Func* f, u32 b, u32 i, Inst* in) {
IRPhiAux* aux = (IRPhiAux*)in->extra.aux;
if (!aux) break;
for (u32 p = 0; p < aux->npreds; ++p)
- opt_use_add(f, aux->pred_vals[p], b, i, OPT_USE_PHI_INPUT,
- OPT_USE_NONE, p, NULL);
+ opt_use_add(f, aux->pred_vals[p], b, i, OPT_USE_PHI_INPUT, OPT_USE_NONE,
+ p, NULL);
break;
}
case IR_CALL: {
@@ -300,6 +317,7 @@ void opt_rebuild_def_use(Func* f) {
for (u32 i = 0; i < bl->ninsts; ++i)
opt_collect_inst_uses(f, b, i, &bl->insts[i]);
}
+ opt_analysis_mark_valid(f, OPT_ANALYSIS_DEF_USE);
}
static void verify_operand(Func* f, Inst* in, Operand* op, int is_def,
@@ -312,8 +330,8 @@ static void verify_operand(Func* f, Inst* in, Operand* op, int is_def,
opt_fail(f, stage, is_def ? "bad def val" : "bad use val", v, f->nvals);
if (!verify_stage_is_ssa(stage)) return;
if (op->cls != f->val_cls[v])
- opt_fail(f, stage, is_def ? "def class mismatch" : "use class mismatch",
- v, op->cls);
+ opt_fail(f, stage, is_def ? "def class mismatch" : "use class mismatch", v,
+ op->cls);
}
static void verify_values(Func* f, const char* stage) {
@@ -326,14 +344,12 @@ static void verify_values(Func* f, const char* stage) {
Inst* in = &bl->insts[i];
if (in->id == INST_ID_NONE || in->id >= f->next_inst_id)
opt_fail(f, stage, "bad inst id", b, i);
- if (seen_inst[in->id])
- opt_fail(f, stage, "duplicate inst id", in->id, b);
+ if (seen_inst[in->id]) opt_fail(f, stage, "duplicate inst id", in->id, b);
seen_inst[in->id] = 1;
if (in->def != VAL_NONE) {
if (in->def >= f->nvals) opt_fail(f, stage, "bad inst def", b, i);
- if (verify_stage_is_ssa(stage) && (IROp)in->op == IR_PHI &&
- in->type && f->val_type[in->def] &&
- in->type != f->val_type[in->def])
+ if (verify_stage_is_ssa(stage) && (IROp)in->op == IR_PHI && in->type &&
+ f->val_type[in->def] && in->type != f->val_type[in->def])
opt_fail(f, stage, "inst def type mismatch", in->def, b);
}
for (u32 d = 0; d < in->ndefs; ++d) {
@@ -349,8 +365,7 @@ static void verify_values(Func* f, const char* stage) {
if (in->nopnds || in->opnds)
opt_fail(f, stage, "phi should not carry operands", b, i);
if (aux->slot_id > f->nframe_slots)
- opt_fail(f, stage, "phi bad slot id", aux->slot_id,
- f->nframe_slots);
+ opt_fail(f, stage, "phi bad slot id", aux->slot_id, f->nframe_slots);
if (aux->npreds != bl->npreds)
opt_fail(f, stage, "phi pred count mismatch", aux->npreds,
bl->npreds);
@@ -360,8 +375,7 @@ static void verify_values(Func* f, const char* stage) {
if (aux->pred_vals[p] != VAL_NONE && aux->pred_vals[p] >= f->nvals)
opt_fail(f, stage, "phi bad pred value", aux->pred_vals[p],
f->nvals);
- if (aux->pred_vals[p] != VAL_NONE &&
- f->val_type[aux->pred_vals[p]] &&
+ if (aux->pred_vals[p] != VAL_NONE && f->val_type[aux->pred_vals[p]] &&
f->val_type[aux->pred_vals[p]] != in->type)
opt_fail(f, stage, "phi input type mismatch", b, p);
}
@@ -378,8 +392,7 @@ static void verify_use_site(Func* f, const char* stage, const OptUse* use) {
case OPT_USE_OPERAND:
if (!use->operand)
opt_fail(f, stage, "def-use missing operand", use->block, use->inst);
- if (use->operand->kind != OPK_REG ||
- (Val)use->operand->v.reg != use->val)
+ if (use->operand->kind != OPK_REG || (Val)use->operand->v.reg != use->val)
opt_fail(f, stage, "def-use operand mismatch", use->val, use->kind);
break;
case OPT_USE_INDIRECT_BASE:
@@ -389,8 +402,7 @@ static void verify_use_site(Func* f, const char* stage, const OptUse* use) {
break;
case OPT_USE_PHI_INPUT: {
if ((IROp)in->op != IR_PHI)
- opt_fail(f, stage, "def-use phi site mismatch", use->block,
- use->inst);
+ opt_fail(f, stage, "def-use phi site mismatch", use->block, use->inst);
IRPhiAux* aux = (IRPhiAux*)in->extra.aux;
if (!aux || use->phi_pred_index >= aux->npreds)
opt_fail(f, stage, "def-use phi pred mismatch", use->val,
@@ -407,6 +419,19 @@ static void verify_use_site(Func* f, const char* stage, const OptUse* use) {
static void verify_def_use(Func* f, const char* stage) {
if (f->opt_rewritten) return;
+ if (opt_analysis_has(f, OPT_ANALYSIS_DEF_USE)) {
+ for (u32 u = 0; u < f->opt_nuses; ++u) {
+ OptUse* use = &f->opt_uses[u];
+ if (use->val == VAL_NONE || use->val >= f->nvals)
+ opt_fail(f, stage, "def-use bad cached val", use->val, f->nvals);
+ if (use->block >= f->nblocks)
+ opt_fail(f, stage, "def-use bad cached block", use->block, f->nblocks);
+ if (use->inst >= f->blocks[use->block].ninsts)
+ opt_fail(f, stage, "def-use bad cached inst", use->inst,
+ f->blocks[use->block].ninsts);
+ verify_use_site(f, stage, use);
+ }
+ }
opt_rebuild_def_use(f);
for (u32 u = 0; u < f->opt_nuses; ++u) {
OptUse* use = &f->opt_uses[u];
diff --git a/src/opt/pass_cfg.c b/src/opt/pass_cfg.c
@@ -20,13 +20,11 @@
* and only repopulates from the trailing terminator inst when one is
* present. */
-#include "opt/ir.h"
-#include "opt/opt.h"
-
#include <string.h>
#include "core/arena.h"
#include "core/core.h"
+#include "opt/opt_internal.h"
static int is_terminator(const Inst* in) {
switch ((IROp)in->op) {
@@ -41,8 +39,7 @@ static int is_terminator(const Inst* in) {
return 1;
case IR_INTRINSIC: {
IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
- return aux && (aux->kind == INTRIN_LONGJMP ||
- aux->kind == INTRIN_TRAP ||
+ return aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP ||
aux->kind == INTRIN_UNREACHABLE);
}
default:
@@ -117,6 +114,9 @@ static void prune_unreachable(Func* f, const u8* reachable) {
}
void opt_build_cfg(Func* f) {
+ if (!f) return;
+ opt_analysis_invalidate(
+ f, OPT_ANALYSIS_DEF_USE | OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
for (u32 b = 0; b < f->nblocks; ++b) {
f->blocks[b].preds = NULL;
f->blocks[b].npreds = 0;
@@ -192,4 +192,5 @@ void opt_build_cfg(Func* f) {
f->blocks[t].preds[f->blocks[t].npreds++] = b;
}
}
+ opt_analysis_mark_valid(f, OPT_ANALYSIS_CFG);
}
diff --git a/src/opt/pass_combine.c b/src/opt/pass_combine.c
@@ -512,6 +512,8 @@ static void opt_combine_fold_block(Func* f, Block* bl,
}
void opt_combine(Func* f) {
+ if (!f) return;
+ opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
OptHardBlockLive* hard_live = opt_maybe_build_hard_live(f);
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
diff --git a/src/opt/pass_copy.c b/src/opt/pass_copy.c
@@ -0,0 +1,116 @@
+#include "opt/opt_internal.h"
+
+static int same_val_shape(Func* f, Val a, Val b) {
+ if (a == VAL_NONE || b == VAL_NONE || a >= f->nvals || b >= f->nvals)
+ return 0;
+ return f->val_cls[a] == f->val_cls[b] && f->val_type[a] == f->val_type[b];
+}
+
+static int copy_values(const Inst* in, Val* dst, Val* src) {
+ if (!in || (IROp)in->op != IR_COPY || in->nopnds < 2) return 0;
+ if (in->opnds[0].kind != OPK_REG || in->opnds[1].kind != OPK_REG) return 0;
+ *dst = (Val)in->opnds[0].v.reg;
+ *src = (Val)in->opnds[1].v.reg;
+ if (in->def != VAL_NONE && in->def != *dst) return 0;
+ return 1;
+}
+
+static u32 def_count(Func* f, Val v) {
+ u32 n = 0;
+ if (v == VAL_NONE) return 0;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if (in->def == v) ++n;
+ for (u32 d = 0; d < in->ndefs; ++d)
+ if (in->defs[d] == v) ++n;
+ }
+ }
+ return n;
+}
+
+static void replace_one_use(Func* f, const OptUse* use, Val src) {
+ Inst* in = &f->blocks[use->block].insts[use->inst];
+ switch ((OptUseKind)use->kind) {
+ case OPT_USE_OPERAND:
+ use->operand->v.reg = (Reg)src;
+ use->operand->type = f->val_type[src];
+ use->operand->cls = f->val_cls[src];
+ break;
+ case OPT_USE_INDIRECT_BASE:
+ use->operand->v.ind.base = (Reg)src;
+ break;
+ case OPT_USE_PHI_INPUT: {
+ IRPhiAux* aux = (IRPhiAux*)in->extra.aux;
+ if (aux && use->phi_pred_index < aux->npreds)
+ aux->pred_vals[use->phi_pred_index] = src;
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static void remove_copy_inst(Inst* in) {
+ in->op = IR_NOP;
+ in->def = VAL_NONE;
+ in->ndefs = 0;
+ in->defs = NULL;
+ in->nopnds = 0;
+ in->opnds = NULL;
+}
+
+static void compact_copies(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ u32 w = 0;
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ if ((IROp)bl->insts[i].op == IR_NOP) continue;
+ bl->insts[w] = bl->insts[i];
+ if (bl->insts[w].def != VAL_NONE && bl->insts[w].def < f->nvals) {
+ f->val_def_block[bl->insts[w].def] = b;
+ f->val_def_inst[bl->insts[w].def] = w;
+ }
+ for (u32 d = 0; d < bl->insts[w].ndefs; ++d) {
+ Val v = bl->insts[w].defs[d];
+ if (v != VAL_NONE && v < f->nvals) {
+ f->val_def_block[v] = b;
+ f->val_def_inst[v] = w;
+ }
+ }
+ ++w;
+ }
+ bl->ninsts = w;
+ }
+}
+
+static int cleanup_one_copy(Func* f) {
+ opt_rebuild_def_use(f);
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ Val dst = VAL_NONE;
+ Val src = VAL_NONE;
+ if (!copy_values(in, &dst, &src)) continue;
+ if (dst != src && !same_val_shape(f, dst, src)) continue;
+ if (dst != src && def_count(f, dst) != 1) continue;
+ for (u32 u = f->opt_first_use_by_val[dst]; u != OPT_USE_NONE;
+ u = f->opt_uses[u].next_for_val)
+ replace_one_use(f, &f->opt_uses[u], src);
+ remove_copy_inst(in);
+ opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
+ compact_copies(f);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void opt_copy_cleanup(Func* f) {
+ if (!f || f->opt_rewritten) return;
+ while (cleanup_one_copy(f)) {
+ }
+ opt_rebuild_def_use(f);
+}
diff --git a/src/opt/pass_dce.c b/src/opt/pass_dce.c
@@ -44,7 +44,105 @@ int opt_inst_has_side_effect(Func* f, const Inst* in) {
return 0;
}
}
+
+static int val_has_uses(Func* f, Val v) {
+ return v != VAL_NONE && v < f->nvals && f->opt_first_use_by_val &&
+ f->opt_first_use_by_val[v] != OPT_USE_NONE;
+}
+
+static int ssa_dce_candidate(const Inst* in) {
+ switch ((IROp)in->op) {
+ case IR_CONST_I:
+ case IR_CONST_BYTES:
+ case IR_LOAD_IMM:
+ case IR_LOAD_CONST:
+ case IR_LOAD_LABEL_ADDR:
+ case IR_COPY:
+ case IR_BINOP:
+ case IR_UNOP:
+ case IR_CMP:
+ case IR_CONVERT:
+ case IR_PHI:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int inst_all_defs_unused(Func* f, const Inst* in) {
+ if (in->def != VAL_NONE && val_has_uses(f, in->def)) return 0;
+ for (u32 i = 0; i < in->ndefs; ++i)
+ if (val_has_uses(f, in->defs[i])) return 0;
+ return in->def != VAL_NONE || in->ndefs != 0;
+}
+
+static void refresh_def_locations(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if (in->def != VAL_NONE && in->def < f->nvals) {
+ f->val_def_block[in->def] = b;
+ f->val_def_inst[in->def] = i;
+ }
+ for (u32 d = 0; d < in->ndefs; ++d) {
+ Val v = in->defs[d];
+ if (v == VAL_NONE || v >= f->nvals) continue;
+ f->val_def_block[v] = b;
+ f->val_def_inst[v] = i;
+ }
+ }
+ }
+}
+
+static void compact_nops(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ u32 w = 0;
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ if ((IROp)bl->insts[i].op == IR_NOP) continue;
+ bl->insts[w++] = bl->insts[i];
+ }
+ bl->ninsts = w;
+ }
+ refresh_def_locations(f);
+}
+
+void opt_ssa_dce(Func* f) {
+ if (!f || f->opt_rewritten) return;
+ int changed = 0;
+ int again = 1;
+ opt_rebuild_def_use(f);
+ while (again) {
+ again = 0;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if (!ssa_dce_candidate(in)) continue;
+ if (opt_inst_has_side_effect(f, in)) continue;
+ if (!inst_all_defs_unused(f, in)) continue;
+ in->op = IR_NOP;
+ in->def = VAL_NONE;
+ in->ndefs = 0;
+ in->defs = NULL;
+ in->nopnds = 0;
+ in->opnds = NULL;
+ changed = 1;
+ again = 1;
+ }
+ }
+ if (again) opt_rebuild_def_use(f);
+ }
+ if (changed) {
+ opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
+ compact_nops(f);
+ }
+ opt_rebuild_def_use(f);
+}
+
void opt_dce(Func* f) {
+ opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
OptHardBlockLive* hard_live = opt_maybe_build_hard_live(f);
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
@@ -92,4 +190,5 @@ void opt_dce(Func* f) {
}
bl->ninsts = w;
}
+ refresh_def_locations(f);
}
diff --git a/src/opt/pass_jump.c b/src/opt/pass_jump.c
@@ -1,6 +1,6 @@
#include <string.h>
-#include "opt/opt.h"
+#include "opt/opt_internal.h"
#define BLOCK_NONE ((u32)~0u)
@@ -199,6 +199,8 @@ static void cleanup_layout_fallthrough_branches(const JumpCleanupCtx* c) {
void opt_jump_cleanup(Func* f, OptJumpCleanupStage stage) {
if (!f) return;
+ opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE |
+ OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
JumpCleanupCtx c = jump_cleanup_ctx(f);
if (stage == OPT_JUMP_CLEANUP_CFG) {
cleanup_invert_jump_fallthrough(&c);
diff --git a/src/opt/pass_loop.c b/src/opt/pass_loop.c
@@ -31,16 +31,24 @@ static u32 loop_frequency(u8 depth) {
}
void opt_build_loop_tree(Func* f) {
+ if (!f) return;
+ opt_analysis_invalidate(f, OPT_ANALYSIS_LOOP);
for (u32 b = 0; b < f->nblocks; ++b) {
f->blocks[b].loop_depth = 0;
f->blocks[b].frequency = 1;
}
- if (f->nblocks == 0 || f->entry >= f->nblocks) return;
+ if (f->nblocks == 0 || f->entry >= f->nblocks) {
+ opt_analysis_mark_valid(f, OPT_ANALYSIS_LOOP);
+ return;
+ }
OptAnalysis a;
memset(&a, 0, sizeof a);
opt_analysis_build_dominators(f, &a);
- if (a.npo == 0) return;
+ if (a.npo == 0) {
+ opt_analysis_mark_valid(f, OPT_ANALYSIS_LOOP);
+ return;
+ }
u8* body = arena_zarray(f->arena, u8, f->nblocks);
u32* stack = arena_array(f->arena, u32, f->nblocks);
@@ -68,4 +76,5 @@ void opt_build_loop_tree(Func* f) {
for (u32 b = 0; b < f->nblocks; ++b)
f->blocks[b].frequency = loop_frequency(f->blocks[b].loop_depth);
+ opt_analysis_mark_valid(f, OPT_ANALYSIS_LOOP);
}
diff --git a/src/opt/pass_ssa.c b/src/opt/pass_ssa.c
@@ -1,12 +1,11 @@
/* pass_ssa.c - mem2reg SSA construction and phi destruction for O2. */
-#include "opt/opt_internal.h"
-
#include <stdio.h>
#include <string.h>
#include "core/arena.h"
#include "core/core.h"
+#include "opt/opt_internal.h"
typedef struct SlotStack {
Val* vals;
@@ -235,8 +234,8 @@ static void mark_def_blocks(Func* f, const u8* promoted, u8* def_blocks) {
static void compute_phi_sites(Func* f, OptAnalysis* a, const u8* promoted,
u8* needs_phi) {
- u8* def_blocks = arena_zarray(f->arena, u8,
- (f->nframe_slots + 1u) * f->nblocks);
+ u8* def_blocks =
+ arena_zarray(f->arena, u8, (f->nframe_slots + 1u) * f->nblocks);
mark_def_blocks(f, promoted, def_blocks);
u32* work = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u);
u8* queued = arena_zarray(f->arena, u8, f->nblocks ? f->nblocks : 1u);
@@ -384,6 +383,7 @@ static void replace_phi_inputs(Func* f, RenameCtx* ctx) {
}
void opt_build_ssa(Func* f) {
+ if (f) opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
if (!f || f->nblocks == 0 || f->nframe_slots == 0) {
if (f) opt_rebuild_def_use(f);
return;
@@ -432,8 +432,7 @@ static int ssa_is_terminator(const Inst* in) {
return 1;
case IR_INTRINSIC: {
IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
- return aux && (aux->kind == INTRIN_LONGJMP ||
- aux->kind == INTRIN_TRAP ||
+ return aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP ||
aux->kind == INTRIN_UNREACHABLE);
}
default:
@@ -441,7 +440,8 @@ static int ssa_is_terminator(const Inst* in) {
}
}
-static Inst make_copy_inst(Func* f, Val dst, Val src, CfreeCgTypeId ty, u8 cls) {
+static Inst make_copy_inst(Func* f, Val dst, Val src, CfreeCgTypeId ty,
+ u8 cls) {
Inst in;
memset(&in, 0, sizeof in);
in.op = IR_COPY;
@@ -598,10 +598,10 @@ static void realign_phi_preds(Func* f) {
u32 old_n = aux->npreds;
u32* old_blocks = aux->pred_blocks;
Val* old_vals = aux->pred_vals;
- u32* pred_blocks = bl->npreds ? arena_array(f->arena, u32, bl->npreds)
- : NULL;
- Val* pred_vals = bl->npreds ? arena_zarray(f->arena, Val, bl->npreds)
- : NULL;
+ u32* pred_blocks =
+ bl->npreds ? arena_array(f->arena, u32, bl->npreds) : NULL;
+ Val* pred_vals =
+ bl->npreds ? arena_zarray(f->arena, Val, bl->npreds) : NULL;
for (u32 p = 0; p < bl->npreds; ++p) {
pred_blocks[p] = bl->preds[p];
pred_vals[p] = phi->def;
@@ -621,6 +621,8 @@ static void realign_phi_preds(Func* f) {
void opt_make_conventional_ssa(Func* f) {
if (!f) return;
+ opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE |
+ OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
if (!bl->ninsts || (IROp)bl->insts[0].op != IR_PHI) continue;
@@ -655,6 +657,7 @@ void opt_make_conventional_ssa(Func* f) {
void opt_undo_ssa(Func* f) {
if (!f) return;
+ opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
u32 w = 0;
@@ -693,8 +696,7 @@ void opt_ssa_dump(Func* f, Writer* w) {
opt_rebuild_def_use(f);
char buf[160];
snprintf(buf, sizeof buf, "ssa blocks=%u vals=%u uses=%u\n",
- (unsigned)f->nblocks, (unsigned)f->nvals,
- (unsigned)f->opt_nuses);
+ (unsigned)f->nblocks, (unsigned)f->nvals, (unsigned)f->opt_nuses);
ssa_dump_write(w, buf);
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
@@ -712,8 +714,8 @@ void opt_ssa_dump(Func* f, Writer* w) {
}
if ((IROp)in->op == IR_PHI) {
IRPhiAux* aux = (IRPhiAux*)in->extra.aux;
- snprintf(buf, sizeof buf, " phi slot=%u preds=",
- aux ? (unsigned)aux->slot_id : 0u);
+ snprintf(buf, sizeof buf,
+ " phi slot=%u preds=", aux ? (unsigned)aux->slot_id : 0u);
ssa_dump_write(w, buf);
if (aux) {
for (u32 p = 0; p < aux->npreds; ++p) {
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -568,8 +568,7 @@ static u32 mock_return_reg_mask(CGTarget* t, const ABIFuncInfo* abi,
return 0;
}
-static void mock_plan_call(CGTarget* t, const CGCallDesc* d,
- CGCallPlan* out) {
+static void mock_plan_call(CGTarget* t, const CGCallDesc* d, CGCallPlan* out) {
MockCGTarget* m = (MockCGTarget*)t;
memset(out, 0, sizeof *out);
out->callee = d->callee;
@@ -583,7 +582,8 @@ static void mock_plan_call(CGTarget* t, const CGCallDesc* d,
pm->src = d->args[i].storage;
pm->dst_kind = m->planned_stack_arg ? CG_CALL_PLAN_STACK : CG_CALL_PLAN_REG;
pm->cls = RC_INT;
- pm->dst_reg = m->planned_arg_regs[i] ? m->planned_arg_regs[i] : (Reg)(i + 1u);
+ pm->dst_reg =
+ m->planned_arg_regs[i] ? m->planned_arg_regs[i] : (Reg)(i + 1u);
pm->stack_offset = i * 8u;
pm->mem.type = d->args[i].type;
pm->mem.size = 8;
@@ -613,8 +613,7 @@ static void mock_call(CGTarget* t, const CGCallDesc* d) {
static void mock_emit_call_plan(CGTarget* t, const CGCallPlan* p) {
MockCGTarget* m = (MockCGTarget*)t;
++m->emit_call_plan_calls;
- if (m->event_count < (int)sizeof m->events)
- m->events[m->event_count++] = 'p';
+ if (m->event_count < (int)sizeof m->events) m->events[m->event_count++] = 'p';
m->last_plan_callee = p->callee;
}
@@ -671,8 +670,7 @@ static void mock_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
static void mock_copy(CGTarget* t, Operand dst, Operand src) {
MockCGTarget* m = (MockCGTarget*)t;
- if (m->event_count < (int)sizeof m->events)
- m->events[m->event_count++] = 'c';
+ if (m->event_count < (int)sizeof m->events) m->events[m->event_count++] = 'c';
if (m->copy_calls < (int)(sizeof m->copy_dst / sizeof m->copy_dst[0])) {
m->copy_dst[m->copy_calls] = dst;
m->copy_src[m->copy_calls] = src;
@@ -698,8 +696,7 @@ static void mock_store(CGTarget* t, Operand addr, Operand src, MemAccess macc) {
static void mock_store_call_arg(CGTarget* t, const CGCallPlanMove* move) {
MockCGTarget* m = (MockCGTarget*)t;
- if (m->event_count < (int)sizeof m->events)
- m->events[m->event_count++] = 's';
+ if (m->event_count < (int)sizeof m->events) m->events[m->event_count++] = 's';
++m->store_call_arg_calls;
m->last_stack_arg = *move;
}
@@ -710,8 +707,10 @@ static void mock_load_call_arg(CGTarget* t, Operand dst,
(void)dst;
++m->load_call_arg_calls;
m->last_load_arg = *move;
- if (move->src_kind == CG_CALL_PLAN_SRC_ADDR) ++m->addr_of_calls;
- else ++m->load_calls;
+ if (move->src_kind == CG_CALL_PLAN_SRC_ADDR)
+ ++m->addr_of_calls;
+ else
+ ++m->load_calls;
}
static void mock_store_call_ret(CGTarget* t, const CGCallPlanRet* ret,
@@ -879,22 +878,40 @@ typedef struct RealArchExpect {
} RealArchExpect;
static const RealArchExpect g_real_arch[] = {
- {"x64", CFREE_ARCH_X86_64,
+ {"x64",
+ CFREE_ARCH_X86_64,
{X64_RDI, X64_RSI, X64_RDX, X64_RCX, X64_R8, X64_R9, 0, 0},
- {X64_XMM0, X64_XMM1, X64_XMM2, X64_XMM3, X64_XMM4, X64_XMM5,
- X64_XMM6, X64_XMM7},
- {X64_RAX, X64_RDX}, {X64_XMM0, X64_XMM1}, X64_RDI, 6, 1, 0},
- {"aa64", CFREE_ARCH_ARM_64,
- {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7},
- {0, 1}, {0, 1}, 8, 8, 0, 0},
- {"rv64", CFREE_ARCH_RV64,
+ {X64_XMM0, X64_XMM1, X64_XMM2, X64_XMM3, X64_XMM4, X64_XMM5, X64_XMM6,
+ X64_XMM7},
+ {X64_RAX, X64_RDX},
+ {X64_XMM0, X64_XMM1},
+ X64_RDI,
+ 6,
+ 1,
+ 0},
+ {"aa64",
+ CFREE_ARCH_ARM_64,
+ {0, 1, 2, 3, 4, 5, 6, 7},
+ {0, 1, 2, 3, 4, 5, 6, 7},
+ {0, 1},
+ {0, 1},
+ 8,
+ 8,
+ 0,
+ 0},
+ {"rv64",
+ CFREE_ARCH_RV64,
{RV_A0, RV_A1, RV_A2, RV_A3, RV_A4, RV_A5, RV_A6, RV_A7},
- {10, 11, 12, 13, 14, 15, 16, 17}, {RV_A0, RV_A1}, {10, 11},
- RV_A0, 8, 0, 1},
+ {10, 11, 12, 13, 14, 15, 16, 17},
+ {RV_A0, RV_A1},
+ {10, 11},
+ RV_A0,
+ 8,
+ 0,
+ 1},
};
-static CfreeCgTypeId record_of_i64s(TestCtx* tc, const char* name,
- u64 count) {
+static CfreeCgTypeId record_of_i64s(TestCtx* tc, const char* name, u64 count) {
CfreeCgTypeId arr = cfree_cg_type_array(tc->c, tc->i64, count);
CfreeCgField field;
memset(&field, 0, sizeof field);
@@ -948,9 +965,8 @@ static void expect_plan_arg(const CGCallPlan* p, u32 i, u8 dst_kind, u8 cls,
Reg reg, u32 stack_offset, const char* ctx) {
EXPECT(i < p->nargs, "%s missing arg %u", ctx, (unsigned)i);
if (i >= p->nargs) return;
- EXPECT(p->args[i].dst_kind == dst_kind,
- "%s arg %u dst kind got %u want %u", ctx, (unsigned)i,
- (unsigned)p->args[i].dst_kind, (unsigned)dst_kind);
+ EXPECT(p->args[i].dst_kind == dst_kind, "%s arg %u dst kind got %u want %u",
+ ctx, (unsigned)i, (unsigned)p->args[i].dst_kind, (unsigned)dst_kind);
EXPECT(p->args[i].cls == cls, "%s arg %u class got %u want %u", ctx,
(unsigned)i, (unsigned)p->args[i].cls, (unsigned)cls);
if (dst_kind == CG_CALL_PLAN_REG) {
@@ -987,10 +1003,10 @@ static void opt_machinize_uses_phys_reg_metadata(void) {
static const Reg legacy_pool[] = {7};
static const Reg scratch[] = {9, 10};
static const CGPhysRegInfo phys[] = {
- {13, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
- CG_REG_ARG, 0, 1},
- {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED |
- CG_REG_RET, 50, 4},
+ {13, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_ARG, 0,
+ 1},
+ {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED | CG_REG_RET,
+ 50, 4},
{29, RC_INT, 0xff, CG_REG_RESERVED, 0, 0},
};
mock_set_pool(&mock, RC_INT, legacy_pool, 1, scratch, 2, 0);
@@ -1000,8 +1016,7 @@ static void opt_machinize_uses_phys_reg_metadata(void) {
opt_machinize(f, &mock.base);
EXPECT(f->opt_hard_reg_count[RC_INT] == 2,
"phys metadata should replace legacy allocable pool");
- EXPECT(f->opt_hard_regs[RC_INT][0] == 13 &&
- f->opt_hard_regs[RC_INT][1] == 19,
+ EXPECT(f->opt_hard_regs[RC_INT][0] == 13 && f->opt_hard_regs[RC_INT][1] == 19,
"phys allocable order should be preserved");
EXPECT((f->opt_caller_saved[RC_INT] & (1u << 13)) != 0,
"caller-saved phys flag should be recorded");
@@ -1023,10 +1038,8 @@ static void opt_machinize_keeps_abi_regs_without_legacy_call_fallback(void) {
mock_init(&mock, tc.c);
static const Reg scratch[] = {9, 10};
static const CGPhysRegInfo phys[] = {
- {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
- CG_REG_ARG, 0, 0},
- {3, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
- CG_REG_RET, 0, 0},
+ {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_ARG, 0, 0},
+ {3, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_RET, 0, 0},
{12, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
{19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
};
@@ -1049,12 +1062,10 @@ static void opt_machinize_keeps_abi_regs_without_legacy_call_fallback(void) {
opt_machinize(f, &mock.base);
EXPECT(mock.plan_call_count == 1, "call should be planned before filtering");
- EXPECT(aux->use_plan_replay,
- "stack-arg call should use planned replay");
+ EXPECT(aux->use_plan_replay, "stack-arg call should use planned replay");
EXPECT(f->opt_hard_reg_count[RC_INT] == 4,
"planned stack calls should keep ABI arg/ret regs allocable");
- EXPECT(f->opt_hard_regs[RC_INT][0] == 2 &&
- f->opt_hard_regs[RC_INT][1] == 3 &&
+ EXPECT(f->opt_hard_regs[RC_INT][0] == 2 && f->opt_hard_regs[RC_INT][1] == 3 &&
f->opt_hard_regs[RC_INT][2] == 12 &&
f->opt_hard_regs[RC_INT][3] == 19,
"all non-reserved regs should remain allocable under planned calls");
@@ -1073,10 +1084,8 @@ static void opt_machinize_keeps_abi_regs_for_incoming_params(void) {
mock_init(&mock, tc.c);
static const Reg scratch[] = {9, 10};
static const CGPhysRegInfo phys[] = {
- {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
- CG_REG_ARG, 0, 0},
- {3, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
- CG_REG_RET, 0, 0},
+ {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_ARG, 0, 0},
+ {3, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_RET, 0, 0},
{12, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
{19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
};
@@ -1088,8 +1097,7 @@ static void opt_machinize_keeps_abi_regs_for_incoming_params(void) {
opt_machinize(f, &mock.base);
EXPECT(f->opt_hard_reg_count[RC_INT] == 4,
"incoming params should not suppress ABI arg/ret regs");
- EXPECT(f->opt_hard_regs[RC_INT][0] == 2 &&
- f->opt_hard_regs[RC_INT][1] == 3 &&
+ EXPECT(f->opt_hard_regs[RC_INT][0] == 2 && f->opt_hard_regs[RC_INT][1] == 3 &&
f->opt_hard_regs[RC_INT][2] == 12 &&
f->opt_hard_regs[RC_INT][3] == 19,
"param functions should keep all non-reserved regs allocable");
@@ -1113,8 +1121,7 @@ static void real_arch_call_plan_layout_one(const RealArchExpect* ex) {
CGCallDesc d = make_call_desc(&tc, fn, args, 2, ret);
CGCallPlan p;
target->plan_call(target, &d, &p);
- EXPECT(p.nargs == 2, "%s scalar nargs got %u", ex->name,
- (unsigned)p.nargs);
+ EXPECT(p.nargs == 2, "%s scalar nargs got %u", ex->name, (unsigned)p.nargs);
expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[0], 0,
ex->name);
expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[1], 0,
@@ -1131,22 +1138,17 @@ static void real_arch_call_plan_layout_one(const RealArchExpect* ex) {
const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn);
CGABIValue args[2];
args[0] =
- call_arg_value(tc.f64, &abi->params[0],
- op_reg_cls_(4, tc.f64, RC_FP));
+ call_arg_value(tc.f64, &abi->params[0], op_reg_cls_(4, tc.f64, RC_FP));
args[1] =
- call_arg_value(tc.f64, &abi->params[1],
- op_reg_cls_(5, tc.f64, RC_FP));
+ call_arg_value(tc.f64, &abi->params[1], op_reg_cls_(5, tc.f64, RC_FP));
CGABIValue ret =
call_arg_value(tc.f64, &abi->ret, op_reg_cls_(6, tc.f64, RC_FP));
CGCallDesc d = make_call_desc(&tc, fn, args, 2, ret);
CGCallPlan p;
target->plan_call(target, &d, &p);
- EXPECT(p.nargs == 2, "%s fp nargs got %u", ex->name,
- (unsigned)p.nargs);
- expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[0], 0,
- ex->name);
- expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[1], 0,
- ex->name);
+ EXPECT(p.nargs == 2, "%s fp nargs got %u", ex->name, (unsigned)p.nargs);
+ expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[0], 0, ex->name);
+ expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[1], 0, ex->name);
expect_plan_ret(&p, 0, RC_FP, ex->fp_ret[0], ex->name);
}
@@ -1157,24 +1159,20 @@ static void real_arch_call_plan_layout_one(const RealArchExpect* ex) {
CGABIValue args[4];
args[0] = call_arg_value(tc.i64, &abi->params[0], op_reg_(7, tc.i64));
args[1] =
- call_arg_value(tc.f64, &abi->params[1],
- op_reg_cls_(8, tc.f64, RC_FP));
+ call_arg_value(tc.f64, &abi->params[1], op_reg_cls_(8, tc.f64, RC_FP));
args[2] = call_arg_value(tc.i64, &abi->params[2], op_reg_(9, tc.i64));
args[3] =
- call_arg_value(tc.f64, &abi->params[3],
- op_reg_cls_(10, tc.f64, RC_FP));
+ call_arg_value(tc.f64, &abi->params[3], op_reg_cls_(10, tc.f64, RC_FP));
CGABIValue ret = call_arg_value(tc.i64, &abi->ret, op_reg_(11, tc.i64));
CGCallDesc d = make_call_desc(&tc, fn, args, 4, ret);
CGCallPlan p;
target->plan_call(target, &d, &p);
expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[0], 0,
ex->name);
- expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[0], 0,
- ex->name);
+ expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[0], 0, ex->name);
expect_plan_arg(&p, 2, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[1], 0,
ex->name);
- expect_plan_arg(&p, 3, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[1], 0,
- ex->name);
+ expect_plan_arg(&p, 3, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[1], 0, ex->name);
}
{
@@ -1188,14 +1186,13 @@ static void real_arch_call_plan_layout_one(const RealArchExpect* ex) {
CGCallPlan p;
target->plan_call(target, &d, &p);
EXPECT(p.has_sret, "%s sret plan should be marked", ex->name);
- expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->sret_reg, 0,
- ex->name);
+ expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->sret_reg, 0, ex->name);
EXPECT(p.args[0].src_kind == CG_CALL_PLAN_SRC_ADDR,
"%s sret hidden pointer should materialize an address", ex->name);
- expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_INT,
- ex->arch == CFREE_ARCH_ARM_64 ? ex->int_arg[0]
- : ex->int_arg[1],
- 0, ex->name);
+ expect_plan_arg(
+ &p, 1, CG_CALL_PLAN_REG, RC_INT,
+ ex->arch == CFREE_ARCH_ARM_64 ? ex->int_arg[0] : ex->int_arg[1], 0,
+ ex->name);
}
{
@@ -1225,9 +1222,8 @@ static void real_arch_call_plan_layout_one(const RealArchExpect* ex) {
ex->name);
}
if (ex->variadic_fp_counted)
- EXPECT(p.variadic_fp_count == 1,
- "%s variadic FP count got %u want 1", ex->name,
- (unsigned)p.variadic_fp_count);
+ EXPECT(p.variadic_fp_count == 1, "%s variadic FP count got %u want 1",
+ ex->name, (unsigned)p.variadic_fp_count);
}
{
@@ -1258,13 +1254,11 @@ static void real_arch_call_plan_layout_one(const RealArchExpect* ex) {
for (u32 i = 0; i < 10; ++i)
args[i] = call_arg_value(tc.f128, &abi->params[i],
op_local_((FrameSlot)(20 + i), tc.f128));
- CGABIValue ret = call_arg_value(tc.f128, &abi->ret,
- op_local_(30, tc.f128));
+ CGABIValue ret = call_arg_value(tc.f128, &abi->ret, op_local_(30, tc.f128));
CGCallDesc d = make_call_desc(&tc, fn, args, 10, ret);
CGCallPlan p;
target->plan_call(target, &d, &p);
- EXPECT(p.stack_arg_size == 32,
- "aa64 f128 stack-arg size got %u want 32",
+ EXPECT(p.stack_arg_size == 32, "aa64 f128 stack-arg size got %u want 32",
(unsigned)p.stack_arg_size);
expect_plan_arg(&p, 8, CG_CALL_PLAN_STACK, RC_FP, 0, 0, "aa64 f128");
expect_plan_arg(&p, 9, CG_CALL_PLAN_STACK, RC_FP, 0, 16, "aa64 f128");
@@ -1285,8 +1279,7 @@ static void opt_regalloc_prefers_caller_saved_for_non_call_value(void) {
mock_init(&mock, tc.c);
static const Reg scratch[] = {9, 10};
static const CGPhysRegInfo phys[] = {
- {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
- CG_REG_ARG, 0, 0},
+ {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | CG_REG_ARG, 0, 0},
{19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
};
mock_set_pool(&mock, RC_INT, NULL, 0, scratch, 2, 0);
@@ -1380,10 +1373,10 @@ static void opt_liveness_branch(void) {
EXPECT(opt_bitset_has(&live.blocks[b0].live_out, v),
"v%u live_out of branch block", v);
- EXPECT(opt_bitset_has(&live.blocks[b1].live_in, v),
- "v%u live_in true block", v);
- EXPECT(opt_bitset_has(&live.blocks[b2].live_in, v),
- "v%u live_in false block", v);
+ EXPECT(opt_bitset_has(&live.blocks[b1].live_in, v), "v%u live_in true block",
+ v);
+ EXPECT(opt_bitset_has(&live.blocks[b2].live_in, v), "v%u live_in false block",
+ v);
EXPECT(!opt_bitset_has(&live.blocks[b1].live_out, v),
"v%u dies in true block", v);
tc_fini(&tc);
@@ -1675,8 +1668,7 @@ static void opt_loop_frequency_weights_ranges(void) {
EXPECT(ranges.use_freq_by_val[loop_v] > ranges.use_freq_by_val[exit_v],
"loop-used value should have higher weighted use frequency");
- EXPECT(ranges.spill_cost_by_val[loop_v] >
- ranges.spill_cost_by_val[exit_v],
+ EXPECT(ranges.spill_cost_by_val[loop_v] > ranges.spill_cost_by_val[exit_v],
"loop-used value should have higher spill cost");
tc_fini(&tc);
}
@@ -1869,10 +1861,14 @@ static void opt_analysis_dominators_and_frontier(void) {
emit_ret_val(f, dead, v, tc.i32);
opt_build_cfg(f);
+ EXPECT(opt_analysis_has(f, OPT_ANALYSIS_CFG),
+ "build_cfg should mark CFG analysis valid");
opt_verify(f, "analysis-test");
OptAnalysis a;
memset(&a, 0, sizeof a);
opt_analysis_build_dom_frontier(f, &a);
+ EXPECT(opt_analysis_has(f, OPT_ANALYSIS_DOM),
+ "dominator build should mark dominators valid");
EXPECT(a.npo == 4, "analysis should visit four reachable blocks, got %u",
(unsigned)a.npo);
@@ -1884,8 +1880,7 @@ static void opt_analysis_dominators_and_frontier(void) {
EXPECT(a.idom[then_b] == entry, "then idom should be entry");
EXPECT(a.idom[else_b] == entry, "else idom should be entry");
EXPECT(a.idom[join] == entry, "join idom should be entry");
- EXPECT(opt_analysis_dominates(&a, entry, join),
- "entry should dominate join");
+ EXPECT(opt_analysis_dominates(&a, entry, join), "entry should dominate join");
EXPECT(!opt_analysis_dominates(&a, then_b, else_b),
"then should not dominate else");
EXPECT(block_list_has(&a.dom_frontier[then_b], join),
@@ -2087,6 +2082,92 @@ static void opt_ssa_conventional_splits_critical_edge(void) {
tc_fini(&tc);
}
+static void stale_verify_arg(void* arg) {
+ opt_verify((Func*)arg, "stale-def-use-test");
+}
+
+static void opt_verify_catches_stale_def_use(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ Val a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, a, tc.i32, 1);
+ emit_load_imm(f, f->entry, b, tc.i32, 2);
+ emit_ret_val(f, f->entry, a, tc.i32);
+ opt_build_cfg(f);
+ opt_rebuild_def_use(f);
+ Inst* ret = &f->blocks[f->entry].insts[f->blocks[f->entry].ninsts - 1u];
+ IRRetAux* aux = (IRRetAux*)ret->extra.aux;
+ aux->val.storage.v.reg = (Reg)b;
+ EXPECT(expect_panic(tc.c, stale_verify_arg, f),
+ "verifier should catch stale cached def-use after mutation");
+ tc_fini(&tc);
+}
+
+static void opt_ssa_dce_removes_dead_defs_and_phi(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ FrameSlot fs = add_frame_slot(f, tc.i32, FS_LOCAL, 4, 0);
+ u32 entry = f->entry;
+ u32 then_b = ir_block_new(f);
+ u32 else_b = ir_block_new(f);
+ u32 join = ir_block_new(f);
+ ir_note_emit(f, then_b);
+ ir_note_emit(f, else_b);
+ ir_note_emit(f, join);
+
+ Val keep = add_val(f, tc.i32);
+ Val dead = add_val(f, tc.i32);
+ Val tv = add_val(f, tc.i32);
+ Val ev = add_val(f, tc.i32);
+ emit_load_imm(f, entry, keep, tc.i32, 42);
+ emit_load_imm(f, entry, dead, tc.i32, 99);
+ emit_test_branch(f, entry, then_b, else_b, tc.i32);
+ emit_load_imm(f, then_b, tv, tc.i32, 11);
+ emit_store_local(f, then_b, fs, tv, tc.i32, 0);
+ emit_br_to(f, then_b, join);
+ emit_load_imm(f, else_b, ev, tc.i32, 22);
+ emit_store_local(f, else_b, fs, ev, tc.i32, 0);
+ emit_br_to(f, else_b, join);
+ emit_ret_val(f, join, keep, tc.i32);
+
+ opt_build_cfg(f);
+ opt_build_ssa(f);
+ EXPECT(count_op(f, IR_PHI) == 1, "test should build an unused phi");
+ opt_ssa_dce(f);
+ opt_verify(f, "test-ssa-dce");
+ EXPECT(count_op(f, IR_PHI) == 0, "SSA DCE should remove unused phi");
+ EXPECT(count_uses_of(f, dead) == 0, "dead value should have no users");
+ EXPECT(count_op(f, IR_LOAD_IMM) == 1,
+ "SSA DCE should remove dead pure load_imm defs");
+ tc_fini(&tc);
+}
+
+static void opt_copy_cleanup_rewrites_users(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ Val a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32);
+ Val c = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, a, tc.i32, 42);
+ emit_copy(f, f->entry, b, a, tc.i32);
+ emit_copy(f, f->entry, c, b, tc.i32);
+ emit_ret_val(f, f->entry, c, tc.i32);
+ opt_build_cfg(f);
+ opt_rebuild_def_use(f);
+ opt_copy_cleanup(f);
+ opt_verify(f, "test-copy-cleanup");
+ Inst* ret = &f->blocks[f->entry].insts[f->blocks[f->entry].ninsts - 1u];
+ IRRetAux* aux = (IRRetAux*)ret->extra.aux;
+ EXPECT(count_op(f, IR_COPY) == 0, "copy cleanup should remove copy chain");
+ EXPECT(aux && aux->val.storage.v.reg == (Reg)a,
+ "copy cleanup should rewrite final user to original value");
+ tc_fini(&tc);
+}
+
static void opt_jump_cleanup_forwards_branch_targets(void) {
TestCtx tc;
tc_init(&tc);
@@ -3432,9 +3513,9 @@ static void opt_planned_call_replay_preserves_indirect_callee_arg_reg(void) {
"callee-in-arg-register hazard should copy callee plus arg");
EXPECT(mock.copy_dst[0].v.reg == 9 && mock.copy_src[0].v.reg == 1,
"callee should be saved before arg setup overwrites its register");
- EXPECT(mock.last_plan_callee.kind == OPK_REG &&
- mock.last_plan_callee.v.reg == 9,
- "emit_call_plan should receive scratch callee register");
+ EXPECT(
+ mock.last_plan_callee.kind == OPK_REG && mock.last_plan_callee.v.reg == 9,
+ "emit_call_plan should receive scratch callee register");
tc_fini(&tc);
}
@@ -3506,8 +3587,7 @@ static void opt_planned_call_replay_materializes_address_args(void) {
"sret-shaped plans should use planned replay");
EXPECT(mock.addr_of_calls == 1,
"register address arg should materialize with addr_of");
- EXPECT(mock.load_calls == 0,
- "address args should not be loaded as values");
+ EXPECT(mock.load_calls == 0, "address args should not be loaded as values");
EXPECT(mock.store_call_arg_calls == 1 &&
mock.last_stack_arg.src_kind == CG_CALL_PLAN_SRC_ADDR,
"stack address arg should stay marked as an address");
@@ -3548,9 +3628,9 @@ static void opt_planned_call_replay_resolves_return_reg_collision(void) {
EXPECT(mock.copy_calls == 3,
"two-register return collision should need three copies, got %d",
mock.copy_calls);
- EXPECT(mock.event_count >= 4 && mock.events[0] == 'p' &&
- mock.events[1] == 'c',
- "return copies should occur after the planned call branch");
+ EXPECT(
+ mock.event_count >= 4 && mock.events[0] == 'p' && mock.events[1] == 'c',
+ "return copies should occur after the planned call branch");
EXPECT(mock.copy_dst[0].v.reg == 9 && mock.copy_src[0].v.reg == 1,
"return cycle should save first return register to scratch");
EXPECT(mock.copy_dst[1].v.reg == 1 && mock.copy_src[1].v.reg == 2,
@@ -3592,9 +3672,10 @@ static void opt_planned_call_replay_stores_stack_sources_before_clobber(void) {
"stack-source hazard call should use emit_call_plan");
EXPECT(mock.store_call_arg_calls == 1 && mock.copy_calls == 1,
"expected one stack store and one register arg copy");
- EXPECT(mock.event_count >= 3 && mock.events[0] == 's' &&
- mock.events[1] == 'c' && mock.events[2] == 'p',
- "stack arg source should be stored before its source reg is clobbered");
+ EXPECT(
+ mock.event_count >= 3 && mock.events[0] == 's' && mock.events[1] == 'c' &&
+ mock.events[2] == 'p',
+ "stack arg source should be stored before its source reg is clobbered");
EXPECT(mock.last_stack_arg.src.kind == OPK_REG &&
mock.last_stack_arg.src.v.reg == 1,
"stack arg should use the original source register");
@@ -3889,7 +3970,8 @@ static void opt_known_frame_keeps_frame_for_slot_call_and_alloca(void) {
mock_set_pool(&alloca_mock, RC_INT, pool, 3, scratch, 2, 0x4007FFFFu);
CGTarget* alloca_opt = opt_cgtarget_new(tc.c, &alloca_mock.base, 1);
begin_mock_opt_func(&tc, alloca_opt, tc.i32);
- alloca_opt->alloca_(alloca_opt, op_reg_(1, cfree_cg_type_ptr(tc.c, tc.i32, 0)),
+ alloca_opt->alloca_(alloca_opt,
+ op_reg_(1, cfree_cg_type_ptr(tc.c, tc.i32, 0)),
op_imm_(16, tc.i32), 16);
alloca_opt->load_imm(alloca_opt, op_reg_(2, tc.i32), 5);
CGABIValue aret = {0};
@@ -4127,6 +4209,9 @@ int main(void) {
opt_ssa_loop_carried_phi();
opt_ssa_non_promotable_slots_stay_memory();
opt_ssa_conventional_splits_critical_edge();
+ opt_verify_catches_stale_def_use();
+ opt_ssa_dce_removes_dead_defs_and_phi();
+ opt_copy_cleanup_rewrites_users();
opt_jump_cleanup_forwards_branch_targets();
opt_jump_cleanup_inverts_to_remove_jump_block();
opt_jump_cleanup_keeps_conditional_fallthrough_block();