commit 627d98f8eb8716464ededb979972b695f94a4579
parent 7d47b9fc98c761d38925c34d547866bcf426d908
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 22 May 2026 03:08:42 -0700
test O2 jump and inline cleanup quality
Diffstat:
7 files changed, 234 insertions(+), 11 deletions(-)
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -970,11 +970,13 @@ Remaining Phase D exit criteria:
default O2 schedule.
- [x] `opt_ssa_combine` has focused red-green tests before it enters the
default O2 schedule.
-- [ ] Full O2 jump optimization has focused red-green tests before it enters
- the default O2 schedule.
-- [ ] Post-inline cleanup quality: after Phase E inlines a simple Toy wrapper,
+- [x] Full O2 jump optimization has focused red-green tests before it enters
+ the default O2 schedule, covering switch target forwarding, empty
+ fallthrough-chain forwarding, repeated branch forwarding, and same-target
+ conditional collapse.
+- [x] Post-inline cleanup quality: after Phase E inlines a simple Toy wrapper,
the existing Phase D value passes should collapse the exposed constant/copy
- chain and remove frame traffic. Current example:
+ chain and remove frame traffic. Covered example:
```
fn add1(x: i32): i32 {
@@ -990,10 +992,9 @@ Remaining Phase D exit criteria:
}
```
- `-O2` now removes the `bl _add1`, but AArch64 still shows materialized
- parameter/local frame traffic and a runtime compare/branch in `main`. The
- desired Phase D follow-up is for inlining plus cleanup to fold the call
- result and branch to the constant return path.
+ `-O2` removes the wrapper call leftovers and AArch64 now lowers the focused
+ quality fixture's `main` to a constant return with no call, prologue, spill,
+ or reload traffic.
### Phase E - Inlining and Cleanup
diff --git a/src/opt/pass_emit.c b/src/opt/pass_emit.c
@@ -942,10 +942,86 @@ static u32 collect_replayed_hard_regs(Func* f, CGTarget* w, RegClass cls,
return nused;
}
+static int replay_operand_uses_frame_slot(const Operand* op) {
+ return op && op->kind == OPK_LOCAL && op->v.frame_slot != FRAME_SLOT_NONE;
+}
+
+static int replay_abivalue_uses_frame_slot(const CGABIValue* v) {
+ if (!v) return 0;
+ if (replay_operand_uses_frame_slot(&v->storage)) return 1;
+ for (u32 i = 0; i < v->nparts; ++i)
+ if (replay_operand_uses_frame_slot(&v->parts[i].op)) return 1;
+ return 0;
+}
+
+static int replay_inst_uses_frame_slot(const Inst* in) {
+ for (u32 i = 0; i < in->nopnds; ++i)
+ if (replay_operand_uses_frame_slot(&in->opnds[i])) return 1;
+ switch ((IROp)in->op) {
+ case IR_CALL: {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ if (!aux) return 0;
+ if (aux->use_plan_replay) {
+ if (replay_operand_uses_frame_slot(&aux->plan.callee)) return 1;
+ for (u32 i = 0; i < aux->plan.nargs; ++i)
+ if (replay_operand_uses_frame_slot(&aux->plan.args[i].src)) return 1;
+ for (u32 i = 0; i < aux->plan.nrets; ++i)
+ if (replay_operand_uses_frame_slot(&aux->plan.rets[i].dst)) return 1;
+ } else {
+ if (replay_operand_uses_frame_slot(&aux->desc.callee)) return 1;
+ for (u32 i = 0; i < aux->desc.nargs; ++i)
+ if (replay_abivalue_uses_frame_slot(&aux->desc.args[i])) return 1;
+ if (replay_abivalue_uses_frame_slot(&aux->desc.ret)) return 1;
+ }
+ return 0;
+ }
+ case IR_RET: {
+ IRRetAux* aux = (IRRetAux*)in->extra.aux;
+ return aux && aux->present && replay_abivalue_uses_frame_slot(&aux->val);
+ }
+ case IR_SCOPE_BEGIN: {
+ IRScopeAux* aux = (IRScopeAux*)in->extra.aux;
+ return aux && replay_operand_uses_frame_slot(&aux->desc.cond);
+ }
+ case IR_ASM_BLOCK: {
+ IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
+ if (!aux) return 0;
+ for (u32 i = 0; i < aux->nin; ++i)
+ if (replay_operand_uses_frame_slot(&aux->in_ops[i])) return 1;
+ for (u32 i = 0; i < aux->nout; ++i)
+ if (replay_operand_uses_frame_slot(&aux->out_ops[i])) return 1;
+ return 0;
+ }
+ case IR_INTRINSIC: {
+ IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux;
+ if (!aux) return 0;
+ for (u32 i = 0; i < aux->narg; ++i)
+ if (replay_operand_uses_frame_slot(&aux->args[i])) return 1;
+ for (u32 i = 0; i < aux->ndst; ++i)
+ if (replay_operand_uses_frame_slot(&aux->dsts[i])) return 1;
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+static int replay_func_uses_frame_slot(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i)
+ if (replay_inst_uses_frame_slot(&bl->insts[i])) return 1;
+ }
+ for (u32 i = 0; i < f->nframe_slots; ++i)
+ if (f->frame_slots[i].flags & (FSF_ADDR_TAKEN | FSF_VOLATILE)) return 1;
+ return 0;
+}
+
static void collect_known_frame(Func* f, CGTarget* w, CGKnownFrameDesc* out) {
memset(out, 0, sizeof(*out));
FrameSlotDesc* slots = NULL;
- if (f->nframe_slots) {
+ int uses_frame_slot = replay_func_uses_frame_slot(f);
+ if (uses_frame_slot && f->nframe_slots) {
slots = arena_zarray(f->arena, FrameSlotDesc, f->nframe_slots);
for (u32 i = 0; i < f->nframe_slots; ++i) {
IRFrameSlot* s = &f->frame_slots[i];
@@ -959,7 +1035,7 @@ static void collect_known_frame(Func* f, CGTarget* w, CGKnownFrameDesc* out) {
}
}
out->slots = slots;
- out->nslots = f->nframe_slots;
+ out->nslots = uses_frame_slot ? f->nframe_slots : 0;
for (u32 b = 0; b < f->nblocks; ++b) {
Block* bl = &f->blocks[b];
diff --git a/src/opt/pass_inline.c b/src/opt/pass_inline.c
@@ -243,7 +243,7 @@ static int build_inline_map(InlineMap* m, Func* caller, Func* callee) {
d.loc = old->loc;
d.size = old->size;
d.align = old->align;
- d.kind = old->kind;
+ d.kind = old->kind == FS_PARAM ? FS_LOCAL : old->kind;
d.flags = old->flags;
m->slot[s] = ir_frame_slot_new(caller, &d);
}
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -629,6 +629,14 @@ static int val_is_load_imm(Func* f, Val v, i64 imm) {
return in && (IROp)in->op == IR_LOAD_IMM && in->extra.imm == imm;
}
+static int any_ret_load_imm(Func* f, i64 imm) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Val v = ret_val(f, b);
+ if (v != VAL_NONE && val_is_load_imm(f, v, imm)) return 1;
+ }
+ return 0;
+}
+
static u32 count_uses_of(Func* f, Val v) {
opt_rebuild_def_use(f);
u32 n = 0;
@@ -1222,6 +1230,20 @@ static void add_reg_param(Func* f, PReg r, CfreeCgTypeId ty) {
in->type = ty;
}
+static FrameSlot add_frame_param(Func* f, CfreeCgTypeId ty) {
+ FrameSlot fs = add_frame_slot(f, ty, FS_PARAM, 4, 0);
+ CGParamDesc d;
+ memset(&d, 0, sizeof d);
+ d.index = f->nparams;
+ d.type = ty;
+ d.size = 4;
+ d.align = 4;
+ d.storage.kind = CG_LOCAL_STORAGE_FRAME;
+ d.storage.v.frame_slot = fs;
+ ir_param_add(f, &d);
+ return fs;
+}
+
static Inst* emit_preg_load_imm(Func* f, u32 b, PReg dst, CfreeCgTypeId ty,
i64 imm) {
Inst* in = ir_emit(f, b, IR_LOAD_IMM);
@@ -1234,6 +1256,19 @@ static Inst* emit_preg_load_imm(Func* f, u32 b, PReg dst, CfreeCgTypeId ty,
return in;
}
+static Inst* emit_preg_load_local(Func* f, u32 b, PReg dst, FrameSlot fs,
+ CfreeCgTypeId ty, u16 flags) {
+ Inst* in = ir_emit(f, b, IR_LOAD);
+ in->opnds = arena_array(f->arena, Operand, 2);
+ in->opnds[0] = op_reg_(dst, ty);
+ in->opnds[1] = op_local_(fs, ty);
+ in->nopnds = 2;
+ in->def = dst;
+ in->type = ty;
+ in->extra.mem = mem_local_(fs, ty, 4, flags);
+ return in;
+}
+
static Inst* emit_preg_binop(Func* f, u32 b, PReg dst, PReg a, PReg c,
CfreeCgTypeId ty) {
Inst* in = ir_emit(f, b, IR_BINOP);
@@ -3711,6 +3746,42 @@ static void opt_jump_opt_forwards_empty_fallthrough_chain(void) {
tc_fini(&tc);
}
+static void opt_jump_opt_repeatedly_forwards_branch_chain(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ u32 b0 = f->entry;
+ u32 hop0 = ir_block_new(f);
+ u32 hop1 = ir_block_new(f);
+ u32 ret = ir_block_new(f);
+ u32 other = ir_block_new(f);
+ ir_note_emit(f, hop0);
+ ir_note_emit(f, hop1);
+ ir_note_emit(f, ret);
+ ir_note_emit(f, other);
+
+ Val cond = add_val(f, tc.i32);
+ Val a = add_val(f, tc.i32);
+ Val b = add_val(f, tc.i32);
+ emit_scalar_input(f, b0, cond, tc.i32);
+ emit_cond_branch(f, b0, cond, hop0, other, tc.i32);
+ emit_br_to(f, hop0, hop1);
+ emit_br_to(f, hop1, ret);
+ emit_load_imm(f, ret, a, tc.i32, 7);
+ emit_ret_val(f, ret, a, tc.i32);
+ emit_load_imm(f, other, b, tc.i32, 9);
+ emit_ret_val(f, other, b, tc.i32);
+
+ opt_jump_opt(f);
+ opt_verify(f, "test-jump-opt-repeated-branch-forward");
+
+ EXPECT(f->blocks[b0].succ[0] == ret,
+ "jump opt should forward repeated branch-only chain");
+ EXPECT(f->blocks[hop0].ninsts == 0 && f->blocks[hop1].ninsts == 0,
+ "repeated branch trampolines should be pruned");
+ tc_fini(&tc);
+}
+
static void opt_jump_opt_collapses_same_target_cond_branch(void) {
TestCtx tc;
tc_init(&tc);
@@ -6389,6 +6460,43 @@ static void opt_inline_caller_growth_cap(void) {
tc_fini(&tc);
}
+static void opt_inline_cleanup_promotes_cloned_param_frame(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ CfreeCgTypeId ps[1] = {tc.i32};
+ Func* callee = new_named_func(&tc, (ObjSymId)16, tc.i32, ps, 1, 0);
+ FrameSlot param = add_frame_param(callee, tc.i32);
+ PReg x = add_preg(callee, tc.i32);
+ PReg one = add_preg(callee, tc.i32);
+ PReg sum = add_preg(callee, tc.i32);
+ emit_preg_load_local(callee, callee->entry, x, param, tc.i32, 0);
+ emit_preg_load_imm(callee, callee->entry, one, tc.i32, 1);
+ emit_preg_binop(callee, callee->entry, sum, x, one, tc.i32);
+ emit_preg_ret(callee, callee->entry, sum, tc.i32);
+
+ Func* caller = new_named_func(&tc, (ObjSymId)17, tc.i32, NULL, 0, 0);
+ PReg arg = add_preg(caller, tc.i32);
+ PReg ret = add_preg(caller, tc.i32);
+ emit_preg_load_imm(caller, caller->entry, arg, tc.i32, 41);
+ Operand arg_op = op_reg_(arg, tc.i32);
+ emit_direct_call(&tc, caller, caller->entry, (ObjSymId)16, callee->type,
+ &arg_op, 1, op_reg_(ret, tc.i32));
+ emit_preg_ret(caller, caller->entry, ret, tc.i32);
+
+ Func* funcs[2] = {caller, callee};
+ FuncSet fs = {tc.c, tc.c->tu, funcs, 2, 2};
+ opt_inline(&fs, 1);
+ EXPECT(count_op(caller, IR_CALL) == 0, "wrapper call should inline");
+ opt_cleanup(caller);
+ opt_verify(caller, "test-inline-cleanup-param-frame");
+
+ EXPECT(count_op(caller, IR_LOAD) == 0 && count_op(caller, IR_STORE) == 0,
+ "cleanup should promote cloned parameter frame traffic");
+ EXPECT(any_ret_load_imm(caller, 42),
+ "cleanup should fold inlined wrapper result to constant 42");
+ tc_fini(&tc);
+}
+
int main(void) {
opt_machinize_uses_phys_reg_metadata();
opt_machinize_keeps_abi_regs_without_legacy_call_fallback();
@@ -6445,6 +6553,7 @@ int main(void) {
opt_jump_cleanup_layout_deletes_fallthrough_branch();
opt_jump_opt_forwards_switch_targets();
opt_jump_opt_forwards_empty_fallthrough_chain();
+ opt_jump_opt_repeatedly_forwards_branch_chain();
opt_jump_opt_collapses_same_target_cond_branch();
opt_loop_tree_excludes_side_exit();
opt_loop_tree_nested_depths();
@@ -6515,6 +6624,7 @@ int main(void) {
opt_inline_bottom_up_chain_single_iter();
opt_inline_refuses_recursive_and_unsupported();
opt_inline_caller_growth_cap();
+ opt_inline_cleanup_promotes_cloned_param_frame();
simple_regalloc_reports_exact_used_regs();
if (g_fails) {
fprintf(stderr, "opt tests: %d failed (%d checks)\n", g_fails, g_checks);
diff --git a/test/opt/run.sh b/test/opt/run.sh
@@ -8,3 +8,29 @@ mkdir -p "$ROOT/build/test/opt"
"$ROOT/build/cfree" cc -target aarch64-linux-gnu \
-O2 -c "$ROOT/test/opt/o2_many_values.c" \
-o "$ROOT/build/test/opt/o2_many_values.o"
+
+INLINE_WORK="$ROOT/build/test/opt/inline_cleanup_quality"
+mkdir -p "$INLINE_WORK"
+"$ROOT/build/cfree" cc -target aarch64-linux-gnu -O2 -c \
+ "$ROOT/test/toy/cases/135_inline_cleanup_quality.toy" \
+ -o "$INLINE_WORK/inline_cleanup_quality.o" \
+ > "$INLINE_WORK/cc.out" 2> "$INLINE_WORK/cc.err"
+"$ROOT/build/cfree" objdump -d "$INLINE_WORK/inline_cleanup_quality.o" \
+ > "$INLINE_WORK/objdump.out" 2> "$INLINE_WORK/objdump.err"
+awk '
+ /^000000000000[0-9a-f]+ <main>:/ { in_main = 1; print; next }
+ /^000000000000[0-9a-f]+ </ { in_main = 0 }
+ in_main { print }
+' "$INLINE_WORK/objdump.out" > "$INLINE_WORK/main.dis"
+if grep -Eq '\bbl\b|stur|ldur|stp|ldp|sub sp|add sp' \
+ "$INLINE_WORK/main.dis"; then
+ printf 'inline cleanup quality check failed; unexpected call/frame traffic:\n' >&2
+ sed 's/^/ | /' "$INLINE_WORK/main.dis" >&2
+ exit 1
+fi
+if ! grep -Eq 'movz[[:space:]]+w0, 0x2a|mov[[:space:]]+w0, #42|li[[:space:]]+a0, 42' \
+ "$INLINE_WORK/main.dis"; then
+ printf 'inline cleanup quality check failed; main is not a constant 42 return:\n' >&2
+ sed 's/^/ | /' "$INLINE_WORK/main.dis" >&2
+ exit 1
+fi
diff --git a/test/toy/cases/135_inline_cleanup_quality.expected b/test/toy/cases/135_inline_cleanup_quality.expected
@@ -0,0 +1 @@
+42
diff --git a/test/toy/cases/135_inline_cleanup_quality.toy b/test/toy/cases/135_inline_cleanup_quality.toy
@@ -0,0 +1,9 @@
+fn add1(x: i64): i64 {
+ return x + 1;
+}
+
+fn __user_main(): i64 {
+ return add1(41);
+}
+
+fn main(): i32 { return __user_main() as i32; }