kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit da2192d8a98985012e6f3532c233f02a4efbaa55
parent d328ef30125bf7b82667e0196b0bcbb90135aaaf
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 15 May 2026 19:04:15 -0700

Implement guarded Phase 5 register exposure

Diffstat:
Mdoc/OPT_REGS_CALL_PLAN.md | 36++++++++++++++++++++----------------
Msrc/arch/aa64/opt_coord.c | 47+++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/rv64/opt_coord.c | 42++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/x64/opt_coord.c | 30++++++++++++++++++++++++++----
Msrc/opt/opt.c | 4+++-
Msrc/opt/pass_lower.c | 51++++++++++++++++++++++++++++++++++-----------------
Mtest/opt/opt_test.c | 114+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 286 insertions(+), 38 deletions(-)

diff --git a/doc/OPT_REGS_CALL_PLAN.md b/doc/OPT_REGS_CALL_PLAN.md @@ -46,8 +46,9 @@ What remains open: than separate first-class IR ops; - stack-argument call plans, sret calls, and tail calls still fall back to the legacy backend `call` hook; -- target `get_phys_regs` tables still expose mostly the old conservative pools, - so ABI argument/return registers are not generally allocable yet; +- target `get_phys_regs` tables expose broader O1 pools, but ABI argument and + return registers are suppressed for functions with incoming parameters or + legacy call fallback until those setup paths are also opt-visible; - direct CG still uses legacy allocation/call hooks; - broader real-architecture call-plan layout tests and code-shape probes remain to be added. @@ -55,7 +56,7 @@ What remains open: In phase terms: Phase 1 and Phase 2 are done, Phase 3 is implemented through call-plan aux visibility plus planned replay for supported call shapes, Phase 4 is implemented for register argument/return moves with stack/sret/tail fallback, -and Phases 5-6 remain open. +Phase 5 has an initial guarded implementation, and Phase 6 remains open. ## Planned Call Replay Boundary @@ -454,17 +455,20 @@ Expected result: ABI arg and return registers can be made allocable safely. ### Phase 5 - Broaden Register Exposure -Status: open except for allocator scoring. O1 now has target-informed scoring -and per-call preservation, but target phys-reg tables still mostly expose the old -conservative pools. Broadening ABI arg/return and additional caller-saved regs -depends on planned/parallel call emission. - -- Expand target `get_phys_regs` tables to include nearly all allocable physical - registers. -- Update opt scoring to prefer caller-saved regs for non-call-crossing values and - callee-saved regs for call-crossing values. -- Keep known backend helper scratch registers reserved until their clobbers are - expressed. +Status: partially implemented. O1 has target-informed scoring and per-call +preservation, and the native target phys-reg tables now expose broader O1 pools. +Known backend helper scratch registers remain hidden. ABI arg/return registers +are available only when O1 can avoid the still-sequential setup paths: functions +with incoming parameters or legacy call fallback suppress those ABI registers. + +- done: expand target `get_phys_regs` tables with guarded caller-saved and ABI + registers for x64, AArch64, and RV64; +- done: update opt scoring to prefer caller-saved regs for non-call-crossing + values and callee-saved regs for call-crossing values; +- done: keep known backend helper scratch registers reserved until their + clobbers are expressed; +- still open: remove the ABI-reg suppression after incoming parameter setup and + stack/sret/tail call setup are opt-visible; - Add code-shape tests for direct-call tiny functions and unused-param functions across x64, AArch64, and RV64. @@ -557,8 +561,8 @@ Next patch stack: materialization path, then remove the stack-argument fallback. 3. Add red-green hazard tests for return-register collisions and stack-argument sources once stack materialization is explicit. -4. Broaden register exposure incrementally, keeping helper scratch registers - reserved until their clobbers are explicit. +4. Continue broadening register exposure by removing the current ABI-reg guards + as incoming-parameter and stack/sret/tail-call setup become opt-visible. 5. Migrate direct CG or wrap it with internal call planning, then remove legacy pool semantics. diff --git a/src/arch/aa64/opt_coord.c b/src/arch/aa64/opt_coord.c @@ -16,6 +16,32 @@ static const Reg aa_int_scratch[] = {16, 17}; static const Reg aa_fp_scratch[] = {24, 25}; static const CGPhysRegInfo aa_int_phys[] = { + {0, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {1, RC_INT, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {2, RC_INT, 2, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {3, RC_INT, 3, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {4, RC_INT, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {5, RC_INT, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {6, RC_INT, 6, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {7, RC_INT, 7, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {8, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {12, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_TEMP_PREFERRED, 0, 0}, + {13, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_TEMP_PREFERRED, 0, 0}, + {14, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_TEMP_PREFERRED, 0, 0}, + {15, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_TEMP_PREFERRED, 0, 0}, {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {20, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {21, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, @@ -28,6 +54,22 @@ static const CGPhysRegInfo aa_int_phys[] = { {28, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, }; static const CGPhysRegInfo aa_fp_phys[] = { + {0, RC_FP, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {1, RC_FP, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {2, RC_FP, 2, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {3, RC_FP, 3, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {4, RC_FP, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {5, RC_FP, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {6, RC_FP, 6, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {7, RC_FP, 7, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, {8, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {9, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {10, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, @@ -44,6 +86,11 @@ static const CGPhysRegInfo aa_fp_phys[] = { {21, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, {22, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, {23, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, + {26, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, + {27, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, + {28, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, + {29, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, + {30, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, }; /* ============================================================ diff --git a/src/arch/rv64/opt_coord.c b/src/arch/rv64/opt_coord.c @@ -12,6 +12,22 @@ static const Reg rv_int_scratch[] = {18, 19}; /* s2, s3; reserved by opt_emit */ static const Reg rv_fp_scratch[] = {18, 19}; /* fs2, fs3; reserved by opt_emit */ static const CGPhysRegInfo rv_int_phys[] = { + {10, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {11, RC_INT, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {12, RC_INT, 2, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {13, RC_INT, 3, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {14, RC_INT, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {15, RC_INT, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {16, RC_INT, 6, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {17, RC_INT, 7, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, {20, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {21, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {22, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, @@ -20,8 +36,30 @@ static const CGPhysRegInfo rv_int_phys[] = { {25, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {26, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {27, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, + {29, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_TEMP_PREFERRED, 0, 0}, + {30, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_TEMP_PREFERRED, 0, 0}, + {31, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_TEMP_PREFERRED, 0, 0}, }; static const CGPhysRegInfo rv_fp_phys[] = { + {10, RC_FP, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {11, RC_FP, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {12, RC_FP, 2, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {13, RC_FP, 3, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {14, RC_FP, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {15, RC_FP, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {16, RC_FP, 6, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {17, RC_FP, 7, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, {20, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {21, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {22, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, @@ -30,6 +68,10 @@ static const CGPhysRegInfo rv_fp_phys[] = { {25, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {26, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {27, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, + {28, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, + {29, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, + {30, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, + {31, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, }; /* ============================================================ diff --git a/src/arch/x64/opt_coord.c b/src/arch/x64/opt_coord.c @@ -19,15 +19,37 @@ static const Reg x_int_scratch[] = {X64_RBX, X64_R12}; static const Reg x_fp_scratch[] = {X64_XMM0 + 14, X64_XMM15}; static const CGPhysRegInfo x_int_phys[] = { + {X64_RDI, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {X64_RSI, RC_INT, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {X64_R8, RC_INT, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {X64_R9, RC_INT, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {X64_R10, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_TEMP_PREFERRED, 0, 0}, {X64_R13, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {X64_R14, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, {X64_R15, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, - {X64_R10, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | - CG_REG_TEMP_PREFERRED, 0, 0}, }; static const CGPhysRegInfo x_fp_phys[] = { - {X64_XMM6, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, - {X64_XMM7, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, + {X64_XMM0, RC_FP, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {X64_XMM0 + 1, RC_FP, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG | CG_REG_RET, 0, 0}, + {X64_XMM0 + 2, RC_FP, 2, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {X64_XMM0 + 3, RC_FP, 3, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {X64_XMM0 + 4, RC_FP, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {X64_XMM0 + 5, RC_FP, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {X64_XMM6, RC_FP, 6, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {X64_XMM7, RC_FP, 7, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, {X64_XMM8, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, {X64_XMM0 + 9, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, {X64_XMM0 + 10, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -1235,13 +1235,15 @@ static Operand xlat_op(ReplayCtx* r, Operand op) { case OPK_GLOBAL: return op; case OPK_REG: + if (r->identity_regs && r->f->opt_rewritten) return op; op.v.reg = val_to_target_reg(r, (Val)op.v.reg); return op; case OPK_LOCAL: op.v.frame_slot = slot_to_target(r, op.v.frame_slot); return op; case OPK_INDIRECT: - op.v.ind.base = val_to_target_reg(r, (Val)op.v.ind.base); + if (!(r->identity_regs && r->f->opt_rewritten)) + op.v.ind.base = val_to_target_reg(r, (Val)op.v.ind.base); return op; } return op; diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c @@ -364,6 +364,19 @@ static void asm_prepare_constraints(Func* f, CGTarget* target, IRAsmAux* aux) { static int call_plan_replay_supported(const IRCallAux* aux, const CGTarget* target); +static int func_has_legacy_call_fallback(Func* f) { + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + if ((IROp)in->op != IR_CALL) continue; + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (!aux || !aux->use_plan_replay) return 1; + } + } + return 0; +} + void opt_machinize(Func* f, CGTarget* target) { f->opt_target = target->c->target; f->opt_has_target = 1; @@ -378,6 +391,25 @@ void opt_machinize(Func* f, CGTarget* target) { f->opt_ret_regs[c] = 0; } + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + if ((IROp)in->op == IR_ASM_BLOCK) { + asm_prepare_constraints(f, target, (IRAsmAux*)in->extra.aux); + } else if ((IROp)in->op == IR_CALL && target->plan_call) { + IRCallAux* aux = (IRCallAux*)in->extra.aux; + if (aux) { + target->plan_call(target, &aux->desc, &aux->plan); + aux->plan_valid = 1; + aux->use_plan_replay = call_plan_replay_supported(aux, target); + } + } + } + } + + int suppress_abi_regs = func_has_legacy_call_fallback(f) || f->nparams != 0; + for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { const CGPhysRegInfo* phys = NULL; u32 nphys = 0; @@ -396,6 +428,8 @@ void opt_machinize(Func* f, CGTarget* target) { } f->opt_phys_regs[c][f->opt_phys_reg_count[c]++] = pi; if ((pi.flags & CG_REG_ALLOCABLE) && + (!suppress_abi_regs || + (pi.flags & (CG_REG_ARG | CG_REG_RET)) == 0) && !(pi.flags & CG_REG_RESERVED)) { f->opt_hard_regs[c][f->opt_hard_reg_count[c]++] = hr; } @@ -441,23 +475,6 @@ void opt_machinize(Func* f, CGTarget* target) { } } } - - for (u32 b = 0; b < f->nblocks; ++b) { - Block* bl = &f->blocks[b]; - for (u32 i = 0; i < bl->ninsts; ++i) { - Inst* in = &bl->insts[i]; - if ((IROp)in->op == IR_ASM_BLOCK) { - asm_prepare_constraints(f, target, (IRAsmAux*)in->extra.aux); - } else if ((IROp)in->op == IR_CALL && target->plan_call) { - IRCallAux* aux = (IRCallAux*)in->extra.aux; - if (aux) { - target->plan_call(target, &aux->desc, &aux->plan); - aux->plan_valid = 1; - aux->use_plan_replay = call_plan_replay_supported(aux, target); - } - } - } - } } static int is_caller_saved(Func* f, u8 cls, Reg r) { diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -793,6 +793,93 @@ static void opt_machinize_uses_phys_reg_metadata(void) { tc_fini(&tc); } +static void opt_machinize_filters_abi_regs_for_legacy_call_fallback(void) { + TestCtx tc; + tc_init(&tc); + MockCGTarget mock; + mock_init(&mock, tc.c); + static const Reg scratch[] = {9, 10}; + static const CGPhysRegInfo phys[] = { + {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {3, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_RET, 0, 0}, + {12, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0}, + {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, + }; + mock_set_pool(&mock, RC_INT, NULL, 0, scratch, 2, 0); + mock_set_phys(&mock, RC_INT, phys, sizeof phys / sizeof phys[0]); + mock.planned_stack_arg = 1; + + Func* f = new_func(&tc); + Val arg = add_val(f, tc.i64); + emit_load_imm(f, f->entry, arg, tc.i64, 7); + Inst* in = emit_call_void(f, f->entry); + IRCallAux* aux = (IRCallAux*)in->extra.aux; + CGABIValue* args = arena_zarray(f->arena, CGABIValue, 1); + args[0].type = tc.i64; + args[0].storage = op_reg_(arg, tc.i64); + aux->desc.callee = op_reg_(arg, tc.i64); + aux->desc.args = args; + aux->desc.nargs = 1; + + opt_machinize(f, &mock.base); + + EXPECT(mock.plan_call_count == 1, "call should be planned before filtering"); + EXPECT(!aux->use_plan_replay, + "stack-arg call should stay on legacy fallback"); + EXPECT(f->opt_hard_reg_count[RC_INT] == 2, + "legacy fallback should filter ABI arg/ret regs from hard pool"); + EXPECT(f->opt_hard_regs[RC_INT][0] == 12 && + f->opt_hard_regs[RC_INT][1] == 19, + "only non-ABI regs should remain allocable under fallback"); + EXPECT((f->opt_arg_regs[RC_INT] & (1u << 2)) != 0, + "arg metadata should still be recorded"); + EXPECT((f->opt_ret_regs[RC_INT] & (1u << 3)) != 0, + "ret metadata should still be recorded"); + + Func* g = new_func(&tc); + g->nparams = 1; + opt_machinize(g, &mock.base); + EXPECT(g->opt_hard_reg_count[RC_INT] == 2, + "incoming params should also filter ABI arg/ret regs"); + EXPECT(g->opt_hard_regs[RC_INT][0] == 12 && + g->opt_hard_regs[RC_INT][1] == 19, + "param functions should keep only non-ABI regs allocable"); + tc_fini(&tc); +} + +static void opt_regalloc_prefers_caller_saved_for_non_call_value(void) { + TestCtx tc; + tc_init(&tc); + MockCGTarget mock; + mock_init(&mock, tc.c); + static const Reg scratch[] = {9, 10}; + static const CGPhysRegInfo phys[] = { + {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED | + CG_REG_ARG, 0, 0}, + {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4}, + }; + mock_set_pool(&mock, RC_INT, NULL, 0, scratch, 2, 0); + mock_set_phys(&mock, RC_INT, phys, sizeof phys / sizeof phys[0]); + + Func* f = new_func(&tc); + Val v = add_val(f, tc.i32); + emit_load_imm(f, f->entry, v, tc.i32, 42); + emit_ret_val(f, f->entry, v, tc.i32); + opt_machinize(f, &mock.base); + opt_build_cfg(f); + opt_build_loop_tree(f); + opt_regalloc(f, 0); + + EXPECT(f->val_info[v].alloc_kind == OPT_ALLOC_HARD, + "single live value should allocate a hard register"); + EXPECT(f->val_info[v].hard_reg == 2, + "non-call-crossing value should prefer caller-saved r2, got r%u", + (unsigned)f->val_info[v].hard_reg); + tc_fini(&tc); +} + static void opt_call_plan_drives_call_specific_preservation(void) { TestCtx tc; tc_init(&tc); @@ -2686,6 +2773,30 @@ static void opt_planned_call_replay_falls_back_for_stack_args(void) { tc_fini(&tc); } +static void opt_emit_preserves_physical_reg_zero(void) { + TestCtx tc; + tc_init(&tc); + MockCGTarget mock; + mock_init(&mock, tc.c); + + Func* f = new_func(&tc); + f->opt_rewritten = 1; + Inst* in = ir_emit(f, f->entry, IR_LOAD_IMM); + in->opnds = arena_array(f->arena, Operand, 1); + in->opnds[0] = op_reg_(0, tc.i32); + in->nopnds = 1; + in->extra.imm = 42; + emit_ret_val(f, f->entry, 0, tc.i32); + + opt_emit(tc.c, f, &mock.base); + + EXPECT(mock.load_imm_calls == 1, "physical r0 load should be emitted"); + EXPECT(mock.last_load_imm_dst == 0, + "identity replay should preserve physical r0, got r%u", + (unsigned)mock.last_load_imm_dst); + tc_fini(&tc); +} + /* ============================================================ * End-to-end test — drive the opt-wrapped CGTarget through the * public CGTarget interface, let func_end run the full pipeline, @@ -3031,6 +3142,8 @@ static void simple_regalloc_reports_exact_used_regs(void) { int main(void) { opt_machinize_uses_phys_reg_metadata(); + opt_machinize_filters_abi_regs_for_legacy_call_fallback(); + opt_regalloc_prefers_caller_saved_for_non_call_value(); opt_call_plan_drives_call_specific_preservation(); opt_cfg_prunes_unreachable(); opt_cfg_preserves_scope_edges(); @@ -3072,6 +3185,7 @@ int main(void) { opt_planned_call_replay_resolves_arg_cycle(); opt_planned_call_replay_preserves_indirect_callee_arg_reg(); opt_planned_call_replay_falls_back_for_stack_args(); + opt_emit_preserves_physical_reg_zero(); opt_emit_no_virtual_alloc(); opt_records_const_bytes_by_value(); opt_cmp_branch_keeps_fallthrough_after_block_growth();