commit da2192d8a98985012e6f3532c233f02a4efbaa55
parent d328ef30125bf7b82667e0196b0bcbb90135aaaf
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 15 May 2026 19:04:15 -0700
Implement guarded Phase 5 register exposure
Diffstat:
7 files changed, 286 insertions(+), 38 deletions(-)
diff --git a/doc/OPT_REGS_CALL_PLAN.md b/doc/OPT_REGS_CALL_PLAN.md
@@ -46,8 +46,9 @@ What remains open:
than separate first-class IR ops;
- stack-argument call plans, sret calls, and tail calls still fall back to the
legacy backend `call` hook;
-- target `get_phys_regs` tables still expose mostly the old conservative pools,
- so ABI argument/return registers are not generally allocable yet;
+- target `get_phys_regs` tables expose broader O1 pools, but ABI argument and
+ return registers are suppressed for functions with incoming parameters or
+ legacy call fallback until those setup paths are also opt-visible;
- direct CG still uses legacy allocation/call hooks;
- broader real-architecture call-plan layout tests and code-shape probes remain
to be added.
@@ -55,7 +56,7 @@ What remains open:
In phase terms: Phase 1 and Phase 2 are done, Phase 3 is implemented through
call-plan aux visibility plus planned replay for supported call shapes, Phase 4
is implemented for register argument/return moves with stack/sret/tail fallback,
-and Phases 5-6 remain open.
+Phase 5 has an initial guarded implementation, and Phase 6 remains open.
## Planned Call Replay Boundary
@@ -454,17 +455,20 @@ Expected result: ABI arg and return registers can be made allocable safely.
### Phase 5 - Broaden Register Exposure
-Status: open except for allocator scoring. O1 now has target-informed scoring
-and per-call preservation, but target phys-reg tables still mostly expose the old
-conservative pools. Broadening ABI arg/return and additional caller-saved regs
-depends on planned/parallel call emission.
-
-- Expand target `get_phys_regs` tables to include nearly all allocable physical
- registers.
-- Update opt scoring to prefer caller-saved regs for non-call-crossing values and
- callee-saved regs for call-crossing values.
-- Keep known backend helper scratch registers reserved until their clobbers are
- expressed.
+Status: partially implemented. O1 has target-informed scoring and per-call
+preservation, and the native target phys-reg tables now expose broader O1 pools.
+Known backend helper scratch registers remain hidden. ABI arg/return registers
+are available only when O1 can avoid the still-sequential setup paths: functions
+with incoming parameters or legacy call fallback suppress those ABI registers.
+
+- done: expand target `get_phys_regs` tables with guarded caller-saved and ABI
+ registers for x64, AArch64, and RV64;
+- done: update opt scoring to prefer caller-saved regs for non-call-crossing
+ values and callee-saved regs for call-crossing values;
+- done: keep known backend helper scratch registers reserved until their
+ clobbers are expressed;
+- still open: remove the ABI-reg suppression after incoming parameter setup and
+ stack/sret/tail call setup are opt-visible;
- Add code-shape tests for direct-call tiny functions and unused-param functions
across x64, AArch64, and RV64.
@@ -557,8 +561,8 @@ Next patch stack:
materialization path, then remove the stack-argument fallback.
3. Add red-green hazard tests for return-register collisions and stack-argument
sources once stack materialization is explicit.
-4. Broaden register exposure incrementally, keeping helper scratch registers
- reserved until their clobbers are explicit.
+4. Continue broadening register exposure by removing the current ABI-reg guards
+ as incoming-parameter and stack/sret/tail-call setup become opt-visible.
5. Migrate direct CG or wrap it with internal call planning, then remove legacy
pool semantics.
diff --git a/src/arch/aa64/opt_coord.c b/src/arch/aa64/opt_coord.c
@@ -16,6 +16,32 @@ static const Reg aa_int_scratch[] = {16, 17};
static const Reg aa_fp_scratch[] = {24, 25};
static const CGPhysRegInfo aa_int_phys[] = {
+ {0, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {1, RC_INT, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {2, RC_INT, 2, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {3, RC_INT, 3, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {4, RC_INT, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {5, RC_INT, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {6, RC_INT, 6, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {7, RC_INT, 7, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {8, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {12, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_TEMP_PREFERRED, 0, 0},
+ {13, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_TEMP_PREFERRED, 0, 0},
+ {14, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_TEMP_PREFERRED, 0, 0},
+ {15, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_TEMP_PREFERRED, 0, 0},
{19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{20, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{21, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
@@ -28,6 +54,22 @@ static const CGPhysRegInfo aa_int_phys[] = {
{28, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
};
static const CGPhysRegInfo aa_fp_phys[] = {
+ {0, RC_FP, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {1, RC_FP, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {2, RC_FP, 2, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {3, RC_FP, 3, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {4, RC_FP, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {5, RC_FP, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {6, RC_FP, 6, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {7, RC_FP, 7, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
{8, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{9, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{10, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
@@ -44,6 +86,11 @@ static const CGPhysRegInfo aa_fp_phys[] = {
{21, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
{22, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
{23, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
+ {26, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
+ {27, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
+ {28, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
+ {29, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
+ {30, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
};
/* ============================================================
diff --git a/src/arch/rv64/opt_coord.c b/src/arch/rv64/opt_coord.c
@@ -12,6 +12,22 @@ static const Reg rv_int_scratch[] = {18, 19}; /* s2, s3; reserved by opt_emit */
static const Reg rv_fp_scratch[] = {18, 19}; /* fs2, fs3; reserved by opt_emit */
static const CGPhysRegInfo rv_int_phys[] = {
+ {10, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {11, RC_INT, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {12, RC_INT, 2, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {13, RC_INT, 3, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {14, RC_INT, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {15, RC_INT, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {16, RC_INT, 6, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {17, RC_INT, 7, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
{20, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{21, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{22, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
@@ -20,8 +36,30 @@ static const CGPhysRegInfo rv_int_phys[] = {
{25, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{26, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{27, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
+ {29, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_TEMP_PREFERRED, 0, 0},
+ {30, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_TEMP_PREFERRED, 0, 0},
+ {31, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_TEMP_PREFERRED, 0, 0},
};
static const CGPhysRegInfo rv_fp_phys[] = {
+ {10, RC_FP, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {11, RC_FP, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {12, RC_FP, 2, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {13, RC_FP, 3, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {14, RC_FP, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {15, RC_FP, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {16, RC_FP, 6, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {17, RC_FP, 7, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
{20, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{21, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{22, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
@@ -30,6 +68,10 @@ static const CGPhysRegInfo rv_fp_phys[] = {
{25, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{26, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{27, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
+ {28, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
+ {29, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
+ {30, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
+ {31, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
};
/* ============================================================
diff --git a/src/arch/x64/opt_coord.c b/src/arch/x64/opt_coord.c
@@ -19,15 +19,37 @@ static const Reg x_int_scratch[] = {X64_RBX, X64_R12};
static const Reg x_fp_scratch[] = {X64_XMM0 + 14, X64_XMM15};
static const CGPhysRegInfo x_int_phys[] = {
+ {X64_RDI, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {X64_RSI, RC_INT, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {X64_R8, RC_INT, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {X64_R9, RC_INT, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {X64_R10, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_TEMP_PREFERRED, 0, 0},
{X64_R13, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{X64_R14, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
{X64_R15, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
- {X64_R10, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
- CG_REG_TEMP_PREFERRED, 0, 0},
};
static const CGPhysRegInfo x_fp_phys[] = {
- {X64_XMM6, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
- {X64_XMM7, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
+ {X64_XMM0, RC_FP, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {X64_XMM0 + 1, RC_FP, 1, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG | CG_REG_RET, 0, 0},
+ {X64_XMM0 + 2, RC_FP, 2, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {X64_XMM0 + 3, RC_FP, 3, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {X64_XMM0 + 4, RC_FP, 4, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {X64_XMM0 + 5, RC_FP, 5, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {X64_XMM6, RC_FP, 6, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {X64_XMM7, RC_FP, 7, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
{X64_XMM8, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
{X64_XMM0 + 9, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
{X64_XMM0 + 10, RC_FP, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1235,13 +1235,15 @@ static Operand xlat_op(ReplayCtx* r, Operand op) {
case OPK_GLOBAL:
return op;
case OPK_REG:
+ if (r->identity_regs && r->f->opt_rewritten) return op;
op.v.reg = val_to_target_reg(r, (Val)op.v.reg);
return op;
case OPK_LOCAL:
op.v.frame_slot = slot_to_target(r, op.v.frame_slot);
return op;
case OPK_INDIRECT:
- op.v.ind.base = val_to_target_reg(r, (Val)op.v.ind.base);
+ if (!(r->identity_regs && r->f->opt_rewritten))
+ op.v.ind.base = val_to_target_reg(r, (Val)op.v.ind.base);
return op;
}
return op;
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -364,6 +364,19 @@ static void asm_prepare_constraints(Func* f, CGTarget* target, IRAsmAux* aux) {
static int call_plan_replay_supported(const IRCallAux* aux,
const CGTarget* target);
+static int func_has_legacy_call_fallback(Func* f) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if ((IROp)in->op != IR_CALL) continue;
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ if (!aux || !aux->use_plan_replay) return 1;
+ }
+ }
+ return 0;
+}
+
void opt_machinize(Func* f, CGTarget* target) {
f->opt_target = target->c->target;
f->opt_has_target = 1;
@@ -378,6 +391,25 @@ void opt_machinize(Func* f, CGTarget* target) {
f->opt_ret_regs[c] = 0;
}
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if ((IROp)in->op == IR_ASM_BLOCK) {
+ asm_prepare_constraints(f, target, (IRAsmAux*)in->extra.aux);
+ } else if ((IROp)in->op == IR_CALL && target->plan_call) {
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ if (aux) {
+ target->plan_call(target, &aux->desc, &aux->plan);
+ aux->plan_valid = 1;
+ aux->use_plan_replay = call_plan_replay_supported(aux, target);
+ }
+ }
+ }
+ }
+
+ int suppress_abi_regs = func_has_legacy_call_fallback(f) || f->nparams != 0;
+
for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
const CGPhysRegInfo* phys = NULL;
u32 nphys = 0;
@@ -396,6 +428,8 @@ void opt_machinize(Func* f, CGTarget* target) {
}
f->opt_phys_regs[c][f->opt_phys_reg_count[c]++] = pi;
if ((pi.flags & CG_REG_ALLOCABLE) &&
+ (!suppress_abi_regs ||
+ (pi.flags & (CG_REG_ARG | CG_REG_RET)) == 0) &&
!(pi.flags & CG_REG_RESERVED)) {
f->opt_hard_regs[c][f->opt_hard_reg_count[c]++] = hr;
}
@@ -441,23 +475,6 @@ void opt_machinize(Func* f, CGTarget* target) {
}
}
}
-
- for (u32 b = 0; b < f->nblocks; ++b) {
- Block* bl = &f->blocks[b];
- for (u32 i = 0; i < bl->ninsts; ++i) {
- Inst* in = &bl->insts[i];
- if ((IROp)in->op == IR_ASM_BLOCK) {
- asm_prepare_constraints(f, target, (IRAsmAux*)in->extra.aux);
- } else if ((IROp)in->op == IR_CALL && target->plan_call) {
- IRCallAux* aux = (IRCallAux*)in->extra.aux;
- if (aux) {
- target->plan_call(target, &aux->desc, &aux->plan);
- aux->plan_valid = 1;
- aux->use_plan_replay = call_plan_replay_supported(aux, target);
- }
- }
- }
- }
}
static int is_caller_saved(Func* f, u8 cls, Reg r) {
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -793,6 +793,93 @@ static void opt_machinize_uses_phys_reg_metadata(void) {
tc_fini(&tc);
}
+static void opt_machinize_filters_abi_regs_for_legacy_call_fallback(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg scratch[] = {9, 10};
+ static const CGPhysRegInfo phys[] = {
+ {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {3, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_RET, 0, 0},
+ {12, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
+ {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
+ };
+ mock_set_pool(&mock, RC_INT, NULL, 0, scratch, 2, 0);
+ mock_set_phys(&mock, RC_INT, phys, sizeof phys / sizeof phys[0]);
+ mock.planned_stack_arg = 1;
+
+ Func* f = new_func(&tc);
+ Val arg = add_val(f, tc.i64);
+ emit_load_imm(f, f->entry, arg, tc.i64, 7);
+ Inst* in = emit_call_void(f, f->entry);
+ IRCallAux* aux = (IRCallAux*)in->extra.aux;
+ CGABIValue* args = arena_zarray(f->arena, CGABIValue, 1);
+ args[0].type = tc.i64;
+ args[0].storage = op_reg_(arg, tc.i64);
+ aux->desc.callee = op_reg_(arg, tc.i64);
+ aux->desc.args = args;
+ aux->desc.nargs = 1;
+
+ opt_machinize(f, &mock.base);
+
+ EXPECT(mock.plan_call_count == 1, "call should be planned before filtering");
+ EXPECT(!aux->use_plan_replay,
+ "stack-arg call should stay on legacy fallback");
+ EXPECT(f->opt_hard_reg_count[RC_INT] == 2,
+ "legacy fallback should filter ABI arg/ret regs from hard pool");
+ EXPECT(f->opt_hard_regs[RC_INT][0] == 12 &&
+ f->opt_hard_regs[RC_INT][1] == 19,
+ "only non-ABI regs should remain allocable under fallback");
+ EXPECT((f->opt_arg_regs[RC_INT] & (1u << 2)) != 0,
+ "arg metadata should still be recorded");
+ EXPECT((f->opt_ret_regs[RC_INT] & (1u << 3)) != 0,
+ "ret metadata should still be recorded");
+
+ Func* g = new_func(&tc);
+ g->nparams = 1;
+ opt_machinize(g, &mock.base);
+ EXPECT(g->opt_hard_reg_count[RC_INT] == 2,
+ "incoming params should also filter ABI arg/ret regs");
+ EXPECT(g->opt_hard_regs[RC_INT][0] == 12 &&
+ g->opt_hard_regs[RC_INT][1] == 19,
+ "param functions should keep only non-ABI regs allocable");
+ tc_fini(&tc);
+}
+
+static void opt_regalloc_prefers_caller_saved_for_non_call_value(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg scratch[] = {9, 10};
+ static const CGPhysRegInfo phys[] = {
+ {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
+ };
+ mock_set_pool(&mock, RC_INT, NULL, 0, scratch, 2, 0);
+ mock_set_phys(&mock, RC_INT, phys, sizeof phys / sizeof phys[0]);
+
+ Func* f = new_func(&tc);
+ Val v = add_val(f, tc.i32);
+ emit_load_imm(f, f->entry, v, tc.i32, 42);
+ emit_ret_val(f, f->entry, v, tc.i32);
+ opt_machinize(f, &mock.base);
+ opt_build_cfg(f);
+ opt_build_loop_tree(f);
+ opt_regalloc(f, 0);
+
+ EXPECT(f->val_info[v].alloc_kind == OPT_ALLOC_HARD,
+ "single live value should allocate a hard register");
+ EXPECT(f->val_info[v].hard_reg == 2,
+ "non-call-crossing value should prefer caller-saved r2, got r%u",
+ (unsigned)f->val_info[v].hard_reg);
+ tc_fini(&tc);
+}
+
static void opt_call_plan_drives_call_specific_preservation(void) {
TestCtx tc;
tc_init(&tc);
@@ -2686,6 +2773,30 @@ static void opt_planned_call_replay_falls_back_for_stack_args(void) {
tc_fini(&tc);
}
+static void opt_emit_preserves_physical_reg_zero(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+
+ Func* f = new_func(&tc);
+ f->opt_rewritten = 1;
+ Inst* in = ir_emit(f, f->entry, IR_LOAD_IMM);
+ in->opnds = arena_array(f->arena, Operand, 1);
+ in->opnds[0] = op_reg_(0, tc.i32);
+ in->nopnds = 1;
+ in->extra.imm = 42;
+ emit_ret_val(f, f->entry, 0, tc.i32);
+
+ opt_emit(tc.c, f, &mock.base);
+
+ EXPECT(mock.load_imm_calls == 1, "physical r0 load should be emitted");
+ EXPECT(mock.last_load_imm_dst == 0,
+ "identity replay should preserve physical r0, got r%u",
+ (unsigned)mock.last_load_imm_dst);
+ tc_fini(&tc);
+}
+
/* ============================================================
* End-to-end test — drive the opt-wrapped CGTarget through the
* public CGTarget interface, let func_end run the full pipeline,
@@ -3031,6 +3142,8 @@ static void simple_regalloc_reports_exact_used_regs(void) {
int main(void) {
opt_machinize_uses_phys_reg_metadata();
+ opt_machinize_filters_abi_regs_for_legacy_call_fallback();
+ opt_regalloc_prefers_caller_saved_for_non_call_value();
opt_call_plan_drives_call_specific_preservation();
opt_cfg_prunes_unreachable();
opt_cfg_preserves_scope_edges();
@@ -3072,6 +3185,7 @@ int main(void) {
opt_planned_call_replay_resolves_arg_cycle();
opt_planned_call_replay_preserves_indirect_callee_arg_reg();
opt_planned_call_replay_falls_back_for_stack_args();
+ opt_emit_preserves_physical_reg_zero();
opt_emit_no_virtual_alloc();
opt_records_const_bytes_by_value();
opt_cmp_branch_keeps_fallthrough_after_block_growth();