commit af14511ce25c7bb424bcf5465180c0716ebecb73
parent 64a16dbb17499ff7c984aa6980a5c9cf3a5487a3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 16 May 2026 00:59:51 -0700
Expand opt call-plan ABI register coverage
Diffstat:
3 files changed, 516 insertions(+), 35 deletions(-)
diff --git a/doc/OPT_REGS_CALL_PLAN.md b/doc/OPT_REGS_CALL_PLAN.md
@@ -47,12 +47,13 @@ What remains open:
- call setup/return extraction are represented by call-plan aux data rather
than separate first-class IR ops;
- tail calls still fall back to the legacy backend `call` hook;
-- target `get_phys_regs` tables expose broader O1 pools, but ABI argument and
- return registers are suppressed for functions with incoming parameters or
- legacy call fallback until incoming parameter setup is also opt-visible;
+- target `get_phys_regs` tables expose broader O1 pools, and incoming
+ parameter functions can now allocate ABI argument/return registers with
+ opt-side constraints for sequential parameter-copy hazards; legacy call
+ fallback, including tail calls, still suppresses ABI argument/return registers
+ until tail-call setup is opt-visible;
- direct CG still uses legacy allocation/call hooks;
-- broader real-architecture call-plan layout tests and code-shape probes remain
- to be added.
+- code-shape probes remain to be added.
In phase terms: Phase 1 and Phase 2 are done, Phase 3 is implemented through
call-plan aux visibility plus planned replay for supported call shapes, Phase 4
@@ -467,12 +468,15 @@ Expected result: ABI arg and return registers can be made allocable safely.
### Phase 5 - Broaden Register Exposure
-Status: implemented for call setup. O1 has target-informed scoring and
-per-call preservation, and the native target phys-reg tables now expose broader
-O1 pools. Known backend helper scratch registers remain hidden. ABI arg/return
-registers are available when all calls in a function use planned replay; only
-functions with incoming parameters or legacy tail-call fallback suppress those
-ABI registers.
+Status: implemented for call setup and incoming scalar parameter setup. O1 has
+target-informed scoring and per-call preservation, and the native target
+phys-reg tables now expose broader O1 pools. Known backend helper scratch
+registers remain hidden. ABI arg/return registers are available when all calls
+in a function use planned replay. Incoming parameter functions keep those
+registers allocable, with opt forbidding earlier parameter values from being
+assigned to later incoming ABI registers that the backend still copies
+sequentially. Legacy tail-call fallback still suppresses ABI registers until
+tail-call setup is replay-visible.
- done: expand target `get_phys_regs` tables with guarded caller-saved and ABI
registers for x64, AArch64, and RV64;
@@ -481,8 +485,10 @@ ABI registers.
- done: keep known backend helper scratch registers reserved until their
clobbers are expressed;
- done: remove call-driven ABI-reg suppression for stack and sret call plans;
-- still open: remove the ABI-reg suppression after incoming parameter setup and
- tail call setup are opt-visible;
+- done: remove incoming-parameter ABI-reg suppression by modeling parameter
+ incoming-register clobber hazards in opt allocation constraints;
+- still open: remove the legacy tail-call fallback ABI-reg suppression after
+ tail-call setup is opt-visible;
- Add code-shape tests for direct-call tiny functions and unused-param functions
across x64, AArch64, and RV64.
@@ -511,11 +517,12 @@ Focused unit tests:
- done: opt-side target register metadata consumption;
- done: caller-saved live-across-call preservation using per-call masks;
- done: planned-call replay through `emit_call_plan` for register-argument
- cycles, stack arguments, address-valued args, sret-shaped plans, and
+ cycles, stack arguments, address-valued args, sret-shaped plans,
+ return-register collisions, stack-argument source hazards, and
indirect-callee/argument-register hazards;
- still needed: target register metadata tests per real architecture;
-- still needed: broader real-architecture call-plan layout for scalar, FP,
- mixed, sret, variadic, and stack-arg calls;
+- done: broader real-architecture call-plan layout for scalar, FP, mixed,
+ sret, variadic, and stack-arg calls;
- still needed: direct call-clobber mask tests per real architecture;
- still needed: code-shape probes after ABI registers are exposed broadly;
- still needed: callee-save reservation/code-shape tests after broadened
@@ -571,16 +578,18 @@ Completed:
extraction.
6. Remove call-driven ABI-reg suppression for stack-argument and sret-shaped
calls.
+7. Add call-plan layout/dump tests for real x64/AArch64/RV64 scalar, FP, mixed,
+ sret, variadic, and stack-arg cases.
+8. Add red-green hazard tests for return-register collisions and stack-argument
+ sources.
+9. Remove incoming-parameter ABI-reg suppression with opt-side constraints for
+ incoming parameter copy hazards.
Next patch stack:
-1. Add call-plan layout/dump tests for real x64/AArch64/RV64 scalar, FP, mixed,
- sret, variadic, and stack-arg cases.
-2. Add red-green hazard tests for return-register collisions and stack-argument
- sources.
-3. Continue broadening register exposure by removing the remaining ABI-reg
- guards as incoming-parameter and tail-call setup become opt-visible.
-4. Migrate direct CG or wrap it with internal call planning, then remove legacy
+1. Continue broadening register exposure by removing the remaining tail-call
+ ABI-reg guard when tail-call setup becomes opt-visible.
+2. Migrate direct CG or wrap it with internal call planning, then remove legacy
pool semantics.
This order keeps each step testable and avoids mixing API migration, allocation
diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c
@@ -293,6 +293,81 @@ static void apply_asm_register_constraints(Func* f, Inst* in, u64* use,
}
}
+static int phys_arg_reg_for_index(Func* f, u8 cls, u32 abi_index, Reg* out) {
+ if (!f || cls >= OPT_REG_CLASSES) return 0;
+ for (u32 i = 0; i < f->opt_phys_reg_count[cls]; ++i) {
+ const CGPhysRegInfo* pi = &f->opt_phys_regs[cls][i];
+ if ((pi->flags & CG_REG_ARG) && pi->abi_index == abi_index) {
+ if (out) *out = pi->reg;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static void apply_param_incoming_register_hazards(Func* f) {
+ if (!f || !f->val_info || !f->desc.abi || !f->nparams) return;
+ Reg incoming_regs[64];
+ u8 incoming_cls[64];
+ u8 has_incoming[64];
+ memset(incoming_regs, 0, sizeof incoming_regs);
+ memset(incoming_cls, 0, sizeof incoming_cls);
+ memset(has_incoming, 0, sizeof has_incoming);
+
+ u32 next_int = 0;
+ u32 next_fp = 0;
+ if (f->desc.abi->has_sret && f->opt_target.arch != CFREE_ARCH_ARM_64)
+ next_int = 1;
+
+ u32 nparams = f->nparams < 64u ? f->nparams : 64u;
+ for (u32 i = 0; i < nparams; ++i) {
+ IRParam* p = &f->params[i];
+ const ABIArgInfo* ai = p->abi;
+ if (!ai || ai->kind == ABI_ARG_IGNORE) continue;
+ if (ai->kind == ABI_ARG_INDIRECT) {
+ Reg r = REG_NONE;
+ if (phys_arg_reg_for_index(f, RC_INT, next_int, &r)) {
+ incoming_regs[i] = r;
+ incoming_cls[i] = RC_INT;
+ has_incoming[i] = 1;
+ }
+ ++next_int;
+ continue;
+ }
+ if (ai->kind != ABI_ARG_DIRECT || ai->nparts != 1) continue;
+ const ABIArgPart* part = &ai->parts[0];
+ if (part->cls == ABI_CLASS_FP) {
+ Reg r = REG_NONE;
+ if (phys_arg_reg_for_index(f, RC_FP, next_fp, &r)) {
+ incoming_regs[i] = r;
+ incoming_cls[i] = RC_FP;
+ has_incoming[i] = 1;
+ }
+ ++next_fp;
+ } else if (part->cls == ABI_CLASS_INT) {
+ Reg r = REG_NONE;
+ if (phys_arg_reg_for_index(f, RC_INT, next_int, &r)) {
+ incoming_regs[i] = r;
+ incoming_cls[i] = RC_INT;
+ has_incoming[i] = 1;
+ }
+ ++next_int;
+ }
+ }
+
+ for (u32 i = 0; i < nparams; ++i) {
+ IRParam* p = &f->params[i];
+ if (p->storage.kind != CG_LOCAL_STORAGE_REG) continue;
+ Val v = (Val)p->storage.v.reg;
+ if (v == VAL_NONE || v >= f->nvals) continue;
+ u8 cls = f->val_info[v].cls;
+ for (u32 j = i + 1u; j < nparams; ++j) {
+ if (!has_incoming[j] || incoming_cls[j] != cls) continue;
+ forbid_val_reg(f, v, cls, incoming_regs[j]);
+ }
+ }
+}
+
static int mem_observable(const MemAccess* m) {
return (m->flags & (MF_VOLATILE | MF_ATOMIC)) != 0;
}
@@ -408,7 +483,7 @@ void opt_machinize(Func* f, CGTarget* target) {
}
}
- int suppress_abi_regs = func_has_legacy_call_fallback(f) || f->nparams != 0;
+ int suppress_abi_regs = func_has_legacy_call_fallback(f);
for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
const CGPhysRegInfo* phys = NULL;
@@ -1619,6 +1694,7 @@ void opt_regalloc(Func* f, int allow_live_range_split) {
opt_live_ranges_build(f, &live, &ranges);
opt_init_val_info_from_ranges(f, &ranges);
opt_apply_asm_constraints_from_live(f, &live);
+ apply_param_incoming_register_hazards(f);
metrics_count(f->c, "opt.live_words", f->opt_live_words);
metrics_count(f->c, "opt.ranges", ranges.nranges);
metrics_count(f->c, "opt.range_points", ranges.point_count);
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -9,6 +9,8 @@
#include "abi/abi.h"
#include "arch/arch.h"
#include "arch/regalloc.h"
+#include "arch/rv64/isa.h"
+#include "arch/x64/isa.h"
#include "core/core.h"
#include "core/heap.h"
#include "core/pool.h"
@@ -67,7 +69,7 @@ typedef struct TestCtx {
CfreeCgTypeId f64;
} TestCtx;
-static void tc_init(TestCtx* tc) {
+static void tc_init_target(TestCtx* tc, CfreeArchKind arch, CfreeOSKind os) {
CfreeEnv env;
memset(&env, 0, sizeof env);
env.heap = &g_heap;
@@ -76,8 +78,8 @@ static void tc_init(TestCtx* tc) {
CfreeTarget tgt;
memset(&tgt, 0, sizeof tgt);
- tgt.arch = CFREE_ARCH_ARM_64;
- tgt.os = CFREE_OS_LINUX;
+ tgt.arch = arch;
+ tgt.os = os;
tgt.obj = CFREE_OBJ_ELF;
tgt.ptr_size = 8;
tgt.ptr_align = 8;
@@ -94,6 +96,10 @@ static void tc_init(TestCtx* tc) {
}
}
+static void tc_init(TestCtx* tc) {
+ tc_init_target(tc, CFREE_ARCH_ARM_64, CFREE_OS_LINUX);
+}
+
static void tc_fini(TestCtx* tc) { compiler_fini(&tc->cc); }
static Operand op_reg_(Reg r, CfreeCgTypeId ty) {
@@ -438,6 +444,8 @@ typedef struct MockCGTarget {
int cmp_branch_calls;
int call_calls;
int emit_call_plan_calls;
+ char events[32];
+ int event_count;
Operand last_plan_callee;
Reg planned_arg_regs[8];
u32 planned_nargs;
@@ -548,6 +556,8 @@ static void mock_call(CGTarget* t, const CGCallDesc* d) {
static void mock_emit_call_plan(CGTarget* t, const CGCallPlan* p) {
MockCGTarget* m = (MockCGTarget*)t;
++m->emit_call_plan_calls;
+ if (m->event_count < (int)sizeof m->events)
+ m->events[m->event_count++] = 'p';
m->last_plan_callee = p->callee;
}
@@ -604,6 +614,8 @@ static void mock_load_const(CGTarget* t, Operand dst, ConstBytes cb) {
static void mock_copy(CGTarget* t, Operand dst, Operand src) {
MockCGTarget* m = (MockCGTarget*)t;
+ if (m->event_count < (int)sizeof m->events)
+ m->events[m->event_count++] = 'c';
if (m->copy_calls < (int)(sizeof m->copy_dst / sizeof m->copy_dst[0])) {
m->copy_dst[m->copy_calls] = dst;
m->copy_src[m->copy_calls] = src;
@@ -629,6 +641,8 @@ static void mock_store(CGTarget* t, Operand addr, Operand src, MemAccess macc) {
static void mock_store_call_arg(CGTarget* t, const CGCallPlanMove* move) {
MockCGTarget* m = (MockCGTarget*)t;
+ if (m->event_count < (int)sizeof m->events)
+ m->events[m->event_count++] = 's';
++m->store_call_arg_calls;
m->last_stack_arg = *move;
}
@@ -783,6 +797,114 @@ static void mock_set_phys(MockCGTarget* m, RegClass cls,
}
}
+typedef struct RealArchExpect {
+ const char* name;
+ CfreeArchKind arch;
+ Reg int_arg[8];
+ Reg fp_arg[8];
+ Reg int_ret[2];
+ Reg fp_ret[2];
+ Reg sret_reg;
+ u32 n_int_arg_regs;
+ int variadic_fp_counted;
+ int variadic_fp_uses_int_reg;
+} RealArchExpect;
+
+static const RealArchExpect g_real_arch[] = {
+ {"x64", CFREE_ARCH_X86_64,
+ {X64_RDI, X64_RSI, X64_RDX, X64_RCX, X64_R8, X64_R9, 0, 0},
+ {X64_XMM0, X64_XMM1, X64_XMM2, X64_XMM3, X64_XMM4, X64_XMM5,
+ X64_XMM6, X64_XMM7},
+ {X64_RAX, X64_RDX}, {X64_XMM0, X64_XMM1}, X64_RDI, 6, 1, 0},
+ {"aa64", CFREE_ARCH_ARM_64,
+ {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7},
+ {0, 1}, {0, 1}, 8, 8, 0, 0},
+ {"rv64", CFREE_ARCH_RV64,
+ {RV_A0, RV_A1, RV_A2, RV_A3, RV_A4, RV_A5, RV_A6, RV_A7},
+ {10, 11, 12, 13, 14, 15, 16, 17}, {RV_A0, RV_A1}, {10, 11},
+ RV_A0, 8, 0, 1},
+};
+
+static CfreeCgTypeId record_of_i64s(TestCtx* tc, const char* name,
+ u64 count) {
+ CfreeCgTypeId arr = cfree_cg_type_array(tc->c, tc->i64, count);
+ CfreeCgField field;
+ memset(&field, 0, sizeof field);
+ field.name = cfree_sym_intern(tc->c, "a");
+ field.type = arr;
+ return cfree_cg_type_record(tc->c, cfree_sym_intern(tc->c, name), &field, 1);
+}
+
+static CfreeCgTypeId make_func_type(TestCtx* tc, CfreeCgTypeId ret,
+ const CfreeCgTypeId* params, u32 nparams,
+ int variadic) {
+ CfreeCgFuncParam ps[12];
+ CfreeCgFuncSig sig;
+ memset(ps, 0, sizeof ps);
+ memset(&sig, 0, sizeof sig);
+ for (u32 i = 0; i < nparams; ++i) ps[i].type = params[i];
+ sig.ret = ret;
+ sig.params = ps;
+ sig.nparams = nparams;
+ sig.call_conv = CFREE_CG_CC_TARGET_C;
+ sig.abi_variadic = variadic;
+ return cfree_cg_type_func(tc->c, sig);
+}
+
+static CGABIValue call_arg_value(CfreeCgTypeId ty, const ABIArgInfo* abi,
+ Operand storage) {
+ CGABIValue v;
+ memset(&v, 0, sizeof v);
+ v.type = ty;
+ v.abi = abi;
+ v.storage = storage;
+ v.size = 0;
+ return v;
+}
+
+static CGCallDesc make_call_desc(TestCtx* tc, CfreeCgTypeId fn_ty,
+ const CGABIValue* args, u32 nargs,
+ CGABIValue ret) {
+ CGCallDesc d;
+ memset(&d, 0, sizeof d);
+ d.fn_type = fn_ty;
+ d.abi = abi_cg_func_info(tc->c->abi, fn_ty);
+ d.callee = op_reg_(30, tc->i64);
+ d.args = args;
+ d.nargs = nargs;
+ d.ret = ret;
+ return d;
+}
+
+static void expect_plan_arg(const CGCallPlan* p, u32 i, u8 dst_kind, u8 cls,
+ Reg reg, u32 stack_offset, const char* ctx) {
+ EXPECT(i < p->nargs, "%s missing arg %u", ctx, (unsigned)i);
+ if (i >= p->nargs) return;
+ EXPECT(p->args[i].dst_kind == dst_kind,
+ "%s arg %u dst kind got %u want %u", ctx, (unsigned)i,
+ (unsigned)p->args[i].dst_kind, (unsigned)dst_kind);
+ EXPECT(p->args[i].cls == cls, "%s arg %u class got %u want %u", ctx,
+ (unsigned)i, (unsigned)p->args[i].cls, (unsigned)cls);
+ if (dst_kind == CG_CALL_PLAN_REG) {
+ EXPECT(p->args[i].dst_reg == reg, "%s arg %u reg got %u want %u", ctx,
+ (unsigned)i, (unsigned)p->args[i].dst_reg, (unsigned)reg);
+ } else if (dst_kind == CG_CALL_PLAN_STACK) {
+ EXPECT(p->args[i].stack_offset == stack_offset,
+ "%s arg %u stack offset got %u want %u", ctx, (unsigned)i,
+ (unsigned)p->args[i].stack_offset, (unsigned)stack_offset);
+ }
+}
+
+static void expect_plan_ret(const CGCallPlan* p, u32 i, u8 cls, Reg reg,
+ const char* ctx) {
+ EXPECT(i < p->nrets, "%s missing ret %u", ctx, (unsigned)i);
+ if (i >= p->nrets) return;
+ EXPECT(p->rets[i].cls == cls, "%s ret %u class got %u want %u", ctx,
+ (unsigned)i, (unsigned)p->rets[i].cls, (unsigned)cls);
+ EXPECT(p->rets[i].src_reg == reg, "%s ret %u reg got %u want %u", ctx,
+ (unsigned)i, (unsigned)p->rets[i].src_reg, (unsigned)reg);
+}
+
/* ============================================================
* Pass-shape tests — build IR via the public IR API, run one
* pass at a time, assert on IR structure. Backend policy is
@@ -873,17 +995,200 @@ static void opt_machinize_filters_abi_regs_for_legacy_call_fallback(void) {
EXPECT((f->opt_ret_regs[RC_INT] & (1u << 3)) != 0,
"ret metadata should still be recorded");
- Func* g = new_func(&tc);
- g->nparams = 1;
- opt_machinize(g, &mock.base);
- EXPECT(g->opt_hard_reg_count[RC_INT] == 2,
- "incoming params should also filter ABI arg/ret regs");
- EXPECT(g->opt_hard_regs[RC_INT][0] == 12 &&
- g->opt_hard_regs[RC_INT][1] == 19,
- "param functions should keep only non-ABI regs allocable");
tc_fini(&tc);
}
+static void opt_machinize_keeps_abi_regs_for_incoming_params(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+ static const Reg scratch[] = {9, 10};
+ static const CGPhysRegInfo phys[] = {
+ {2, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_ARG, 0, 0},
+ {3, RC_INT, 0, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED |
+ CG_REG_RET, 0, 0},
+ {12, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLER_SAVED, 0, 0},
+ {19, RC_INT, 0xff, CG_REG_ALLOCABLE | CG_REG_CALLEE_SAVED, 50, 4},
+ };
+ mock_set_pool(&mock, RC_INT, NULL, 0, scratch, 2, 0);
+ mock_set_phys(&mock, RC_INT, phys, sizeof phys / sizeof phys[0]);
+
+ Func* f = new_func(&tc);
+ f->nparams = 1;
+ opt_machinize(f, &mock.base);
+ EXPECT(f->opt_hard_reg_count[RC_INT] == 4,
+ "incoming params should not suppress ABI arg/ret regs");
+ EXPECT(f->opt_hard_regs[RC_INT][0] == 2 &&
+ f->opt_hard_regs[RC_INT][1] == 3 &&
+ f->opt_hard_regs[RC_INT][2] == 12 &&
+ f->opt_hard_regs[RC_INT][3] == 19,
+ "param functions should keep all non-reserved regs allocable");
+ tc_fini(&tc);
+}
+
+static void real_arch_call_plan_layout_one(const RealArchExpect* ex) {
+ TestCtx tc;
+ tc_init_target(&tc, ex->arch, CFREE_OS_LINUX);
+ CGTarget* target = cgtarget_new(tc.c, NULL, NULL);
+ CfreeCgTypeId large = record_of_i64s(&tc, "Large", 3);
+
+ {
+ CfreeCgTypeId params[2] = {tc.i64, tc.i64};
+ CfreeCgTypeId fn = make_func_type(&tc, tc.i64, params, 2, 0);
+ const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn);
+ CGABIValue args[2];
+ args[0] = call_arg_value(tc.i64, &abi->params[0], op_reg_(1, tc.i64));
+ args[1] = call_arg_value(tc.i64, &abi->params[1], op_reg_(2, tc.i64));
+ CGABIValue ret = call_arg_value(tc.i64, &abi->ret, op_reg_(3, tc.i64));
+ CGCallDesc d = make_call_desc(&tc, fn, args, 2, ret);
+ CGCallPlan p;
+ target->plan_call(target, &d, &p);
+ EXPECT(p.nargs == 2, "%s scalar nargs got %u", ex->name,
+ (unsigned)p.nargs);
+ expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[0], 0,
+ ex->name);
+ expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[1], 0,
+ ex->name);
+ expect_plan_ret(&p, 0, RC_INT, ex->int_ret[0], ex->name);
+ EXPECT((p.return_mask[RC_INT] & (1u << ex->int_ret[0])) != 0,
+ "%s scalar return mask should include first int return reg",
+ ex->name);
+ }
+
+ {
+ CfreeCgTypeId params[2] = {tc.f64, tc.f64};
+ CfreeCgTypeId fn = make_func_type(&tc, tc.f64, params, 2, 0);
+ const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn);
+ CGABIValue args[2];
+ args[0] =
+ call_arg_value(tc.f64, &abi->params[0],
+ op_reg_cls_(4, tc.f64, RC_FP));
+ args[1] =
+ call_arg_value(tc.f64, &abi->params[1],
+ op_reg_cls_(5, tc.f64, RC_FP));
+ CGABIValue ret =
+ call_arg_value(tc.f64, &abi->ret, op_reg_cls_(6, tc.f64, RC_FP));
+ CGCallDesc d = make_call_desc(&tc, fn, args, 2, ret);
+ CGCallPlan p;
+ target->plan_call(target, &d, &p);
+ EXPECT(p.nargs == 2, "%s fp nargs got %u", ex->name,
+ (unsigned)p.nargs);
+ expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[0], 0,
+ ex->name);
+ expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[1], 0,
+ ex->name);
+ expect_plan_ret(&p, 0, RC_FP, ex->fp_ret[0], ex->name);
+ }
+
+ {
+ CfreeCgTypeId params[4] = {tc.i64, tc.f64, tc.i64, tc.f64};
+ CfreeCgTypeId fn = make_func_type(&tc, tc.i64, params, 4, 0);
+ const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn);
+ CGABIValue args[4];
+ args[0] = call_arg_value(tc.i64, &abi->params[0], op_reg_(7, tc.i64));
+ args[1] =
+ call_arg_value(tc.f64, &abi->params[1],
+ op_reg_cls_(8, tc.f64, RC_FP));
+ args[2] = call_arg_value(tc.i64, &abi->params[2], op_reg_(9, tc.i64));
+ args[3] =
+ call_arg_value(tc.f64, &abi->params[3],
+ op_reg_cls_(10, tc.f64, RC_FP));
+ CGABIValue ret = call_arg_value(tc.i64, &abi->ret, op_reg_(11, tc.i64));
+ CGCallDesc d = make_call_desc(&tc, fn, args, 4, ret);
+ CGCallPlan p;
+ target->plan_call(target, &d, &p);
+ expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[0], 0,
+ ex->name);
+ expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[0], 0,
+ ex->name);
+ expect_plan_arg(&p, 2, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[1], 0,
+ ex->name);
+ expect_plan_arg(&p, 3, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[1], 0,
+ ex->name);
+ }
+
+ {
+ CfreeCgTypeId params[1] = {tc.i64};
+ CfreeCgTypeId fn = make_func_type(&tc, large, params, 1, 0);
+ const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn);
+ CGABIValue args[1];
+ args[0] = call_arg_value(tc.i64, &abi->params[0], op_reg_(12, tc.i64));
+ CGABIValue ret = call_arg_value(large, &abi->ret, op_local_(1, large));
+ CGCallDesc d = make_call_desc(&tc, fn, args, 1, ret);
+ CGCallPlan p;
+ target->plan_call(target, &d, &p);
+ EXPECT(p.has_sret, "%s sret plan should be marked", ex->name);
+ expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->sret_reg, 0,
+ ex->name);
+ EXPECT(p.args[0].src_kind == CG_CALL_PLAN_SRC_ADDR,
+ "%s sret hidden pointer should materialize an address", ex->name);
+ expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_INT,
+ ex->arch == CFREE_ARCH_ARM_64 ? ex->int_arg[0]
+ : ex->int_arg[1],
+ 0, ex->name);
+ }
+
+ {
+ CfreeCgTypeId fixed[1] = {tc.i64};
+ CfreeCgTypeId fn = make_func_type(&tc, tc.i64, fixed, 1, 1);
+ const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn);
+ CGABIValue args[3];
+ args[0] = call_arg_value(tc.i64, &abi->params[0], op_reg_(13, tc.i64));
+ args[1] = call_arg_value(tc.f64, NULL, op_reg_cls_(14, tc.f64, RC_FP));
+ args[2] = call_arg_value(tc.i64, NULL, op_reg_(15, tc.i64));
+ CGABIValue ret = call_arg_value(tc.i64, &abi->ret, op_reg_(16, tc.i64));
+ CGCallDesc d = make_call_desc(&tc, fn, args, 3, ret);
+ CGCallPlan p;
+ target->plan_call(target, &d, &p);
+ EXPECT(p.is_variadic, "%s variadic plan should be marked", ex->name);
+ expect_plan_arg(&p, 0, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[0], 0,
+ ex->name);
+ if (ex->variadic_fp_uses_int_reg) {
+ expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[1], 0,
+ ex->name);
+ expect_plan_arg(&p, 2, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[2], 0,
+ ex->name);
+ } else {
+ expect_plan_arg(&p, 1, CG_CALL_PLAN_REG, RC_FP, ex->fp_arg[0], 0,
+ ex->name);
+ expect_plan_arg(&p, 2, CG_CALL_PLAN_REG, RC_INT, ex->int_arg[1], 0,
+ ex->name);
+ }
+ if (ex->variadic_fp_counted)
+ EXPECT(p.variadic_fp_count == 1,
+ "%s variadic FP count got %u want 1", ex->name,
+ (unsigned)p.variadic_fp_count);
+ }
+
+ {
+ CfreeCgTypeId params[9];
+ CGABIValue args[9];
+ for (u32 i = 0; i < 9; ++i) params[i] = tc.i64;
+ CfreeCgTypeId fn = make_func_type(&tc, tc.i64, params, 9, 0);
+ const ABIFuncInfo* abi = abi_cg_func_info(tc.c->abi, fn);
+ for (u32 i = 0; i < 9; ++i)
+ args[i] = call_arg_value(tc.i64, &abi->params[i],
+ op_reg_((Reg)(20 + i), tc.i64));
+ CGABIValue ret = call_arg_value(tc.i64, &abi->ret, op_reg_(29, tc.i64));
+ CGCallDesc d = make_call_desc(&tc, fn, args, 9, ret);
+ CGCallPlan p;
+ target->plan_call(target, &d, &p);
+ EXPECT(p.stack_arg_size != 0, "%s stack-arg size should be nonzero",
+ ex->name);
+ expect_plan_arg(&p, ex->n_int_arg_regs, CG_CALL_PLAN_STACK, RC_INT, 0, 0,
+ ex->name);
+ }
+
+ tc_fini(&tc);
+}
+
+static void real_arch_call_plan_layouts(void) {
+ for (u32 i = 0; i < sizeof g_real_arch / sizeof g_real_arch[0]; ++i)
+ real_arch_call_plan_layout_one(&g_real_arch[i]);
+}
+
static void opt_regalloc_prefers_caller_saved_for_non_call_value(void) {
TestCtx tc;
tc_init(&tc);
@@ -2857,6 +3162,93 @@ static void opt_planned_call_replay_materializes_address_args(void) {
tc_fini(&tc);
}
+static void opt_planned_call_replay_resolves_return_reg_collision(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+
+ Func* f = new_func(&tc);
+ f->opt_scratch_regs[RC_INT][0] = 9;
+ f->opt_scratch_reg_count[RC_INT] = 1;
+
+ Inst* in = ir_emit(f, f->entry, IR_CALL);
+ IRCallAux* aux = arena_znew(f->arena, IRCallAux);
+ in->extra.aux = aux;
+ aux->plan_valid = 1;
+ aux->use_plan_replay = 1;
+ aux->plan.callee = op_reg_(8, tc.i64);
+ aux->plan.rets = arena_zarray(f->arena, CGCallPlanRet, 2);
+ aux->plan.nrets = 2;
+ aux->plan.rets[0].dst = op_reg_(2, tc.i64);
+ aux->plan.rets[0].cls = RC_INT;
+ aux->plan.rets[0].src_reg = 1;
+ aux->plan.rets[0].mem = mem_unknown_(tc.i64, 8);
+ aux->plan.rets[1].dst = op_reg_(1, tc.i64);
+ aux->plan.rets[1].cls = RC_INT;
+ aux->plan.rets[1].src_reg = 2;
+ aux->plan.rets[1].mem = mem_unknown_(tc.i64, 8);
+
+ opt_emit(tc.c, f, &mock.base);
+
+ EXPECT(mock.emit_call_plan_calls == 1,
+ "return collision call should use emit_call_plan");
+ EXPECT(mock.copy_calls == 3,
+ "two-register return collision should need three copies, got %d",
+ mock.copy_calls);
+ EXPECT(mock.event_count >= 4 && mock.events[0] == 'p' &&
+ mock.events[1] == 'c',
+ "return copies should occur after the planned call branch");
+ EXPECT(mock.copy_dst[0].v.reg == 9 && mock.copy_src[0].v.reg == 1,
+ "return cycle should save first return register to scratch");
+ EXPECT(mock.copy_dst[1].v.reg == 1 && mock.copy_src[1].v.reg == 2,
+ "return cycle should rotate second return into first destination");
+ EXPECT(mock.copy_dst[2].v.reg == 2 && mock.copy_src[2].v.reg == 9,
+ "return cycle should restore scratch into second destination");
+ tc_fini(&tc);
+}
+
+static void opt_planned_call_replay_stores_stack_sources_before_clobber(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ MockCGTarget mock;
+ mock_init(&mock, tc.c);
+
+ Func* f = new_func(&tc);
+ Inst* in = ir_emit(f, f->entry, IR_CALL);
+ IRCallAux* aux = arena_znew(f->arena, IRCallAux);
+ in->extra.aux = aux;
+ aux->plan_valid = 1;
+ aux->use_plan_replay = 1;
+ aux->plan.callee = op_reg_(8, tc.i64);
+ aux->plan.args = arena_zarray(f->arena, CGCallPlanMove, 2);
+ aux->plan.nargs = 2;
+ aux->plan.args[0].src = op_reg_(2, tc.i64);
+ aux->plan.args[0].dst_kind = CG_CALL_PLAN_REG;
+ aux->plan.args[0].cls = RC_INT;
+ aux->plan.args[0].dst_reg = 1;
+ aux->plan.args[0].mem = mem_unknown_(tc.i64, 8);
+ aux->plan.args[1].src = op_reg_(1, tc.i64);
+ aux->plan.args[1].dst_kind = CG_CALL_PLAN_STACK;
+ aux->plan.args[1].cls = RC_INT;
+ aux->plan.args[1].stack_offset = 0;
+ aux->plan.args[1].mem = mem_unknown_(tc.i64, 8);
+
+ opt_emit(tc.c, f, &mock.base);
+
+ EXPECT(mock.emit_call_plan_calls == 1,
+ "stack-source hazard call should use emit_call_plan");
+ EXPECT(mock.store_call_arg_calls == 1 && mock.copy_calls == 1,
+ "expected one stack store and one register arg copy");
+ EXPECT(mock.event_count >= 3 && mock.events[0] == 's' &&
+ mock.events[1] == 'c' && mock.events[2] == 'p',
+ "stack arg source should be stored before its source reg is clobbered");
+ EXPECT(mock.last_stack_arg.src.kind == OPK_REG &&
+ mock.last_stack_arg.src.v.reg == 1,
+ "stack arg should use the original source register");
+ tc_fini(&tc);
+}
+
static void opt_emit_preserves_physical_reg_zero(void) {
TestCtx tc;
tc_init(&tc);
@@ -3227,6 +3619,8 @@ static void simple_regalloc_reports_exact_used_regs(void) {
int main(void) {
opt_machinize_uses_phys_reg_metadata();
opt_machinize_filters_abi_regs_for_legacy_call_fallback();
+ opt_machinize_keeps_abi_regs_for_incoming_params();
+ real_arch_call_plan_layouts();
opt_regalloc_prefers_caller_saved_for_non_call_value();
opt_call_plan_drives_call_specific_preservation();
opt_cfg_prunes_unreachable();
@@ -3270,6 +3664,8 @@ int main(void) {
opt_planned_call_replay_preserves_indirect_callee_arg_reg();
opt_planned_call_replay_stores_stack_args();
opt_planned_call_replay_materializes_address_args();
+ opt_planned_call_replay_resolves_return_reg_collision();
+ opt_planned_call_replay_stores_stack_sources_before_clobber();
opt_emit_preserves_physical_reg_zero();
opt_emit_no_virtual_alloc();
opt_records_const_bytes_by_value();