kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 5b6f8ec5fd7128a689d19ba5f2d4bbb5dd04b4a8
parent 58b3b6295afbdbdef20c1fa9236cd47eda894c7b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 11:47:10 -0700

opt: make machine-clobber forbids survive the ABI register hints

The ABI hint passes (return-reg hint, arg-reg hints, copy-hint propagation) clear
forbidden_hard_regs bits to steer values toward ABI registers, which wiped the
fixed-register clobber forbids — so e.g. a modulo result or atomic-loop value
landed back in rdx/rcx and was clobbered.

Track clobber forbids separately in OptPRegInfo.clobbered_hard_regs (a subset of
forbidden that the hint passes must not clear), and:
- set it alongside forbidden in the machine-clobber application,
- carry it across opt_init_preg_info_from_ranges,
- have set_preg_pref_to_ret_reg / propagate_hint_through_copies skip a value
  clobbered out of the hint register,
- re-apply clobbered into forbidden after the hint passes so no allocation path
  (normal, preferred-reg, group) can pick a clobbered register.

x64 parse -O1: 882 -> 888 (now also fixes continue-in-do-while/switch via the
modulo clobber, and atomic_cas_loop). Remaining: 8 varargs (return value
phi-connected into rax across va_arg), 2 VLA-param, funcptr, sadd_overflow. No
regressions: toy x64/rv64 O0+O1 156/0.

Diffstat:
Msrc/opt/ir.h | 5+++++
Msrc/opt/pass_lower.c | 28++++++++++++++++++++++++++--
2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/src/opt/ir.h b/src/opt/ir.h @@ -608,6 +608,11 @@ typedef struct OptPRegInfo { i8 preferred_hard_reg;/* soft hint for allocator; -1 = no hint */ u8 pad[1]; u32 forbidden_hard_regs; /* bit r means PReg may not allocate hard reg r. */ + /* Subset of forbidden_hard_regs that comes from a fixed-register machine + * clobber (an instruction live across this value destroys reg r — see + * Func.inst_clobbers). Unlike soft forbids, the return-register hint must not + * clear these: a value cannot live in a register clobbered within its range. */ + u32 clobbered_hard_regs; } OptPRegInfo; typedef enum OptUseKind { diff --git a/src/opt/pass_lower.c b/src/opt/pass_lower.c @@ -224,7 +224,12 @@ static void apply_machine_reg_clobbers(Func* f, Inst* in, u64* def, if ((mask & (1u << r)) == 0) continue; for (PReg v = 1; v < opt_reg_count(f); ++v) { if (!(live_after && bit_has(live_after, v)) || bit_has(def, v)) continue; - forbid_preg_reg(f, v, (u8)cls, r); + if ((u8)opt_reg_cls(f, v) != (u8)cls) continue; + f->preg_info[v].forbidden_hard_regs |= 1u << r; + /* Record this as a hard clobber so the return-register hint won't later + * clear the forbid and place the value in a register the instruction + * destroys (see set_preg_pref_to_ret_reg). */ + f->preg_info[v].clobbered_hard_regs |= 1u << r; } } } @@ -267,10 +272,15 @@ static void set_preg_pref_to_ret_reg(Func* f, const Operand* op) { if (hint == REG_NONE || hint >= 32) return; /* Don't override a real pin. */ if (f->preg_info[v].tied_hard_reg >= 0) return; + /* A value live across an instruction that clobbers the ret reg cannot live + * there; skip the hint so the allocator places it elsewhere and the return + * copy moves it into the ret reg (e.g. an accumulator returned past a va_arg, + * which uses rax). */ + if (f->preg_info[v].clobbered_hard_regs & (1u << hint)) return; /* The hint reg may not be in opt_hard_regs (e.g. x0 on aa64 is reserved * as the ABI ret reg, outside aa_int_allocable); the allocator's * preferred-reg branch will still consider it via the unit-overlap - * precision check. Clear any leftover forbid bit so the hint isn't + * precision check. Clear any leftover soft forbid bit so the hint isn't * silently blocked. */ f->preg_info[v].forbidden_hard_regs &= ~(1u << hint); f->preg_info[v].preferred_hard_reg = (i8)hint; @@ -449,6 +459,11 @@ static void propagate_hint_through_copies(Func* f) { if (f->preg_info[src].tied_hard_reg >= 0) continue; if (f->preg_info[src].preferred_hard_reg >= 0) continue; if (f->preg_info[dst].cls != f->preg_info[src].cls) continue; + /* Don't propagate the hint into a value clobbered out of that register by + * a machine instruction live across it (e.g. a loop accumulator returned + * past a va_arg/idiv): it cannot live there. Leave it allocated elsewhere; + * the copy moves it into the hinted register. */ + if (f->preg_info[src].clobbered_hard_regs & (1u << (Reg)dst_pref)) continue; f->preg_info[src].forbidden_hard_regs &= ~(1u << (Reg)dst_pref); f->preg_info[src].preferred_hard_reg = dst_pref; } @@ -730,6 +745,7 @@ static void opt_init_preg_info_from_ranges(Func* f, for (PReg v = 0; v < opt_reg_count(f); ++v) { i32 tied = old ? old[v].tied_hard_reg : -1; u32 forbidden = old ? old[v].forbidden_hard_regs : 0; + u32 clobbered = old ? old[v].clobbered_hard_regs : 0; u32 old_frequency = old ? old[v].frequency : 0; i8 pref = old ? old[v].preferred_hard_reg : (i8)-1; OptPRegInfo* vi = &info[v]; @@ -740,6 +756,7 @@ static void opt_init_preg_info_from_ranges(Func* f, vi->alloc_kind = OPT_ALLOC_NONE; vi->cls = opt_reg_cls(f, v); vi->forbidden_hard_regs = forbidden; + vi->clobbered_hard_regs = clobbered; if (!ranges || v == PREG_NONE || v == 0 || ranges->first_range_by_preg[v] == OPT_RANGE_NONE) { continue; @@ -2012,6 +2029,13 @@ static void opt_regalloc_place(Func* f, int allow_live_range_split, opt_init_preg_info_from_ranges(f, &ranges); opt_apply_asm_constraints_from_live(f, &live); apply_abi_aliasing_hints(f); + /* The ABI hint passes clear forbid bits to steer values toward ABI registers + * (ret reg, arg regs). Restore the hard machine-clobber forbids afterward so + * no allocation path (normal, preferred-reg, or two-address group) can place a + * value in a register that an instruction live across it destroys. */ + if (f->preg_info) + for (PReg v = 1; v < opt_reg_count(f); ++v) + f->preg_info[v].forbidden_hard_regs |= f->preg_info[v].clobbered_hard_regs; /* MIR coalesces only at -O2 (mir-gen.c:9431); match that here. At O1 the * point-bitmap allocator emits copies through the natural conflict-free * path. IRF_NO_COALESCE protects SSA edge copies inserted at O2. */