kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 90d63a81f19f98008ca8cd0e5ba111793138f6a7
parent 7e395f14c533274c31be05b48f775225c0d64d37
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 14 May 2026 12:47:51 -0700

Remove backend allocator pool state

Diffstat:
Msrc/arch/aarch64/alloc.c | 11+----------
Msrc/arch/aarch64/emit.c | 53++++++++++++++++++++++++++++++-----------------------
Msrc/arch/aarch64/internal.h | 15+++------------
Msrc/arch/aarch64/ops.c | 16++++------------
Msrc/arch/aarch64/opt_coord.c | 35++++++++++++++++++++---------------
Msrc/arch/rv64/emit.c | 27++++++++++++++++++---------
Msrc/arch/rv64/internal.h | 13++-----------
Msrc/arch/rv64/opt_coord.c | 34++++++++++++++++++++--------------
Msrc/arch/x64/alloc.c | 13+++----------
Msrc/arch/x64/emit.c | 16++++++++++------
Msrc/arch/x64/internal.h | 13+++----------
Msrc/arch/x64/opt_coord.c | 32+++++++++++++++++++-------------
Msrc/opt/opt.c | 6++----
13 files changed, 135 insertions(+), 149 deletions(-)

diff --git a/src/arch/aarch64/alloc.c b/src/arch/aarch64/alloc.c @@ -1,5 +1,4 @@ -/* aarch64/alloc.c — register pool, spill/reload, labels, control flow, - * structured scopes. */ +/* aarch64/alloc.c — spill/reload, labels, control flow, structured scopes. */ #include "arch/aarch64/internal.h" @@ -10,14 +9,6 @@ AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; } /* ============================================================ - * RegPool - * ============================================================ */ - -void regpool_init(RegPool* p, u8 base, u8 nregs) { - cg_simple_regpool_init_range(p, base, nregs); -} - -/* ============================================================ * Slot accessor * ============================================================ */ diff --git a/src/arch/aarch64/emit.c b/src/arch/aarch64/emit.c @@ -52,6 +52,14 @@ u32 size_idx_for_bytes(u32 nbytes) { u32 reg_num(Operand op) { return op.v.reg & 0x1fu; } +static u32 collect_mask_regs(u32 mask, u32 first, u32 last, u32* out) { + u32 n = 0; + for (u32 r = first; r <= last; ++r) { + if (mask & (1u << r)) out[n++] = r; + } + return n; +} + /* ============================================================ * Low-level emission * ============================================================ */ @@ -151,8 +159,8 @@ void aa_func_begin(CGTarget* t, const CGFuncDesc* fd) { a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0; a->cum_off = 0; a->max_outgoing = 0; - regpool_init(&a->int_pool, /*base=*/19u, /*nregs=*/10u); - regpool_init(&a->fp_pool, /*base=*/8u, /*nregs=*/16u); + a->used_cs_int_mask = 0; + a->used_cs_fp_mask = 0; a->nslots = 0; a->nscopes = 0; a->has_alloca = 0; @@ -217,14 +225,15 @@ void aa_func_end(CGTarget* t) { AAImpl* a = impl_of(t); MCEmitter* mc = t->mc; - u32 n_int_pairs = (a->int_pool.hwm + 1) / 2; - u32 used_fp_cs = a->fp_pool.hwm > 8 ? 8u : a->fp_pool.hwm; - u32 n_fp_pairs = (used_fp_cs + 1) / 2; + u32 int_regs[10]; + u32 fp_regs[8]; + u32 n_int_saves = collect_mask_regs(a->used_cs_int_mask, 19u, 28u, int_regs); + u32 n_fp_saves = collect_mask_regs(a->used_cs_fp_mask, 8u, 15u, fp_regs); u32 outgoing_off = 0; u32 int_save_off = a->max_outgoing; - u32 fp_save_off = int_save_off + n_int_pairs * 16; - u32 locals_off = fp_save_off + n_fp_pairs * 16; + u32 fp_save_off = int_save_off + n_int_saves * 8u; + u32 locals_off = fp_save_off + n_fp_saves * 8u; u32 fp_lr_off = locals_off + a->cum_off; u32 frame_size = fp_lr_off + 16; frame_size = (frame_size + 15u) & ~15u; @@ -244,15 +253,15 @@ void aa_func_end(CGTarget* t) { } } - for (i32 i = (i32)n_fp_pairs - 1; i >= 0; --i) { - u32 r0 = 8u + (u32)i * 2u; - u32 r1 = r0 + 1u; - aa64_emit32(mc, aa64_ldp_d(r0, r1, 31, (i32)(fp_save_off + (u32)i * 16u))); + for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) { + u32 r0 = fp_regs[i]; + aa64_emit32(mc, aa64_ldr_fp_uimm(3, r0, 31, + fp_save_off + (u32)i * 8u)); } - for (i32 i = (i32)n_int_pairs - 1; i >= 0; --i) { - u32 r0 = 19u + (u32)i * 2u; - u32 r1 = r0 + 1u; - aa64_emit32(mc, aa64_ldp_x(r0, r1, 31, (i32)(int_save_off + (u32)i * 16u))); + for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) { + u32 r0 = int_regs[i]; + aa64_emit32(mc, aa64_ldr_uimm(3, r0, 31, + int_save_off + (u32)i * 8u)); } aa64_emit32(mc, aa64_ldp_x(29, 30, 31, (i32)fp_lr_off)); emit_sp_add(mc, frame_size); @@ -288,17 +297,15 @@ void aa_func_end(CGTarget* t) { words[wi++] = aa64_stur(3, 8, 29, -(i32)s->off); } } - for (u32 i = 0; i < n_int_pairs; ++i) { - u32 r0 = 19u + i * 2u; - u32 r1 = r0 + 1u; + for (u32 i = 0; i < n_int_saves; ++i) { + u32 r0 = int_regs[i]; if (wi >= AA_PROLOGUE_WORDS) goto overflow; - words[wi++] = aa64_stp_x(r0, r1, 31, (i32)(int_save_off + i * 16u)); + words[wi++] = aa64_str_uimm(3, r0, 31, int_save_off + i * 8u); } - for (u32 i = 0; i < n_fp_pairs; ++i) { - u32 r0 = 8u + i * 2u; - u32 r1 = r0 + 1u; + for (u32 i = 0; i < n_fp_saves; ++i) { + u32 r0 = fp_regs[i]; if (wi >= AA_PROLOGUE_WORDS) goto overflow; - words[wi++] = aa64_stp_d(r0, r1, 31, (i32)(fp_save_off + i * 16u)); + words[wi++] = aa64_str_fp_uimm(3, r0, 31, fp_save_off + i * 8u); } if (0) { overflow: diff --git a/src/arch/aarch64/internal.h b/src/arch/aarch64/internal.h @@ -8,7 +8,6 @@ #include "arch/aa64_isa.h" #include "arch/aa64_regs.h" #include "arch/arch.h" -#include "arch/regalloc.h" #include "core/arena.h" #include "core/pool.h" #include "obj/obj.h" @@ -199,17 +198,11 @@ static inline u32 aa64_bfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) { } /* ============================================================ - * RegPool - * ============================================================ */ - -typedef CGSimpleRegPool RegPool; - -/* ============================================================ * AAImpl types * ============================================================ */ #define AA_PROLOGUE_WORDS \ - 12u /* worst case: sub sp + stp/add fp + 5 int + 4 fp = 11 */ + 22u /* worst case: sub sp + stp/add fp + sret + 5 int + 8 fp saves */ typedef struct AASlot { u32 off; @@ -250,8 +243,8 @@ typedef struct AAImpl { u8 has_sret; FrameSlot sret_ptr_slot; - RegPool int_pool; - RegPool fp_pool; + u32 used_cs_int_mask; /* bit reg set when x19-x28 must be preserved */ + u32 used_cs_fp_mask; /* bit reg set when d8-d15 must be preserved */ AAScope* scopes; u32 nscopes; @@ -274,8 +267,6 @@ typedef struct AAImpl { * Cross-file forward declarations * ============================================================ */ -/* regpool (alloc.c) */ -void regpool_init(RegPool* p, u8 base, u8 nregs); /* emit.c helpers used in alloc.c / ops.c */ void aa64_emit32(MCEmitter* mc, u32 word); void aa64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word); diff --git a/src/arch/aarch64/ops.c b/src/arch/aarch64/ops.c @@ -1833,19 +1833,11 @@ static void aa_asm_block(CGTarget* t, const char* tmpl, RegClass cls; if (t->resolve_reg_name(t, clobs[i], &phys, &cls) != 0) continue; if (cls == RC_INT) { - u32 idx = (u32)phys; - RegPool* p = &a_impl->int_pool; - if (idx >= p->base && idx < (u32)(p->base + p->nregs)) { - u32 off = idx - p->base + 1u; - if (off > p->hwm) p->hwm = off; - } + if (phys >= 19u && phys <= 28u) + a_impl->used_cs_int_mask |= 1u << phys; } else if (cls == RC_FP) { - u32 idx = (u32)phys; - RegPool* p = &a_impl->fp_pool; - if (idx >= p->base && idx < (u32)(p->base + p->nregs)) { - u32 off = idx - p->base + 1u; - if (off > p->hwm) p->hwm = off; - } + if (phys >= 8u && phys <= 15u) + a_impl->used_cs_fp_mask |= 1u << phys; } } AA64Asm* a = aa64_asm_open(t->c); diff --git a/src/arch/aarch64/opt_coord.c b/src/arch/aarch64/opt_coord.c @@ -5,11 +5,12 @@ #include "arch/aarch64/internal.h" /* ============================================================ - * Static register tables (match regpool_init in emit.c). */ + * Static register tables reported to caller-owned allocators. */ -static const Reg aa_int_pool[] = {19, 20, 21, 22, 23, 24, 25, 26, 27, 28}; -static const Reg aa_fp_pool[] = {8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23}; +static const Reg aa_int_allocable[] = {19, 20, 21, 22, 23, + 24, 25, 26, 27, 28}; +static const Reg aa_fp_allocable[] = {8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23}; static const Reg aa_int_scratch[] = {16, 17}; static const Reg aa_fp_scratch[] = {24, 25}; @@ -22,12 +23,12 @@ static void aa_get_allocable_regs(CGTarget* t, RegClass cls, (void)t; switch (cls) { case RC_INT: - *out = aa_int_pool; - *nregs = sizeof aa_int_pool / sizeof aa_int_pool[0]; + *out = aa_int_allocable; + *nregs = sizeof aa_int_allocable / sizeof aa_int_allocable[0]; break; case RC_FP: - *out = aa_fp_pool; - *nregs = sizeof aa_fp_pool / sizeof aa_fp_pool[0]; + *out = aa_fp_allocable; + *nregs = sizeof aa_fp_allocable / sizeof aa_fp_allocable[0]; break; default: *out = NULL; @@ -72,14 +73,18 @@ static int aa_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) { static void aa_reserve_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, u32 n) { AAImpl* a = impl_of(t); - RegPool* p; - switch (cls) { - case RC_INT: p = &a->int_pool; break; - case RC_FP: p = &a->fp_pool; break; - default: return; - } for (u32 i = 0; i < n; ++i) { - cg_simple_regpool_reserve(p, regs[i]); + Reg r = regs[i]; + switch (cls) { + case RC_INT: + if (r >= 19u && r <= 28u) a->used_cs_int_mask |= 1u << r; + break; + case RC_FP: + if (r >= 8u && r <= 15u) a->used_cs_fp_mask |= 1u << r; + break; + default: + break; + } } } diff --git a/src/arch/rv64/emit.c b/src/arch/rv64/emit.c @@ -2,6 +2,14 @@ #include "arch/rv64/internal.h" +static u32 collect_mask_regs(u32 mask, u32 first, u32 last, u32* out) { + u32 n = 0; + for (u32 r = first; r <= last; ++r) { + if (mask & (1u << r)) out[n++] = r; + } + return n; +} + void rv64_emit32(MCEmitter* mc, u32 word) { u32 ofs = obj_pos(mc->obj, mc->section_id); u8 b[4]; @@ -111,8 +119,8 @@ void rv_func_begin(CGTarget* t, const CGFuncDesc* fd) { a->cum_off = 0; a->max_outgoing = 0; a->fp_pair_off = 0; - regpool_init(&a->int_pool, /*base=*/18u, /*nregs=*/10u); /* s2..s11 */ - regpool_init(&a->fp_pool, /*base=*/18u, /*nregs=*/10u); /* fs2..fs11 */ + a->used_cs_int_mask = 0; + a->used_cs_fp_mask = 0; a->nslots = 0; a->nscopes = 0; a->has_alloca = 0; @@ -161,8 +169,10 @@ void rv_func_end(CGTarget* t) { ObjBuilder* obj = t->obj; u32 sec = a->fd->text_section_id; - u32 n_int_saves = a->int_pool.hwm; /* s2..s2+hwm-1 */ - u32 n_fp_saves = a->fp_pool.hwm; + u32 int_regs[10]; + u32 fp_regs[10]; + u32 n_int_saves = collect_mask_regs(a->used_cs_int_mask, 18u, 27u, int_regs); + u32 n_fp_saves = collect_mask_regs(a->used_cs_fp_mask, 18u, 27u, fp_regs); u32 max_out = (a->max_outgoing + 15u) & ~15u; u32 int_saves_sz = n_int_saves * 8u; u32 fp_saves_sz = n_fp_saves * 8u; @@ -212,12 +222,12 @@ void rv_func_end(CGTarget* t) { /* Reverse order: ints first (lowest address) on restore, but we emit * the restore loop in reverse to keep the prologue/epilogue symmetric. */ for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) { - u32 r = 18u + (u32)i; /* s2 + i */ + u32 r = int_regs[i]; i32 off = int_save_base - 8 * (i32)i; rv64_emit32(mc, rv_ld(r, RV_S0, off)); } for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) { - u32 r = 18u + (u32)i; /* fs2 + i (fp reg number) */ + u32 r = fp_regs[i]; i32 off = fp_save_base - 8 * (i32)i; rv64_emit32(mc, rv_fld(r, RV_S0, off)); } @@ -286,14 +296,14 @@ void rv_func_end(CGTarget* t) { } /* int saves */ for (u32 i = 0; i < n_int_saves; ++i) { - u32 r = 18u + i; + u32 r = int_regs[i]; i32 off = int_save_base - 8 * (i32)i; if (wi >= RV_PROLOGUE_WORDS) goto overflow; words[wi++] = rv_sd(r, RV_S0, off); } /* fp saves */ for (u32 i = 0; i < n_fp_saves; ++i) { - u32 r = 18u + i; + u32 r = fp_regs[i]; i32 off = fp_save_base - 8 * (i32)i; if (wi >= RV_PROLOGUE_WORDS) goto overflow; words[wi++] = rv_fsd(r, RV_S0, off); @@ -329,4 +339,3 @@ void rv_func_end(CGTarget* t) { mc->cfi_endproc(mc); a->fd = NULL; } - diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h @@ -5,7 +5,6 @@ #include <string.h> #include "arch/arch.h" -#include "arch/regalloc.h" #include "arch/rv64.h" #include "arch/rv64_isa.h" #include "core/arena.h" @@ -14,9 +13,6 @@ #define RV_PROLOGUE_WORDS 32u -/* ---- RegPool ---- */ -typedef CGSimpleRegPool RegPool; - /* ---- RvSlot / RvScope ---- */ typedef struct RvSlot { u32 off; /* bytes below s0 (positive); address = s0 - off */ @@ -59,8 +55,8 @@ typedef struct RImpl { u8 has_sret; FrameSlot sret_ptr_slot; - RegPool int_pool; - RegPool fp_pool; + u32 used_cs_int_mask; /* bit reg set for s2-s11 */ + u32 used_cs_fp_mask; /* bit reg set for fs2-fs11 */ RvScope* scopes; u32 nscopes; @@ -108,11 +104,6 @@ static inline int type_is_signed(CfreeCgTypeId t) { static inline u32 reg_num(Operand op) { return op.v.reg & 0x1fu; } -/* ---- RegPool ops (inlined — identical in each caller) ---- */ -static inline void regpool_init(RegPool* p, u8 base, u8 nregs) { - cg_simple_regpool_init_range(p, base, nregs); -} - /* ---- emit.c: function lifecycle (referenced by ops.c vtable) ---- */ void rv_func_begin(CGTarget* t, const CGFuncDesc* fd); void rv_func_end(CGTarget* t); diff --git a/src/arch/rv64/opt_coord.c b/src/arch/rv64/opt_coord.c @@ -3,10 +3,12 @@ #include "arch/rv64/internal.h" /* ============================================================ - * Static register tables (match regpool_init in emit.c). */ + * Static register tables reported to caller-owned allocators. */ -static const Reg rv_int_pool[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27}; -static const Reg rv_fp_pool[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27}; +static const Reg rv_int_allocable[] = {18, 19, 20, 21, 22, + 23, 24, 25, 26, 27}; +static const Reg rv_fp_allocable[] = {18, 19, 20, 21, 22, + 23, 24, 25, 26, 27}; static const Reg rv_int_scratch[] = {5, 6}; /* t0, t1 */ static const Reg rv_fp_scratch[] = {0}; /* ft0 */ @@ -19,12 +21,12 @@ static void rv_get_allocable_regs(CGTarget* t, RegClass cls, (void)t; switch (cls) { case RC_INT: - *out = rv_int_pool; - *nregs = sizeof rv_int_pool / sizeof rv_int_pool[0]; + *out = rv_int_allocable; + *nregs = sizeof rv_int_allocable / sizeof rv_int_allocable[0]; break; case RC_FP: - *out = rv_fp_pool; - *nregs = sizeof rv_fp_pool / sizeof rv_fp_pool[0]; + *out = rv_fp_allocable; + *nregs = sizeof rv_fp_allocable / sizeof rv_fp_allocable[0]; break; default: *out = NULL; @@ -71,14 +73,18 @@ static int rv_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) { static void rv_reserve_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, u32 n) { RImpl* a = impl_of(t); - RegPool* p; - switch (cls) { - case RC_INT: p = &a->int_pool; break; - case RC_FP: p = &a->fp_pool; break; - default: return; - } for (u32 i = 0; i < n; ++i) { - cg_simple_regpool_reserve(p, regs[i]); + Reg r = regs[i]; + switch (cls) { + case RC_INT: + if (r >= 18u && r <= 27u) a->used_cs_int_mask |= 1u << r; + break; + case RC_FP: + if (r >= 18u && r <= 27u) a->used_cs_fp_mask |= 1u << r; + break; + default: + break; + } } } diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c @@ -1,7 +1,7 @@ -/* arch/x64/alloc.c — register pool, spill/reload, labels, control flow. +/* arch/x64/alloc.c — frame slots, spill/reload, labels, control flow. * - * Covers: xpool_init, x_frame_slot, x64_slot_get, x_param, x_spill_reg, - * x_reload_reg, x_label_*, + * Covers: x_frame_slot, x64_slot_get, x_param, x_spill_reg, x_reload_reg, + * x_label_*, * emit_jmp_label, emit_jcc_label, x_jump, x64_force_reg_int, emit_cmp_ab, * x_cmp_branch, x_cmp, x_scope_*, x_break_to, x_continue_to. */ @@ -17,13 +17,6 @@ #include "arch/x64/internal.h" /* ============================================================ - * XRegPool implementation. */ - -void xpool_init(XRegPool* p, const Reg* order, u32 nregs) { - cg_simple_regpool_init_ordered(p, order, nregs); -} - -/* ============================================================ * Registers / frame */ FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d) { diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c @@ -478,8 +478,8 @@ void x_func_begin(CGTarget* t, const CGFuncDesc* fd) { a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0; a->cum_off = 0; a->max_outgoing = 0; - xpool_init(&a->int_pool, g_int_order, 6u); - xpool_init(&a->fp_pool, g_fp_order, 10u); + a->used_cs_int_mask = 0; + a->used_cs_fp_mask = 0; a->nslots = 0; a->nscopes = 0; a->nalloca_patches = 0; @@ -537,8 +537,12 @@ void x_func_end(CGTarget* t) { XImpl* a = impl_of(t); MCEmitter* mc = t->mc; - u32 cs_used = a->int_pool.hwm; - if (cs_used > 5u) cs_used = 5u; + Reg cs_regs[5]; + u32 cs_used = 0; + for (u32 i = 0; i < 5u; ++i) { + Reg r = g_int_order[i]; + if (a->used_cs_int_mask & (1u << r)) cs_regs[cs_used++] = r; + } u32 cs_size = cs_used * 8u; /* Stack alignment: SysV requires rsp ≡ 0 mod 16 just before a call, @@ -554,7 +558,7 @@ void x_func_end(CGTarget* t) { /* Restore callee-saves. Each at rbp - (cum_off + (i+1)*8). */ for (i32 i = (i32)cs_used - 1; i >= 0; --i) { - u32 reg = a->int_pool.order[i]; + u32 reg = cs_regs[i]; i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8; emit_mov_load(mc, /*size=*/8, /*signed=*/0, reg, X64_RBP, off); } @@ -601,7 +605,7 @@ void x_func_end(CGTarget* t) { /* Spill callee-saves. */ for (u32 i = 0; i < cs_used; ++i) { - u32 reg = a->int_pool.order[i]; + u32 reg = cs_regs[i]; i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8; if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow; buf[wi++] = (u8)(X64_REX_BASE | X64_REX_W | ((reg & 8) ? X64_REX_R : 0)); diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h @@ -1,7 +1,7 @@ /* arch/x64/internal.h — private header shared by emit.c, alloc.c, ops.c. * * Contains: - * - XRegPool, XSlot, XScope, XAllocaPatch, XImpl struct definitions + * - XSlot, XScope, XAllocaPatch, XImpl struct definitions * - impl_of() accessor * - Small type helpers (static inline) * - Forward declarations of cross-file functions @@ -13,7 +13,6 @@ #include <string.h> #include "arch/arch.h" -#include "arch/regalloc.h" #include "arch/x64.h" #include "arch/x64_isa.h" #include "core/arena.h" @@ -23,11 +22,6 @@ #define X64_PROLOGUE_BYTES 96u /* ============================================================ - * Custom register pool. */ - -typedef CGSimpleRegPool XRegPool; - -/* ============================================================ * XImpl and friends. */ typedef struct XSlot { @@ -80,8 +74,8 @@ typedef struct XImpl { FrameSlot sret_ptr_slot; FrameSlot reg_save_slot; /* variadic: 176-byte __va_list_tag reg save area */ - XRegPool int_pool; - XRegPool fp_pool; + u32 used_cs_int_mask; /* SysV callee-saved GPRs used by this function */ + u32 used_cs_fp_mask; /* reserved for ABIs with callee-saved FP regs */ XScope* scopes; u32 nscopes; @@ -190,7 +184,6 @@ void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst, u32 src); /* --- alloc.c exports (used by emit.c and/or ops.c) --- */ -void xpool_init(XRegPool* p, const Reg* order, u32 nregs); XSlot* x64_slot_get(XImpl* a, FrameSlot fs); FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d); void x_param(CGTarget* t, const CGParamDesc* p); diff --git a/src/arch/x64/opt_coord.c b/src/arch/x64/opt_coord.c @@ -3,7 +3,7 @@ #include "arch/x64/internal.h" /* ============================================================ - * Scratch tables (not in XRegPool.order, used by backend internals). */ + * Scratch tables used by backend internals and opt spill rewriting. */ static const Reg x_int_scratch[] = {X64_R11}; /* RAX is reserved for backend */ static const Reg x_fp_scratch[] = {X64_XMM0 + 15}; /* xmm15 */ @@ -13,15 +13,15 @@ static const Reg x_fp_scratch[] = {X64_XMM0 + 15}; /* xmm15 */ static void x_get_allocable_regs(CGTarget* t, RegClass cls, const Reg** out, u32* nregs) { - XImpl* a = impl_of(t); + (void)t; switch (cls) { case RC_INT: - *out = a->int_pool.order; - *nregs = a->int_pool.nregs; + *out = g_int_order; + *nregs = 6u; break; case RC_FP: - *out = a->fp_pool.order; - *nregs = a->fp_pool.nregs; + *out = g_fp_order; + *nregs = 10u; break; default: *out = NULL; @@ -68,14 +68,20 @@ static int x_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) { static void x_reserve_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, u32 n) { XImpl* a = impl_of(t); - XRegPool* p; - switch (cls) { - case RC_INT: p = &a->int_pool; break; - case RC_FP: p = &a->fp_pool; break; - default: return; - } for (u32 i = 0; i < n; ++i) { - cg_simple_regpool_reserve(p, regs[i]); + Reg r = regs[i]; + switch (cls) { + case RC_INT: + if (!x_is_caller_saved(t, cls, r) && r < 32u) + a->used_cs_int_mask |= 1u << r; + break; + case RC_FP: + if (!x_is_caller_saved(t, cls, r) && r < 32u) + a->used_cs_fp_mask |= 1u << r; + break; + default: + break; + } } } diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -1280,12 +1280,10 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) { * regs were actually assigned so it can save the right callee-saved * subset in prologue/epilogue. * - * We skip FP for now: AArch64's FP prologue placeholder is - * fixed-size and may not fit the full FP pool. This matches the - * conservative behaviour of the old alloc/free hwm-bump loop. */ + * The backend records only callee-saved members of this set for + * prologue/epilogue preservation. */ if (r.identity_regs && w->reserve_hard_regs) { for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { - if (c == RC_FP) continue; Reg used[OPT_MAX_HARD_REGS]; u32 nused = 0; for (Val v = 1; v < f->nvals; ++v) {