commit 90d63a81f19f98008ca8cd0e5ba111793138f6a7
parent 7e395f14c533274c31be05b48f775225c0d64d37
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 14 May 2026 12:47:51 -0700
Remove backend allocator pool state
Diffstat:
13 files changed, 135 insertions(+), 149 deletions(-)
diff --git a/src/arch/aarch64/alloc.c b/src/arch/aarch64/alloc.c
@@ -1,5 +1,4 @@
-/* aarch64/alloc.c — register pool, spill/reload, labels, control flow,
- * structured scopes. */
+/* aarch64/alloc.c — spill/reload, labels, control flow, structured scopes. */
#include "arch/aarch64/internal.h"
@@ -10,14 +9,6 @@
AAImpl* impl_of(CGTarget* t) { return (AAImpl*)t; }
/* ============================================================
- * RegPool
- * ============================================================ */
-
-void regpool_init(RegPool* p, u8 base, u8 nregs) {
- cg_simple_regpool_init_range(p, base, nregs);
-}
-
-/* ============================================================
* Slot accessor
* ============================================================ */
diff --git a/src/arch/aarch64/emit.c b/src/arch/aarch64/emit.c
@@ -52,6 +52,14 @@ u32 size_idx_for_bytes(u32 nbytes) {
u32 reg_num(Operand op) { return op.v.reg & 0x1fu; }
+static u32 collect_mask_regs(u32 mask, u32 first, u32 last, u32* out) {
+ u32 n = 0;
+ for (u32 r = first; r <= last; ++r) {
+ if (mask & (1u << r)) out[n++] = r;
+ }
+ return n;
+}
+
/* ============================================================
* Low-level emission
* ============================================================ */
@@ -151,8 +159,8 @@ void aa_func_begin(CGTarget* t, const CGFuncDesc* fd) {
a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0;
a->cum_off = 0;
a->max_outgoing = 0;
- regpool_init(&a->int_pool, /*base=*/19u, /*nregs=*/10u);
- regpool_init(&a->fp_pool, /*base=*/8u, /*nregs=*/16u);
+ a->used_cs_int_mask = 0;
+ a->used_cs_fp_mask = 0;
a->nslots = 0;
a->nscopes = 0;
a->has_alloca = 0;
@@ -217,14 +225,15 @@ void aa_func_end(CGTarget* t) {
AAImpl* a = impl_of(t);
MCEmitter* mc = t->mc;
- u32 n_int_pairs = (a->int_pool.hwm + 1) / 2;
- u32 used_fp_cs = a->fp_pool.hwm > 8 ? 8u : a->fp_pool.hwm;
- u32 n_fp_pairs = (used_fp_cs + 1) / 2;
+ u32 int_regs[10];
+ u32 fp_regs[8];
+ u32 n_int_saves = collect_mask_regs(a->used_cs_int_mask, 19u, 28u, int_regs);
+ u32 n_fp_saves = collect_mask_regs(a->used_cs_fp_mask, 8u, 15u, fp_regs);
u32 outgoing_off = 0;
u32 int_save_off = a->max_outgoing;
- u32 fp_save_off = int_save_off + n_int_pairs * 16;
- u32 locals_off = fp_save_off + n_fp_pairs * 16;
+ u32 fp_save_off = int_save_off + n_int_saves * 8u;
+ u32 locals_off = fp_save_off + n_fp_saves * 8u;
u32 fp_lr_off = locals_off + a->cum_off;
u32 frame_size = fp_lr_off + 16;
frame_size = (frame_size + 15u) & ~15u;
@@ -244,15 +253,15 @@ void aa_func_end(CGTarget* t) {
}
}
- for (i32 i = (i32)n_fp_pairs - 1; i >= 0; --i) {
- u32 r0 = 8u + (u32)i * 2u;
- u32 r1 = r0 + 1u;
- aa64_emit32(mc, aa64_ldp_d(r0, r1, 31, (i32)(fp_save_off + (u32)i * 16u)));
+ for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) {
+ u32 r0 = fp_regs[i];
+ aa64_emit32(mc, aa64_ldr_fp_uimm(3, r0, 31,
+ fp_save_off + (u32)i * 8u));
}
- for (i32 i = (i32)n_int_pairs - 1; i >= 0; --i) {
- u32 r0 = 19u + (u32)i * 2u;
- u32 r1 = r0 + 1u;
- aa64_emit32(mc, aa64_ldp_x(r0, r1, 31, (i32)(int_save_off + (u32)i * 16u)));
+ for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) {
+ u32 r0 = int_regs[i];
+ aa64_emit32(mc, aa64_ldr_uimm(3, r0, 31,
+ int_save_off + (u32)i * 8u));
}
aa64_emit32(mc, aa64_ldp_x(29, 30, 31, (i32)fp_lr_off));
emit_sp_add(mc, frame_size);
@@ -288,17 +297,15 @@ void aa_func_end(CGTarget* t) {
words[wi++] = aa64_stur(3, 8, 29, -(i32)s->off);
}
}
- for (u32 i = 0; i < n_int_pairs; ++i) {
- u32 r0 = 19u + i * 2u;
- u32 r1 = r0 + 1u;
+ for (u32 i = 0; i < n_int_saves; ++i) {
+ u32 r0 = int_regs[i];
if (wi >= AA_PROLOGUE_WORDS) goto overflow;
- words[wi++] = aa64_stp_x(r0, r1, 31, (i32)(int_save_off + i * 16u));
+ words[wi++] = aa64_str_uimm(3, r0, 31, int_save_off + i * 8u);
}
- for (u32 i = 0; i < n_fp_pairs; ++i) {
- u32 r0 = 8u + i * 2u;
- u32 r1 = r0 + 1u;
+ for (u32 i = 0; i < n_fp_saves; ++i) {
+ u32 r0 = fp_regs[i];
if (wi >= AA_PROLOGUE_WORDS) goto overflow;
- words[wi++] = aa64_stp_d(r0, r1, 31, (i32)(fp_save_off + i * 16u));
+ words[wi++] = aa64_str_fp_uimm(3, r0, 31, fp_save_off + i * 8u);
}
if (0) {
overflow:
diff --git a/src/arch/aarch64/internal.h b/src/arch/aarch64/internal.h
@@ -8,7 +8,6 @@
#include "arch/aa64_isa.h"
#include "arch/aa64_regs.h"
#include "arch/arch.h"
-#include "arch/regalloc.h"
#include "core/arena.h"
#include "core/pool.h"
#include "obj/obj.h"
@@ -199,17 +198,11 @@ static inline u32 aa64_bfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) {
}
/* ============================================================
- * RegPool
- * ============================================================ */
-
-typedef CGSimpleRegPool RegPool;
-
-/* ============================================================
* AAImpl types
* ============================================================ */
#define AA_PROLOGUE_WORDS \
- 12u /* worst case: sub sp + stp/add fp + 5 int + 4 fp = 11 */
+ 22u /* worst case: sub sp + stp/add fp + sret + 5 int + 8 fp saves */
typedef struct AASlot {
u32 off;
@@ -250,8 +243,8 @@ typedef struct AAImpl {
u8 has_sret;
FrameSlot sret_ptr_slot;
- RegPool int_pool;
- RegPool fp_pool;
+ u32 used_cs_int_mask; /* bit reg set when x19-x28 must be preserved */
+ u32 used_cs_fp_mask; /* bit reg set when d8-d15 must be preserved */
AAScope* scopes;
u32 nscopes;
@@ -274,8 +267,6 @@ typedef struct AAImpl {
* Cross-file forward declarations
* ============================================================ */
-/* regpool (alloc.c) */
-void regpool_init(RegPool* p, u8 base, u8 nregs);
/* emit.c helpers used in alloc.c / ops.c */
void aa64_emit32(MCEmitter* mc, u32 word);
void aa64_patch32(ObjBuilder* obj, u32 sec_id, u32 ofs, u32 word);
diff --git a/src/arch/aarch64/ops.c b/src/arch/aarch64/ops.c
@@ -1833,19 +1833,11 @@ static void aa_asm_block(CGTarget* t, const char* tmpl,
RegClass cls;
if (t->resolve_reg_name(t, clobs[i], &phys, &cls) != 0) continue;
if (cls == RC_INT) {
- u32 idx = (u32)phys;
- RegPool* p = &a_impl->int_pool;
- if (idx >= p->base && idx < (u32)(p->base + p->nregs)) {
- u32 off = idx - p->base + 1u;
- if (off > p->hwm) p->hwm = off;
- }
+ if (phys >= 19u && phys <= 28u)
+ a_impl->used_cs_int_mask |= 1u << phys;
} else if (cls == RC_FP) {
- u32 idx = (u32)phys;
- RegPool* p = &a_impl->fp_pool;
- if (idx >= p->base && idx < (u32)(p->base + p->nregs)) {
- u32 off = idx - p->base + 1u;
- if (off > p->hwm) p->hwm = off;
- }
+ if (phys >= 8u && phys <= 15u)
+ a_impl->used_cs_fp_mask |= 1u << phys;
}
}
AA64Asm* a = aa64_asm_open(t->c);
diff --git a/src/arch/aarch64/opt_coord.c b/src/arch/aarch64/opt_coord.c
@@ -5,11 +5,12 @@
#include "arch/aarch64/internal.h"
/* ============================================================
- * Static register tables (match regpool_init in emit.c). */
+ * Static register tables reported to caller-owned allocators. */
-static const Reg aa_int_pool[] = {19, 20, 21, 22, 23, 24, 25, 26, 27, 28};
-static const Reg aa_fp_pool[] = {8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23};
+static const Reg aa_int_allocable[] = {19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28};
+static const Reg aa_fp_allocable[] = {8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23};
static const Reg aa_int_scratch[] = {16, 17};
static const Reg aa_fp_scratch[] = {24, 25};
@@ -22,12 +23,12 @@ static void aa_get_allocable_regs(CGTarget* t, RegClass cls,
(void)t;
switch (cls) {
case RC_INT:
- *out = aa_int_pool;
- *nregs = sizeof aa_int_pool / sizeof aa_int_pool[0];
+ *out = aa_int_allocable;
+ *nregs = sizeof aa_int_allocable / sizeof aa_int_allocable[0];
break;
case RC_FP:
- *out = aa_fp_pool;
- *nregs = sizeof aa_fp_pool / sizeof aa_fp_pool[0];
+ *out = aa_fp_allocable;
+ *nregs = sizeof aa_fp_allocable / sizeof aa_fp_allocable[0];
break;
default:
*out = NULL;
@@ -72,14 +73,18 @@ static int aa_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) {
static void aa_reserve_hard_regs(CGTarget* t, RegClass cls,
const Reg* regs, u32 n) {
AAImpl* a = impl_of(t);
- RegPool* p;
- switch (cls) {
- case RC_INT: p = &a->int_pool; break;
- case RC_FP: p = &a->fp_pool; break;
- default: return;
- }
for (u32 i = 0; i < n; ++i) {
- cg_simple_regpool_reserve(p, regs[i]);
+ Reg r = regs[i];
+ switch (cls) {
+ case RC_INT:
+ if (r >= 19u && r <= 28u) a->used_cs_int_mask |= 1u << r;
+ break;
+ case RC_FP:
+ if (r >= 8u && r <= 15u) a->used_cs_fp_mask |= 1u << r;
+ break;
+ default:
+ break;
+ }
}
}
diff --git a/src/arch/rv64/emit.c b/src/arch/rv64/emit.c
@@ -2,6 +2,14 @@
#include "arch/rv64/internal.h"
+static u32 collect_mask_regs(u32 mask, u32 first, u32 last, u32* out) {
+ u32 n = 0;
+ for (u32 r = first; r <= last; ++r) {
+ if (mask & (1u << r)) out[n++] = r;
+ }
+ return n;
+}
+
void rv64_emit32(MCEmitter* mc, u32 word) {
u32 ofs = obj_pos(mc->obj, mc->section_id);
u8 b[4];
@@ -111,8 +119,8 @@ void rv_func_begin(CGTarget* t, const CGFuncDesc* fd) {
a->cum_off = 0;
a->max_outgoing = 0;
a->fp_pair_off = 0;
- regpool_init(&a->int_pool, /*base=*/18u, /*nregs=*/10u); /* s2..s11 */
- regpool_init(&a->fp_pool, /*base=*/18u, /*nregs=*/10u); /* fs2..fs11 */
+ a->used_cs_int_mask = 0;
+ a->used_cs_fp_mask = 0;
a->nslots = 0;
a->nscopes = 0;
a->has_alloca = 0;
@@ -161,8 +169,10 @@ void rv_func_end(CGTarget* t) {
ObjBuilder* obj = t->obj;
u32 sec = a->fd->text_section_id;
- u32 n_int_saves = a->int_pool.hwm; /* s2..s2+hwm-1 */
- u32 n_fp_saves = a->fp_pool.hwm;
+ u32 int_regs[10];
+ u32 fp_regs[10];
+ u32 n_int_saves = collect_mask_regs(a->used_cs_int_mask, 18u, 27u, int_regs);
+ u32 n_fp_saves = collect_mask_regs(a->used_cs_fp_mask, 18u, 27u, fp_regs);
u32 max_out = (a->max_outgoing + 15u) & ~15u;
u32 int_saves_sz = n_int_saves * 8u;
u32 fp_saves_sz = n_fp_saves * 8u;
@@ -212,12 +222,12 @@ void rv_func_end(CGTarget* t) {
/* Reverse order: ints first (lowest address) on restore, but we emit
* the restore loop in reverse to keep the prologue/epilogue symmetric. */
for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) {
- u32 r = 18u + (u32)i; /* s2 + i */
+ u32 r = int_regs[i];
i32 off = int_save_base - 8 * (i32)i;
rv64_emit32(mc, rv_ld(r, RV_S0, off));
}
for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) {
- u32 r = 18u + (u32)i; /* fs2 + i (fp reg number) */
+ u32 r = fp_regs[i];
i32 off = fp_save_base - 8 * (i32)i;
rv64_emit32(mc, rv_fld(r, RV_S0, off));
}
@@ -286,14 +296,14 @@ void rv_func_end(CGTarget* t) {
}
/* int saves */
for (u32 i = 0; i < n_int_saves; ++i) {
- u32 r = 18u + i;
+ u32 r = int_regs[i];
i32 off = int_save_base - 8 * (i32)i;
if (wi >= RV_PROLOGUE_WORDS) goto overflow;
words[wi++] = rv_sd(r, RV_S0, off);
}
/* fp saves */
for (u32 i = 0; i < n_fp_saves; ++i) {
- u32 r = 18u + i;
+ u32 r = fp_regs[i];
i32 off = fp_save_base - 8 * (i32)i;
if (wi >= RV_PROLOGUE_WORDS) goto overflow;
words[wi++] = rv_fsd(r, RV_S0, off);
@@ -329,4 +339,3 @@ void rv_func_end(CGTarget* t) {
mc->cfi_endproc(mc);
a->fd = NULL;
}
-
diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h
@@ -5,7 +5,6 @@
#include <string.h>
#include "arch/arch.h"
-#include "arch/regalloc.h"
#include "arch/rv64.h"
#include "arch/rv64_isa.h"
#include "core/arena.h"
@@ -14,9 +13,6 @@
#define RV_PROLOGUE_WORDS 32u
-/* ---- RegPool ---- */
-typedef CGSimpleRegPool RegPool;
-
/* ---- RvSlot / RvScope ---- */
typedef struct RvSlot {
u32 off; /* bytes below s0 (positive); address = s0 - off */
@@ -59,8 +55,8 @@ typedef struct RImpl {
u8 has_sret;
FrameSlot sret_ptr_slot;
- RegPool int_pool;
- RegPool fp_pool;
+ u32 used_cs_int_mask; /* bit reg set for s2-s11 */
+ u32 used_cs_fp_mask; /* bit reg set for fs2-fs11 */
RvScope* scopes;
u32 nscopes;
@@ -108,11 +104,6 @@ static inline int type_is_signed(CfreeCgTypeId t) {
static inline u32 reg_num(Operand op) { return op.v.reg & 0x1fu; }
-/* ---- RegPool ops (inlined — identical in each caller) ---- */
-static inline void regpool_init(RegPool* p, u8 base, u8 nregs) {
- cg_simple_regpool_init_range(p, base, nregs);
-}
-
/* ---- emit.c: function lifecycle (referenced by ops.c vtable) ---- */
void rv_func_begin(CGTarget* t, const CGFuncDesc* fd);
void rv_func_end(CGTarget* t);
diff --git a/src/arch/rv64/opt_coord.c b/src/arch/rv64/opt_coord.c
@@ -3,10 +3,12 @@
#include "arch/rv64/internal.h"
/* ============================================================
- * Static register tables (match regpool_init in emit.c). */
+ * Static register tables reported to caller-owned allocators. */
-static const Reg rv_int_pool[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
-static const Reg rv_fp_pool[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
+static const Reg rv_int_allocable[] = {18, 19, 20, 21, 22,
+ 23, 24, 25, 26, 27};
+static const Reg rv_fp_allocable[] = {18, 19, 20, 21, 22,
+ 23, 24, 25, 26, 27};
static const Reg rv_int_scratch[] = {5, 6}; /* t0, t1 */
static const Reg rv_fp_scratch[] = {0}; /* ft0 */
@@ -19,12 +21,12 @@ static void rv_get_allocable_regs(CGTarget* t, RegClass cls,
(void)t;
switch (cls) {
case RC_INT:
- *out = rv_int_pool;
- *nregs = sizeof rv_int_pool / sizeof rv_int_pool[0];
+ *out = rv_int_allocable;
+ *nregs = sizeof rv_int_allocable / sizeof rv_int_allocable[0];
break;
case RC_FP:
- *out = rv_fp_pool;
- *nregs = sizeof rv_fp_pool / sizeof rv_fp_pool[0];
+ *out = rv_fp_allocable;
+ *nregs = sizeof rv_fp_allocable / sizeof rv_fp_allocable[0];
break;
default:
*out = NULL;
@@ -71,14 +73,18 @@ static int rv_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) {
static void rv_reserve_hard_regs(CGTarget* t, RegClass cls,
const Reg* regs, u32 n) {
RImpl* a = impl_of(t);
- RegPool* p;
- switch (cls) {
- case RC_INT: p = &a->int_pool; break;
- case RC_FP: p = &a->fp_pool; break;
- default: return;
- }
for (u32 i = 0; i < n; ++i) {
- cg_simple_regpool_reserve(p, regs[i]);
+ Reg r = regs[i];
+ switch (cls) {
+ case RC_INT:
+ if (r >= 18u && r <= 27u) a->used_cs_int_mask |= 1u << r;
+ break;
+ case RC_FP:
+ if (r >= 18u && r <= 27u) a->used_cs_fp_mask |= 1u << r;
+ break;
+ default:
+ break;
+ }
}
}
diff --git a/src/arch/x64/alloc.c b/src/arch/x64/alloc.c
@@ -1,7 +1,7 @@
-/* arch/x64/alloc.c — register pool, spill/reload, labels, control flow.
+/* arch/x64/alloc.c — frame slots, spill/reload, labels, control flow.
*
- * Covers: xpool_init, x_frame_slot, x64_slot_get, x_param, x_spill_reg,
- * x_reload_reg, x_label_*,
+ * Covers: x_frame_slot, x64_slot_get, x_param, x_spill_reg, x_reload_reg,
+ * x_label_*,
* emit_jmp_label, emit_jcc_label, x_jump, x64_force_reg_int, emit_cmp_ab,
* x_cmp_branch, x_cmp, x_scope_*, x_break_to, x_continue_to. */
@@ -17,13 +17,6 @@
#include "arch/x64/internal.h"
/* ============================================================
- * XRegPool implementation. */
-
-void xpool_init(XRegPool* p, const Reg* order, u32 nregs) {
- cg_simple_regpool_init_ordered(p, order, nregs);
-}
-
-/* ============================================================
* Registers / frame */
FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d) {
diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c
@@ -478,8 +478,8 @@ void x_func_begin(CGTarget* t, const CGFuncDesc* fd) {
a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0;
a->cum_off = 0;
a->max_outgoing = 0;
- xpool_init(&a->int_pool, g_int_order, 6u);
- xpool_init(&a->fp_pool, g_fp_order, 10u);
+ a->used_cs_int_mask = 0;
+ a->used_cs_fp_mask = 0;
a->nslots = 0;
a->nscopes = 0;
a->nalloca_patches = 0;
@@ -537,8 +537,12 @@ void x_func_end(CGTarget* t) {
XImpl* a = impl_of(t);
MCEmitter* mc = t->mc;
- u32 cs_used = a->int_pool.hwm;
- if (cs_used > 5u) cs_used = 5u;
+ Reg cs_regs[5];
+ u32 cs_used = 0;
+ for (u32 i = 0; i < 5u; ++i) {
+ Reg r = g_int_order[i];
+ if (a->used_cs_int_mask & (1u << r)) cs_regs[cs_used++] = r;
+ }
u32 cs_size = cs_used * 8u;
/* Stack alignment: SysV requires rsp ≡ 0 mod 16 just before a call,
@@ -554,7 +558,7 @@ void x_func_end(CGTarget* t) {
/* Restore callee-saves. Each at rbp - (cum_off + (i+1)*8). */
for (i32 i = (i32)cs_used - 1; i >= 0; --i) {
- u32 reg = a->int_pool.order[i];
+ u32 reg = cs_regs[i];
i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
emit_mov_load(mc, /*size=*/8, /*signed=*/0, reg, X64_RBP, off);
}
@@ -601,7 +605,7 @@ void x_func_end(CGTarget* t) {
/* Spill callee-saves. */
for (u32 i = 0; i < cs_used; ++i) {
- u32 reg = a->int_pool.order[i];
+ u32 reg = cs_regs[i];
i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
if (wi + 7 > X64_PROLOGUE_BYTES) goto overflow;
buf[wi++] = (u8)(X64_REX_BASE | X64_REX_W | ((reg & 8) ? X64_REX_R : 0));
diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h
@@ -1,7 +1,7 @@
/* arch/x64/internal.h — private header shared by emit.c, alloc.c, ops.c.
*
* Contains:
- * - XRegPool, XSlot, XScope, XAllocaPatch, XImpl struct definitions
+ * - XSlot, XScope, XAllocaPatch, XImpl struct definitions
* - impl_of() accessor
* - Small type helpers (static inline)
* - Forward declarations of cross-file functions
@@ -13,7 +13,6 @@
#include <string.h>
#include "arch/arch.h"
-#include "arch/regalloc.h"
#include "arch/x64.h"
#include "arch/x64_isa.h"
#include "core/arena.h"
@@ -23,11 +22,6 @@
#define X64_PROLOGUE_BYTES 96u
/* ============================================================
- * Custom register pool. */
-
-typedef CGSimpleRegPool XRegPool;
-
-/* ============================================================
* XImpl and friends. */
typedef struct XSlot {
@@ -80,8 +74,8 @@ typedef struct XImpl {
FrameSlot sret_ptr_slot;
FrameSlot reg_save_slot; /* variadic: 176-byte __va_list_tag reg save area */
- XRegPool int_pool;
- XRegPool fp_pool;
+ u32 used_cs_int_mask; /* SysV callee-saved GPRs used by this function */
+ u32 used_cs_fp_mask; /* reserved for ABIs with callee-saved FP regs */
XScope* scopes;
u32 nscopes;
@@ -190,7 +184,6 @@ void emit_sse_rr_w(MCEmitter* mc, u8 prefix, u8 opcode, int w, u32 dst,
u32 src);
/* --- alloc.c exports (used by emit.c and/or ops.c) --- */
-void xpool_init(XRegPool* p, const Reg* order, u32 nregs);
XSlot* x64_slot_get(XImpl* a, FrameSlot fs);
FrameSlot x_frame_slot(CGTarget* t, const FrameSlotDesc* d);
void x_param(CGTarget* t, const CGParamDesc* p);
diff --git a/src/arch/x64/opt_coord.c b/src/arch/x64/opt_coord.c
@@ -3,7 +3,7 @@
#include "arch/x64/internal.h"
/* ============================================================
- * Scratch tables (not in XRegPool.order, used by backend internals). */
+ * Scratch tables used by backend internals and opt spill rewriting. */
static const Reg x_int_scratch[] = {X64_R11}; /* RAX is reserved for backend */
static const Reg x_fp_scratch[] = {X64_XMM0 + 15}; /* xmm15 */
@@ -13,15 +13,15 @@ static const Reg x_fp_scratch[] = {X64_XMM0 + 15}; /* xmm15 */
static void x_get_allocable_regs(CGTarget* t, RegClass cls,
const Reg** out, u32* nregs) {
- XImpl* a = impl_of(t);
+ (void)t;
switch (cls) {
case RC_INT:
- *out = a->int_pool.order;
- *nregs = a->int_pool.nregs;
+ *out = g_int_order;
+ *nregs = 6u;
break;
case RC_FP:
- *out = a->fp_pool.order;
- *nregs = a->fp_pool.nregs;
+ *out = g_fp_order;
+ *nregs = 10u;
break;
default:
*out = NULL;
@@ -68,14 +68,20 @@ static int x_is_caller_saved(CGTarget* t, RegClass cls, Reg reg) {
static void x_reserve_hard_regs(CGTarget* t, RegClass cls,
const Reg* regs, u32 n) {
XImpl* a = impl_of(t);
- XRegPool* p;
- switch (cls) {
- case RC_INT: p = &a->int_pool; break;
- case RC_FP: p = &a->fp_pool; break;
- default: return;
- }
for (u32 i = 0; i < n; ++i) {
- cg_simple_regpool_reserve(p, regs[i]);
+ Reg r = regs[i];
+ switch (cls) {
+ case RC_INT:
+ if (!x_is_caller_saved(t, cls, r) && r < 32u)
+ a->used_cs_int_mask |= 1u << r;
+ break;
+ case RC_FP:
+ if (!x_is_caller_saved(t, cls, r) && r < 32u)
+ a->used_cs_fp_mask |= 1u << r;
+ break;
+ default:
+ break;
+ }
}
}
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1280,12 +1280,10 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) {
* regs were actually assigned so it can save the right callee-saved
* subset in prologue/epilogue.
*
- * We skip FP for now: AArch64's FP prologue placeholder is
- * fixed-size and may not fit the full FP pool. This matches the
- * conservative behaviour of the old alloc/free hwm-bump loop. */
+ * The backend records only callee-saved members of this set for
+ * prologue/epilogue preservation. */
if (r.identity_regs && w->reserve_hard_regs) {
for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
- if (c == RC_FP) continue;
Reg used[OPT_MAX_HARD_REGS];
u32 nused = 0;
for (Val v = 1; v < f->nvals; ++v) {