commit 2054863eaad4a1cb13b851e90f6e9630a114c398
parent dbf13315d00e0f39eb2202d426bff83f68f8a0b3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 15 May 2026 06:45:39 -0700
Size opt prologue placeholders from planned regs
Diffstat:
12 files changed, 270 insertions(+), 57 deletions(-)
diff --git a/src/arch/aa64/emit.c b/src/arch/aa64/emit.c
@@ -64,6 +64,22 @@ static u32 collect_mask_regs(u32 mask, u32 first, u32 last, u32 *out) {
return n;
}
+static u32 count_mask_regs(u32 mask, u32 first, u32 last) {
+ u32 n = 0;
+ for (u32 r = first; r <= last; ++r) {
+ if (mask & (1u << r)) ++n;
+ }
+ return n;
+}
+
+static u32 aa_planned_prologue_words(const AAImpl *a) {
+ u32 n = AA_PROLOGUE_FRAME_WORDS;
+ if (a->has_sret) ++n;
+ n += count_mask_regs(a->planned_cs_int_mask, 19u, 28u);
+ n += count_mask_regs(a->planned_cs_fp_mask, 8u, 15u);
+ return n ? n : 1u;
+}
+
/* ============================================================
* Low-level emission
* ============================================================ */
@@ -165,8 +181,13 @@ void aa_func_begin(CGTarget *t, const CGFuncDesc *fd) {
a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0;
a->cum_off = 0;
a->max_outgoing = 0;
- a->used_cs_int_mask = 0;
- a->used_cs_fp_mask = 0;
+ a->used_cs_int_mask = a->has_planned_regs ? a->planned_cs_int_mask : 0;
+ a->used_cs_fp_mask = a->has_planned_regs ? a->planned_cs_fp_mask : 0;
+ a->prologue_words = a->has_planned_regs ? aa_planned_prologue_words(a)
+ : AA_PROLOGUE_WORDS;
+ a->planned_cs_int_mask = 0;
+ a->planned_cs_fp_mask = 0;
+ a->has_planned_regs = 0;
a->nslots = 0;
a->nscopes = 0;
a->has_alloca = 0;
@@ -180,7 +201,7 @@ void aa_func_begin(CGTarget *t, const CGFuncDesc *fd) {
mc->cfi_startproc(mc);
a->prologue_pos = mc->pos(mc);
- for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i)
+ for (u32 i = 0; i < a->prologue_words; ++i)
aa64_emit32(mc, AA64_NOP);
if (a->has_sret) {
@@ -277,7 +298,9 @@ void aa_func_end(CGTarget *t) {
u32 sec = a->fd->text_section_id;
u32 words[AA_PROLOGUE_WORDS];
- for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i)
+ u32 prologue_words = a->prologue_words ? a->prologue_words
+ : AA_PROLOGUE_WORDS;
+ for (u32 i = 0; i < prologue_words; ++i)
words[i] = AA64_NOP;
u32 wi = 0;
@@ -286,7 +309,7 @@ void aa_func_end(CGTarget *t) {
} else if ((frame_size & 0xfff) == 0 && (frame_size >> 12) <= 0xfff) {
words[wi++] = aa64_sub_imm(1, 31, 31, frame_size >> 12, 1);
} else {
- if (wi + 2 > AA_PROLOGUE_WORDS) {
+ if (wi + 2 > prologue_words) {
compiler_panic(t->c, a->loc,
"aarch64: prologue overflow for frame_size %u",
frame_size);
@@ -299,20 +322,20 @@ void aa_func_end(CGTarget *t) {
if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) {
AASlot *s = aa64_slot_get(a, a->sret_ptr_slot);
if (s) {
- if (wi >= AA_PROLOGUE_WORDS)
+ if (wi >= prologue_words)
goto overflow;
words[wi++] = aa64_stur(3, 8, 29, -(i32)s->off);
}
}
for (u32 i = 0; i < n_int_saves; ++i) {
u32 r0 = int_regs[i];
- if (wi >= AA_PROLOGUE_WORDS)
+ if (wi >= prologue_words)
goto overflow;
words[wi++] = aa64_str_uimm(3, r0, 31, int_save_off + i * 8u);
}
for (u32 i = 0; i < n_fp_saves; ++i) {
u32 r0 = fp_regs[i];
- if (wi >= AA_PROLOGUE_WORDS)
+ if (wi >= prologue_words)
goto overflow;
words[wi++] = aa64_str_fp_uimm(3, r0, 31, fp_save_off + i * 8u);
}
@@ -321,10 +344,10 @@ void aa_func_end(CGTarget *t) {
compiler_panic(
t->c, a->loc,
"aarch64: prologue placeholder too small (used %u of %u words)", wi,
- AA_PROLOGUE_WORDS);
+ prologue_words);
}
- for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) {
+ for (u32 i = 0; i < prologue_words; ++i) {
aa64_patch32(obj, sec, pos + i * 4u, words[i]);
}
diff --git a/src/arch/aa64/internal.h b/src/arch/aa64/internal.h
@@ -202,7 +202,8 @@ static inline u32 aa64_bfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) {
* ============================================================ */
#define AA_PROLOGUE_WORDS \
- 22u /* worst case: sub sp + stp/add fp + sret + 5 int + 8 fp saves */
+ 23u /* worst case: sub sp + stp/add fp + sret + 10 int + 8 fp saves */
+#define AA_PROLOGUE_FRAME_WORDS 4u /* worst-case frame adjust + stp/add fp */
typedef struct AASlot {
u32 off;
@@ -229,6 +230,7 @@ typedef struct AAImpl {
u32 func_start;
u32 prologue_pos;
+ u32 prologue_words;
MCLabel epilogue_label;
AASlot* slots;
@@ -245,6 +247,10 @@ typedef struct AAImpl {
u32 used_cs_int_mask; /* bit reg set when x19-x28 must be preserved */
u32 used_cs_fp_mask; /* bit reg set when d8-d15 must be preserved */
+ u32 planned_cs_int_mask;
+ u32 planned_cs_fp_mask;
+ u8 has_planned_regs;
+ u8 pad1[3];
AAScope* scopes;
u32 nscopes;
diff --git a/src/arch/aa64/opt_coord.c b/src/arch/aa64/opt_coord.c
@@ -88,9 +88,29 @@ static void aa_reserve_hard_regs(CGTarget* t, RegClass cls,
}
}
+static void aa_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs,
+ u32 n) {
+ AAImpl* a = impl_of(t);
+ a->has_planned_regs = 1;
+ for (u32 i = 0; i < n; ++i) {
+ Reg r = regs[i];
+ switch (cls) {
+ case RC_INT:
+ if (r >= 19u && r <= 28u) a->planned_cs_int_mask |= 1u << r;
+ break;
+ case RC_FP:
+ if (r >= 8u && r <= 15u) a->planned_cs_fp_mask |= 1u << r;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
void aa_coord_vtable_init(CGTarget* t) {
t->get_allocable_regs = aa_get_allocable_regs;
t->get_scratch_regs = aa_get_scratch_regs;
t->is_caller_saved = aa_is_caller_saved;
+ t->plan_hard_regs = aa_plan_hard_regs;
t->reserve_hard_regs = aa_reserve_hard_regs;
}
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -516,6 +516,12 @@ struct CGTarget {
/* Return non-zero if `reg` in `cls` is caller-saved on this target. */
int (*is_caller_saved)(CGTarget*, RegClass, Reg);
+ /* Tell the backend which hard registers opt is going to assign in the next
+ * function before func_begin reserves its prologue placeholder. Backends use
+ * this only as a sizing hint; reserve_hard_regs remains the authoritative
+ * per-function preservation hook. */
+ void (*plan_hard_regs)(CGTarget*, RegClass, const Reg* regs, u32 n);
+
/* Tell the backend which hard registers opt actually assigned in the
* current function. Call after the function body is emitted, before
* func_end. The backend updates prologue/epilogue bookkeeping so it
diff --git a/src/arch/rv64/emit.c b/src/arch/rv64/emit.c
@@ -12,6 +12,23 @@ static u32 collect_mask_regs(u32 mask, u32 first, u32 last, u32 *out) {
return n;
}
+static u32 count_mask_regs(u32 mask, u32 first, u32 last) {
+ u32 n = 0;
+ for (u32 r = first; r <= last; ++r) {
+ if (mask & (1u << r)) ++n;
+ }
+ return n;
+}
+
+static u32 rv_planned_prologue_words(const RImpl *a) {
+ u32 n = RV_PROLOGUE_FRAME_WORDS;
+ if (a->has_sret) ++n;
+ if (a->is_variadic) n += 8u;
+ n += count_mask_regs(a->planned_cs_int_mask, 18u, 27u);
+ n += count_mask_regs(a->planned_cs_fp_mask, 18u, 27u);
+ return n ? n : 1u;
+}
+
void rv64_emit32(MCEmitter *mc, u32 word) {
u32 ofs = obj_pos(mc->obj, mc->section_id);
u8 b[4];
@@ -121,16 +138,21 @@ void rv_func_begin(CGTarget *t, const CGFuncDesc *fd) {
a->next_param_fp = 0;
a->next_param_stack = 0;
a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0;
+ a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0;
a->cum_off = 0;
a->max_outgoing = 0;
a->fp_pair_off = 0;
- a->used_cs_int_mask = 0;
- a->used_cs_fp_mask = 0;
+ a->used_cs_int_mask = a->has_planned_regs ? a->planned_cs_int_mask : 0;
+ a->used_cs_fp_mask = a->has_planned_regs ? a->planned_cs_fp_mask : 0;
+ a->prologue_words = a->has_planned_regs ? rv_planned_prologue_words(a)
+ : RV_PROLOGUE_WORDS;
+ a->planned_cs_int_mask = 0;
+ a->planned_cs_fp_mask = 0;
+ a->has_planned_regs = 0;
a->nslots = 0;
a->nscopes = 0;
a->has_alloca = 0;
a->nadd_patches = 0;
- a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0;
a->gp_save_slot = FRAME_SLOT_NONE;
a->sret_ptr_slot = FRAME_SLOT_NONE;
a->epilogue_label = mc->label_new(mc);
@@ -139,7 +161,7 @@ void rv_func_begin(CGTarget *t, const CGFuncDesc *fd) {
/* Reserve a NOP-filled prologue placeholder; func_end patches it. */
a->prologue_pos = mc->pos(mc);
- for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i)
+ for (u32 i = 0; i < a->prologue_words; ++i)
rv64_emit32(mc, RV_NOP);
/* For an sret return, the caller passed the destination pointer in
@@ -253,7 +275,9 @@ void rv_func_end(CGTarget *t) {
/* Now patch the prologue placeholder. */
u32 pos = a->prologue_pos;
u32 words[RV_PROLOGUE_WORDS];
- for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i)
+ u32 prologue_words = a->prologue_words ? a->prologue_words
+ : RV_PROLOGUE_WORDS;
+ for (u32 i = 0; i < prologue_words; ++i)
words[i] = RV_NOP;
u32 wi = 0;
@@ -289,7 +313,7 @@ void rv_func_end(CGTarget *t) {
if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) {
RvSlot *s = rv64_slot_get(a, a->sret_ptr_slot);
if (s) {
- if (wi >= RV_PROLOGUE_WORDS)
+ if (wi >= prologue_words)
goto overflow;
words[wi++] = rv_sd(RV_A0, RV_S0, -(i32)s->off);
}
@@ -300,7 +324,7 @@ void rv_func_end(CGTarget *t) {
* == caller's first stack arg. */
if (a->is_variadic) {
for (u32 i = a->next_param_int; i < 8; ++i) {
- if (wi >= RV_PROLOGUE_WORDS)
+ if (wi >= prologue_words)
goto overflow;
words[wi++] = rv_sd(RV_A0 + i, RV_S0, 16 + (i32)i * 8);
}
@@ -309,7 +333,7 @@ void rv_func_end(CGTarget *t) {
for (u32 i = 0; i < n_int_saves; ++i) {
u32 r = int_regs[i];
i32 off = int_save_base - 8 * (i32)i;
- if (wi >= RV_PROLOGUE_WORDS)
+ if (wi >= prologue_words)
goto overflow;
words[wi++] = rv_sd(r, RV_S0, off);
}
@@ -317,7 +341,7 @@ void rv_func_end(CGTarget *t) {
for (u32 i = 0; i < n_fp_saves; ++i) {
u32 r = fp_regs[i];
i32 off = fp_save_base - 8 * (i32)i;
- if (wi >= RV_PROLOGUE_WORDS)
+ if (wi >= prologue_words)
goto overflow;
words[wi++] = rv_fsd(r, RV_S0, off);
}
@@ -325,10 +349,10 @@ void rv_func_end(CGTarget *t) {
overflow:
compiler_panic(t->c, a->loc,
"rv64: prologue placeholder too small (used %u of %u)", wi,
- RV_PROLOGUE_WORDS);
+ prologue_words);
}
- for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) {
+ for (u32 i = 0; i < prologue_words; ++i) {
rv64_patch32(obj, sec, pos + i * 4u, words[i]);
}
diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h
@@ -11,7 +11,8 @@
#include "core/pool.h"
#include "obj/obj.h"
-#define RV_PROLOGUE_WORDS 32u
+#define RV_PROLOGUE_WORDS 35u
+#define RV_PROLOGUE_FRAME_WORDS 6u /* worst-case sp adjust + s0/ra + set s0 */
/* ---- RvSlot / RvScope ---- */
typedef struct RvSlot {
@@ -40,6 +41,7 @@ typedef struct RImpl {
u32 func_start;
u32 prologue_pos;
+ u32 prologue_words;
MCLabel epilogue_label;
RvSlot *slots;
@@ -57,6 +59,10 @@ typedef struct RImpl {
u32 used_cs_int_mask; /* bit reg set for s2-s11 */
u32 used_cs_fp_mask; /* bit reg set for fs2-fs11 */
+ u32 planned_cs_int_mask;
+ u32 planned_cs_fp_mask;
+ u8 has_planned_regs;
+ u8 pad1[3];
RvScope *scopes;
u32 nscopes;
diff --git a/src/arch/rv64/opt_coord.c b/src/arch/rv64/opt_coord.c
@@ -86,9 +86,29 @@ static void rv_reserve_hard_regs(CGTarget* t, RegClass cls,
}
}
+static void rv_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs,
+ u32 n) {
+ RImpl* a = impl_of(t);
+ a->has_planned_regs = 1;
+ for (u32 i = 0; i < n; ++i) {
+ Reg r = regs[i];
+ switch (cls) {
+ case RC_INT:
+ if (r >= 18u && r <= 27u) a->planned_cs_int_mask |= 1u << r;
+ break;
+ case RC_FP:
+ if (r >= 18u && r <= 27u) a->planned_cs_fp_mask |= 1u << r;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
void rv_coord_vtable_init(CGTarget* t) {
t->get_allocable_regs = rv_get_allocable_regs;
t->get_scratch_regs = rv_get_scratch_regs;
t->is_caller_saved = rv_is_caller_saved;
+ t->plan_hard_regs = rv_plan_hard_regs;
t->reserve_hard_regs = rv_reserve_hard_regs;
}
diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c
@@ -496,6 +496,22 @@ void emit_sse_rr_w(MCEmitter *mc, u8 prefix, u8 opcode, int w, u32 dst,
/* ============================================================
* Function lifecycle */
+static u32 count_x64_cs_int(u32 mask) {
+ u32 n = 0;
+ for (u32 i = 0; i < 5u; ++i) {
+ Reg r = g_int_order[i];
+ if (mask & (1u << r)) ++n;
+ }
+ return n;
+}
+
+static u32 x64_planned_prologue_bytes(const XImpl *a) {
+ u32 n = X64_PROLOGUE_BASE_BYTES;
+ if (a->has_sret) n += X64_PROLOGUE_SRET_BYTES;
+ n += count_x64_cs_int(a->planned_cs_int_mask) * X64_PROLOGUE_SAVE_BYTES;
+ return n ? n : 1u;
+}
+
void x_func_begin(CGTarget *t, const CGFuncDesc *fd) {
XImpl *a = impl_of(t);
MCEmitter *mc = t->mc;
@@ -513,8 +529,13 @@ void x_func_begin(CGTarget *t, const CGFuncDesc *fd) {
a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0;
a->cum_off = 0;
a->max_outgoing = 0;
- a->used_cs_int_mask = 0;
- a->used_cs_fp_mask = 0;
+ a->used_cs_int_mask = a->has_planned_regs ? a->planned_cs_int_mask : 0;
+ a->used_cs_fp_mask = a->has_planned_regs ? a->planned_cs_fp_mask : 0;
+ a->prologue_nbytes = a->has_planned_regs ? x64_planned_prologue_bytes(a)
+ : X64_PROLOGUE_BYTES;
+ a->planned_cs_int_mask = 0;
+ a->planned_cs_fp_mask = 0;
+ a->has_planned_regs = 0;
a->nslots = 0;
a->nscopes = 0;
a->nalloca_patches = 0;
@@ -526,7 +547,7 @@ void x_func_begin(CGTarget *t, const CGFuncDesc *fd) {
/* Reserve a fixed-size prologue placeholder filled with NOPs. */
a->prologue_pos = mc->pos(mc);
- for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i)
+ for (u32 i = 0; i < a->prologue_nbytes; ++i)
emit1(mc, 0x90);
/* sret: rdi at entry holds the destination pointer. Spill it to a
@@ -616,7 +637,9 @@ void x_func_end(CGTarget *t) {
/* Patch prologue placeholder. */
u8 buf[X64_PROLOGUE_BYTES];
- for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i)
+ u32 prologue_nbytes = a->prologue_nbytes ? a->prologue_nbytes
+ : X64_PROLOGUE_BYTES;
+ for (u32 i = 0; i < prologue_nbytes; ++i)
buf[i] = 0x90;
u32 wi = 0;
@@ -640,7 +663,7 @@ void x_func_end(CGTarget *t) {
XSlot *s = x64_slot_get(a, a->sret_ptr_slot);
if (s) {
i32 off = -(i32)s->off;
- if (wi + 7 > X64_PROLOGUE_BYTES)
+ if (wi + 7 > prologue_nbytes)
goto overflow;
buf[wi++] = X64_REX_BASE | X64_REX_W;
buf[wi++] = 0x89;
@@ -656,7 +679,7 @@ void x_func_end(CGTarget *t) {
for (u32 i = 0; i < cs_used; ++i) {
u32 reg = cs_regs[i];
i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8;
- if (wi + 7 > X64_PROLOGUE_BYTES)
+ if (wi + 7 > prologue_nbytes)
goto overflow;
buf[wi++] = (u8)(X64_REX_BASE | X64_REX_W | ((reg & 8) ? X64_REX_R : 0));
buf[wi++] = 0x89;
@@ -671,10 +694,10 @@ void x_func_end(CGTarget *t) {
overflow:
compiler_panic(t->c, a->loc,
"x64: prologue placeholder overflow (%u of %u bytes)", wi,
- X64_PROLOGUE_BYTES);
+ prologue_nbytes);
}
obj_patch(t->obj, a->fd->text_section_id, a->prologue_pos, buf,
- X64_PROLOGUE_BYTES);
+ prologue_nbytes);
/* Patch each alloca's `lea dst, [rsp + 0]` disp32 with the final
* max_outgoing (already 16-aligned via the `(stack_off+15)&~15` round
diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h
@@ -20,6 +20,9 @@
#include "obj/obj.h"
#define X64_PROLOGUE_BYTES 96u
+#define X64_PROLOGUE_BASE_BYTES 11u
+#define X64_PROLOGUE_SRET_BYTES 7u
+#define X64_PROLOGUE_SAVE_BYTES 7u
/* ============================================================
* XImpl and friends. */
@@ -56,6 +59,7 @@ typedef struct XImpl {
u32 func_start;
u32 prologue_pos;
+ u32 prologue_nbytes;
MCLabel epilogue_label;
XSlot *slots;
@@ -76,6 +80,10 @@ typedef struct XImpl {
u32 used_cs_int_mask; /* SysV callee-saved GPRs used by this function */
u32 used_cs_fp_mask; /* reserved for ABIs with callee-saved FP regs */
+ u32 planned_cs_int_mask;
+ u32 planned_cs_fp_mask;
+ u8 has_planned_regs;
+ u8 pad1[3];
XScope *scopes;
u32 nscopes;
diff --git a/src/arch/x64/opt_coord.c b/src/arch/x64/opt_coord.c
@@ -95,9 +95,31 @@ static void x_reserve_hard_regs(CGTarget* t, RegClass cls,
}
}
+static void x_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs,
+ u32 n) {
+ XImpl* a = impl_of(t);
+ a->has_planned_regs = 1;
+ for (u32 i = 0; i < n; ++i) {
+ Reg r = regs[i];
+ switch (cls) {
+ case RC_INT:
+ if (!x_is_caller_saved(t, cls, r) && r < 32u)
+ a->planned_cs_int_mask |= 1u << r;
+ break;
+ case RC_FP:
+ if (!x_is_caller_saved(t, cls, r) && r < 32u)
+ a->planned_cs_fp_mask |= 1u << r;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
void x_coord_vtable_init(CGTarget* t) {
t->get_allocable_regs = x_get_allocable_regs;
t->get_scratch_regs = x_get_scratch_regs;
t->is_caller_saved = x_is_caller_saved;
+ t->plan_hard_regs = x_plan_hard_regs;
t->reserve_hard_regs = x_reserve_hard_regs;
}
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -188,6 +188,12 @@ static int w_is_caller_saved(CGTarget* t, RegClass cls, Reg r) {
return 0;
}
+static void w_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs,
+ u32 n) {
+ CGTarget* wr = impl_of(t)->target;
+ if (wr->plan_hard_regs) wr->plan_hard_regs(wr, cls, regs, n);
+}
+
static void w_reserve_hard_regs(CGTarget* t, RegClass cls, const Reg* regs,
u32 n) {
CGTarget* wr = impl_of(t)->target;
@@ -1203,6 +1209,46 @@ static void replay_block(ReplayCtx* r, u32 b) {
}
}
+static void add_unique_reg(Reg* used, u32* nused, u32 cap, Reg r) {
+ for (u32 i = 0; i < *nused; ++i) {
+ if (used[i] == r) return;
+ }
+ if (*nused < cap) used[(*nused)++] = r;
+}
+
+static u32 collect_opt_hard_regs(Func* f, CGTarget* w, RegClass cls,
+ Reg* used, u32 cap) {
+ u32 nused = 0;
+ for (Val v = 1; v < f->nvals; ++v) {
+ if (f->val_info[v].alloc_kind != OPT_ALLOC_HARD) continue;
+ if (f->val_info[v].cls != cls) continue;
+ add_unique_reg(used, &nused, cap, f->val_info[v].hard_reg);
+ }
+ if ((u32)cls < OPT_REG_CLASSES) {
+ for (u32 i = 0; i < f->opt_scratch_reg_count[cls]; ++i)
+ add_unique_reg(used, &nused, cap, f->opt_scratch_regs[cls][i]);
+ }
+ if (w->resolve_reg_name) {
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ for (u32 i = 0; i < bl->ninsts; ++i) {
+ Inst* in = &bl->insts[i];
+ if ((IROp)in->op != IR_ASM_BLOCK) continue;
+ IRAsmAux* aux = (IRAsmAux*)in->extra.aux;
+ if (!aux) continue;
+ for (u32 j = 0; j < aux->nclob; ++j) {
+ Reg r;
+ RegClass rcls;
+ if (w->resolve_reg_name(w, aux->clobbers[j], &r, &rcls) != 0)
+ continue;
+ if (rcls == cls) add_unique_reg(used, &nused, cap, r);
+ }
+ }
+ }
+ }
+ return nused;
+}
+
static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) {
ReplayCtx r;
r.c = c;
@@ -1223,6 +1269,15 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) {
for (u32 i = 0; i <= f->nscopes; ++i) r.scope_map[i] = CG_SCOPE_NONE;
r.block_label_placed = arena_zarray(f->arena, u8, nb);
+ if (identity && w->plan_hard_regs) {
+ for (u32 cidx = 0; cidx < OPT_REG_CLASSES; ++cidx) {
+ Reg used[OPT_MAX_HARD_REGS];
+ u32 nused = collect_opt_hard_regs(f, w, (RegClass)cidx, used,
+ OPT_MAX_HARD_REGS);
+ w->plan_hard_regs(w, (RegClass)cidx, used, nused);
+ }
+ }
+
/* func_begin with the recorded descriptor. The desc.params[].slot
* fields are wrapper IR slot ids; aarch64's func_begin doesn't
* dereference them so we don't translate. */
@@ -1282,31 +1337,8 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) {
if (r.identity_regs && w->reserve_hard_regs) {
for (u32 c = 0; c < OPT_REG_CLASSES; ++c) {
Reg used[OPT_MAX_HARD_REGS];
- u32 nused = 0;
- for (Val v = 1; v < f->nvals; ++v) {
- if (f->val_info[v].alloc_kind != OPT_ALLOC_HARD) continue;
- if (f->val_info[v].cls != c) continue;
- Reg hr = f->val_info[v].hard_reg;
- int already = 0;
- for (u32 i = 0; i < nused; ++i) {
- if (used[i] == hr) {
- already = 1;
- break;
- }
- }
- if (!already) used[nused++] = hr;
- }
- for (u32 i = 0; i < f->opt_scratch_reg_count[c]; ++i) {
- Reg hr = f->opt_scratch_regs[c][i];
- int already = 0;
- for (u32 j = 0; j < nused; ++j) {
- if (used[j] == hr) {
- already = 1;
- break;
- }
- }
- if (!already && nused < OPT_MAX_HARD_REGS) used[nused++] = hr;
- }
+ u32 nused = collect_opt_hard_regs(f, w, (RegClass)c, used,
+ OPT_MAX_HARD_REGS);
if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused);
}
} else if (!r.identity_regs && w->reserve_hard_regs) {
@@ -1528,6 +1560,7 @@ CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* target, int level) {
t->get_allocable_regs = w_get_allocable_regs;
t->get_scratch_regs = w_get_scratch_regs;
t->is_caller_saved = w_is_caller_saved;
+ t->plan_hard_regs = w_plan_hard_regs;
t->reserve_hard_regs = w_reserve_hard_regs;
t->label_new = w_label_new;
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -366,6 +366,9 @@ typedef struct MockCGTarget {
const Reg* scratch[OPT_REG_CLASSES];
u32 scratch_n[OPT_REG_CLASSES];
u32 caller_saved_mask[OPT_REG_CLASSES];
+ int plan_calls[OPT_REG_CLASSES];
+ int plan_regs[OPT_REG_CLASSES];
+ int func_begin_plan_calls;
int reserve_calls[OPT_REG_CLASSES];
int load_imm_calls;
Reg last_load_imm_dst;
@@ -376,7 +379,10 @@ typedef struct MockCGTarget {
} MockCGTarget;
static void mock_func_begin(CGTarget* t, const CGFuncDesc* d) {
- (void)t;
+ MockCGTarget* m = (MockCGTarget*)t;
+ int n = 0;
+ for (u32 i = 0; i < OPT_REG_CLASSES; ++i) n += m->plan_calls[i];
+ m->func_begin_plan_calls = n;
(void)d;
}
static void mock_func_end(CGTarget* t) { (void)t; }
@@ -408,6 +414,16 @@ static void mock_reserve_hard_regs(CGTarget* t, RegClass cls, const Reg* regs,
(void)regs;
}
+static void mock_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs,
+ u32 n) {
+ MockCGTarget* m = (MockCGTarget*)t;
+ if (cls < OPT_REG_CLASSES) {
+ m->plan_calls[cls]++;
+ m->plan_regs[cls] += (int)n;
+ }
+ (void)regs;
+}
+
static int mock_resolve_reg_name(CGTarget* t, Sym name, Reg* out,
RegClass* cls_out) {
size_t len = 0;
@@ -500,6 +516,7 @@ static void mock_init(MockCGTarget* m, Compiler* c) {
m->base.get_allocable_regs = mock_get_allocable_regs;
m->base.get_scratch_regs = mock_get_scratch_regs;
m->base.is_caller_saved = mock_is_caller_saved;
+ m->base.plan_hard_regs = mock_plan_hard_regs;
m->base.reserve_hard_regs = mock_reserve_hard_regs;
m->base.resolve_reg_name = mock_resolve_reg_name;
}
@@ -2012,6 +2029,11 @@ static void opt_emit_no_virtual_alloc(void) {
opt->ret(opt, &retv);
opt->func_end(opt);
+ EXPECT(mock.func_begin_plan_calls == (int)OPT_REG_CLASSES,
+ "opt_emit should plan hard regs before backend func_begin");
+ EXPECT(mock.plan_regs[RC_INT] == 3,
+ "opt_emit should plan the hard pool reg and 2 scratch regs, got %d",
+ mock.plan_regs[RC_INT]);
EXPECT(mock.reserve_calls[RC_INT] == 3,
"opt_emit should reserve the hard pool reg and 2 scratch regs, got %d",
mock.reserve_calls[RC_INT]);