kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 2054863eaad4a1cb13b851e90f6e9630a114c398
parent dbf13315d00e0f39eb2202d426bff83f68f8a0b3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 15 May 2026 06:45:39 -0700

Size opt prologue placeholders from planned regs

Diffstat:
Msrc/arch/aa64/emit.c | 43+++++++++++++++++++++++++++++++++----------
Msrc/arch/aa64/internal.h | 8+++++++-
Msrc/arch/aa64/opt_coord.c | 20++++++++++++++++++++
Msrc/arch/arch.h | 6++++++
Msrc/arch/rv64/emit.c | 46+++++++++++++++++++++++++++++++++++-----------
Msrc/arch/rv64/internal.h | 8+++++++-
Msrc/arch/rv64/opt_coord.c | 20++++++++++++++++++++
Msrc/arch/x64/emit.c | 39+++++++++++++++++++++++++++++++--------
Msrc/arch/x64/internal.h | 8++++++++
Msrc/arch/x64/opt_coord.c | 22++++++++++++++++++++++
Msrc/opt/opt.c | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mtest/opt/opt_test.c | 24+++++++++++++++++++++++-
12 files changed, 270 insertions(+), 57 deletions(-)

diff --git a/src/arch/aa64/emit.c b/src/arch/aa64/emit.c @@ -64,6 +64,22 @@ static u32 collect_mask_regs(u32 mask, u32 first, u32 last, u32 *out) { return n; } +static u32 count_mask_regs(u32 mask, u32 first, u32 last) { + u32 n = 0; + for (u32 r = first; r <= last; ++r) { + if (mask & (1u << r)) ++n; + } + return n; +} + +static u32 aa_planned_prologue_words(const AAImpl *a) { + u32 n = AA_PROLOGUE_FRAME_WORDS; + if (a->has_sret) ++n; + n += count_mask_regs(a->planned_cs_int_mask, 19u, 28u); + n += count_mask_regs(a->planned_cs_fp_mask, 8u, 15u); + return n ? n : 1u; +} + /* ============================================================ * Low-level emission * ============================================================ */ @@ -165,8 +181,13 @@ void aa_func_begin(CGTarget *t, const CGFuncDesc *fd) { a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0; a->cum_off = 0; a->max_outgoing = 0; - a->used_cs_int_mask = 0; - a->used_cs_fp_mask = 0; + a->used_cs_int_mask = a->has_planned_regs ? a->planned_cs_int_mask : 0; + a->used_cs_fp_mask = a->has_planned_regs ? a->planned_cs_fp_mask : 0; + a->prologue_words = a->has_planned_regs ? aa_planned_prologue_words(a) + : AA_PROLOGUE_WORDS; + a->planned_cs_int_mask = 0; + a->planned_cs_fp_mask = 0; + a->has_planned_regs = 0; a->nslots = 0; a->nscopes = 0; a->has_alloca = 0; @@ -180,7 +201,7 @@ void aa_func_begin(CGTarget *t, const CGFuncDesc *fd) { mc->cfi_startproc(mc); a->prologue_pos = mc->pos(mc); - for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) + for (u32 i = 0; i < a->prologue_words; ++i) aa64_emit32(mc, AA64_NOP); if (a->has_sret) { @@ -277,7 +298,9 @@ void aa_func_end(CGTarget *t) { u32 sec = a->fd->text_section_id; u32 words[AA_PROLOGUE_WORDS]; - for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) + u32 prologue_words = a->prologue_words ? a->prologue_words + : AA_PROLOGUE_WORDS; + for (u32 i = 0; i < prologue_words; ++i) words[i] = AA64_NOP; u32 wi = 0; @@ -286,7 +309,7 @@ void aa_func_end(CGTarget *t) { } else if ((frame_size & 0xfff) == 0 && (frame_size >> 12) <= 0xfff) { words[wi++] = aa64_sub_imm(1, 31, 31, frame_size >> 12, 1); } else { - if (wi + 2 > AA_PROLOGUE_WORDS) { + if (wi + 2 > prologue_words) { compiler_panic(t->c, a->loc, "aarch64: prologue overflow for frame_size %u", frame_size); @@ -299,20 +322,20 @@ void aa_func_end(CGTarget *t) { if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) { AASlot *s = aa64_slot_get(a, a->sret_ptr_slot); if (s) { - if (wi >= AA_PROLOGUE_WORDS) + if (wi >= prologue_words) goto overflow; words[wi++] = aa64_stur(3, 8, 29, -(i32)s->off); } } for (u32 i = 0; i < n_int_saves; ++i) { u32 r0 = int_regs[i]; - if (wi >= AA_PROLOGUE_WORDS) + if (wi >= prologue_words) goto overflow; words[wi++] = aa64_str_uimm(3, r0, 31, int_save_off + i * 8u); } for (u32 i = 0; i < n_fp_saves; ++i) { u32 r0 = fp_regs[i]; - if (wi >= AA_PROLOGUE_WORDS) + if (wi >= prologue_words) goto overflow; words[wi++] = aa64_str_fp_uimm(3, r0, 31, fp_save_off + i * 8u); } @@ -321,10 +344,10 @@ void aa_func_end(CGTarget *t) { compiler_panic( t->c, a->loc, "aarch64: prologue placeholder too small (used %u of %u words)", wi, - AA_PROLOGUE_WORDS); + prologue_words); } - for (u32 i = 0; i < AA_PROLOGUE_WORDS; ++i) { + for (u32 i = 0; i < prologue_words; ++i) { aa64_patch32(obj, sec, pos + i * 4u, words[i]); } diff --git a/src/arch/aa64/internal.h b/src/arch/aa64/internal.h @@ -202,7 +202,8 @@ static inline u32 aa64_bfm(u32 sf, u32 Rd, u32 Rn, u32 immr, u32 imms) { * ============================================================ */ #define AA_PROLOGUE_WORDS \ - 22u /* worst case: sub sp + stp/add fp + sret + 5 int + 8 fp saves */ + 23u /* worst case: sub sp + stp/add fp + sret + 10 int + 8 fp saves */ +#define AA_PROLOGUE_FRAME_WORDS 4u /* worst-case frame adjust + stp/add fp */ typedef struct AASlot { u32 off; @@ -229,6 +230,7 @@ typedef struct AAImpl { u32 func_start; u32 prologue_pos; + u32 prologue_words; MCLabel epilogue_label; AASlot* slots; @@ -245,6 +247,10 @@ typedef struct AAImpl { u32 used_cs_int_mask; /* bit reg set when x19-x28 must be preserved */ u32 used_cs_fp_mask; /* bit reg set when d8-d15 must be preserved */ + u32 planned_cs_int_mask; + u32 planned_cs_fp_mask; + u8 has_planned_regs; + u8 pad1[3]; AAScope* scopes; u32 nscopes; diff --git a/src/arch/aa64/opt_coord.c b/src/arch/aa64/opt_coord.c @@ -88,9 +88,29 @@ static void aa_reserve_hard_regs(CGTarget* t, RegClass cls, } } +static void aa_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, + u32 n) { + AAImpl* a = impl_of(t); + a->has_planned_regs = 1; + for (u32 i = 0; i < n; ++i) { + Reg r = regs[i]; + switch (cls) { + case RC_INT: + if (r >= 19u && r <= 28u) a->planned_cs_int_mask |= 1u << r; + break; + case RC_FP: + if (r >= 8u && r <= 15u) a->planned_cs_fp_mask |= 1u << r; + break; + default: + break; + } + } +} + void aa_coord_vtable_init(CGTarget* t) { t->get_allocable_regs = aa_get_allocable_regs; t->get_scratch_regs = aa_get_scratch_regs; t->is_caller_saved = aa_is_caller_saved; + t->plan_hard_regs = aa_plan_hard_regs; t->reserve_hard_regs = aa_reserve_hard_regs; } diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -516,6 +516,12 @@ struct CGTarget { /* Return non-zero if `reg` in `cls` is caller-saved on this target. */ int (*is_caller_saved)(CGTarget*, RegClass, Reg); + /* Tell the backend which hard registers opt is going to assign in the next + * function before func_begin reserves its prologue placeholder. Backends use + * this only as a sizing hint; reserve_hard_regs remains the authoritative + * per-function preservation hook. */ + void (*plan_hard_regs)(CGTarget*, RegClass, const Reg* regs, u32 n); + /* Tell the backend which hard registers opt actually assigned in the * current function. Call after the function body is emitted, before * func_end. The backend updates prologue/epilogue bookkeeping so it diff --git a/src/arch/rv64/emit.c b/src/arch/rv64/emit.c @@ -12,6 +12,23 @@ static u32 collect_mask_regs(u32 mask, u32 first, u32 last, u32 *out) { return n; } +static u32 count_mask_regs(u32 mask, u32 first, u32 last) { + u32 n = 0; + for (u32 r = first; r <= last; ++r) { + if (mask & (1u << r)) ++n; + } + return n; +} + +static u32 rv_planned_prologue_words(const RImpl *a) { + u32 n = RV_PROLOGUE_FRAME_WORDS; + if (a->has_sret) ++n; + if (a->is_variadic) n += 8u; + n += count_mask_regs(a->planned_cs_int_mask, 18u, 27u); + n += count_mask_regs(a->planned_cs_fp_mask, 18u, 27u); + return n ? n : 1u; +} + void rv64_emit32(MCEmitter *mc, u32 word) { u32 ofs = obj_pos(mc->obj, mc->section_id); u8 b[4]; @@ -121,16 +138,21 @@ void rv_func_begin(CGTarget *t, const CGFuncDesc *fd) { a->next_param_fp = 0; a->next_param_stack = 0; a->has_sret = (fd->abi && fd->abi->has_sret) ? 1 : 0; + a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0; a->cum_off = 0; a->max_outgoing = 0; a->fp_pair_off = 0; - a->used_cs_int_mask = 0; - a->used_cs_fp_mask = 0; + a->used_cs_int_mask = a->has_planned_regs ? a->planned_cs_int_mask : 0; + a->used_cs_fp_mask = a->has_planned_regs ? a->planned_cs_fp_mask : 0; + a->prologue_words = a->has_planned_regs ? rv_planned_prologue_words(a) + : RV_PROLOGUE_WORDS; + a->planned_cs_int_mask = 0; + a->planned_cs_fp_mask = 0; + a->has_planned_regs = 0; a->nslots = 0; a->nscopes = 0; a->has_alloca = 0; a->nadd_patches = 0; - a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0; a->gp_save_slot = FRAME_SLOT_NONE; a->sret_ptr_slot = FRAME_SLOT_NONE; a->epilogue_label = mc->label_new(mc); @@ -139,7 +161,7 @@ void rv_func_begin(CGTarget *t, const CGFuncDesc *fd) { /* Reserve a NOP-filled prologue placeholder; func_end patches it. */ a->prologue_pos = mc->pos(mc); - for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) + for (u32 i = 0; i < a->prologue_words; ++i) rv64_emit32(mc, RV_NOP); /* For an sret return, the caller passed the destination pointer in @@ -253,7 +275,9 @@ void rv_func_end(CGTarget *t) { /* Now patch the prologue placeholder. */ u32 pos = a->prologue_pos; u32 words[RV_PROLOGUE_WORDS]; - for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) + u32 prologue_words = a->prologue_words ? a->prologue_words + : RV_PROLOGUE_WORDS; + for (u32 i = 0; i < prologue_words; ++i) words[i] = RV_NOP; u32 wi = 0; @@ -289,7 +313,7 @@ void rv_func_end(CGTarget *t) { if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) { RvSlot *s = rv64_slot_get(a, a->sret_ptr_slot); if (s) { - if (wi >= RV_PROLOGUE_WORDS) + if (wi >= prologue_words) goto overflow; words[wi++] = rv_sd(RV_A0, RV_S0, -(i32)s->off); } @@ -300,7 +324,7 @@ void rv_func_end(CGTarget *t) { * == caller's first stack arg. */ if (a->is_variadic) { for (u32 i = a->next_param_int; i < 8; ++i) { - if (wi >= RV_PROLOGUE_WORDS) + if (wi >= prologue_words) goto overflow; words[wi++] = rv_sd(RV_A0 + i, RV_S0, 16 + (i32)i * 8); } @@ -309,7 +333,7 @@ void rv_func_end(CGTarget *t) { for (u32 i = 0; i < n_int_saves; ++i) { u32 r = int_regs[i]; i32 off = int_save_base - 8 * (i32)i; - if (wi >= RV_PROLOGUE_WORDS) + if (wi >= prologue_words) goto overflow; words[wi++] = rv_sd(r, RV_S0, off); } @@ -317,7 +341,7 @@ void rv_func_end(CGTarget *t) { for (u32 i = 0; i < n_fp_saves; ++i) { u32 r = fp_regs[i]; i32 off = fp_save_base - 8 * (i32)i; - if (wi >= RV_PROLOGUE_WORDS) + if (wi >= prologue_words) goto overflow; words[wi++] = rv_fsd(r, RV_S0, off); } @@ -325,10 +349,10 @@ void rv_func_end(CGTarget *t) { overflow: compiler_panic(t->c, a->loc, "rv64: prologue placeholder too small (used %u of %u)", wi, - RV_PROLOGUE_WORDS); + prologue_words); } - for (u32 i = 0; i < RV_PROLOGUE_WORDS; ++i) { + for (u32 i = 0; i < prologue_words; ++i) { rv64_patch32(obj, sec, pos + i * 4u, words[i]); } diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h @@ -11,7 +11,8 @@ #include "core/pool.h" #include "obj/obj.h" -#define RV_PROLOGUE_WORDS 32u +#define RV_PROLOGUE_WORDS 35u +#define RV_PROLOGUE_FRAME_WORDS 6u /* worst-case sp adjust + s0/ra + set s0 */ /* ---- RvSlot / RvScope ---- */ typedef struct RvSlot { @@ -40,6 +41,7 @@ typedef struct RImpl { u32 func_start; u32 prologue_pos; + u32 prologue_words; MCLabel epilogue_label; RvSlot *slots; @@ -57,6 +59,10 @@ typedef struct RImpl { u32 used_cs_int_mask; /* bit reg set for s2-s11 */ u32 used_cs_fp_mask; /* bit reg set for fs2-fs11 */ + u32 planned_cs_int_mask; + u32 planned_cs_fp_mask; + u8 has_planned_regs; + u8 pad1[3]; RvScope *scopes; u32 nscopes; diff --git a/src/arch/rv64/opt_coord.c b/src/arch/rv64/opt_coord.c @@ -86,9 +86,29 @@ static void rv_reserve_hard_regs(CGTarget* t, RegClass cls, } } +static void rv_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, + u32 n) { + RImpl* a = impl_of(t); + a->has_planned_regs = 1; + for (u32 i = 0; i < n; ++i) { + Reg r = regs[i]; + switch (cls) { + case RC_INT: + if (r >= 18u && r <= 27u) a->planned_cs_int_mask |= 1u << r; + break; + case RC_FP: + if (r >= 18u && r <= 27u) a->planned_cs_fp_mask |= 1u << r; + break; + default: + break; + } + } +} + void rv_coord_vtable_init(CGTarget* t) { t->get_allocable_regs = rv_get_allocable_regs; t->get_scratch_regs = rv_get_scratch_regs; t->is_caller_saved = rv_is_caller_saved; + t->plan_hard_regs = rv_plan_hard_regs; t->reserve_hard_regs = rv_reserve_hard_regs; } diff --git a/src/arch/x64/emit.c b/src/arch/x64/emit.c @@ -496,6 +496,22 @@ void emit_sse_rr_w(MCEmitter *mc, u8 prefix, u8 opcode, int w, u32 dst, /* ============================================================ * Function lifecycle */ +static u32 count_x64_cs_int(u32 mask) { + u32 n = 0; + for (u32 i = 0; i < 5u; ++i) { + Reg r = g_int_order[i]; + if (mask & (1u << r)) ++n; + } + return n; +} + +static u32 x64_planned_prologue_bytes(const XImpl *a) { + u32 n = X64_PROLOGUE_BASE_BYTES; + if (a->has_sret) n += X64_PROLOGUE_SRET_BYTES; + n += count_x64_cs_int(a->planned_cs_int_mask) * X64_PROLOGUE_SAVE_BYTES; + return n ? n : 1u; +} + void x_func_begin(CGTarget *t, const CGFuncDesc *fd) { XImpl *a = impl_of(t); MCEmitter *mc = t->mc; @@ -513,8 +529,13 @@ void x_func_begin(CGTarget *t, const CGFuncDesc *fd) { a->is_variadic = (fd->abi && fd->abi->variadic) ? 1 : 0; a->cum_off = 0; a->max_outgoing = 0; - a->used_cs_int_mask = 0; - a->used_cs_fp_mask = 0; + a->used_cs_int_mask = a->has_planned_regs ? a->planned_cs_int_mask : 0; + a->used_cs_fp_mask = a->has_planned_regs ? a->planned_cs_fp_mask : 0; + a->prologue_nbytes = a->has_planned_regs ? x64_planned_prologue_bytes(a) + : X64_PROLOGUE_BYTES; + a->planned_cs_int_mask = 0; + a->planned_cs_fp_mask = 0; + a->has_planned_regs = 0; a->nslots = 0; a->nscopes = 0; a->nalloca_patches = 0; @@ -526,7 +547,7 @@ void x_func_begin(CGTarget *t, const CGFuncDesc *fd) { /* Reserve a fixed-size prologue placeholder filled with NOPs. */ a->prologue_pos = mc->pos(mc); - for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) + for (u32 i = 0; i < a->prologue_nbytes; ++i) emit1(mc, 0x90); /* sret: rdi at entry holds the destination pointer. Spill it to a @@ -616,7 +637,9 @@ void x_func_end(CGTarget *t) { /* Patch prologue placeholder. */ u8 buf[X64_PROLOGUE_BYTES]; - for (u32 i = 0; i < X64_PROLOGUE_BYTES; ++i) + u32 prologue_nbytes = a->prologue_nbytes ? a->prologue_nbytes + : X64_PROLOGUE_BYTES; + for (u32 i = 0; i < prologue_nbytes; ++i) buf[i] = 0x90; u32 wi = 0; @@ -640,7 +663,7 @@ void x_func_end(CGTarget *t) { XSlot *s = x64_slot_get(a, a->sret_ptr_slot); if (s) { i32 off = -(i32)s->off; - if (wi + 7 > X64_PROLOGUE_BYTES) + if (wi + 7 > prologue_nbytes) goto overflow; buf[wi++] = X64_REX_BASE | X64_REX_W; buf[wi++] = 0x89; @@ -656,7 +679,7 @@ void x_func_end(CGTarget *t) { for (u32 i = 0; i < cs_used; ++i) { u32 reg = cs_regs[i]; i32 off = -(i32)a->cum_off - (i32)(i + 1) * 8; - if (wi + 7 > X64_PROLOGUE_BYTES) + if (wi + 7 > prologue_nbytes) goto overflow; buf[wi++] = (u8)(X64_REX_BASE | X64_REX_W | ((reg & 8) ? X64_REX_R : 0)); buf[wi++] = 0x89; @@ -671,10 +694,10 @@ void x_func_end(CGTarget *t) { overflow: compiler_panic(t->c, a->loc, "x64: prologue placeholder overflow (%u of %u bytes)", wi, - X64_PROLOGUE_BYTES); + prologue_nbytes); } obj_patch(t->obj, a->fd->text_section_id, a->prologue_pos, buf, - X64_PROLOGUE_BYTES); + prologue_nbytes); /* Patch each alloca's `lea dst, [rsp + 0]` disp32 with the final * max_outgoing (already 16-aligned via the `(stack_off+15)&~15` round diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h @@ -20,6 +20,9 @@ #include "obj/obj.h" #define X64_PROLOGUE_BYTES 96u +#define X64_PROLOGUE_BASE_BYTES 11u +#define X64_PROLOGUE_SRET_BYTES 7u +#define X64_PROLOGUE_SAVE_BYTES 7u /* ============================================================ * XImpl and friends. */ @@ -56,6 +59,7 @@ typedef struct XImpl { u32 func_start; u32 prologue_pos; + u32 prologue_nbytes; MCLabel epilogue_label; XSlot *slots; @@ -76,6 +80,10 @@ typedef struct XImpl { u32 used_cs_int_mask; /* SysV callee-saved GPRs used by this function */ u32 used_cs_fp_mask; /* reserved for ABIs with callee-saved FP regs */ + u32 planned_cs_int_mask; + u32 planned_cs_fp_mask; + u8 has_planned_regs; + u8 pad1[3]; XScope *scopes; u32 nscopes; diff --git a/src/arch/x64/opt_coord.c b/src/arch/x64/opt_coord.c @@ -95,9 +95,31 @@ static void x_reserve_hard_regs(CGTarget* t, RegClass cls, } } +static void x_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, + u32 n) { + XImpl* a = impl_of(t); + a->has_planned_regs = 1; + for (u32 i = 0; i < n; ++i) { + Reg r = regs[i]; + switch (cls) { + case RC_INT: + if (!x_is_caller_saved(t, cls, r) && r < 32u) + a->planned_cs_int_mask |= 1u << r; + break; + case RC_FP: + if (!x_is_caller_saved(t, cls, r) && r < 32u) + a->planned_cs_fp_mask |= 1u << r; + break; + default: + break; + } + } +} + void x_coord_vtable_init(CGTarget* t) { t->get_allocable_regs = x_get_allocable_regs; t->get_scratch_regs = x_get_scratch_regs; t->is_caller_saved = x_is_caller_saved; + t->plan_hard_regs = x_plan_hard_regs; t->reserve_hard_regs = x_reserve_hard_regs; } diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -188,6 +188,12 @@ static int w_is_caller_saved(CGTarget* t, RegClass cls, Reg r) { return 0; } +static void w_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, + u32 n) { + CGTarget* wr = impl_of(t)->target; + if (wr->plan_hard_regs) wr->plan_hard_regs(wr, cls, regs, n); +} + static void w_reserve_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, u32 n) { CGTarget* wr = impl_of(t)->target; @@ -1203,6 +1209,46 @@ static void replay_block(ReplayCtx* r, u32 b) { } } +static void add_unique_reg(Reg* used, u32* nused, u32 cap, Reg r) { + for (u32 i = 0; i < *nused; ++i) { + if (used[i] == r) return; + } + if (*nused < cap) used[(*nused)++] = r; +} + +static u32 collect_opt_hard_regs(Func* f, CGTarget* w, RegClass cls, + Reg* used, u32 cap) { + u32 nused = 0; + for (Val v = 1; v < f->nvals; ++v) { + if (f->val_info[v].alloc_kind != OPT_ALLOC_HARD) continue; + if (f->val_info[v].cls != cls) continue; + add_unique_reg(used, &nused, cap, f->val_info[v].hard_reg); + } + if ((u32)cls < OPT_REG_CLASSES) { + for (u32 i = 0; i < f->opt_scratch_reg_count[cls]; ++i) + add_unique_reg(used, &nused, cap, f->opt_scratch_regs[cls][i]); + } + if (w->resolve_reg_name) { + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + for (u32 i = 0; i < bl->ninsts; ++i) { + Inst* in = &bl->insts[i]; + if ((IROp)in->op != IR_ASM_BLOCK) continue; + IRAsmAux* aux = (IRAsmAux*)in->extra.aux; + if (!aux) continue; + for (u32 j = 0; j < aux->nclob; ++j) { + Reg r; + RegClass rcls; + if (w->resolve_reg_name(w, aux->clobbers[j], &r, &rcls) != 0) + continue; + if (rcls == cls) add_unique_reg(used, &nused, cap, r); + } + } + } + } + return nused; +} + static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) { ReplayCtx r; r.c = c; @@ -1223,6 +1269,15 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) { for (u32 i = 0; i <= f->nscopes; ++i) r.scope_map[i] = CG_SCOPE_NONE; r.block_label_placed = arena_zarray(f->arena, u8, nb); + if (identity && w->plan_hard_regs) { + for (u32 cidx = 0; cidx < OPT_REG_CLASSES; ++cidx) { + Reg used[OPT_MAX_HARD_REGS]; + u32 nused = collect_opt_hard_regs(f, w, (RegClass)cidx, used, + OPT_MAX_HARD_REGS); + w->plan_hard_regs(w, (RegClass)cidx, used, nused); + } + } + /* func_begin with the recorded descriptor. The desc.params[].slot * fields are wrapper IR slot ids; aarch64's func_begin doesn't * dereference them so we don't translate. */ @@ -1282,31 +1337,8 @@ static void replay_func_to(Compiler* c, Func* f, CGTarget* w, int identity) { if (r.identity_regs && w->reserve_hard_regs) { for (u32 c = 0; c < OPT_REG_CLASSES; ++c) { Reg used[OPT_MAX_HARD_REGS]; - u32 nused = 0; - for (Val v = 1; v < f->nvals; ++v) { - if (f->val_info[v].alloc_kind != OPT_ALLOC_HARD) continue; - if (f->val_info[v].cls != c) continue; - Reg hr = f->val_info[v].hard_reg; - int already = 0; - for (u32 i = 0; i < nused; ++i) { - if (used[i] == hr) { - already = 1; - break; - } - } - if (!already) used[nused++] = hr; - } - for (u32 i = 0; i < f->opt_scratch_reg_count[c]; ++i) { - Reg hr = f->opt_scratch_regs[c][i]; - int already = 0; - for (u32 j = 0; j < nused; ++j) { - if (used[j] == hr) { - already = 1; - break; - } - } - if (!already && nused < OPT_MAX_HARD_REGS) used[nused++] = hr; - } + u32 nused = collect_opt_hard_regs(f, w, (RegClass)c, used, + OPT_MAX_HARD_REGS); if (nused) w->reserve_hard_regs(w, (RegClass)c, used, nused); } } else if (!r.identity_regs && w->reserve_hard_regs) { @@ -1528,6 +1560,7 @@ CGTarget* opt_cgtarget_new(Compiler* c, CGTarget* target, int level) { t->get_allocable_regs = w_get_allocable_regs; t->get_scratch_regs = w_get_scratch_regs; t->is_caller_saved = w_is_caller_saved; + t->plan_hard_regs = w_plan_hard_regs; t->reserve_hard_regs = w_reserve_hard_regs; t->label_new = w_label_new; diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -366,6 +366,9 @@ typedef struct MockCGTarget { const Reg* scratch[OPT_REG_CLASSES]; u32 scratch_n[OPT_REG_CLASSES]; u32 caller_saved_mask[OPT_REG_CLASSES]; + int plan_calls[OPT_REG_CLASSES]; + int plan_regs[OPT_REG_CLASSES]; + int func_begin_plan_calls; int reserve_calls[OPT_REG_CLASSES]; int load_imm_calls; Reg last_load_imm_dst; @@ -376,7 +379,10 @@ typedef struct MockCGTarget { } MockCGTarget; static void mock_func_begin(CGTarget* t, const CGFuncDesc* d) { - (void)t; + MockCGTarget* m = (MockCGTarget*)t; + int n = 0; + for (u32 i = 0; i < OPT_REG_CLASSES; ++i) n += m->plan_calls[i]; + m->func_begin_plan_calls = n; (void)d; } static void mock_func_end(CGTarget* t) { (void)t; } @@ -408,6 +414,16 @@ static void mock_reserve_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, (void)regs; } +static void mock_plan_hard_regs(CGTarget* t, RegClass cls, const Reg* regs, + u32 n) { + MockCGTarget* m = (MockCGTarget*)t; + if (cls < OPT_REG_CLASSES) { + m->plan_calls[cls]++; + m->plan_regs[cls] += (int)n; + } + (void)regs; +} + static int mock_resolve_reg_name(CGTarget* t, Sym name, Reg* out, RegClass* cls_out) { size_t len = 0; @@ -500,6 +516,7 @@ static void mock_init(MockCGTarget* m, Compiler* c) { m->base.get_allocable_regs = mock_get_allocable_regs; m->base.get_scratch_regs = mock_get_scratch_regs; m->base.is_caller_saved = mock_is_caller_saved; + m->base.plan_hard_regs = mock_plan_hard_regs; m->base.reserve_hard_regs = mock_reserve_hard_regs; m->base.resolve_reg_name = mock_resolve_reg_name; } @@ -2012,6 +2029,11 @@ static void opt_emit_no_virtual_alloc(void) { opt->ret(opt, &retv); opt->func_end(opt); + EXPECT(mock.func_begin_plan_calls == (int)OPT_REG_CLASSES, + "opt_emit should plan hard regs before backend func_begin"); + EXPECT(mock.plan_regs[RC_INT] == 3, + "opt_emit should plan the hard pool reg and 2 scratch regs, got %d", + mock.plan_regs[RC_INT]); EXPECT(mock.reserve_calls[RC_INT] == 3, "opt_emit should reserve the hard pool reg and 2 scratch regs, got %d", mock.reserve_calls[RC_INT]);