commit 7fce439d102fa3b36bdd60442dfb084b28261403
parent 622485a9c40384eb68d2a018fb5aa7279a169343
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 08:56:07 -0700
Add shared NativeFrame module; migrate aa64 onto it
New src/cg/native_frame.{h,c} centralizes the frame-layout bookkeeping that was
duplicated byte-for-byte across the native backends: the slot arena (table +
cumulative offset + alloc/lookup + frame-final gate), max-outgoing tracking, and
the callee-save-set derivation from the optimizer's per-class used-masks (with an
alloc_slots flag so aa64 can home each save in a frame slot while rv64/x64 keep
their compute-below-locals offsets). native_frame_va_save_bytes derives the
vararg register-save-area size from the target ABI's va_list layout, so the
per-arch magic numbers all come from one ABI-driven query.
aa64 (the -O0+-O1 reference) now embeds a NativeFrame and delegates frame_slot,
slot lookup, reserve_callee_saves, outgoing tracking, and per-function reset to
it. Pure refactor: aa64 toy is green at O0 and O1 (1034 pass, 0 fail); the 8
aa64 parse O1 fails (func-addr static init / static struct-ptr cast) reproduce
identically on the pre-migration backend — pre-existing, not introduced here.
rv64/x64 adopt NativeFrame next (and gain their -O1 known-frame path).
Diffstat:
4 files changed, 339 insertions(+), 126 deletions(-)
diff --git a/doc/INTERFACES.md b/doc/INTERFACES.md
@@ -89,6 +89,7 @@ fills in. This is the most actively-changing area (x64/rv64 are being ported ont
| Semantic CG | `src/cg/cgtarget.h` | `CgTarget` | `native_direct_target` (-O0) or `opt_cgtarget` (-O≥1) | frontend-facing lowering, pre-regalloc |
| -O0 adapter | `src/cg/native_direct_target.h` | `NativeDirectTarget` + `NativeOps` | shared, parameterized by arch `NativeOps` | adapts `NativeTarget` to `CgTarget` for -O0 |
| Physical emit | `src/arch/native_target.h` | `NativeTarget` | `aa64`/`x64`/`rv64` `*_native_target_new()` | hard-register, machine-code emission + frame/CFI |
+| Frame model (shared) | `src/cg/native_frame.h` | `NativeFrame` | shared impl (`native_frame.c`); each backend embeds one | arch-neutral frame-slot bookkeeping the `NativeTarget` impls delegate to |
| Machine code | `src/arch/mc.h` | `MCEmitter` | one generic impl, `mc_new(Compiler, ObjBuilder)` | section/label/reloc/CFI byte emission for all MC archs |
**Per-arch entry points** (the surface each backend exposes to the rest of the
@@ -114,6 +115,38 @@ compiler):
`ArchImpl.apply_label_fixup` + CFI constants. Don't leak arch knowledge into
the generic emitter.
+### Native frame model (`src/cg/native_frame.h`) — NEW, adoption in progress
+
+A shared frame-bookkeeping module extracted because aa64/rv64/x64 all lay out a
+stack frame the same way at the bookkeeping level. `NativeFrame` owns the
+arch-neutral parts; each backend embeds one and keeps the ISA/ABI-specific parts.
+
+**The split — what `NativeFrame` owns vs. what stays in the backend:**
+
+| Owned by `NativeFrame` (arch-neutral) | Stays in the backend (ISA/ABI-specific) |
+|----------------------------------------|------------------------------------------|
+| Slot table + cumulative-offset arithmetic (`native_frame_slot_alloc`/`_at`) | Coordinate transform from `off` to anchor-relative disp (fp/s0/rbp; aa64 top- vs bottom-record) |
+| `frame_final` gate (no slots after prologue) | Prologue/epilogue + slim-variant instruction encoding |
+| Used-callee-save set from optimizer per-class masks (`native_frame_set_callee_saves`/`_collect_saves`) | Callee-save *placement* (aa64 reserves slots here; rv64/x64 compute offsets) |
+| `max_outgoing` tracking (`native_frame_note_outgoing`) | Deferred-patch application, variadic register-save stores |
+| Vararg save-area size from ABI va_list layout (`native_frame_va_save_bytes`) | — |
+
+**Review notes:**
+- ⚠️ **Status: untracked / not yet consumed.** `native_frame.{h,c}` exist but no
+ backend includes them yet (`grep` shows only self-reference). The contract is
+ defined ahead of adoption; treat the *migration of aa64/rv64/x64 onto it* as
+ the open work, mirroring the `NativeTarget` port itself.
+- It consolidates the per-arch vararg-save magic numbers (rv64 64, x64 176,
+ aa64 64+128) into the single ABI-driven `native_frame_va_save_bytes` query —
+ aligned with the no-magic-numbers rule. When adopting per arch, verify the old
+ literal is *deleted*, not duplicated.
+- `NativeFrameSlotEntry` layout is documented as matching the per-arch slot
+ structs it replaces — when porting, confirm the backend's local slot struct is
+ actually retired rather than kept in parallel.
+- Handles are **1-indexed**; `NATIVE_FRAME_SLOT_NONE` is the sentinel. Both the
+ shared `native_frame_slot_alloc` and any backend that still mints raw
+ `NativeFrameSlot` values must agree on this.
+
---
## Tier 3 — Internal subsystem boundaries
@@ -295,6 +328,7 @@ Track interface-review passes here. Status: ⬜ not reviewed · 🔶 in progress
| `compile.h` / `frontend.h` / `source.h` | 1 | ⬜ | frontend-facing |
| other Tier-1 (`archive`, `asm_emit`, `emu`, `preprocess`, `wasm`, `config`, support) | 1 | ⬜ | smaller surfaces |
| `NativeTarget` (`native_target.h`) | 2 | 🔶 | aa64 ✅ reference; x64/rv64 porting |
+| `NativeFrame` (`native_frame.h`) | 2 | 🔶 | ⚠️ NEW, untracked, no backend consumes it yet — adoption pending |
| `CgTarget` (`cgtarget.h`) | 2 | ⬜ | — |
| `NativeDirectTarget`/`NativeOps` | 2 | ⬜ | -O0 adapter; semantic/physical split |
| `MCEmitter` (`mc.h`) | 2 | ⬜ | arch-neutral; keep it that way |
diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c
@@ -37,6 +37,7 @@
#include "asm/asm.h"
#include "asm/asm_lex.h"
#include "cg/native_direct_target.h"
+#include "cg/native_frame.h"
#include "cg/type.h"
#include "core/arena.h"
#include "core/bytes.h"
@@ -141,13 +142,9 @@ static inline u32 aa_sp_off_saved_pair(const AAFrameLayout* L) {
return L->frame_size - AA_FRAME_SAVE_SIZE;
}
-typedef struct AANativeSlot {
- u32 off;
- u32 size;
- u32 align;
- u8 kind;
- u8 pad[3];
-} AANativeSlot;
+/* Frame slots and callee-save records are owned by the shared NativeFrame
+ * bookkeeping (cg/native_frame.h); these aliases keep the aa64-local spellings. */
+typedef NativeFrameSlotEntry AANativeSlot;
/* Deferred in-function patches, all resolved in aa_func_end once the frame
* layout (max_outgoing, callee-saves) is final. One growable list carries both
@@ -168,26 +165,16 @@ typedef struct AAPatch {
} u;
} AAPatch;
-/* x19..x28 (10) + v8..v15 (8) is the maximum the allocator can assign. */
-#define AA_MAX_CALLEE_SAVES 18u
-
-typedef struct AACalleeSave {
- NativeFrameSlot slot;
- CfreeCgTypeId type;
- u8 cls; /* NativeAllocClass */
- Reg reg;
-} AACalleeSave;
+typedef NativeFrameCalleeSave AACalleeSave;
typedef struct AANativeTarget {
NativeTarget base;
SrcLoc loc;
const CGFuncDesc* func;
- AANativeSlot* slots;
- u32 nslots;
- u32 slots_cap;
- u32 cum_off;
- u32 max_outgoing;
+ /* Shared frame bookkeeping: slot table, cumulative offset, max-outgoing,
+ * callee-save set, and the known_frame / has_alloca / frame_final flags. */
+ NativeFrame frame;
/* Final frame size, set once in aa_func_begin_known_frame when fp_at_bottom is
* decided. Read by the fp-relative offset helpers in the bottom-record layout
* (where slot/incoming-arg offsets depend on frame_size); meaningless and
@@ -212,9 +199,6 @@ typedef struct AANativeTarget {
u32 minimal_prologue_words; /* opt path: exact prologue length, else 0 */
MCLabel epilogue_label;
- AACalleeSave callee_saves[AA_MAX_CALLEE_SAVES];
- u32 ncallee_saves;
-
/* Set at func_end when this function qualifies for the slim prologue/epilogue
* (Tier A: no body locals/spills, no callee-saves, no alloca, no outgoing
* stack args, no sret/variadic). When set, the prologue patch and epilogue
@@ -241,23 +225,6 @@ typedef struct AANativeTarget {
* exclusive with slim_prologue (Tier A) and slim_small_frame; gated on
* out_stack==0 && !has_alloca && frame_size <= 504. */
u8 fp_at_bottom;
- /* Set by aa_func_begin_known_frame (optimizer path: the full frame is known
- * up front, so the prologue, allocas, and tail epilogues are emitted final
- * with no back-patching). Cleared by aa_func_begin (NativeDirectTarget
- * single-pass path: worst-case prologue region reserved and patched, alloca /
- * tail sites recorded and patched at func_end). This flag is the single
- * discriminator between the two strategies throughout this file. */
- u8 known_frame;
- /* Set when the function body contains a dynamic alloca. On the known-frame
- * path it comes from NativeKnownFrameDesc.has_alloca (needed before the body
- * to settle slim-epilogue eligibility); on the single-pass path it tracks
- * nalloca_patches. Disqualifies the slim small-frame epilogue. */
- u8 has_alloca;
- /* Set on the known-frame path once the frame is fixed and the prologue
- * emitted. Any frame_slot request after this point would grow the frame the
- * prologue already encoded — a silent miscompile — so aa_frame_slot panics.
- * The optimizer is expected to plan every slot before the body. */
- u8 frame_final;
} AANativeTarget;
static AANativeTarget* aa_of(NativeTarget* t) { return (AANativeTarget*)t; }
@@ -688,9 +655,7 @@ static u32 cmp_cond(CmpOp op) {
}
static AANativeSlot* aa_slot(AANativeTarget* a, NativeFrameSlot slot) {
- if (slot == NATIVE_FRAME_SLOT_NONE || slot > a->nslots)
- aa_panic(a, "bad frame slot");
- return &a->slots[slot - 1u];
+ return native_frame_slot_at(&a->frame, slot);
}
static void aa_addr_base(AANativeTarget* a, NativeAddr addr, u32* base_out,
@@ -1045,12 +1010,11 @@ static void aa_func_begin_common(NativeTarget* t, const CGFuncDesc* fd) {
AANativeTarget* a = aa_of(t);
MCEmitter* mc = t->mc;
a->func = fd;
- a->nslots = 0;
- /* cum_off counts frame-slot bytes below fp (see AAFrameLayout above).
- * The saved fp/lr pair (16 bytes at [fp, fp+8]) is *not* part of cum_off;
- * the frame-size computation in aa_func_end adds it via aa_build_layout. */
- a->cum_off = 0;
- a->max_outgoing = 0;
+ /* Shared frame bookkeeping: clears the slot table, cum_off, max_outgoing,
+ * callee-save set, and known_frame/has_alloca/frame_final. cum_off counts
+ * frame-slot bytes below fp; the saved fp/lr pair (16 bytes at [fp, fp+8]) is
+ * *not* part of it — aa_build_layout adds it in aa_func_end. */
+ native_frame_reset(&a->frame);
a->incoming_stack_size = 0;
a->next_param_int = 0;
a->next_param_fp = 0;
@@ -1065,14 +1029,10 @@ static void aa_func_begin_common(NativeTarget* t, const CGFuncDesc* fd) {
a->va_vr_slot = NATIVE_FRAME_SLOT_NONE;
a->npatches = 0;
a->nalloca = 0;
- a->ncallee_saves = 0;
a->slim_prologue = 0;
a->slim_small_frame = 0;
a->fp_at_bottom = 0;
a->frame_size_final = 0;
- a->known_frame = 0;
- a->has_alloca = 0;
- a->frame_final = 0;
mc->set_section(mc, fd->text_section_id);
mc->emit_align(mc, 4, 0);
a->func_start = mc->pos(mc);
@@ -1152,8 +1112,8 @@ static void aa_emit_entry_saves(AANativeTarget* a) {
static void aa_note_frame_state(NativeTarget* t,
const NativeFramePatchState* state) {
AANativeTarget* a = aa_of(t);
- if (state && state->max_outgoing > a->max_outgoing)
- a->max_outgoing = state->max_outgoing;
+ if (state && state->max_outgoing > a->frame.max_outgoing)
+ a->frame.max_outgoing = state->max_outgoing;
}
/* Reserve a save slot for each callee-saved register the allocator used. Runs
@@ -1163,30 +1123,19 @@ static void aa_note_frame_state(NativeTarget* t,
static void aa_reserve_callee_saves(NativeTarget* t, const u32* used,
u32 nclasses) {
AANativeTarget* a = aa_of(t);
- CfreeCgTypeId i64 = builtin_id(CFREE_CG_BUILTIN_I64);
- CfreeCgTypeId f64 = builtin_id(CFREE_CG_BUILTIN_F64);
- a->ncallee_saves = 0;
- for (u32 cls = 0; cls < nclasses; ++cls) {
- u32 mask = used[cls];
- for (Reg r = 0; r < 32u && mask; ++r) {
- NativeFrameSlotDesc sd;
- AACalleeSave* cs;
- if ((mask & (1u << r)) == 0) continue;
- mask &= ~(1u << r);
- if (a->ncallee_saves >= AA_MAX_CALLEE_SAVES)
- aa_panic(a, "too many callee-saved registers");
- memset(&sd, 0, sizeof sd);
- sd.type = (cls == (u32)NATIVE_REG_FP) ? f64 : i64;
- sd.size = 8;
- sd.align = 8;
- sd.kind = NATIVE_FRAME_SLOT_SAVE;
- cs = &a->callee_saves[a->ncallee_saves++];
- cs->cls = (u8)cls;
- cs->reg = r;
- cs->type = sd.type;
- cs->slot = t->frame_slot(t, &sd);
- }
- }
+ /* aa64 homes each callee-save in its own 8-byte frame slot (reserved before
+ * the body slots so they sit nearest fp, in stur range), so alloc_slots=1.
+ * Adjacent integer slots are later paired into stp/ldp. */
+ NativeFrameSaveSpec spec[NATIVE_REG_VEC + 1];
+ memset(spec, 0, sizeof spec);
+ spec[NATIVE_REG_INT].size = 8;
+ spec[NATIVE_REG_INT].align = 8;
+ spec[NATIVE_REG_INT].type = builtin_id(CFREE_CG_BUILTIN_I64);
+ spec[NATIVE_REG_FP].size = 8;
+ spec[NATIVE_REG_FP].align = 8;
+ spec[NATIVE_REG_FP].type = builtin_id(CFREE_CG_BUILTIN_F64);
+ native_frame_set_callee_saves(&a->frame, used, nclasses, spec,
+ NATIVE_REG_VEC + 1, 1);
}
static MemAccess aa_mem_for_type(NativeTarget* t, CfreeCgTypeId type, u32 size);
@@ -1308,12 +1257,12 @@ static void aa_words_restore_frame(AANativeTarget* a, u32* words, u32 cap,
* the single-register stur/ldur form. */
static void aa_words_callee_saves(AANativeTarget* a, int save, u32* words,
u32 cap, u32* n) {
- for (u32 i = 0; i < a->ncallee_saves;) {
- const AACalleeSave* cs = &a->callee_saves[i];
+ for (u32 i = 0; i < a->frame.ncallee_saves;) {
+ const AACalleeSave* cs = &a->frame.callee_saves[i];
i32 off = aa_fp_off_slot(a, aa_slot(a, cs->slot)->off);
- if (i + 1u < a->ncallee_saves && cs->cls == (u8)NATIVE_REG_INT &&
- a->callee_saves[i + 1u].cls == (u8)NATIVE_REG_INT) {
- const AACalleeSave* cs2 = &a->callee_saves[i + 1u];
+ if (i + 1u < a->frame.ncallee_saves && cs->cls == (u8)NATIVE_REG_INT &&
+ a->frame.callee_saves[i + 1u].cls == (u8)NATIVE_REG_INT) {
+ const AACalleeSave* cs2 = &a->frame.callee_saves[i + 1u];
i32 off2 = aa_fp_off_slot(a, aa_slot(a, cs2->slot)->off);
/* cs2 is reserved after cs (larger slot.off), so it is the lower address
* in both layouts (off2 = off - 8): stp's Rt = cs2, Rt2 = cs, base off2.
@@ -1463,7 +1412,7 @@ static void aa_apply_patches(AANativeTarget* a, const AAFrameLayout* L) {
AAPatch* p = &a->patches[i];
if (p->kind == AA_PATCH_ALLOCA) {
u32 imm12, sh;
- if (!aa64_addsub_imm_fits(a->max_outgoing, &imm12, &sh))
+ if (!aa64_addsub_imm_fits(a->frame.max_outgoing, &imm12, &sh))
aa_panic(a, "outgoing area too large for alloca result");
aa_patch32(a->base.obj, sec, p->pos,
aa64_add_imm(1, p->u.dst_reg, AA_SP, imm12, sh));
@@ -1493,7 +1442,7 @@ static void aa_apply_patches(AANativeTarget* a, const AAFrameLayout* L) {
static void aa_func_end(NativeTarget* t) {
AANativeTarget* a = aa_of(t);
MCEmitter* mc = t->mc;
- AAFrameLayout L = aa_build_layout(a->cum_off, a->max_outgoing);
+ AAFrameLayout L = aa_build_layout(a->frame.cum_off, a->frame.max_outgoing);
/* known_frame (optimizer): prologue, allocas, and tail epilogues were emitted
* final and slim eligibility was settled in aa_func_begin_known_frame — there
* is nothing to patch. Single-pass (NDT): a worst-case prologue region was
@@ -1501,12 +1450,12 @@ static void aa_func_end(NativeTarget* t) {
* is final. The NDT path always uses the fat prologue/epilogue (slim_* left 0
* by aa_func_begin_common, since its reserved region is much larger). */
u32 prologue_region =
- a->known_frame ? a->minimal_prologue_words : AA_PROLOGUE_WORDS;
+ a->frame.known_frame ? a->minimal_prologue_words : AA_PROLOGUE_WORDS;
mc->label_place(mc, a->epilogue_label);
aa_emit_callee_restores(a);
aa_emit_restore_frame(a, &L);
aa_emit32(mc, aa64_ret(AA_LR));
- if (a->known_frame) {
+ if (a->frame.known_frame) {
/* The frame-planning pre-pass plus final prologue/alloca/tail emission must
* leave nothing deferred; a stray patch would mean a body-time frame change
* the final prologue never saw. */
@@ -1544,26 +1493,7 @@ static void aa_func_end(NativeTarget* t) {
static NativeFrameSlot aa_frame_slot(NativeTarget* t,
const NativeFrameSlotDesc* d) {
- AANativeTarget* a = aa_of(t);
- AANativeSlot* s;
- u32 size = d->size ? d->size : 8u;
- u32 align = d->align ? d->align : 1u;
- if (a->frame_final)
- aa_panic(a, "frame slot requested after known-frame prologue");
- if (a->nslots == a->slots_cap) {
- u32 cap = a->slots_cap ? a->slots_cap * 2u : 16u;
- AANativeSlot* nb = arena_zarray(t->c->tu, AANativeSlot, cap);
- if (a->slots) memcpy(nb, a->slots, sizeof(*nb) * a->nslots);
- a->slots = nb;
- a->slots_cap = cap;
- }
- a->cum_off = align_up_u32(a->cum_off + size, align);
- s = &a->slots[a->nslots++];
- s->off = a->cum_off;
- s->size = size;
- s->align = align;
- s->kind = d->kind;
- return a->nslots;
+ return native_frame_slot_alloc(&aa_of(t)->frame, d);
}
static int aa_frame_slot_debug_loc(NativeTarget* t, NativeFrameSlot slot,
@@ -1573,7 +1503,7 @@ static int aa_frame_slot_debug_loc(NativeTarget* t, NativeFrameSlot slot,
i32 fp_off;
if (!out) return 0;
memset(out, 0, sizeof *out);
- if (slot == NATIVE_FRAME_SLOT_NONE || slot > a->nslots) return 0;
+ if (slot == NATIVE_FRAME_SLOT_NONE || slot > a->frame.nslots) return 0;
s = aa_slot(a, slot);
fp_off = aa_fp_off_slot(a, s->off);
out->kind = CG_DEBUG_LOC_FRAME;
@@ -1587,7 +1517,7 @@ static int aa_frame_slot_debug_loc(NativeTarget* t, NativeFrameSlot slot,
/* Optimizer entry point: the full frame is supplied up front, so the prologue,
* entry saves, slim-form eligibility, allocas, and tail epilogues are all final
* the moment they are emitted — no back-patching (aa_func_end skips the patch
- * passes when a->known_frame). Slot creation order matches the single-pass path
+ * passes when a->frame.known_frame). Slot creation order matches the single-pass path
* (callee-saves first for stur range, then the static slots, then sret/variadic
* entry saves) so offsets are identical to what the patch path would produce. */
static void aa_func_begin_known_frame(NativeTarget* t, const CGFuncDesc* fd,
@@ -1598,9 +1528,9 @@ static void aa_func_begin_known_frame(NativeTarget* t, const CGFuncDesc* fd,
u32 words[AA_PROLOGUE_WORDS];
u32 n;
aa_func_begin_common(t, fd);
- a->known_frame = 1;
+ a->frame.known_frame = 1;
if (frame) {
- a->has_alloca = frame->has_alloca;
+ a->frame.has_alloca = frame->has_alloca;
if (frame->callee_saved_used && frame->ncallee_classes)
aa_reserve_callee_saves(t, frame->callee_saved_used,
frame->ncallee_classes);
@@ -1621,14 +1551,14 @@ static void aa_func_begin_known_frame(NativeTarget* t, const CGFuncDesc* fd,
sd.kind = NATIVE_FRAME_SLOT_SPILL;
a->saved_tmp_slot = a->base.frame_slot(&a->base, &sd);
}
- if (frame->max_outgoing > a->max_outgoing)
- a->max_outgoing = frame->max_outgoing;
+ if (frame->max_outgoing > a->frame.max_outgoing)
+ a->frame.max_outgoing = frame->max_outgoing;
}
/* Frame is final: slot_bytes (cum_off) and out_stack (max_outgoing) are both
* known, so the prologue immediates and slim-form choice are settled here.
* frame_size_final must be set before aa_build_prologue_words / entry saves,
* since the bottom-record offset helpers read it. */
- L = aa_build_layout(a->cum_off, a->max_outgoing);
+ L = aa_build_layout(a->frame.cum_off, a->frame.max_outgoing);
a->frame_size_final = L.frame_size;
/* Slim Tier A: no callee-saves, no alloca, no body slots, no outgoing stack
* args — the whole frame is the 16-byte record. fp_at_bottom: a small frame
@@ -1638,16 +1568,16 @@ static void aa_func_begin_known_frame(NativeTarget* t, const CGFuncDesc* fd,
* slim_small_frame keeps the top-record layout but skips the x17/x10 scratch
* (out_stack>0 small frames land here). (See aa_func_end for the single-pass
* path, which never takes any slim form.) */
- a->slim_prologue = a->ncallee_saves == 0 && !a->has_alloca &&
+ a->slim_prologue = a->frame.ncallee_saves == 0 && !a->frame.has_alloca &&
L.slot_bytes == 0 && L.out_stack == 0;
- a->fp_at_bottom = !a->slim_prologue && !a->has_alloca && L.out_stack == 0 &&
+ a->fp_at_bottom = !a->slim_prologue && !a->frame.has_alloca && L.out_stack == 0 &&
L.frame_size <= 504u;
a->slim_small_frame = !a->slim_prologue && !a->fp_at_bottom &&
- !a->has_alloca && aa_sp_off_saved_pair(&L) <= 504u;
+ !a->frame.has_alloca && aa_sp_off_saved_pair(&L) <= 504u;
n = aa_build_prologue_words(a, &L, words, AA_PROLOGUE_WORDS);
for (u32 i = 0; i < n; ++i) aa_emit32(t->mc, words[i]);
a->minimal_prologue_words = n;
- a->frame_final = 1;
+ a->frame.frame_final = 1;
aa_emit_entry_save_stores(a);
}
@@ -2245,9 +2175,9 @@ static void aa_alloca(NativeTarget* t, NativeLoc dst, NativeLoc size,
/* The alloca result is sp + outgoing-area bytes. On the known-frame path
* max_outgoing is already final, so emit the final `add dst, sp, #N` here; on
* the single-pass path it is not known yet, so record a patch. */
- if (a->known_frame) {
+ if (a->frame.known_frame) {
u32 imm12, sh;
- if (!aa64_addsub_imm_fits(a->max_outgoing, &imm12, &sh))
+ if (!aa64_addsub_imm_fits(a->frame.max_outgoing, &imm12, &sh))
aa_panic(a, "outgoing area too large for alloca result");
aa_emit32(t->mc, aa64_add_imm(1, loc_reg(dst), AA_SP, imm12, sh));
} else {
@@ -2623,8 +2553,7 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc,
plan->has_sret = abi && abi->has_sret;
plan->is_variadic = abi && abi->variadic;
plan->stack_arg_size = aa_call_stack_size(t, desc);
- if (plan->stack_arg_size > aa_of(t)->max_outgoing)
- aa_of(t)->max_outgoing = plan->stack_arg_size;
+ native_frame_note_outgoing(&aa_of(t)->frame, plan->stack_arg_size);
/* Indirect call whose callee lives in x0..x7: the upcoming arg-load loop
* writes those same registers and would clobber the function pointer
* before blr reads it. Stash callee into AA_TMP0 (x16) up front and
@@ -2765,11 +2694,11 @@ static void aa_ret(NativeTarget* t);
static void aa_emit_tail_site(NativeTarget* t, NativeLoc callee) {
AANativeTarget* a = aa_of(t);
- if (a->known_frame) {
+ if (a->frame.known_frame) {
/* Frame is final: emit the tail epilogue (callee restores + frame restore +
* branch) directly, exactly the words aa_apply_patches would patch in but
* without the reserved NOP padding. */
- AAFrameLayout L = aa_build_layout(a->cum_off, a->max_outgoing);
+ AAFrameLayout L = aa_build_layout(a->frame.cum_off, a->frame.max_outgoing);
u32 words[AA_TAIL_WORDS];
u32 n = 0;
aa_words_callee_restores(a, words, AA_TAIL_WORDS, &n);
@@ -3546,6 +3475,7 @@ NativeTarget* aa64_native_target_new(Compiler* c, ObjBuilder* obj,
t->c = c;
t->obj = obj;
t->mc = mc;
+ native_frame_init(&a->frame, c);
t->regs = &aa_reg_info;
t->class_for_type = aa_class_for_type;
t->imm_legal = aa_imm_legal;
diff --git a/src/cg/native_frame.c b/src/cg/native_frame.c
@@ -0,0 +1,117 @@
+#include "cg/native_frame.h"
+
+#include <string.h>
+
+#include "core/arena.h"
+
+static u32 nf_align_up(u32 v, u32 align) {
+ u32 mask = align ? align - 1u : 0u;
+ return (v + mask) & ~mask;
+}
+
+static void nf_panic(NativeFrame* f, const char* msg) {
+ compiler_panic(f->c, (SrcLoc){0, 0, 0}, "native frame: %s", msg);
+}
+
+void native_frame_init(NativeFrame* f, Compiler* c) {
+ memset(f, 0, sizeof *f);
+ f->c = c;
+}
+
+void native_frame_reset(NativeFrame* f) {
+ /* Keep the slots buffer (slots/slots_cap) for reuse across functions in the
+ * translation unit; nslots = 0 logically clears it. */
+ f->nslots = 0;
+ f->cum_off = 0;
+ f->max_outgoing = 0;
+ f->ncallee_saves = 0;
+ f->frame_final = 0;
+ f->known_frame = 0;
+ f->has_alloca = 0;
+}
+
+NativeFrameSlot native_frame_slot_alloc(NativeFrame* f,
+ const NativeFrameSlotDesc* d) {
+ NativeFrameSlotEntry* s;
+ u32 size = d->size ? d->size : 8u;
+ u32 align = d->align ? d->align : 1u;
+ if (f->frame_final) nf_panic(f, "frame slot requested after prologue");
+ if (f->nslots == f->slots_cap) {
+ u32 cap = f->slots_cap ? f->slots_cap * 2u : 16u;
+ NativeFrameSlotEntry* nb = arena_zarray(f->c->tu, NativeFrameSlotEntry, cap);
+ if (f->slots) memcpy(nb, f->slots, sizeof(*nb) * f->nslots);
+ f->slots = nb;
+ f->slots_cap = cap;
+ }
+ f->cum_off = nf_align_up(f->cum_off + size, align);
+ s = &f->slots[f->nslots++];
+ s->off = f->cum_off;
+ s->size = size;
+ s->align = align;
+ s->kind = d->kind;
+ return (NativeFrameSlot)f->nslots;
+}
+
+NativeFrameSlotEntry* native_frame_slot_at(NativeFrame* f,
+ NativeFrameSlot slot) {
+ if (slot == NATIVE_FRAME_SLOT_NONE || slot > f->nslots)
+ nf_panic(f, "bad frame slot");
+ return &f->slots[slot - 1u];
+}
+
+void native_frame_note_outgoing(NativeFrame* f, u32 bytes) {
+ if (bytes > f->max_outgoing) f->max_outgoing = bytes;
+}
+
+void native_frame_set_final(NativeFrame* f) { f->frame_final = 1; }
+
+void native_frame_set_callee_saves(NativeFrame* f, const u32* used_by_class,
+ u32 nclasses,
+ const NativeFrameSaveSpec* spec_by_class,
+ u32 nspec, int alloc_slots) {
+ f->ncallee_saves = 0;
+ if (!used_by_class) return;
+ for (u32 cls = 0; cls < nclasses; ++cls) {
+ u32 mask = used_by_class[cls];
+ for (Reg r = 0; r < 32u && mask; ++r) {
+ NativeFrameCalleeSave* cs;
+ if ((mask & (1u << r)) == 0) continue;
+ mask &= ~(1u << r);
+ if (f->ncallee_saves >= NATIVE_FRAME_MAX_CALLEE_SAVES)
+ nf_panic(f, "too many callee-saved registers");
+ cs = &f->callee_saves[f->ncallee_saves++];
+ cs->cls = (u8)cls;
+ cs->reg = r;
+ cs->slot = NATIVE_FRAME_SLOT_NONE;
+ cs->type = 0;
+ if (cls < nspec && spec_by_class) cs->type = spec_by_class[cls].type;
+ if (alloc_slots) {
+ NativeFrameSlotDesc sd;
+ const NativeFrameSaveSpec* sp =
+ (cls < nspec && spec_by_class) ? &spec_by_class[cls] : NULL;
+ memset(&sd, 0, sizeof sd);
+ sd.type = cs->type;
+ sd.size = sp && sp->size ? sp->size : 8u;
+ sd.align = sp && sp->align ? sp->align : 8u;
+ sd.kind = NATIVE_FRAME_SLOT_SAVE;
+ cs->slot = native_frame_slot_alloc(f, &sd);
+ }
+ }
+ }
+}
+
+u32 native_frame_collect_saves(const NativeFrame* f, NativeAllocClass cls,
+ Reg* out, u32 cap) {
+ u32 n = 0;
+ for (u32 i = 0; i < f->ncallee_saves; ++i) {
+ if (f->callee_saves[i].cls != (u8)cls) continue;
+ if (n >= cap) break;
+ out[n++] = f->callee_saves[i].reg;
+ }
+ return n;
+}
+
+u32 native_frame_va_save_bytes(TargetABI* abi) {
+ ABIVaListInfo va = abi_va_list_layout(abi);
+ return va.gp_reg_count * va.gp_slot_size + va.fp_reg_count * va.fp_slot_size;
+}
diff --git a/src/cg/native_frame.h b/src/cg/native_frame.h
@@ -0,0 +1,132 @@
+#ifndef CFREE_CG_NATIVE_FRAME_H
+#define CFREE_CG_NATIVE_FRAME_H
+
+/* Shared native-frame bookkeeping for the aa64/rv64/x64 NativeTarget backends.
+ *
+ * Every native backend lays out a stack frame the same way at the bookkeeping
+ * level: a table of frame slots (locals, spills, sret/variadic homes, and — on
+ * aa64 — callee-save homes) accumulated below the frame anchor, a running
+ * max-outgoing-arg size, and a set of callee-saved registers the allocator
+ * touched. The *arithmetic* of assigning each slot a cumulative offset, the
+ * frame-final gate that forbids growing the frame after the prologue is emitted,
+ * and the derivation of the used-callee-save set from the optimizer's per-class
+ * masks are identical across all three. They live here.
+ *
+ * What stays in each backend is everything ISA/ABI-specific: the coordinate
+ * transform from a slot's cumulative `off` to an anchor-relative displacement
+ * (fp/s0/rbp-relative, and aa64's top- vs bottom-record choice), the
+ * prologue/epilogue instruction encoding, callee-save placement (aa64 reserves
+ * slots here; rv64/x64 compute offsets below the locals), the slim-prologue
+ * variants, deferred-patch application, and the variadic register-save stores.
+ *
+ * The frame-relevant ABI facts are consulted through this module too:
+ * native_frame_va_save_bytes derives the vararg register-save-area size from the
+ * target ABI's va_list layout, so the per-arch magic numbers (rv64 64, x64 176,
+ * aa64 64+128) all come from one ABI-driven query. */
+
+#include "abi/abi.h"
+#include "arch/native_target.h"
+#include "core/core.h"
+
+/* One allocated frame slot. `off` is the cumulative byte offset below the frame
+ * anchor (positive); the backend converts it to an anchor-relative displacement.
+ * Layout matches the per-arch slot structs it replaces. */
+typedef struct NativeFrameSlotEntry {
+ u32 off;
+ u32 size;
+ u32 align;
+ u8 kind; /* NativeFrameSlotKind */
+ u8 pad[3];
+} NativeFrameSlotEntry;
+
+/* A callee-saved register the function body used and must preserve. `slot` is
+ * the reserved save slot when the backend asked native_frame to allocate one
+ * (aa64), else NATIVE_FRAME_SLOT_NONE (rv64/x64 compute the save offset). */
+typedef struct NativeFrameCalleeSave {
+ NativeFrameSlot slot;
+ CfreeCgTypeId type;
+ u8 cls; /* NativeAllocClass */
+ Reg reg;
+} NativeFrameCalleeSave;
+
+/* Per-class save-slot shape, used only when native_frame_set_callee_saves is
+ * asked to allocate save slots (alloc_slots != 0). Indexed by NativeAllocClass. */
+typedef struct NativeFrameSaveSpec {
+ u32 size;
+ u32 align;
+ CfreeCgTypeId type;
+} NativeFrameSaveSpec;
+
+/* x19..x28 (10) + v8..v15 (8) on aa64; s0..s11 + fs0..fs11 on rv64; the Win64
+ * GPR + XMM callee-saved set on x64. 48 covers every target with headroom. */
+#define NATIVE_FRAME_MAX_CALLEE_SAVES 48u
+
+typedef struct NativeFrame {
+ Compiler* c;
+
+ NativeFrameSlotEntry* slots; /* arena-grown; 1-indexed handles */
+ u32 nslots;
+ u32 slots_cap;
+ u32 cum_off; /* running below-anchor slot bytes (sum of reservations) */
+
+ u32 max_outgoing; /* max outgoing-arg bytes across all calls */
+
+ NativeFrameCalleeSave callee_saves[NATIVE_FRAME_MAX_CALLEE_SAVES];
+ u32 ncallee_saves;
+
+ u8 frame_final; /* set once the prologue is emitted; bars further slots */
+ u8 known_frame; /* optimizer (known-frame) path vs single-pass path */
+ u8 has_alloca; /* body contains a dynamic alloca */
+ u8 pad;
+} NativeFrame;
+
+/* One-time setup (call from <arch>_native_target_new). */
+void native_frame_init(NativeFrame* f, Compiler* c);
+
+/* Per-function reset: clears the slot table, outgoing size, callee-save set and
+ * the frame flags. Keeps the slots buffer for reuse across functions. */
+void native_frame_reset(NativeFrame* f);
+
+/* Allocate a frame slot, advancing the cumulative offset by `size` aligned to
+ * `align` (defaults: size 8, align 1). Returns a 1-indexed handle. Panics if the
+ * frame is already final. Identical arithmetic to the per-arch allocators. */
+NativeFrameSlot native_frame_slot_alloc(NativeFrame* f,
+ const NativeFrameSlotDesc* d);
+
+/* Resolve a 1-indexed handle to its entry. Panics on an out-of-range handle. */
+NativeFrameSlotEntry* native_frame_slot_at(NativeFrame* f, NativeFrameSlot slot);
+
+/* Grow max_outgoing to at least `bytes`. */
+void native_frame_note_outgoing(NativeFrame* f, u32 bytes);
+
+/* Mark the frame final (no further native_frame_slot_alloc allowed). */
+void native_frame_set_final(NativeFrame* f);
+
+static inline u32 native_frame_slot_bytes(const NativeFrame* f) {
+ return f->cum_off;
+}
+
+/* Derive the used-callee-save set from the optimizer's per-class masks (one
+ * bitmask of hard registers per NativeAllocClass, already restricted to the
+ * callee-saved set by the caller). Appends one NativeFrameCalleeSave per set
+ * bit. When alloc_slots is non-zero, also reserves an 8/size-byte save slot per
+ * register (aa64); otherwise slot is left NONE and the backend computes the save
+ * offset itself (rv64/x64). `spec_by_class` (indexed by NativeAllocClass) gives
+ * the save-slot shape per class and may be NULL when alloc_slots == 0. Resets
+ * the set first; panics if it would exceed NATIVE_FRAME_MAX_CALLEE_SAVES. */
+void native_frame_set_callee_saves(NativeFrame* f, const u32* used_by_class,
+ u32 nclasses,
+ const NativeFrameSaveSpec* spec_by_class,
+ u32 nspec, int alloc_slots);
+
+/* Fill `out` with the registers in the callee-save set belonging to `cls`, in
+ * the order they were derived. Returns the count written (capped at `cap`). */
+u32 native_frame_collect_saves(const NativeFrame* f, NativeAllocClass cls,
+ Reg* out, u32 cap);
+
+/* Bytes of the vararg register-save area for `abi`, derived from its va_list
+ * layout: gp_reg_count*gp_slot_size + fp_reg_count*fp_slot_size. 0 for ABIs with
+ * no register-save area (e.g. Win64). The one ABI-consulting frame query. */
+u32 native_frame_va_save_bytes(TargetABI* abi);
+
+#endif