kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit e6a04f06bd66c77b397468a5749785163adb18fa
parent bded6d326e383ec53eabd205d8521456899ed8a5
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 10 May 2026 13:19:36 -0700

arch/rv64: variadic save area atop frame, contiguous with stack args

Place the 64-byte GP save area immediately above the saved-s0/ra pair so
[save_area, save_area+64, caller's stack] forms one byte stream — a
single 8-byte stride past save_area[7] lands on the caller's first stack
arg. Spill only a_{next_param_int}..a7 (named int params already live in
their own slots). Stack-arg reads in rv_param skip past the save area
when the callee is variadic.

Fixes j03_va_int_spill (10 i32 variadics) and j06_va_double_spill (9 f64
variadics) at L0 and L1; rv64 cg suite goes 765→769 pass / 8→4 fail.
The remaining 4 fails are Phase 4 .tbss cases.

Diffstat:
Mdoc/rv64-status.md | 28+++++++++++-----------------
Msrc/arch/rv64.c | 79+++++++++++++++++++++++++++++++++++--------------------------------------------
2 files changed, 46 insertions(+), 61 deletions(-)

diff --git a/doc/rv64-status.md b/doc/rv64-status.md @@ -19,13 +19,13 @@ E links and runs under qemu-riscv64 via podman. | Path | Pass | Fail | Skip | |----------------------------|-----:|-----:|-----:| | R (roundtrip) | 386 | 0 | 0 | -| E (qemu exec) | 379 | 8 | ~ | +| E (qemu exec) | 383 | 4 | ~ | | D / J (native JIT) | 0 | 0 | 772 | Skips are valid: D and J require host == rv64. With -`CFREE_TEST_ALLOW_SKIP=1`, the suite reports **765 pass, 8 fail, 768 skip**. +`CFREE_TEST_ALLOW_SKIP=1`, the suite reports **769 pass, 4 fail, 768 skip**. -The 8 fails are 4 cases × 2 opt levels — see the phase checklists below. +The 4 fails are 2 cases × 2 opt levels — see the Phase 4 checklist below. --- @@ -56,28 +56,22 @@ The 8 fails are 4 cases × 2 opt levels — see the phase checklists below. - ✅ Intrinsics: memcpy/memmove/memset, popcount, ctz, clz, bswap16/32/64, add/sub/mul_overflow, expect, assume_aligned, prefetch, trap -## Phase 3 — Variadic LP64D 🚧 +## Phase 3 — Variadic LP64D ✅ Variadic-args calling convention with **save area contiguous with caller's stack args** so a single `void*` walk works for any number of args. - ✅ va_list = `void*`; va_start / va_arg / va_end / va_copy -- ✅ Prologue spills a0..a7 into a 64-byte save slot - ✅ va_arg handles RC_INT and RC_FP (bitcast via FMV.X.{W,D}) - ✅ Variadic FP **args being passed** are bitcast into integer regs (RC_FP storage → FMV.X.{W,D} → a-reg) -- 🚧 Save area placement: currently inside the frame near other slots; - walks past `save_area[7]` fall into locals, not caller's stack -- ⬜ Relayout: place save area at the very top of the frame (right above - the saved-s0/ra pair) so [save_area, save_area+64, caller's stack] - forms one contiguous byte stream -- ⬜ Honor named-arg count: spill only a_{nparams_int}..a7 (today we - spill all 8 unconditionally; correct but wastes prologue insns) - -Failing today (4 case × 2 levels = 8): -- `j03_va_int_spill` — 10 i32 variadics; 9th and 10th read from - wrong addresses -- `j06_va_double_spill` — 9 f64 variadics; 9th double read from wrong addr +- ✅ Save area sits at the very top of the frame, above the saved-s0/ra + pair, so [save_area, save_area+64, caller's stack] is one contiguous + byte stream — `save_area[8]` coincides with the caller's first stack arg +- ✅ Prologue spills only a_{next_param_int}..a7 (named int params already + landed in their own slots; sret consumes a0 when present) +- ✅ Stack-arg reads in `rv_param` use `caller_stack_base = 16 + 64` for + variadic functions to skip past the save area ## Phase 4 — TLS LE ⬜ diff --git a/src/arch/rv64.c b/src/arch/rv64.c @@ -353,20 +353,12 @@ static void rv_func_begin(CGTarget* t, const CGFuncDesc* fd) { a->next_param_int = 1; } - /* Variadic: reserve a 64-byte GP save area for a0..a7. The patcher - * spills the regs into it as part of the prologue, after `addi s0,…`. */ - if (a->is_variadic) { - FrameSlotDesc gpd = { - .type = NULL, - .name = 0, - .loc = (SrcLoc){0, 0, 0}, - .size = 64, - .align = 8, - .kind = FS_SPILL, - .flags = 0, - }; - a->gp_save_slot = rv_frame_slot(t, &gpd); - } + /* Variadic: a 64-byte GP save area for a0..a7 lives at the very top + * of the frame, immediately above the saved-s0/ra pair, so its bytes + * are contiguous with the caller's stack args. The patcher spills the + * unnamed a-regs into it as part of the prologue. The slot is implicit + * (not allocated through rv_frame_slot) — it sits at [s0 + 16] when + * is_variadic is set. */ } static void rv_func_end(CGTarget* t) { @@ -381,11 +373,15 @@ static void rv_func_end(CGTarget* t) { u32 int_saves_sz = n_int_saves * 8u; u32 fp_saves_sz = n_fp_saves * 8u; + /* Variadic functions reserve a 64-byte save area at the very top of + * the frame so the save area and caller's stack args form a single + * contiguous byte stream walked by the va_list pointer. */ + u32 va_save_sz = a->is_variadic ? 64u : 0u; u32 locals_off = max_out + int_saves_sz + fp_saves_sz; /* from sp */ u32 fp_pair_off = locals_off + a->cum_off; - u32 frame_size = fp_pair_off + 16u; + u32 frame_size = fp_pair_off + 16u + va_save_sz; frame_size = (frame_size + 15u) & ~15u; - fp_pair_off = frame_size - 16u; + fp_pair_off = frame_size - 16u - va_save_sz; a->fp_pair_off = fp_pair_off; /* Place the epilogue label at current pos. */ @@ -484,15 +480,14 @@ static void rv_func_end(CGTarget* t) { words[wi++] = rv_sd(RV_A0, RV_S0, -(i32)s->off); } } - /* Variadic: spill a0..a7 into the GP save area. */ - if (a->is_variadic && a->gp_save_slot != FRAME_SLOT_NONE) { - RvSlot* gs = slot_get(a, a->gp_save_slot); - if (gs) { - for (u32 i = 0; i < 8; ++i) { - if (wi >= RV_PROLOGUE_WORDS) goto overflow; - words[wi++] = rv_sd(RV_A0 + i, RV_S0, - -(i32)gs->off + (i32)i * 8); - } + /* Variadic: spill the still-unconsumed a-regs (a_{nparams_int}..a7) + * into the save area at [s0 + 16 + i*8]. The save area sits between + * the saved-s0/ra pair and the caller's stack args, so save_area[8] + * == caller's first stack arg. */ + if (a->is_variadic) { + for (u32 i = a->next_param_int; i < 8; ++i) { + if (wi >= RV_PROLOGUE_WORDS) goto overflow; + words[wi++] = rv_sd(RV_A0 + i, RV_S0, 16 + (i32)i * 8); } } /* int saves */ @@ -623,6 +618,9 @@ static void rv_param(CGTarget* t, const CGParamDesc* p) { RvSlot* s = slot_get(a, p->slot); if (!s) compiler_panic(t->c, a->loc, "rv64 param: bad slot"); const ABIArgInfo* ai = p->abi; + /* Caller's stack args start above the saved-s0/ra pair, plus the + * 64-byte variadic save area when this function is variadic. */ + i32 caller_stack_base = 16 + (a->is_variadic ? 64 : 0); if (ai->kind == ABI_ARG_IGNORE) return; if (ai->kind == ABI_ARG_INDIRECT) { @@ -638,7 +636,7 @@ static void rv_param(CGTarget* t, const CGParamDesc* p) { /* Incoming stack args live in the caller's outgoing-arg area, * which is `frame_size - fp_pair_off` (= 16 + the saved-s0/ra * pair) above s0 — same logic as aa64's `16 + caller_off`. */ - emit32(mc, rv_ld(RV_T1, RV_S0, (i32)(16u + caller_off))); + emit32(mc, rv_ld(RV_T1, RV_S0, caller_stack_base + (i32)caller_off)); ptr_reg = RV_T1; } u32 nbytes = s->size; @@ -681,7 +679,7 @@ static void rv_param(CGTarget* t, const CGParamDesc* p) { u32 caller_off = a->next_param_stack; a->next_param_stack += 8; emit32(mc, enc_int_load(sz, 0, RV_T2, RV_S0, - (i32)(16u + caller_off))); + caller_stack_base + (i32)caller_off)); emit32(mc, enc_int_store(sz, RV_T2, RV_S0, -(i32)s->off + (i32)part_off)); } @@ -699,10 +697,10 @@ static void rv_param(CGTarget* t, const CGParamDesc* p) { u32 caller_off = a->next_param_stack; a->next_param_stack += 8; if (sz == 8) { - emit32(mc, rv_fld(0, RV_S0, (i32)(16u + caller_off))); + emit32(mc, rv_fld(0, RV_S0, caller_stack_base + (i32)caller_off)); emit32(mc, rv_fsd(0, RV_S0, -(i32)s->off + (i32)part_off)); } else { - emit32(mc, rv_flw(0, RV_S0, (i32)(16u + caller_off))); + emit32(mc, rv_flw(0, RV_S0, caller_stack_base + (i32)caller_off)); emit32(mc, rv_fsw(0, RV_S0, -(i32)s->off + (i32)part_off)); } } @@ -1868,10 +1866,11 @@ static void rv_alloca_(CGTarget* t, Operand d, Operand sz, u32 align) { a->has_alloca = 1; } /* RV64 LP64D va_list: a single `void*` pointing at the next argument - * slot. The prologue spills a0..a7 into a contiguous save area. The - * save area and the caller's stack args form a single byte-stream - * (named-arg-count-aware), but for our test corpus the save area - * alone covers every variadic case. */ + * slot. The prologue spills a_{nparams_int}..a7 into the save area at + * [s0 + 16]. The save area lives at the top of the callee frame, + * immediately above the saved-s0/ra pair, so save_area[8] coincides + * with the caller's first stack arg — a single 8-byte stride covers + * register and stack args alike. */ static void rv_va_start_(CGTarget* t, Operand ap_op) { RImpl* a = impl_of(t); MCEmitter* mc = t->mc; @@ -1879,17 +1878,9 @@ static void rv_va_start_(CGTarget* t, Operand ap_op) { compiler_panic(t->c, a->loc, "rv64 va_start: function not variadic"); } u32 ap = reg_num(ap_op); - /* ap is the address of the va_list variable; *ap = &save_area[named_int]. */ - RvSlot* gs = slot_get(a, a->gp_save_slot); - if (!gs) compiler_panic(t->c, a->loc, "rv64 va_start: no save slot"); - /* t0 = s0 - gs->off + next_param_int*8 */ - i32 off = -(i32)gs->off + (i32)(a->next_param_int * 8u); - if (off >= -2048 && off <= 2047) { - emit32(mc, rv_addi(RV_T0, RV_S0, off)); - } else { - emit_load_imm(mc, 1, RV_T0, (i64)off); - emit32(mc, rv_add(RV_T0, RV_S0, RV_T0)); - } + /* *ap = s0 + 16 + next_param_int*8 (skip past named-int slots). */ + i32 off = 16 + (i32)(a->next_param_int * 8u); + emit32(mc, rv_addi(RV_T0, RV_S0, off)); emit32(mc, rv_sd(RV_T0, ap, 0)); }