kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit c77e51615803de12a2838a0c3b72b9f850e4af86
parent 12e73593a9c7eb07271ec79cb696b4c57edccb2e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 24 May 2026 09:02:09 -0700

aa64: don't clobber aggregate-return base with first field load

aa_ret loads a small aggregate returned in registers part-by-part: part i
goes to xN=i, read from [base_reg + src_offset]. When the struct address
(base_reg) is itself one of the destination registers — e.g. base in x0,
part 0 -> x0 — loading part 0 overwrites the base before later parts are
read, so the next load `ldur w1, [x0, #8]` indexes off the loaded data
instead of the struct. Pre-fix this produced `add x0,x1,#8; ldur x0,[x0];
ldur w1,[x0,#8]` and crashed (EXC_BAD_ACCESS) at -O1/-O2; -O0 was fine
because the base lived in a callee-saved reg.

Park the base in a scratch (x10, never a return register) when an earlier
INT part would overwrite it, and load all parts from there. FP parts
target v-regs and never alias the int base, so they don't trigger the
copy. Surfaced building cfree at -O1 with cfree (abi_va_list_info,
returning the 12-byte ABITypeInfo, got a garbage second word).

Regression: test/opt/ret_aggregate_base_clobber.c driven via `cfree run`
at -O0/-O1/-O2 (entry returns 343 & 0xff == 87); pre-fix it segfaulted at
-O1/-O2. test-opt, test-smoke-{x64,rv64}, test-parse, test-toy,
test-driver, test-isa, test-cg-api, test-aa64-inline pass.

Diffstat:
Msrc/arch/aa64/ops.c | 20+++++++++++++++++---
Atest/opt/ret_aggregate_base_clobber.c | 38++++++++++++++++++++++++++++++++++++++
Mtest/opt/run.sh | 19+++++++++++++++++++
3 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c @@ -1821,14 +1821,28 @@ static void aa_ret(CGTarget* t, const CGABIValue* val) { base_off = val->storage.v.ind.ofs; } const ABIArgInfo* ri2 = val->abi; - for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) { + u16 nparts = ri2 ? ri2->nparts : 0; + /* INT parts load into x0..x{n-1}. If the base address sits in one of + * those registers, loading that part clobbers the base before later + * parts are read (e.g. `ldur x0,[x0]; ldur w1,[x0,#8]`). Park the base + * in a scratch (x10, never a return reg) when an earlier INT part would + * overwrite it. FP parts target v-regs and never alias the int base. */ + u32 load_base = base_reg; + for (u16 i = 0; i + 1u < nparts; ++i) { + if (ri2->parts[i].cls == ABI_CLASS_INT && (u32)i == base_reg) { + aa64_emit32(mc, aa64_mov_reg(/*sf=*/1, AA_TMP1, base_reg)); + load_base = AA_TMP1; + break; + } + } + for (u16 i = 0; i < nparts; ++i) { const ABIArgPart* pt = &ri2->parts[i]; u32 sidx = size_idx_for_bytes(pt->size); i32 off = base_off + (i32)pt->src_offset; if (pt->cls == ABI_CLASS_INT) { - aa64_emit_ldur_off(mc, sidx, /*Rt=*/i, base_reg, off, AA_TMP0); + aa64_emit_ldur_off(mc, sidx, /*Rt=*/i, load_base, off, AA_TMP0); } else if (pt->cls == ABI_CLASS_FP) { - aa_emit_ldr_fp_any(mc, sidx, /*Rt=*/i, base_reg, off); + aa_emit_ldr_fp_any(mc, sidx, /*Rt=*/i, load_base, off); } else { compiler_panic(t->c, a->loc, "aarch64 ret: ret part cls %d unimpl", (int)pt->cls); diff --git a/test/opt/ret_aggregate_base_clobber.c b/test/opt/ret_aggregate_base_clobber.c @@ -0,0 +1,38 @@ +/* Regression: returning a small aggregate loaded through a pointer chain must + * not clobber the base address with the first field load before later fields + * are read. The aa64 ret path loaded part i into xN=i from [base+off]; when the + * struct address sat in x0 (a return reg), `ldur x0,[x0]; ldur w1,[x0,#8]` + * read the second field off the just-loaded data. Pre-fix this segfaulted at + * -O1/-O2; -O0 was fine. Driven via `cfree run` (entry returns 343 & 0xff). */ +typedef struct { + unsigned size; + unsigned align; + unsigned char a, b, c, d; +} Info; /* 12 bytes -> returned in x0 + w1 */ + +struct VT { + long x; + Info info; +}; +struct A { + long y; + struct VT* vt; +}; + +__attribute__((noinline)) Info ret_info(struct A* a) { return a->vt->info; } + +int entry(void) { + struct VT vt; + vt.x = 0; + vt.info.size = 111; + vt.info.align = 222; + vt.info.a = 1; + vt.info.b = 2; + vt.info.c = 3; + vt.info.d = 4; + struct A a; + a.y = 0; + a.vt = &vt; + Info r = ret_info(&a); + return (int)((r.size + r.align + r.a + r.b + r.c + r.d) & 0xff); /* 343&255=87 */ +} diff --git a/test/opt/run.sh b/test/opt/run.sh @@ -34,3 +34,22 @@ if ! grep -Eq 'movz[[:space:]]+w0, 0x2a|mov[[:space:]]+w0, #42|li[[:space:]]+a0, sed 's/^/ | /' "$INLINE_WORK/main.dis" >&2 exit 1 fi + +# Regression: small-aggregate return must not clobber its base address with the +# first field load (aa64 ret part-load base aliasing). Pre-fix this segfaulted +# at -O1/-O2. Driven through the in-process JIT so it runs on the host arch; +# entry() returns 343 & 0xff == 87. +RET_CLOBBER_SRC="$ROOT/test/opt/ret_aggregate_base_clobber.c" +for opt in -O0 -O1 -O2; do + if "$ROOT/build/cfree" run "$opt" -e entry "$RET_CLOBBER_SRC" \ + > /dev/null 2>&1; then + rc=0 + else + rc=$? + fi + if [ "$rc" -ne 87 ]; then + printf 'ret aggregate base-clobber check failed at %s: exit %d (want 87)\n' \ + "$opt" "$rc" >&2 + exit 1 + fi +done