commit c77e51615803de12a2838a0c3b72b9f850e4af86
parent 12e73593a9c7eb07271ec79cb696b4c57edccb2e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 24 May 2026 09:02:09 -0700
aa64: don't clobber aggregate-return base with first field load
aa_ret loads a small aggregate returned in registers part-by-part: part i
goes to xN=i, read from [base_reg + src_offset]. When the struct address
(base_reg) is itself one of the destination registers — e.g. base in x0,
part 0 -> x0 — loading part 0 overwrites the base before later parts are
read, so the next load `ldur w1, [x0, #8]` indexes off the loaded data
instead of the struct. Pre-fix this produced `add x0,x1,#8; ldur x0,[x0];
ldur w1,[x0,#8]` and crashed (EXC_BAD_ACCESS) at -O1/-O2; -O0 was fine
because the base lived in a callee-saved reg.
Park the base in a scratch (x10, never a return register) when an earlier
INT part would overwrite it, and load all parts from there. FP parts
target v-regs and never alias the int base, so they don't trigger the
copy. Surfaced building cfree at -O1 with cfree (abi_va_list_info,
returning the 12-byte ABITypeInfo, got a garbage second word).
Regression: test/opt/ret_aggregate_base_clobber.c driven via `cfree run`
at -O0/-O1/-O2 (entry returns 343 & 0xff == 87); pre-fix it segfaulted at
-O1/-O2. test-opt, test-smoke-{x64,rv64}, test-parse, test-toy,
test-driver, test-isa, test-cg-api, test-aa64-inline pass.
Diffstat:
3 files changed, 74 insertions(+), 3 deletions(-)
diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c
@@ -1821,14 +1821,28 @@ static void aa_ret(CGTarget* t, const CGABIValue* val) {
base_off = val->storage.v.ind.ofs;
}
const ABIArgInfo* ri2 = val->abi;
- for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) {
+ u16 nparts = ri2 ? ri2->nparts : 0;
+ /* INT parts load into x0..x{n-1}. If the base address sits in one of
+ * those registers, loading that part clobbers the base before later
+ * parts are read (e.g. `ldur x0,[x0]; ldur w1,[x0,#8]`). Park the base
+ * in a scratch (x10, never a return reg) when an earlier INT part would
+ * overwrite it. FP parts target v-regs and never alias the int base. */
+ u32 load_base = base_reg;
+ for (u16 i = 0; i + 1u < nparts; ++i) {
+ if (ri2->parts[i].cls == ABI_CLASS_INT && (u32)i == base_reg) {
+ aa64_emit32(mc, aa64_mov_reg(/*sf=*/1, AA_TMP1, base_reg));
+ load_base = AA_TMP1;
+ break;
+ }
+ }
+ for (u16 i = 0; i < nparts; ++i) {
const ABIArgPart* pt = &ri2->parts[i];
u32 sidx = size_idx_for_bytes(pt->size);
i32 off = base_off + (i32)pt->src_offset;
if (pt->cls == ABI_CLASS_INT) {
- aa64_emit_ldur_off(mc, sidx, /*Rt=*/i, base_reg, off, AA_TMP0);
+ aa64_emit_ldur_off(mc, sidx, /*Rt=*/i, load_base, off, AA_TMP0);
} else if (pt->cls == ABI_CLASS_FP) {
- aa_emit_ldr_fp_any(mc, sidx, /*Rt=*/i, base_reg, off);
+ aa_emit_ldr_fp_any(mc, sidx, /*Rt=*/i, load_base, off);
} else {
compiler_panic(t->c, a->loc, "aarch64 ret: ret part cls %d unimpl",
(int)pt->cls);
diff --git a/test/opt/ret_aggregate_base_clobber.c b/test/opt/ret_aggregate_base_clobber.c
@@ -0,0 +1,38 @@
+/* Regression: returning a small aggregate loaded through a pointer chain must
+ * not clobber the base address with the first field load before later fields
+ * are read. The aa64 ret path loaded part i into xN=i from [base+off]; when the
+ * struct address sat in x0 (a return reg), `ldur x0,[x0]; ldur w1,[x0,#8]`
+ * read the second field off the just-loaded data. Pre-fix this segfaulted at
+ * -O1/-O2; -O0 was fine. Driven via `cfree run` (entry returns 343 & 0xff). */
+typedef struct {
+ unsigned size;
+ unsigned align;
+ unsigned char a, b, c, d;
+} Info; /* 12 bytes -> returned in x0 + w1 */
+
+struct VT {
+ long x;
+ Info info;
+};
+struct A {
+ long y;
+ struct VT* vt;
+};
+
+__attribute__((noinline)) Info ret_info(struct A* a) { return a->vt->info; }
+
+int entry(void) {
+ struct VT vt;
+ vt.x = 0;
+ vt.info.size = 111;
+ vt.info.align = 222;
+ vt.info.a = 1;
+ vt.info.b = 2;
+ vt.info.c = 3;
+ vt.info.d = 4;
+ struct A a;
+ a.y = 0;
+ a.vt = &vt;
+ Info r = ret_info(&a);
+ return (int)((r.size + r.align + r.a + r.b + r.c + r.d) & 0xff); /* 343&255=87 */
+}
diff --git a/test/opt/run.sh b/test/opt/run.sh
@@ -34,3 +34,22 @@ if ! grep -Eq 'movz[[:space:]]+w0, 0x2a|mov[[:space:]]+w0, #42|li[[:space:]]+a0,
sed 's/^/ | /' "$INLINE_WORK/main.dis" >&2
exit 1
fi
+
+# Regression: small-aggregate return must not clobber its base address with the
+# first field load (aa64 ret part-load base aliasing). Pre-fix this segfaulted
+# at -O1/-O2. Driven through the in-process JIT so it runs on the host arch;
+# entry() returns 343 & 0xff == 87.
+RET_CLOBBER_SRC="$ROOT/test/opt/ret_aggregate_base_clobber.c"
+for opt in -O0 -O1 -O2; do
+ if "$ROOT/build/cfree" run "$opt" -e entry "$RET_CLOBBER_SRC" \
+ > /dev/null 2>&1; then
+ rc=0
+ else
+ rc=$?
+ fi
+ if [ "$rc" -ne 87 ]; then
+ printf 'ret aggregate base-clobber check failed at %s: exit %d (want 87)\n' \
+ "$opt" "$rc" >&2
+ exit 1
+ fi
+done