commit 6ea2c9a48aa1a8670eb9a9005cd63d8b7c002a28
parent 6ccc62a9ce1e2feed3f6d085b011571cccfc8447
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 3 Jun 2026 20:38:12 -0700
Fix Apple ARM64 variadic stack slots
Diffstat:
5 files changed, 106 insertions(+), 24 deletions(-)
diff --git a/src/abi/abi.h b/src/abi/abi.h
@@ -128,6 +128,10 @@ typedef struct ABIFuncInfo {
* Zero means the backend default. Apple ARM64 uses 4-byte compact slots for
* stack arguments such as int32; AAPCS64 uses 8-byte slots. */
u8 stack_arg_min_align;
+ /* Minimum stack slot size/alignment for variadic arguments forced to the
+ * stack. Zero means use stack_arg_min_align/backend default. Apple ARM64
+ * keeps fixed stack args compact but uses 8-byte variadic slots. */
+ u8 vararg_stack_arg_min_align;
u32 vararg_gp_offset;
u32 vararg_fp_offset;
u32 vararg_overflow_offset;
diff --git a/src/abi/abi_apple_arm64.c b/src/abi/abi_apple_arm64.c
@@ -15,14 +15,10 @@
* ABIFuncInfo.vararg_on_stack trait that the cg backend reads
* when synthesizing variadic-arg ABIArgInfos.
*
- * 3. Stack-arg promotion — small integer arguments passed on the
+ * 3. Stack-arg promotion — fixed small integer arguments passed on the
* stack are promoted to 4 bytes minimum (`char`/`short` occupy 4
- * stack bytes). Like (2), the divergence is in stack-slot
- * assignment, not in classification, and lives in cg. The
- * current kit cg path uses 8-byte stack stride for every arg,
- * which is wider than either ABI requires but ABI-safe — the
- * narrower Apple-specific layout becomes a concern only when
- * cross-checking against clang-emitted callers/callees. */
+ * stack bytes). Variadic arguments still occupy 8-byte slots so a
+ * plain `char*` va_list can advance uniformly. */
#include "abi/abi_internal.h"
#include "core/core.h"
@@ -40,6 +36,7 @@ static ABIFuncInfo* apple_arm64_compute_func_info(TargetABI* a,
ABIFuncInfo* info = aapcs64_compute_func_info(a, fn);
info->vararg_on_stack = 1;
info->stack_arg_min_align = 4;
+ info->vararg_stack_arg_min_align = 8;
return info;
}
diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c
@@ -2525,28 +2525,53 @@ static const ABIArgInfo* aa_param_abi(NativeTarget* t, const ABIFuncInfo* abi,
return scratch;
}
-/* Stack footprint of a single argument part. AAPCS64 uses 8-byte slots;
- * Apple ARM64 uses compact 4-byte slots for stack-passed int32-sized values. */
+/* Stack footprint of a single argument part. AAPCS64 uses 8-byte slots. Apple
+ * ARM64 uses compact 4-byte slots for fixed stack-passed int32-sized values,
+ * but its forced stack variadics still use 8-byte slots. */
static u32 aa_stack_arg_min_align(const ABIFuncInfo* abi) {
return (abi && abi->stack_arg_min_align) ? abi->stack_arg_min_align : 8u;
}
+static u32 aa_vararg_stack_arg_min_align(const ABIFuncInfo* abi) {
+ if (abi && abi->vararg_stack_arg_min_align)
+ return abi->vararg_stack_arg_min_align;
+ return aa_stack_arg_min_align(abi);
+}
+
+static u32 aa_vararg_stack_start(const ABIFuncInfo* abi, u32 cursor) {
+ return align_up_u32(cursor, aa_vararg_stack_arg_min_align(abi));
+}
+
/* Natural stack alignment of a part, capped at 16 (binary128). */
-static u32 aa_part_stack_align(const ABIFuncInfo* abi,
- const ABIArgPart* part) {
- u32 min_align = aa_stack_arg_min_align(abi);
+static u32 aa_part_stack_align_min(u32 min_align, const ABIArgPart* part) {
u32 al = part->align ? part->align : 8u;
if (al < min_align) al = min_align;
if (al > 16u) al = 16u;
return al;
}
+static u32 aa_part_stack_align(const ABIFuncInfo* abi,
+ const ABIArgPart* part) {
+ return aa_part_stack_align_min(aa_stack_arg_min_align(abi), part);
+}
+
+static u32 aa_part_vararg_stack_align(const ABIFuncInfo* abi,
+ const ABIArgPart* part) {
+ return aa_part_stack_align_min(aa_vararg_stack_arg_min_align(abi), part);
+}
+
static u32 aa_part_stack_size(const ABIFuncInfo* abi,
const ABIArgPart* part) {
return align_up_u32(part->size ? part->size : 8u,
aa_part_stack_align(abi, part));
}
+static u32 aa_part_vararg_stack_size(const ABIFuncInfo* abi,
+ const ABIArgPart* part) {
+ return align_up_u32(part->size ? part->size : 8u,
+ aa_part_vararg_stack_align(abi, part));
+}
+
/* The scalar type used to move one ABI part through a register. Aggregate
* args/results are split into parts; each part must move at its own width, not
* the (possibly >8-byte) aggregate width. */
@@ -2568,16 +2593,17 @@ static KitCgTypeId aa_part_scalar_type(const ABIArgPart* part) {
}
}
-static u32 aa_class_stack_size(const ABIFuncInfo* abi, const ABIArgInfo* ai) {
+static u32 aa_class_vararg_stack_size(const ABIFuncInfo* abi,
+ const ABIArgInfo* ai) {
u32 total = 0;
+ u32 min_align = aa_vararg_stack_arg_min_align(abi);
if (!ai || ai->kind == ABI_ARG_IGNORE) return 0;
if (ai->kind == ABI_ARG_INDIRECT) return 8u;
for (u32 p = 0; p < ai->nparts; ++p) {
- total = align_up_u32(total, aa_part_stack_align(abi, &ai->parts[p]));
- total += aa_part_stack_size(abi, &ai->parts[p]);
+ total = align_up_u32(total, aa_part_vararg_stack_align(abi, &ai->parts[p]));
+ total += aa_part_vararg_stack_size(abi, &ai->parts[p]);
}
- return align_up_u32(total ? total : aa_stack_arg_min_align(abi),
- aa_stack_arg_min_align(abi));
+ return align_up_u32(total ? total : min_align, min_align);
}
static u32 aa_call_stack_size(NativeTarget* t, const NativeCallDesc* desc) {
@@ -2590,7 +2616,8 @@ static u32 aa_call_stack_size(NativeTarget* t, const NativeCallDesc* desc) {
abi && abi->variadic && abi->vararg_on_stack && i >= abi->nparams;
if (ai->kind == ABI_ARG_IGNORE) continue;
if (force_stack) {
- stack += aa_class_stack_size(abi, ai);
+ stack = aa_vararg_stack_start(abi, stack);
+ stack += aa_class_vararg_stack_size(abi, ai);
continue;
}
if (ai->kind == ABI_ARG_INDIRECT) {
@@ -2732,8 +2759,9 @@ static void aa_plan_call(NativeTarget* t, const NativeCallDesc* desc,
if (force_stack) {
NativeLoc tmpreg =
native_loc_reg(desc->args[i].type, NATIVE_REG_INT, AA_TMP0);
- u32 n = aa_class_stack_size(abi, ai);
+ u32 n = aa_class_vararg_stack_size(abi, ai);
u32 off = 0;
+ stack = aa_vararg_stack_start(abi, stack);
while (off < n) {
u32 chunk = (n - off > 8u) ? 8u : (n - off);
aa_load_part(t, tmpreg, desc->args[i], off, chunk);
@@ -4040,15 +4068,18 @@ static u32 aa_va_base_reg(AANativeTarget* a, NativeAddr ap) {
* va_list pointer opaquely. `ap` addresses the va_list object itself. */
static void aa_va_start_core(AANativeTarget* a, NativeAddr ap) {
NativeTarget* t = &a->base;
+ const ABIFuncInfo* abi =
+ a->func ? abi_cg_func_info(t->c->abi, a->func->fn_type) : NULL;
ABIVaListInfo vai = abi_va_list_layout(t->c->abi);
NativeLoc ptr =
native_loc_reg(builtin_id(KIT_CG_BUILTIN_I64), NATIVE_REG_INT, AA_TMP0);
if (vai.kind == ABI_VA_LIST_POINTER) {
/* `va_list = &<first vararg>`. Variadic stack args follow the fixed
- * incoming params in the same caller window, so the offset is the
- * current next_param_stack cursor. */
- aa_emit_add_imm(a, AA_TMP0, AA_FP,
- aa_fp_off_in_arg(a, a->next_param_stack));
+ * incoming params in the same caller window. Apple ARM64 compact fixed
+ * stack args may leave this cursor at +4, while the first variadic slot
+ * starts at the next 8-byte boundary. */
+ u32 stack = aa_vararg_stack_start(abi, a->next_param_stack);
+ aa_emit_add_imm(a, AA_TMP0, AA_FP, aa_fp_off_in_arg(a, stack));
aa_emit_mem(a, 0, ptr, ap, aa_mem_for_type(t, ptr.type, 8));
return;
}
@@ -4115,7 +4146,17 @@ static void aa_va_arg_core(AANativeTarget* a, NativeLoc dst, NativeAddr ap,
if (vai.kind == ABI_VA_LIST_POINTER) {
aa_emit_mem(a, 1, cur, ap, ptr_mem);
src = aa_reg_addr(type, AA_TMP0, 0);
- aa_emit_add_imm(a, AA_TMP1, AA_TMP0, 8);
+ {
+ const ABIFuncInfo* abi =
+ a->func ? abi_cg_func_info(t->c->abi, a->func->fn_type) : NULL;
+ ABIArgPart part;
+ memset(&part, 0, sizeof part);
+ part.cls = cg_type_is_float(t->c, type) ? ABI_CLASS_FP : ABI_CLASS_INT;
+ part.size = type_size32(t, type);
+ part.align = type_align32(t, type);
+ aa_emit_add_imm(a, AA_TMP1, AA_TMP0,
+ (i32)aa_part_vararg_stack_size(abi, &part));
+ }
aa_emit_mem(a, 0, native_loc_reg(cur.type, NATIVE_REG_INT, AA_TMP1), ap,
ptr_mem);
aa_emit_mem(a, 1, val, src, val_mem);
diff --git a/test/api/abi_classify_test.c b/test/api/abi_classify_test.c
@@ -538,6 +538,34 @@ static void test_aarch64_windows_variadic(void) {
kit_compiler_free(c);
}
+static void test_apple_arm64_stack_traits(void) {
+ KitCompiler* c = new_compiler(KIT_ARCH_ARM_64, KIT_OS_MACOS, KIT_OBJ_MACHO);
+ KitCgBuiltinTypes bi = kit_cg_builtin_types(c);
+ KitCgTypeId i32 = bi.id[KIT_CG_BUILTIN_I32];
+ KitCgTypeId args[1] = {i32};
+ const ABIFuncInfo* fi =
+ classify_fn_n(c, bi.id[KIT_CG_BUILTIN_VOID], args, 1, 1);
+
+ EXPECT(fi->vararg_on_stack == 1,
+ "apple arm64 variadic: vararg_on_stack=%u want 1",
+ (unsigned)fi->vararg_on_stack);
+ EXPECT(fi->stack_arg_min_align == 4,
+ "apple arm64 fixed stack min=%u want 4",
+ (unsigned)fi->stack_arg_min_align);
+ EXPECT(fi->vararg_stack_arg_min_align == 8,
+ "apple arm64 vararg stack min=%u want 8",
+ (unsigned)fi->vararg_stack_arg_min_align);
+ {
+ ABITypeInfo vi = abi_va_list_info(((Compiler*)c)->abi);
+ EXPECT(vi.size == 8, "apple arm64 va_list size=%u want 8",
+ (unsigned)vi.size);
+ EXPECT(vi.scalar_kind == ABI_SC_PTR,
+ "apple arm64 va_list scalar_kind=%u want ABI_SC_PTR (%u)",
+ (unsigned)vi.scalar_kind, (unsigned)ABI_SC_PTR);
+ }
+ kit_compiler_free(c);
+}
+
int main(void) {
kit_unit_init(&g_u);
check_target(KIT_ARCH_X86_64, KIT_OS_LINUX, KIT_OBJ_ELF);
@@ -548,6 +576,7 @@ int main(void) {
check_target(KIT_ARCH_ARM_64, KIT_OS_WINDOWS, KIT_OBJ_COFF);
test_win64_specifics();
test_aarch64_windows_variadic();
+ test_apple_arm64_stack_traits();
kit_unit_summary(&g_u, "abi_classify_test");
return kit_unit_status(&g_u);
}
diff --git a/test/toy/cases/133_varargs_mixed_types.toy b/test/toy/cases/133_varargs_mixed_types.toy
@@ -35,6 +35,15 @@ fn no_varargs(n: i64, ...): i64 {
return n * (2 as i64);
}
+fn adjacent_i32(n: i64, ...): i64 {
+ var ap: va_list;
+ @va_start(ap);
+ let a: i32 = @va_arg<i32>(ap);
+ let b: i32 = @va_arg<i32>(ap);
+ @va_end(ap);
+ return n + ((a as i64) * (10 as i64)) + (b as i64);
+}
+
fn __user_main(): i64 {
let s: i64 = sum3(10, 1, 2, 3 as i32);
if s != (16 as i64) { return 1; }
@@ -44,6 +53,8 @@ fn __user_main(): i64 {
if c != (7 + 11 + 11) { return 3; }
let z: i64 = no_varargs(21);
if z != (42 as i64) { return 4; }
+ let p: i64 = adjacent_i32(0, 11 as i32, 22 as i32);
+ if p != (132 as i64) { return 5; }
return 42;
}