commit 532cc5f1ebd1b7bed82472244cd4cb8e0a378a90
parent 82ec9ebef078a59b33a21a88ab8ddbd476f7e038
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 10 May 2026 19:41:07 -0700
arch/aa64: GOT addressing for extern data on Mach-O; build hosted shim with cfree
Teach the AArch64 backend to materialize undefined externs via the GOT
indirection sequence (ADR_GOT_PAGE + LD64_GOT_LO12_NC) on object formats
that bind dylib imports through __DATA,__got — i.e. Mach-O. With the
direct ADRP + ADD/LDR sequence we emit elsewhere, dyld has nowhere to
land the runtime fixup for libSystem-imported globals (__stdinp,
__stdoutp, __stderrp, etc.).
Policy stays out of the backend: obj_format_extern_via_got(Compiler*) in
src/obj/obj_secnames.c is the one place that names CFREE_OBJ_MACHO. The
backend reads it through use_got_for_sym, keyed on section_id ==
OBJ_SEC_NONE (the canonical "undefined external" marker per obj.h).
ELF and other formats keep the direct path.
In the same spirit, lift the lone other Apple-OS check out of the
backend: ABIFuncInfo gains a vararg_on_stack trait that
apple_arm64_compute_func_info sets, so emit_arg_value reads
fi->vararg_on_stack instead of branching on target.os. The AArch64
backend now contains zero CFREE_OS_* references.
Makefile: hosted-macos shim is now built by cfree-cc itself. The
previous clang dependency existed solely because we couldn't emit GOT
relocs; that constraint is gone.
Verified:
- otool -rv on the cfree-built shim shows GOTLDP / GOTLDPOFF against
___stdinp / ___stdoutp / ___stderrp (was PAGE21 / PAGOF12).
- test/libc/run.sh: 7/7 pass, 2 skip (unsupported surface).
- test-cg + test-link: 119/119 pass.
Diffstat:
6 files changed, 141 insertions(+), 29 deletions(-)
diff --git a/Makefile b/Makefile
@@ -57,22 +57,21 @@ rt: rt-aarch64-linux
# rt/include/libc/ to whatever the platform libc actually exports. macOS
# variant is the only one wired today.
#
-# Built with clang for now, not cfree-cc: the shim reads libSystem-imported
-# global variables (__stdinp, __stdoutp, __stderrp), and cfree's AArch64
-# codegen always emits direct ADRP+LDR (R_AARCH64_ADR_PREL_PG_HI21 +
-# LDST64_ABS_LO12_NC) for extern globals. Mach-O dylib imports require
-# GOT_LOAD_PAGE21 / GOT_LO12_NC so the load can route through a chained
-# fixup. clang emits GOT_LOAD unconditionally on this target; until cfree
-# matches, the shim has to be built by clang.
+# Built by cfree itself. The shim reads libSystem-imported global
+# variables (__stdinp, __stdoutp, __stderrp), which require Mach-O
+# GOT_LOAD_PAGE21 / GOT_LO12_NC relocations so dyld can route the load
+# through a non-lazy pointer in __DATA,__got; the AArch64 backend picks
+# that sequence automatically for undefined externs on Mach-O targets
+# (see use_got_for_sym in src/arch/aarch64.c).
HOSTED_MACOS_AR = build/libcfree_hosted_macos.a
HOSTED_MACOS_SRC = rt/lib/cfree_hosted/macos.c
HOSTED_MACOS_OBJ = build/cfree_hosted/macos.o
hosted-macos: $(HOSTED_MACOS_AR)
-$(HOSTED_MACOS_OBJ): $(HOSTED_MACOS_SRC)
+$(HOSTED_MACOS_OBJ): $(HOSTED_MACOS_SRC) $(BIN)
@mkdir -p $(dir $@)
- $(CC) $(HOST_SYSROOT_CFLAGS) -arch arm64 -ffreestanding -c $< -o $@
+ $(BIN) cc -target aarch64-darwin -c $< -o $@
$(HOSTED_MACOS_AR): $(HOSTED_MACOS_OBJ) $(BIN)
@rm -f $@
diff --git a/src/abi/abi.h b/src/abi/abi.h
@@ -97,6 +97,11 @@ typedef struct ABIFuncInfo {
u16 nparams;
u8 variadic;
u8 has_sret;
+ /* True when the trailing `...` portion of a variadic call must be
+ * routed to the stack exclusively, bypassing the GPR/FPR arg pools.
+ * Apple ARM64 sets this; AAPCS64 / SysV-x64 leave it 0 (variadics
+ * use the same register routing as fixed args). */
+ u8 vararg_on_stack;
u32 vararg_gp_offset;
u32 vararg_fp_offset;
u32 vararg_overflow_offset;
diff --git a/src/abi/abi_apple_arm64.c b/src/abi/abi_apple_arm64.c
@@ -11,9 +11,9 @@
* on the stack (no v0-v7 / x0-x7 routing for the `...` portion of
* the arglist). This is a *call-site* divergence — the fixed
* params classify identically to AAPCS64, so compute_func_info
- * remains an AAPCS64 alias. The stack routing is enforced inside
- * the cg backend (src/arch/aarch64.c::emit_arg_value) by keying on
- * target.os when synthesizing the variadic-arg ABIArgInfo.
+ * delegates to the AAPCS64 classifier and then sets the
+ * ABIFuncInfo.vararg_on_stack trait that the cg backend reads
+ * when synthesizing variadic-arg ABIArgInfos.
*
* 3. Stack-arg promotion — small integer arguments passed on the
* stack are promoted to 4 bytes minimum (`char`/`short` occupy 4
@@ -35,8 +35,13 @@ static ABIFuncInfo* apple_arm64_compute_func_info(TargetABI* a,
const Type* fn) {
/* Phase 2: spell out the Darwin variadic / stack-arg-promotion
* deltas. For now the AAPCS64 classifier produces ABI-correct
- * output for the fixed-args-only programs in the v1 cg suite. */
- return aapcs64_compute_func_info(a, fn);
+ * output for the fixed-args-only programs in the v1 cg suite,
+ * and we layer on the vararg-on-stack trait so the cg backend
+ * routes the `...` portion to the stack without keying on the
+ * target OS itself. */
+ ABIFuncInfo* info = aapcs64_compute_func_info(a, fn);
+ info->vararg_on_stack = 1;
+ return info;
}
static const Type* apple_arm64_va_list_type(TargetABI* a, Pool* p) {
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -1264,10 +1264,54 @@ static RelocKind ldst_lo12_reloc_for(u32 nbytes) {
}
}
-/* Materialize &sym+addend into `dst_reg` via ADRP + ADD (LO12_NC). */
+/* Forward decl: addend fixup after a GOT load lands here when the
+ * addend doesn't fit in a single imm12. Defined just below. */
+static void emit_addr_adjust(MCEmitter* mc, u32 Rd, u32 base, i32 off);
+
+/* True when the symbol must be reached via a GOT indirection slot at
+ * this site: an undefined external on a target format that binds extern
+ * data through __got / non-lazy pointers (Mach-O today). The policy
+ * lives behind obj_format_extern_via_got so the backend never names a
+ * specific OS/format.
+ *
+ * The "is undefined" test keys on section_id == OBJ_SEC_NONE — the
+ * canonical marker per obj.h. SK_UNDEF as a kind is reserved for
+ * symbols whose kind isn't known yet; the decl pass mints externs
+ * with their intended SK_OBJ / SK_FUNC kind plus OBJ_SEC_NONE. */
+static int use_got_for_sym(CGTarget* t, ObjSymId sym) {
+ const ObjSym* s;
+ if (!obj_format_extern_via_got(t->c)) return 0;
+ s = obj_symbol_get(t->obj, sym);
+ return s && s->section_id == OBJ_SEC_NONE;
+}
+
+/* Emit `ADRP dst, sym@GOTPAGE ; LDR Xdst, [dst, #sym@GOTPAGEOFF]`,
+ * leaving the runtime address of `sym` in `dst_reg`. Addends are
+ * deliberately omitted from the GOT relocs — most loaders disallow
+ * nonzero addends on GOT-load fixups — so callers add any displacement
+ * with a follow-on ADD/LDUR/STUR. */
+static void emit_got_load_addr(CGTarget* t, u32 dst_reg, ObjSymId sym) {
+ MCEmitter* mc = t->mc;
+ u32 sec = mc->section_id;
+ u32 adrp_pos = mc->pos(mc);
+ emit32(mc, aa64_adrp_base(dst_reg));
+ mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_GOT_PAGE, sym, 0, 0, 0);
+ u32 ldr_pos = mc->pos(mc);
+ emit32(mc, aa64_ldr_uimm(/*size=*/3, dst_reg, dst_reg, 0));
+ mc->emit_reloc_at(mc, sec, ldr_pos, R_AARCH64_LD64_GOT_LO12_NC, sym, 0, 0, 0);
+}
+
+/* Materialize &sym+addend into `dst_reg` via ADRP + ADD (LO12_NC), or
+ * ADRP + LDR-from-GOT + (optional) ADD when the symbol must route
+ * through an indirection slot. */
static void emit_global_addr(CGTarget* t, u32 dst_reg, ObjSymId sym,
i64 addend) {
MCEmitter* mc = t->mc;
+ if (use_got_for_sym(t, sym)) {
+ emit_got_load_addr(t, dst_reg, sym);
+ if (addend) emit_addr_adjust(mc, dst_reg, dst_reg, (i32)addend);
+ return;
+ }
u32 sec = mc->section_id;
u32 adrp_pos = mc->pos(mc);
emit32(mc, aa64_adrp_base(dst_reg));
@@ -1358,12 +1402,26 @@ static void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) {
u32 sidx = size_idx_for_bytes(sz);
/* OPK_GLOBAL: ADRP scratch, sym ; LDR Wd, [scratch, #:lo12:sym].
- * The LO12_NC reloc requires the scaled-offset LDR encoding, not LDUR. */
+ * The LO12_NC reloc requires the scaled-offset LDR encoding, not LDUR.
+ *
+ * Extern-via-GOT path: ADRP scratch, sym@GOTPAGE ;
+ * LDR Xscratch, [scratch, #:gotoff:sym] ; LDUR Wd, [scratch, #addend]
+ * The GOT load returns the symbol's runtime address; we then read the
+ * value at +addend with a plain LDUR (no reloc, addend baked in). */
if (addr.kind == OPK_GLOBAL) {
MCEmitter* mc = t->mc;
u32 sec = mc->section_id;
ObjSymId sym = addr.v.global.sym;
i64 add = addr.v.global.addend;
+ if (use_got_for_sym(t, sym)) {
+ emit_got_load_addr(t, /*dst=*/9, sym);
+ if (dst.cls == RC_FP) {
+ emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), 9, (i32)add));
+ } else {
+ emit32(mc, aa64_ldur(sidx, reg_num(dst), 9, (i32)add));
+ }
+ return;
+ }
u32 adrp_pos = mc->pos(mc);
emit32(mc, aa64_adrp_base(/*Rd=*/9));
mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, add,
@@ -1393,7 +1451,11 @@ static void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
/* OPK_GLOBAL: ADRP scratch, sym ; STR Wt, [scratch, #:lo12:sym].
* For OPK_IMM source, materialize the value first into x9, then use
- * x10 for the global base so the two scratches don't collide. */
+ * x10 for the global base so the two scratches don't collide.
+ *
+ * Extern-via-GOT path: load the symbol's runtime address into the
+ * base scratch via emit_got_load_addr, then STUR with addend baked
+ * into the imm9 (no reloc on the store). */
if (addr.kind == OPK_GLOBAL) {
MCEmitter* mc = t->mc;
u32 sec = mc->section_id;
@@ -1413,6 +1475,15 @@ static void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) {
src_reg = reg_num(src);
}
u32 base = (src.kind == OPK_IMM) ? 10u : 9u;
+ if (use_got_for_sym(t, sym)) {
+ emit_got_load_addr(t, base, sym);
+ if (src_is_fp) {
+ emit32(mc, aa64_stur_fp(sidx, src_reg, base, (i32)add));
+ } else {
+ emit32(mc, aa64_stur(sidx, src_reg, base, (i32)add));
+ }
+ return;
+ }
u32 adrp_pos = mc->pos(mc);
emit32(mc, aa64_adrp_base(base));
mc->emit_reloc_at(mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym, add,
@@ -1474,17 +1545,27 @@ static void aa_addr_of(CGTarget* t, Operand dst, Operand lv) {
}
if (lv.kind == OPK_GLOBAL) {
/* ADRP Xd, sym ; ADD Xd, Xd, #:lo12:sym (with addend baked into both
- * relocations). Used to materialize a function or data pointer. */
+ * relocations). Used to materialize a function or data pointer.
+ *
+ * Extern-via-GOT path: load the address from the GOT slot and then
+ * apply the addend with a plain ADD/SUB (GOT relocs disallow addends). */
u32 rd = reg_num(dst);
+ ObjSymId sym = lv.v.global.sym;
+ i64 addend = lv.v.global.addend;
+ if (use_got_for_sym(t, sym)) {
+ emit_got_load_addr(t, rd, sym);
+ if (addend) emit_addr_adjust(t->mc, rd, rd, (i32)addend);
+ return;
+ }
u32 sec = t->mc->section_id;
u32 adrp_pos = t->mc->pos(t->mc);
emit32(t->mc, aa64_adrp_base(rd));
- t->mc->emit_reloc_at(t->mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21,
- lv.v.global.sym, lv.v.global.addend, 0, 0);
+ t->mc->emit_reloc_at(t->mc, sec, adrp_pos, R_AARCH64_ADR_PREL_PG_HI21, sym,
+ addend, 0, 0);
u32 add_pos = t->mc->pos(t->mc);
emit32(t->mc, aa64_add_imm(1, rd, rd, 0, 0));
- t->mc->emit_reloc_at(t->mc, sec, add_pos, R_AARCH64_ADD_ABS_LO12_NC,
- lv.v.global.sym, lv.v.global.addend, 0, 0);
+ t->mc->emit_reloc_at(t->mc, sec, add_pos, R_AARCH64_ADD_ABS_LO12_NC, sym,
+ addend, 0, 0);
return;
}
aa_panic(t, "addr_of");
@@ -1929,17 +2010,19 @@ static void aa_convert(CGTarget* t, ConvKind k, Operand dst, Operand src) {
* For BYVAL/INDIRECT the caller's `storage` is the address of the source
* data; we either load chunks into the next register pair (DIRECT
* aggregate) or pass the address itself (INDIRECT). */
-static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
- u32* next_fp, u32* stack_off) {
+static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi,
+ const CGABIValue* av, u32* next_int, u32* next_fp,
+ u32* stack_off) {
AAImpl* a = impl_of(t);
/* Synthesize a one-part DIRECT ABIArgInfo for var args (av->abi is NULL
* past the fixed-param count). AAPCS64 routes var args through the same
* register/stack rules as fixed scalars, so this matches what
* abi_func_info would have produced.
*
- * Apple ARM64 (Darwin) diverges: variadic args go on the stack only.
- * Detect the synthesized-vararg case and bump the next-int / next-fp
- * cursors past the register pool so the part below routes to stack. */
+ * Apple ARM64 diverges: variadic args go on the stack only. The
+ * ABIFuncInfo.vararg_on_stack trait carries that policy out of the
+ * backend — we bump the next-int / next-fp cursors past the register
+ * pool so the part below falls through to stack placement. */
ABIArgInfo va_ai;
ABIArgPart va_pt;
const ABIArgInfo* ai = av->abi;
@@ -1955,7 +2038,7 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
va_pt.align = sz;
va_pt.src_offset = 0;
ai = &va_ai;
- if (t->c->target.os == CFREE_OS_MACOS) {
+ if (fi && fi->vararg_on_stack) {
*next_int = 8;
*next_fp = 8;
}
@@ -2084,7 +2167,7 @@ static void aa_call(CGTarget* t, const CGCallDesc* d) {
}
for (u32 i = 0; i < d->nargs; ++i) {
- emit_arg_value(t, &d->args[i], &next_int, &next_fp, &stack_off);
+ emit_arg_value(t, d->abi, &d->args[i], &next_int, &next_fp, &stack_off);
}
/* Track outgoing-arg high-water mark, 16-aligned. */
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -407,6 +407,22 @@ Sym obj_secname_preinit_array(Compiler*);
Sym obj_secname_tdata(Compiler*);
Sym obj_secname_tbss(Compiler*);
+/* ---- format-aware codegen policy ----
+ *
+ * Backends consult these predicates instead of branching on
+ * target.os / target.obj directly, so the OS/format knowledge stays
+ * concentrated in src/obj/ and a future format lands as one case here
+ * rather than fan-out in every CGTarget. */
+
+/* True when references to undefined external symbols must be
+ * materialized via an indirection slot (GOT / non-lazy pointer)
+ * rather than direct page+offset addressing. Mach-O: yes — dyld
+ * binds dylib imports through __DATA,__got at runtime, and the
+ * direct PAGE21/PAGEOFF12 fixups can't carry that binding. ELF
+ * static link: no — the linker resolves SK_UNDEFs at link time and
+ * patches the direct ADRP/ADD bytes in place. */
+int obj_format_extern_via_got(const Compiler*);
+
/* ---- file format emitters ---- */
void emit_elf(Compiler*, ObjBuilder*, Writer*);
void emit_coff(Compiler*, ObjBuilder*, Writer*);
diff --git a/src/obj/obj_secnames.c b/src/obj/obj_secnames.c
@@ -89,3 +89,7 @@ Sym obj_secname_tbss(Compiler* c) {
return secname_panic_unimpl(c, ".tbss");
}
}
+
+int obj_format_extern_via_got(const Compiler* c) {
+ return c->target.obj == CFREE_OBJ_MACHO;
+}