commit bdef0648ac7f4deec69fa6bb69f7df6a866102c9
parent af1dcc5f631b291e74a78f74af15d39bba6c87b5
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 10 May 2026 12:07:16 -0700
obj/abi: MULTIOBJ Phase 2 — Mach-O writer/reader (MH_OBJECT, arm64)
Lands the cfree-emits-Mach-O slice for arm64-apple-macos. cfree -c
produces an MH_OBJECT that links via the host clang and runs natively
(returns 42 in the smoke test); the cfree-emitted .o round-trips
byte-identically through read_macho → emit_macho.
obj/: macho_emit.c writes a single-segment MH_OBJECT with LC_SEGMENT_64
+ LC_BUILD_VERSION + LC_SYMTAB + LC_DYSYMTAB, prepending '_' to C
symbols (Apple convention) and emitting an ARM64_RELOC_ADDEND pair
ahead of any non-zero-addend relocation. macho_read.c inverts:
strips the leading '_' and collapses ADDEND pairs. macho_reloc_aarch64.c
fills out the RelocKind ↔ ARM64_RELOC_* table and exposes pcrel/length
companions so the writer can pack relocation_info bitfields without
duplicating per-kind logic. obj_secnames.c switches the Mach-O
init/fini/tdata/tbss arms from panics to '__DATA,__mod_*' /
'__DATA,__thread_*'. api/stubs.c drops the emit_macho/read_macho
panic stubs.
abi/arch/: abi_apple_arm64.c keeps the AAPCS64 alias for fixed-arg
classification (the divergence is at the call site, not the
classifier) and documents the cg-side hooks. aarch64.c::emit_arg_value
forces the synthesized variadic-arg ABIArgInfo straight to the stack
when target.os == MACOS, matching the Darwin variadic ABI.
test/: cfree_test_target.h and the {cg,elf}/run.sh harnesses learn a
CFREE_TEST_OBJ env var (elf|macho); exec_target.sh moves to
<arch>-<os> tag form with a Darwin-native exec branch. Per-case
applicability is now a *.targets file keyed on <arch>-<obj> tuples
(rename of *.arches). test/macho/cfree-roundtrip-macho.c is the
read→emit oracle; cfree-emitted .o round-trips byte-identically.
Diffstat:
16 files changed, 1711 insertions(+), 187 deletions(-)
diff --git a/doc/MULTIOBJ.md b/doc/MULTIOBJ.md
@@ -26,8 +26,34 @@ matrix.
- [x] `obj_secname_*` helpers for init/fini/preinit/tdata/tbss (§3.5)
- [x] `src/obj/macho.h` + `macho_reloc_aarch64.c` stubs (§3.1, §3.2)
- [x] ELF suite green (test-elf 37/37, test-link 119/119, test-cg 1549/1549)
-- [ ] **Phase 2** — Mach-O object writer + reader (MH_OBJECT)
+- [x] **Phase 2** — Mach-O object writer + reader (MH_OBJECT, arm64)
+ - [x] `obj/macho_emit.c` — MH_OBJECT writer, leading-`_` C-symbol
+ prefix, ARM64_RELOC_ADDEND pair on non-zero addends (§3.1)
+ - [x] `obj/macho_reloc_aarch64.c` — full RelocKind ↔ ARM64_RELOC_*
+ table with pcrel/length companions (§3.2)
+ - [x] `obj/macho_read.c` — MH_OBJECT reader with strip-leading-`_`
+ inverse and ADDEND-pair collapsing (§3.1)
+ - [x] `obj/obj_secnames.c` — Mach-O section names for init/fini/tls
+ (§3.5)
+ - [x] `abi/abi_apple_arm64.c` — `va_list` is `char*`; variadic-arg
+ on-stack routing wired in `arch/aarch64.c::emit_arg_value`
+ when `target.os == MACOS` (§3.4)
+ - [x] `test/lib/exec_target.sh` — `<arch>-<os>` tag form, Darwin
+ native branch for `aarch64-macos` (§3.6)
+ - [x] Smoke test: `cfree cc -target arm64-apple-macos -c …` produces
+ Mach-O `.o` that links via host clang and runs natively (exit 42)
+ - [x] Self-roundtrip oracle: `test/macho/cfree-roundtrip-macho.c` —
+ cfree-emitted `.o` → `read_macho` → `emit_macho` is
+ byte-identical
+ - [x] ELF suite still green (test-elf 37/37)
+ - [x] Testing harness extensions (§7): `CFREE_TEST_OBJ` env in
+ `cfree_test_target.h` and `test/{cg,link,elf}/run.sh`,
+ per-case `*.targets` applicability (`test/elf/cases/18_bti_note.targets`)
+ - [ ] Clang-emitted Mach-O round-trip — needs section-relative reloc
+ and `__compact_unwind` handling (deferred)
- [ ] **Phase 3** — Mach-O linker (`link_emit_macho`)
+ - [ ] Ad-hoc codesigning in `link_macho.c` (LC_CODE_SIGNATURE) so
+ kernel will exec the binary on macOS 11+
---
@@ -504,7 +530,99 @@ For the new files and exposed symbols:
---
-## 7. Validation gates
+## 7. Testing harness
+
+The existing `test/cg/` and `test/link/` matrices already do everything
+the Mach-O work needs to validate against — round-trip (Path R), exec
+(Path E), JIT (Path J), DWARF check (Path W). We extend that
+infrastructure rather than standing up a parallel `test/macho/` peer
+of `test/elf/`: `test-link`'s Path R covers `clang -c` → cfree-roundtrip
+→ structural diff, and Path E covers run-the-binary. The same machinery
+serves Mach-O once the harness can pick a Mach-O target and exec the
+result.
+
+### 7.1 Target selection
+
+A new `CFREE_TEST_OBJ` env var sits parallel to `CFREE_TEST_ARCH`,
+values `elf` (default) | `macho` (later `coff`). `cfree_test_target.h`
+reads it and sets `t->obj` and `t->os` together (`macho` ⇒ MACOS) so
+both the C runners and the shell drivers stay in lockstep.
+
+`test/cg/run.sh`'s clang-cross detection grows a Mach-O branch:
+
+- `elf` ⇒ `--target=<arch>-linux-gnu` as today.
+- `macho` ⇒ `--target=arm64-apple-macos` (or `x86_64-apple-macos`).
+
+### 7.2 Per-case applicability
+
+`test/cg/cases/*.arches` becomes `*.targets`, listing one
+`<arch>-<obj>` tuple per line (`aarch64-elf`, `arm64-macos`,
+`x86_64-elf`, …). Cases with no file default to "all supported
+tuples"; cases that exercise format-specific features (GNU IFUNC,
+SHT_GNU_RETAIN, ELF linker scripts) name only the tuples they
+support. The Phase-2 Mach-O allowlist starts with a small set —
+hello-world, integer arithmetic, locals, calls, varargs — and grows
+as ABI deltas and reloc-translator coverage land.
+
+### 7.3 Exec dispatch (`test/lib/exec_target.sh`)
+
+The queue tag widens from `<arch>` to `<arch>-<os>`:
+
+- `aarch64-linux` → existing podman/qemu path on a Linux container.
+- `arm64-macos` (new) → on a Darwin/arm64 host, `chmod +x && ./exe`
+ natively (no podman, no qemu). On non-Darwin hosts, SKIP cleanly
+ with "macOS exec requires Darwin host". Mach-O cannot be loaded
+ by the Linux kernel.
+- macOS-on-Linux is unsupported and stays SKIP. Linux-on-macOS
+ continues to flow through the podman path (already works on
+ Darwin/arm64 via `podman machine`).
+
+### 7.4 Phase-2 Path E (linker delegation)
+
+`link_emit_macho` doesn't exist until Phase 3, so Phase-2 Path E
+delegates to host `clang`: `cfree -c case.c -o case.o` then
+`clang -o case case.o`. A new `test/lib/link_macho_via_clang.sh`
+peer of `link_exe_runner` packages this so `test/cg/run.sh` and
+`test/link/run.sh` route Mach-O cases through it. Phase 3 swaps the
+helper for cfree's own linker; cases don't change.
+
+Clang's invocation of `ld` automatically inserts an ad-hoc code
+signature, so Phase-2 binaries exec on macOS 11+ without extra steps.
+Phase 3 inherits that responsibility — see the codesigning task
+below.
+
+### 7.5 Round-trip diff
+
+Path R already runs `cfree-roundtrip` (read → write) and structural-
+diffs the input vs. the rewritten output. For ELF the diff is
+`readelf -aW | normalize.py`. For Mach-O the equivalent is
+`llvm-objdump --macho --syms --reloc --section-headers | normalize_macho.py`,
+a small new normalizer alongside `test/elf/normalize.py`. This is
+the only new Mach-O-specific test artifact Phase 2 ships — and
+because the diff is structural, it doesn't have to be byte-perfect
+against clang's output (just round-trip-stable through cfree's
+reader/writer).
+
+### 7.6 Sysroot
+
+Layer-B / Path R round-trip needs no sysroot — clang produces the
+`.o` without linking. Path E (Phase 2 via clang) needs the host SDK
+on Darwin: `xcrun --show-sdk-path` is the only sanctioned source.
+Cross-from-Linux is out of scope (Apple SDK isn't redistributable).
+A new `test/sdk/macos/` peer of `test/libc/{musl,glibc}/` handles the
+extraction, only invoked when libc-dependent cases are added (mostly
+a Phase-3 concern; Phase-2 smoke can stay freestanding).
+
+### 7.7 `make` targets
+
+No new top-level harness target. Existing `make test-link` /
+`make test-cg` honor `CFREE_TEST_OBJ` (and `CFREE_TEST_ARCH`); CI
+runs them once per supported `(arch, obj)` tuple. The default
+invocation stays `aarch64-elf` so `make test` behavior is unchanged.
+
+---
+
+## 8. Validation gates
A change in this plan is "done" when:
@@ -512,8 +630,10 @@ A change in this plan is "done" when:
output objects on a representative set of `test/cg/` cases.
- **Phase 2**: Mach-O `.o` produced by cfree links via host
`clang` into a runnable arm64-darwin executable; clang-built
- `.o` round-trips through cfree's reader/writer; ELF suite still
- green.
+ `.o` round-trips through cfree's reader/writer (`test-link`
+ Path R with `CFREE_TEST_OBJ=macho`); ELF suite still green.
- **Phase 3**: `cfree -c` + `cfree` linker produces an
- arm64-darwin Mach-O exe that runs natively on the Darwin host;
- per-milestone `test/cg/` cases green; ELF suite still green.
+ arm64-darwin Mach-O exe that runs natively on the Darwin host
+ (ad-hoc codesigned by `link_macho.c` so the kernel will exec
+ it); per-milestone `test/cg/` cases green; ELF suite still
+ green.
diff --git a/src/abi/abi_apple_arm64.c b/src/abi/abi_apple_arm64.c
@@ -1,29 +1,28 @@
/* Apple ARM64 (Darwin) ABI dispatch.
*
- * Phase 1 of doc/MULTIOBJ.md: vtable selection now keys on
- * (target.arch, target.os), and (ARM_64, MACOS) lands here instead of
+ * Phase 2 of doc/MULTIOBJ.md. Vtable selection keys on
+ * (target.arch, target.os); (ARM_64, MACOS) lands here instead of
* AAPCS64. The two ABIs diverge in:
*
- * 1. Variadics — Apple ARM64 passes ALL variadic arguments on the
- * stack (no v0-v7 / x0-x7 routing for the `...` portion of the
- * arglist). AAPCS64 passes them in registers like fixed args.
- * Consequence: `va_list` is just `char*`, not the AAPCS64
- * five-field struct.
+ * 1. va_list shape — Apple ARM64 `__builtin_va_list` is plain
+ * `char*`; AAPCS64 is a five-field struct. Overridden here.
*
- * 2. Stack-arg promotion — small integer arguments passed on the
- * stack are promoted to 4 bytes minimum (so `char` and `short`
- * occupy 4 stack bytes each, not 1 / 2).
+ * 2. Variadics on stack — Apple ARM64 passes ALL variadic arguments
+ * on the stack (no v0-v7 / x0-x7 routing for the `...` portion of
+ * the arglist). This is a *call-site* divergence — the fixed
+ * params classify identically to AAPCS64, so compute_func_info
+ * remains an AAPCS64 alias. The stack routing is enforced inside
+ * the cg backend (src/arch/aarch64.c::emit_arg_value) by keying on
+ * target.os when synthesizing the variadic-arg ABIArgInfo.
*
- * Phase 1 ships a thin shim that aliases AAPCS64's compute_func_info
- * — variadics and the stack-arg promotion override land in Phase 2
- * alongside the macho writer, so the build-it-and-see-what-breaks
- * loop on a real macOS toolchain catches divergences from the spec.
- * The va_list type is overridden now because it is cheap and the
- * divergence is unambiguous.
- *
- * Until macho_emit lands, this vtable is reachable only on the
- * (ARM_64, MACOS) target slice — itself unreachable end-to-end —
- * so the alias is safe. See abi_aapcs64.c for the AAPCS64 source. */
+ * 3. Stack-arg promotion — small integer arguments passed on the
+ * stack are promoted to 4 bytes minimum (`char`/`short` occupy 4
+ * stack bytes). Like (2), the divergence is in stack-slot
+ * assignment, not in classification, and lives in cg. The
+ * current cfree cg path uses 8-byte stack stride for every arg,
+ * which is wider than either ABI requires but ABI-safe — the
+ * narrower Apple-specific layout becomes a concern only when
+ * cross-checking against clang-emitted callers/callees. */
#include "abi/abi_internal.h"
#include "core/core.h"
diff --git a/src/api/stubs.c b/src/api/stubs.c
@@ -61,20 +61,16 @@ void parse_asm(Compiler* c, Lexer* l, MCEmitter* m) {
/* ============================================================
* Object emit/read for non-ELF formats
* ============================================================
- * ELF emit/read are real (src/obj/elf_emit.c, elf_read.c). The other
- * formats are stubs; callers receive a diagnostic if they ask for a
- * COFF/Mach-O/WASM target. */
+ * ELF emit/read are real (src/obj/elf_emit.c, elf_read.c). Mach-O
+ * emit/read are real (src/obj/macho_emit.c, macho_read.c). COFF and
+ * WASM remain stubs; callers receive a diagnostic if they ask for a
+ * COFF/WASM target. */
void emit_coff(Compiler* c, ObjBuilder* o, Writer* w) {
(void)o;
(void)w;
unimplemented(c, "emit_coff");
}
-void emit_macho(Compiler* c, ObjBuilder* o, Writer* w) {
- (void)o;
- (void)w;
- unimplemented(c, "emit_macho");
-}
void emit_wasm(Compiler* c, ObjBuilder* o, Writer* w) {
(void)o;
(void)w;
@@ -87,12 +83,6 @@ ObjBuilder* read_coff(Compiler* c, const char* n, const u8* d, size_t l) {
(void)l;
unimplemented(c, "read_coff");
}
-ObjBuilder* read_macho(Compiler* c, const char* n, const u8* d, size_t l) {
- (void)n;
- (void)d;
- (void)l;
- unimplemented(c, "read_macho");
-}
ObjBuilder* read_wasm(Compiler* c, const char* n, const u8* d, size_t l) {
(void)n;
(void)d;
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -1884,7 +1884,12 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
/* Synthesize a one-part DIRECT ABIArgInfo for var args (av->abi is NULL
* past the fixed-param count). AAPCS64 routes var args through the same
* register/stack rules as fixed scalars, so this matches what
- * abi_func_info would have produced. */
+ * abi_func_info would have produced.
+ *
+ * Apple ARM64 (Darwin) diverges: variadic args go on the stack only.
+ * Detect the synthesized-vararg case and bump the next-int / next-fp
+ * cursors past the register pool so the part below routes to stack.
+ * See doc/MULTIOBJ.md §3.4. */
ABIArgInfo va_ai;
ABIArgPart va_pt;
const ABIArgInfo* ai = av->abi;
@@ -1900,6 +1905,10 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
va_pt.align = sz;
va_pt.src_offset = 0;
ai = &va_ai;
+ if (t->c->target.os == CFREE_OS_MACOS) {
+ *next_int = 8;
+ *next_fp = 8;
+ }
}
if (ai->kind == ABI_ARG_IGNORE) return;
diff --git a/src/obj/macho.h b/src/obj/macho.h
@@ -254,6 +254,8 @@ typedef struct MachRelocInfo {
* panics with a diagnostic. Stubs in macho_reloc_aarch64.c until the
* Phase 2 writer lands (see doc/MULTIOBJ.md). */
u32 macho_aarch64_reloc_to(u32 kind /* RelocKind */);
+u32 macho_aarch64_reloc_pcrel(u32 kind /* RelocKind */);
+u32 macho_aarch64_reloc_length(u32 kind /* RelocKind */);
u32 macho_aarch64_reloc_from(u32 macho_type);
#endif
diff --git a/src/obj/macho_emit.c b/src/obj/macho_emit.c
@@ -0,0 +1,663 @@
+/* Mach-O MH_OBJECT writer. Walks a finalized ObjBuilder and emits a
+ * 64-bit little-endian relocatable object via the supplied Writer.
+ *
+ * Layout strategy (MH_OBJECT — everything in one anonymous segment):
+ * 1. plan Mach-O sections (one per non-symtab/strtab/rela ObjSection),
+ * mapping cfree section names to (segname, sectname) pairs;
+ * 2. partition ObjSyms into local / extdef / undef and assign final
+ * indices for LC_DYSYMTAB;
+ * 3. build per-section relocation tables via the per-arch translator
+ * (only aarch64 is wired today);
+ * 4. assign file offsets sequentially: header, load commands, section
+ * bytes, relocation tables, symbol table, string table;
+ * 5. write header → load commands → section bytes → relocs → symtab
+ * → strtab.
+ *
+ * 64-bit little-endian only. Big-endian / 32-bit panics at entry.
+ *
+ * See doc/MULTIOBJ.md §3.1 for the round-trip invariant: read_macho of
+ * this output must produce an ObjBuilder shape-equivalent to the input,
+ * modulo (a) Mach-O's mandatory (segname, sectname) pairing and (b)
+ * any synthesized N_SECT symbols. The (segname,sectname) form chosen
+ * here is the canonical post-roundtrip shape — read_macho stores the
+ * comma-joined "__SEG,__sect" form in Section.name so a re-emit
+ * produces the same bytes. */
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/buf.h"
+#include "core/bytes.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/util.h"
+#include "obj/macho.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- LE writer helpers (Writer-based) ---- */
+
+static void wr_u32(Writer* w, u32 v) {
+ u8 b[4];
+ wr_u32_le(b, v);
+ cfree_writer_write(w, b, 4);
+}
+
+static void wr_u64(Writer* w, u64 v) {
+ u8 b[8];
+ wr_u64_le(b, v);
+ cfree_writer_write(w, b, 8);
+}
+
+static void wr_name16(Writer* w, const char* s, u32 len) {
+ /* Mach-O section/segment names are 16-byte zero-padded fields. Names
+ * longer than 16 are truncated; the on-disk format leaves no room for
+ * a longer encoding. */
+ u8 buf[16];
+ u32 n = len > 16 ? 16 : len;
+ memcpy(buf, s, n);
+ if (n < 16) memset(buf + n, 0, 16 - n);
+ cfree_writer_write(w, buf, 16);
+}
+
+/* ---- (segname,sectname) derivation ---- */
+
+/* Split a cfree section name into Mach-O (segname, sectname) pair.
+ * If `name` contains a comma, it is treated as already in
+ * "__SEG,__sect" form and split at the first comma. Otherwise we
+ * derive the pair from SecKind, ignoring `name` (the input was an
+ * ELF-shaped name like ".text" or ".rodata"). */
+typedef struct MSegSect {
+ char segname[16];
+ char sectname[16];
+ u32 seg_len;
+ u32 sect_len;
+} MSegSect;
+
+static void copy_fixed16(char* dst, u32* len_out, const char* src, u32 src_len) {
+ u32 n = src_len > 16 ? 16 : src_len;
+ memcpy(dst, src, n);
+ if (n < 16) memset(dst + n, 0, 16 - n);
+ *len_out = n;
+}
+
+static void name_to_seg_sect(const char* name, u32 nlen, u16 sec_kind,
+ MSegSect* out) {
+ /* Comma-separated form: take prefix as segname, suffix as sectname. */
+ for (u32 i = 0; i < nlen; ++i) {
+ if (name[i] == ',') {
+ copy_fixed16(out->segname, &out->seg_len, name, i);
+ copy_fixed16(out->sectname, &out->sect_len, name + i + 1,
+ nlen - i - 1);
+ return;
+ }
+ }
+
+ /* Not comma-separated. Derive from SecKind; ignore `name`. */
+ const char* seg;
+ const char* sect;
+ switch (sec_kind) {
+ case SEC_TEXT:
+ seg = "__TEXT";
+ sect = "__text";
+ break;
+ case SEC_RODATA:
+ seg = "__TEXT";
+ sect = "__const";
+ break;
+ case SEC_DATA:
+ seg = "__DATA";
+ sect = "__data";
+ break;
+ case SEC_BSS:
+ seg = "__DATA";
+ sect = "__bss";
+ break;
+ case SEC_DEBUG:
+ seg = "__DWARF";
+ /* Strip a leading `.` from the input name (".debug_info" →
+ * "__debug_info") so the dwarf section names round-trip. */
+ sect = (nlen && name[0] == '.') ? name + 1 : name;
+ copy_fixed16(out->segname, &out->seg_len, seg, (u32)strlen(seg));
+ copy_fixed16(out->sectname, &out->sect_len, sect,
+ (u32)((nlen && name[0] == '.') ? nlen - 1 : nlen));
+ return;
+ default:
+ seg = "__DATA";
+ sect = "__data";
+ break;
+ }
+ copy_fixed16(out->segname, &out->seg_len, seg, (u32)strlen(seg));
+ copy_fixed16(out->sectname, &out->sect_len, sect, (u32)strlen(sect));
+}
+
+/* ---- per-section plan ---- */
+
+typedef struct MSec {
+ MSegSect ns;
+ u64 addr; /* assigned vmaddr within the segment */
+ u64 size; /* bytes (or bss size) */
+ u32 fileoff; /* 0 for zerofill */
+ u32 align; /* power-of-two; stored as log2 in section_64.align */
+ u32 reloff; /* 0 if no relocs */
+ u32 nreloc;
+ u32 flags; /* S_TYPE | S_ATTR_* */
+ u32 obj_sec; /* originating ObjSecId */
+ int is_zerofill;
+ const Buf* obj_bytes; /* NULL when zerofill */
+ u8* relocs; /* arena-allocated; nreloc * 8 bytes */
+} MSec;
+
+static u32 log2_align(u32 a) {
+ u32 r = 0;
+ while ((1u << r) < a) ++r;
+ return r;
+}
+
+static u32 section_flags_for(u16 sec_kind, u16 sec_flags, const char* sectname,
+ u32 sect_len) {
+ u32 f = 0;
+ if (sec_kind == SEC_TEXT || (sec_flags & SF_EXEC)) {
+ f |= S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS;
+ }
+ if (sec_kind == SEC_BSS || (sect_len >= 5 && memcmp(sectname, "__bss", 5) == 0)) {
+ f |= S_ZEROFILL;
+ }
+ /* Default S_REGULAR (0) for all others. */
+ return f;
+}
+
+/* ---- symbol partition ---- */
+
+typedef struct MSym {
+ ObjSymId obj_id;
+ u32 strx; /* offset in string table */
+ u8 n_type;
+ u8 n_sect;
+ u16 n_desc;
+ u64 n_value;
+} MSym;
+
+static int sym_is_undef(const ObjSym* s) {
+ return s->section_id == OBJ_SEC_NONE && s->kind != SK_ABS &&
+ s->kind != SK_COMMON;
+}
+
+static int sym_is_extdef(const ObjSym* s) {
+ if (sym_is_undef(s)) return 0;
+ return s->bind == SB_GLOBAL || s->bind == SB_WEAK;
+}
+
+/* ---- string table ----
+ *
+ * Mach-O strtab: leading zero byte at offset 0 represents the empty
+ * string. Entries are NUL-terminated; we don't dedupe (small symbol
+ * counts in v1; matches the simplest llvm output). The "_" prefix on
+ * C symbols is added inline in the writer below. */
+
+void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
+ Heap* h = (Heap*)c->env->heap;
+
+ /* ---- target validation ---------------------------------------- */
+ u32 cputype, cpusubtype;
+ u32 (*reloc_to)(u32);
+ u32 (*reloc_pcrel)(u32);
+ u32 (*reloc_length)(u32);
+ switch (c->target.arch) {
+ case CFREE_ARCH_ARM_64:
+ cputype = CPU_TYPE_ARM64;
+ cpusubtype = CPU_SUBTYPE_ARM64_ALL;
+ reloc_to = macho_aarch64_reloc_to;
+ reloc_pcrel = macho_aarch64_reloc_pcrel;
+ reloc_length = macho_aarch64_reloc_length;
+ break;
+ default:
+ compiler_panic(c, no_loc(),
+ "emit_macho: unsupported target arch %u (only arm64 today)",
+ (u32)c->target.arch);
+ }
+ if (c->target.big_endian) {
+ compiler_panic(c, no_loc(), "emit_macho: big-endian not supported");
+ }
+ if (c->target.ptr_size != 8) {
+ compiler_panic(c, no_loc(), "emit_macho: ptr_size %u (expected 8)",
+ (u32)c->target.ptr_size);
+ }
+
+ /* ---- pass 1: plan Mach-O sections ----------------------------- */
+ u32 nobjsec = obj_section_count(ob);
+ MSec* secs = arena_zarray(c->scratch, MSec, nobjsec ? nobjsec : 1);
+ u32* obj_to_msec = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1);
+ u32 nsecs = 0;
+
+ for (u32 i = 1; i < nobjsec; ++i) {
+ const Section* s = obj_section_get(ob, i);
+ /* Skip ELF-style synthetic sections that read_elf would have
+ * filtered: SYMTAB / STRTAB / RELA / GROUP have no Mach-O
+ * representation as data sections. */
+ if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB ||
+ s->sem == SSEM_RELA || s->sem == SSEM_REL ||
+ s->sem == SSEM_GROUP) {
+ continue;
+ }
+ size_t nlen;
+ const char* nm = pool_str(c->global, s->name, &nlen);
+ MSec* m = &secs[nsecs];
+ name_to_seg_sect(nm ? nm : "", (u32)nlen, s->kind, &m->ns);
+ m->obj_sec = i;
+ m->align = s->align ? s->align : 1;
+ m->flags = section_flags_for(s->kind, s->flags, m->ns.sectname,
+ m->ns.sect_len);
+ if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) {
+ m->is_zerofill = 1;
+ m->size = s->bss_size;
+ m->obj_bytes = NULL;
+ m->flags = (m->flags & ~SECTION_TYPE) | S_ZEROFILL;
+ } else {
+ m->is_zerofill = 0;
+ m->size = s->bytes.total;
+ m->obj_bytes = &s->bytes;
+ }
+ obj_to_msec[i] = nsecs + 1; /* 1-based: matches Mach-O n_sect. */
+ nsecs++;
+ }
+
+ /* ---- pass 2: assign vmaddrs (segment-relative) and per-section
+ * flat-layout addresses. MH_OBJECT keeps everything in
+ * one segment with vmaddr=0; section addr fields are
+ * relative offsets within the segment. */
+ u64 cur_addr = 0;
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ cur_addr = ALIGN_UP(cur_addr, (u64)m->align);
+ m->addr = cur_addr;
+ cur_addr += m->size;
+ }
+ u64 segment_vmsize = cur_addr;
+
+ /* ---- pass 3: partition symbols (locals, extdefs, undefs) ------ */
+ u32 nobjsym = 0;
+ {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) ++nobjsym;
+ obj_symiter_free(it);
+ }
+
+ MSym* msyms = arena_zarray(c->scratch, MSym, nobjsym + 1);
+ u32 nmsyms = 0;
+ u32* sym_obj_to_macho =
+ arena_zarray(c->scratch, u32, nobjsym + 2); /* obj_id -> mach idx */
+
+ Buf strtab;
+ buf_init(&strtab, h);
+ /* Mach-O strtab convention: the first byte is " " (space) or NUL —
+ * llvm/Apple emit a single NUL. We start with NUL for offset 0. */
+ {
+ u8 z = 0;
+ buf_write(&strtab, &z, 1);
+ }
+
+ /* Emit in three passes so n_type/sect ordering matches LC_DYSYMTAB
+ * (locals, then extdefs, then undefs). */
+ for (int pass = 0; pass < 3; ++pass) {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ int undef = sym_is_undef(s);
+ int extdef = sym_is_extdef(s);
+ int local = !undef && !extdef;
+ int want = (pass == 0 && local) || (pass == 1 && extdef) ||
+ (pass == 2 && undef);
+ if (!want) continue;
+
+ MSym* ms = &msyms[nmsyms];
+ ms->obj_id = e.id;
+
+ size_t nlen;
+ const char* nm = pool_str(c->global, s->name, &nlen);
+ /* Mach-O convention: C symbols carry a leading underscore on
+ * disk. Apple toolchains rely on this for the linker
+ * "_main"-vs-"main" entry point and for every libSystem call.
+ * Round-tripped via read_macho, which strips one leading `_`. */
+ if (nlen && nm) {
+ u8 us = '_';
+ u32 off = buf_pos(&strtab);
+ buf_write(&strtab, &us, 1);
+ buf_write(&strtab, nm, nlen);
+ u8 z = 0;
+ buf_write(&strtab, &z, 1);
+ ms->strx = off;
+ } else {
+ ms->strx = 0;
+ }
+
+ u8 type = 0;
+ if (extdef) type |= N_EXT;
+ if (s->vis == SV_HIDDEN || s->vis == SV_INTERNAL) {
+ /* Mach-O encodes hidden externals as N_PEXT|N_EXT. */
+ type |= N_PEXT;
+ }
+ u8 n_sect = NO_SECT;
+ u16 n_desc = 0;
+ u64 value = s->value;
+
+ if (undef) {
+ type |= N_UNDF;
+ value = 0;
+ } else if (s->kind == SK_ABS) {
+ type |= N_ABS;
+ } else if (s->kind == SK_COMMON) {
+ /* Mach-O common symbols are N_UNDF|N_EXT with n_value=size and
+ * n_desc carrying log2(align) in the GET_COMM_ALIGN bits. */
+ type = N_UNDF | N_EXT;
+ value = s->size;
+ u32 a = s->common_align ? (u32)s->common_align : 1;
+ n_desc = (u16)(log2_align(a) << 8); /* GET_COMM_ALIGN field */
+ } else {
+ type |= N_SECT;
+ u32 ms_idx = (s->section_id < nobjsec) ? obj_to_msec[s->section_id] : 0;
+ n_sect = (u8)ms_idx;
+ if (n_sect && n_sect <= nsecs) {
+ value = secs[n_sect - 1].addr + s->value;
+ }
+ if (s->bind == SB_WEAK) n_desc |= N_WEAK_DEF;
+ }
+
+ ms->n_type = type;
+ ms->n_sect = n_sect;
+ ms->n_desc = n_desc;
+ ms->n_value = value;
+
+ sym_obj_to_macho[e.id] = nmsyms + 1; /* 1-based index, 0 = none. */
+ nmsyms++;
+ }
+ obj_symiter_free(it);
+ }
+
+ u32 nlocals = 0, nextdefs = 0, nundefs = 0;
+ for (u32 i = 0; i < nmsyms; ++i) {
+ u8 t = msyms[i].n_type;
+ u8 ext = (t & N_EXT) != 0;
+ u8 typ = (u8)(t & N_TYPE);
+ if (typ == N_UNDF && ext) {
+ /* Could be undef or common — common has nonzero n_value. */
+ if (msyms[i].n_value != 0)
+ ++nextdefs; /* common is conventionally extdef-shaped */
+ else
+ ++nundefs;
+ } else if (ext) {
+ ++nextdefs;
+ } else {
+ ++nlocals;
+ }
+ }
+ /* Re-derive without the common fudge by counting partition pass: we
+ * already wrote them in (locals,extdefs,undefs) order, so the prefix
+ * counts are just the per-pass counts. */
+ nlocals = 0;
+ nextdefs = 0;
+ nundefs = 0;
+ {
+ ObjSymIter* it = obj_symiter_new(ob);
+ ObjSymEntry e;
+ while (obj_symiter_next(it, &e)) {
+ const ObjSym* s = e.sym;
+ if (sym_is_undef(s))
+ ++nundefs;
+ else if (sym_is_extdef(s))
+ ++nextdefs;
+ else
+ ++nlocals;
+ }
+ obj_symiter_free(it);
+ }
+
+ /* ---- pass 4: build per-section relocation tables -------------- */
+ u32 total_relocs = obj_reloc_total(ob);
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ u32 nr = obj_reloc_count(ob, m->obj_sec);
+ if (!nr) continue;
+ /* Worst case: each reloc may be preceded by an ARM64_RELOC_ADDEND
+ * pair entry. We size the buffer for that upper bound. */
+ u8* buf = (u8*)arena_alloc(c->scratch, (size_t)MACHO_RELOC_SIZE * nr * 2,
+ _Alignof(u32));
+ u32 j = 0;
+ for (u32 ri = 0; ri < total_relocs; ++ri) {
+ const Reloc* r = obj_reloc_at(ob, ri);
+ if (r->section_id != m->obj_sec) continue;
+ u32 mtype = reloc_to(r->kind);
+ if (mtype == (u32)-1) {
+ compiler_panic(c, no_loc(),
+ "emit_macho: unsupported reloc kind %u for arch %u",
+ (u32)r->kind, (u32)c->target.arch);
+ }
+ u32 pcrel = reloc_pcrel(r->kind);
+ u32 length = reloc_length(r->kind);
+
+ /* Resolve target — extern always 1 in our model (every Reloc has
+ * an ObjSymId). Skip relocs without a symbol — they would map to
+ * a section-relative reloc which the v1 cgtarget never emits. */
+ if (r->sym == OBJ_SYM_NONE) {
+ compiler_panic(c, no_loc(),
+ "emit_macho: reloc without symbol not supported "
+ "(sec=%u offset=%u kind=%u)",
+ (u32)r->section_id, (u32)r->offset, (u32)r->kind);
+ }
+ u32 mach_sym_idx = sym_obj_to_macho[r->sym];
+ if (mach_sym_idx == 0) {
+ compiler_panic(c, no_loc(),
+ "emit_macho: reloc target sym %u not in symtab",
+ (u32)r->sym);
+ }
+ u32 r_symbolnum = mach_sym_idx - 1; /* Mach-O uses 0-based. */
+
+ /* Non-zero addend: emit a leading ARM64_RELOC_ADDEND pair (only
+ * meaningful for non-UNSIGNED types — UNSIGNED carries the addend
+ * inline in the patched bytes). */
+ if (r->addend != 0 && mtype != ARM64_RELOC_UNSIGNED) {
+ u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
+ wr_u32_le(slot + 0, (u32)r->offset);
+ u32 packed = ((u32)(i64)r->addend & 0x00ffffffu) | (0u << 24) |
+ (length << 25) | (1u << 27) /*extern*/ |
+ (ARM64_RELOC_ADDEND << 28);
+ wr_u32_le(slot + 4, packed);
+ ++j;
+ }
+
+ u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
+ wr_u32_le(slot + 0, (u32)r->offset);
+ u32 packed = (r_symbolnum & 0x00ffffffu) |
+ ((pcrel & 1u) << 24) |
+ ((length & 3u) << 25) |
+ (1u << 27) /*extern*/ |
+ ((mtype & 0xfu) << 28);
+ wr_u32_le(slot + 4, packed);
+ ++j;
+ }
+ m->relocs = buf;
+ m->nreloc = j;
+ }
+
+ /* ---- pass 5: assign file offsets ------------------------------ */
+ /* Layout after the load-command block:
+ * section bytes (in order, respecting align)
+ * relocation tables (per section, 4-aligned)
+ * symbol table (8-aligned)
+ * string table */
+ u32 nload_cmds = 4; /* LC_SEGMENT_64 + LC_BUILD_VERSION + LC_SYMTAB + LC_DYSYMTAB */
+ u32 segcmd_size = MACHO_SEGCMD64_SIZE + nsecs * MACHO_SECT64_SIZE;
+ u32 build_version_size = 24; /* fixed: cmd+cmdsize+platform+minos+sdk+ntools(0) */
+ u32 sizeofcmds = segcmd_size + build_version_size + MACHO_SYMTAB_CMD_SIZE +
+ MACHO_DYSYMTAB_CMD_SIZE;
+
+ u64 cur = MACHO_HDR64_SIZE + sizeofcmds;
+ u32 fileoff_first = (u32)cur;
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (m->is_zerofill) {
+ m->fileoff = 0;
+ continue;
+ }
+ cur = ALIGN_UP(cur, (u64)m->align);
+ m->fileoff = (u32)cur;
+ cur += m->size;
+ }
+
+ /* Reloc tables. */
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (!m->nreloc) {
+ m->reloff = 0;
+ continue;
+ }
+ cur = ALIGN_UP(cur, (u64)4);
+ m->reloff = (u32)cur;
+ cur += (u64)m->nreloc * MACHO_RELOC_SIZE;
+ }
+
+ cur = ALIGN_UP(cur, (u64)8);
+ u64 symoff = cur;
+ cur += (u64)nmsyms * MACHO_NLIST64_SIZE;
+ u64 stroff = cur;
+ u32 strtab_size = buf_pos(&strtab);
+ cur += strtab_size;
+
+ /* ---- pass 6: write the file ------------------------------------ */
+ cfree_writer_seek(w, 0);
+
+ /* mach_header_64 */
+ wr_u32(w, MH_MAGIC_64);
+ wr_u32(w, cputype);
+ wr_u32(w, cpusubtype);
+ wr_u32(w, MH_OBJECT);
+ wr_u32(w, nload_cmds);
+ wr_u32(w, sizeofcmds);
+ wr_u32(w, 0); /* flags — MH_OBJECT carries none in v1 */
+ wr_u32(w, 0); /* reserved */
+
+ /* LC_SEGMENT_64 (anonymous, contains everything) */
+ wr_u32(w, LC_SEGMENT_64);
+ wr_u32(w, segcmd_size);
+ wr_name16(w, "", 0); /* segname: empty for MH_OBJECT */
+ wr_u64(w, 0); /* vmaddr */
+ wr_u64(w, segment_vmsize); /* vmsize */
+ wr_u64(w, fileoff_first); /* fileoff */
+ /* filesize = bytes covered by non-zerofill sections (post-section
+ * file offset minus the start). */
+ u64 filesize = 0;
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (m->is_zerofill) continue;
+ u64 end = (u64)m->fileoff + m->size;
+ u64 begin = m->fileoff;
+ if (end > filesize + fileoff_first) filesize = end - fileoff_first;
+ (void)begin;
+ }
+ wr_u64(w, filesize);
+ /* maxprot/initprot — VM_PROT_READ|WRITE|EXECUTE = 7 for object segs. */
+ wr_u32(w, 7);
+ wr_u32(w, 7);
+ wr_u32(w, nsecs);
+ wr_u32(w, 0); /* flags */
+
+ /* sections inline within the segment command */
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ wr_name16(w, m->ns.sectname, m->ns.sect_len);
+ wr_name16(w, m->ns.segname, m->ns.seg_len);
+ wr_u64(w, m->addr);
+ wr_u64(w, m->size);
+ wr_u32(w, m->fileoff);
+ wr_u32(w, log2_align(m->align));
+ wr_u32(w, m->reloff);
+ wr_u32(w, m->nreloc);
+ wr_u32(w, m->flags);
+ wr_u32(w, 0); /* reserved1 */
+ wr_u32(w, 0); /* reserved2 */
+ wr_u32(w, 0); /* reserved3 */
+ }
+
+ /* LC_BUILD_VERSION — platform=PLATFORM_MACOS(1), minos/sdk=14.0.0,
+ * ntools=0. The exact min-version isn't load-bearing for MH_OBJECT,
+ * but Apple's `ld` warns when it's missing. */
+ wr_u32(w, LC_BUILD_VERSION);
+ wr_u32(w, build_version_size);
+ wr_u32(w, 1); /* platform: PLATFORM_MACOS */
+ wr_u32(w, (14u << 16) | 0); /* minos: 14.0.0 */
+ wr_u32(w, (14u << 16) | 0); /* sdk: 14.0.0 */
+ wr_u32(w, 0); /* ntools */
+
+ /* LC_SYMTAB */
+ wr_u32(w, LC_SYMTAB);
+ wr_u32(w, MACHO_SYMTAB_CMD_SIZE);
+ wr_u32(w, (u32)symoff);
+ wr_u32(w, nmsyms);
+ wr_u32(w, (u32)stroff);
+ wr_u32(w, strtab_size);
+
+ /* LC_DYSYMTAB */
+ wr_u32(w, LC_DYSYMTAB);
+ wr_u32(w, MACHO_DYSYMTAB_CMD_SIZE);
+ wr_u32(w, 0); /* ilocalsym */
+ wr_u32(w, nlocals);
+ wr_u32(w, nlocals);
+ wr_u32(w, nextdefs);
+ wr_u32(w, nlocals + nextdefs);
+ wr_u32(w, nundefs);
+ wr_u32(w, 0); wr_u32(w, 0); /* tocoff, ntoc */
+ wr_u32(w, 0); wr_u32(w, 0); /* modtaboff, nmodtab */
+ wr_u32(w, 0); wr_u32(w, 0); /* extrefsymoff, nextrefsyms */
+ wr_u32(w, 0); wr_u32(w, 0); /* indirectsymoff, nindirectsyms */
+ wr_u32(w, 0); wr_u32(w, 0); /* extreloff, nextrel */
+ wr_u32(w, 0); wr_u32(w, 0); /* locreloff, nlocrel */
+
+ /* section bytes */
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (m->is_zerofill || !m->size) continue;
+ cfree_writer_seek(w, m->fileoff);
+ if (m->obj_bytes) {
+ u32 sz = m->obj_bytes->total;
+ u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1);
+ if (sz) buf_flatten(m->obj_bytes, tmp);
+ cfree_writer_write(w, tmp, sz);
+ h->free(h, tmp, sz ? sz : 1);
+ }
+ }
+
+ /* reloc tables */
+ for (u32 i = 0; i < nsecs; ++i) {
+ MSec* m = &secs[i];
+ if (!m->nreloc) continue;
+ cfree_writer_seek(w, m->reloff);
+ cfree_writer_write(w, m->relocs, (size_t)m->nreloc * MACHO_RELOC_SIZE);
+ }
+
+ /* symtab */
+ cfree_writer_seek(w, symoff);
+ for (u32 i = 0; i < nmsyms; ++i) {
+ const MSym* ms = &msyms[i];
+ u8 entry[MACHO_NLIST64_SIZE];
+ wr_u32_le(entry + 0, ms->strx);
+ entry[4] = ms->n_type;
+ entry[5] = ms->n_sect;
+ wr_u16_le(entry + 6, ms->n_desc);
+ wr_u64_le(entry + 8, ms->n_value);
+ cfree_writer_write(w, entry, MACHO_NLIST64_SIZE);
+ }
+
+ /* strtab */
+ {
+ u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1);
+ if (strtab_size) buf_flatten(&strtab, flat);
+ cfree_writer_seek(w, stroff);
+ cfree_writer_write(w, flat, strtab_size);
+ }
+ buf_fini(&strtab);
+}
diff --git a/src/obj/macho_read.c b/src/obj/macho_read.c
@@ -0,0 +1,369 @@
+/* Mach-O MH_OBJECT reader. Parses a 64-bit little-endian relocatable
+ * object back into a fresh ObjBuilder. The post-finalize ObjBuilder
+ * shape is the canonical superset doc/MULTIOBJ.md §3.1 promises:
+ * read_macho of an emit_macho output produces an ObjBuilder
+ * shape-equivalent to the writer's input, modulo the synthesized
+ * "__SEG,__sect"-form section names.
+ *
+ * Scope: AArch64 little-endian, MH_OBJECT only (MH_DYLIB is a stub —
+ * the linker has no consumer for it yet). Other archs / endianness
+ * produce a compiler_panic with a diagnostic. */
+
+#include <string.h>
+
+#include "core/arena.h"
+#include "core/bytes.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/util.h"
+#include "obj/macho.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- mach-section scratch struct ---- */
+
+typedef struct MSecRec {
+ char segname[16];
+ char sectname[16];
+ u32 seg_len;
+ u32 sect_len;
+ u64 addr;
+ u64 size;
+ u32 fileoff;
+ u32 align_log2;
+ u32 reloff;
+ u32 nreloc;
+ u32 flags;
+ ObjSecId obj_sec; /* assigned in pass 1 */
+} MSecRec;
+
+static u32 fixed16_len(const char* s) {
+ u32 n = 0;
+ while (n < 16 && s[n] != 0) ++n;
+ return n;
+}
+
+static u16 sec_kind_from_seg_sect(const char* segname, u32 seg_len,
+ const char* sectname, u32 sect_len,
+ u32 flags) {
+ u32 stype = flags & SECTION_TYPE;
+ if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL) return SEC_BSS;
+ if (flags & S_ATTR_PURE_INSTRUCTIONS) return SEC_TEXT;
+
+ if (seg_len == 7 && memcmp(segname, "__DWARF", 7) == 0) return SEC_DEBUG;
+ if (seg_len == 6 && memcmp(segname, "__TEXT", 6) == 0) {
+ if (sect_len == 6 && memcmp(sectname, "__text", 6) == 0) return SEC_TEXT;
+ return SEC_RODATA; /* __const, __cstring, ... */
+ }
+ if (seg_len == 6 && memcmp(segname, "__DATA", 6) == 0) {
+ if (sect_len == 5 && memcmp(sectname, "__bss", 5) == 0) return SEC_BSS;
+ return SEC_DATA;
+ }
+ return SEC_OTHER;
+}
+
+static u16 sec_flags_from(u32 mflags, u16 sec_kind) {
+ u16 f = 0;
+ if (sec_kind == SEC_TEXT || (mflags & S_ATTR_PURE_INSTRUCTIONS)) {
+ f |= SF_ALLOC | SF_EXEC;
+ } else if (sec_kind == SEC_RODATA) {
+ f |= SF_ALLOC;
+ } else if (sec_kind == SEC_DATA || sec_kind == SEC_BSS) {
+ f |= SF_ALLOC | SF_WRITE;
+ }
+ u32 stype = mflags & SECTION_TYPE;
+ if (stype == S_THREAD_LOCAL_REGULAR || stype == S_THREAD_LOCAL_ZEROFILL ||
+ stype == S_THREAD_LOCAL_VARIABLES) {
+ f |= SF_TLS;
+ }
+ return f;
+}
+
+static u16 sec_sem_from(u32 mflags, u16 sec_kind) {
+ u32 stype = mflags & SECTION_TYPE;
+ if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL ||
+ sec_kind == SEC_BSS) {
+ return SSEM_NOBITS;
+ }
+ if (stype == S_MOD_INIT_FUNC_POINTERS) return SSEM_INIT_ARRAY;
+ if (stype == S_MOD_TERM_FUNC_POINTERS) return SSEM_FINI_ARRAY;
+ return SSEM_PROGBITS;
+}
+
+ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data,
+ size_t len) {
+ (void)name;
+ if (len < MACHO_HDR64_SIZE)
+ compiler_panic(c, no_loc(), "read_macho: input shorter than header");
+
+ u32 magic = rd_u32_le(data + 0);
+ if (magic != MH_MAGIC_64)
+ compiler_panic(c, no_loc(), "read_macho: bad magic 0x%x", magic);
+
+ u32 cputype = rd_u32_le(data + 4);
+ u32 filetype = rd_u32_le(data + 12);
+ u32 ncmds = rd_u32_le(data + 16);
+ u32 sizeofcmds = rd_u32_le(data + 20);
+
+ if (cputype != CPU_TYPE_ARM64)
+ compiler_panic(c, no_loc(),
+ "read_macho: unsupported cputype 0x%x (only arm64 today)",
+ cputype);
+ if (filetype != MH_OBJECT)
+ compiler_panic(c, no_loc(),
+ "read_macho: only MH_OBJECT supported, got filetype %u",
+ filetype);
+
+ if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len)
+ compiler_panic(c, no_loc(), "read_macho: load commands exceed file");
+
+ /* ---- pass 1: walk load commands, collect sections, symtab cmd. */
+ MSecRec* msecs = NULL;
+ u32 nmsecs = 0;
+ u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0;
+
+ u64 pos = MACHO_HDR64_SIZE;
+ u64 end = pos + sizeofcmds;
+ for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) {
+ u32 cmd = rd_u32_le(data + pos);
+ u32 cmdsize = rd_u32_le(data + pos + 4);
+ if (cmdsize < 8 || pos + cmdsize > end)
+ compiler_panic(c, no_loc(), "read_macho: malformed load command");
+
+ if (cmd == LC_SEGMENT_64) {
+ u32 nsects = rd_u32_le(data + pos + 64);
+ if (MACHO_SEGCMD64_SIZE + (u64)nsects * MACHO_SECT64_SIZE > cmdsize)
+ compiler_panic(c, no_loc(), "read_macho: segment cmd truncated");
+ MSecRec* extra = arena_array(c->scratch, MSecRec, nmsecs + nsects);
+ if (msecs && nmsecs)
+ memcpy(extra, msecs, sizeof(MSecRec) * nmsecs);
+ msecs = extra;
+ const u8* sp = data + pos + MACHO_SEGCMD64_SIZE;
+ for (u32 si = 0; si < nsects; ++si, sp += MACHO_SECT64_SIZE) {
+ MSecRec* m = &msecs[nmsecs++];
+ memset(m, 0, sizeof *m);
+ memcpy(m->sectname, sp + 0, 16);
+ memcpy(m->segname, sp + 16, 16);
+ m->seg_len = fixed16_len(m->segname);
+ m->sect_len = fixed16_len(m->sectname);
+ m->addr = rd_u64_le(sp + 32);
+ m->size = rd_u64_le(sp + 40);
+ m->fileoff = rd_u32_le(sp + 48);
+ m->align_log2 = rd_u32_le(sp + 52);
+ m->reloff = rd_u32_le(sp + 56);
+ m->nreloc = rd_u32_le(sp + 60);
+ m->flags = rd_u32_le(sp + 64);
+ }
+ } else if (cmd == LC_SYMTAB) {
+ symoff = rd_u32_le(data + pos + 8);
+ nsyms = rd_u32_le(data + pos + 12);
+ stroff = rd_u32_le(data + pos + 16);
+ strsize = rd_u32_le(data + pos + 20);
+ }
+ pos += cmdsize;
+ }
+
+ if (stroff + (u64)strsize > len)
+ compiler_panic(c, no_loc(), "read_macho: string table out of range");
+ if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len)
+ compiler_panic(c, no_loc(), "read_macho: symbol table out of range");
+ const u8* strtab = data + stroff;
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_macho: obj_new failed");
+
+ /* ---- pass 2: create ObjSecs and copy bytes. */
+ for (u32 i = 0; i < nmsecs; ++i) {
+ MSecRec* m = &msecs[i];
+ /* Build "__SEG,__sect"-form name; matches what emit_macho would
+ * round-trip back out. */
+ char nmbuf[34];
+ u32 nlen = 0;
+ memcpy(nmbuf + nlen, m->segname, m->seg_len);
+ nlen += m->seg_len;
+ nmbuf[nlen++] = ',';
+ memcpy(nmbuf + nlen, m->sectname, m->sect_len);
+ nlen += m->sect_len;
+ Sym sn = pool_intern(c->global, nmbuf, nlen);
+
+ u16 kind = sec_kind_from_seg_sect(m->segname, m->seg_len, m->sectname,
+ m->sect_len, m->flags);
+ u16 flags = sec_flags_from(m->flags, kind);
+ u16 sem = sec_sem_from(m->flags, kind);
+ u32 align = 1u << (m->align_log2 & 31);
+
+ ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags,
+ align, 0, 0, 0);
+ if (id == OBJ_SEC_NONE)
+ compiler_panic(c, no_loc(), "read_macho: obj_section_ex failed");
+
+ /* Preserve the raw mach section.flags so emit_macho can write back
+ * the same S_TYPE / S_ATTR_* bits. */
+ obj_section_set_ext(ob, id, OBJ_EXT_MACHO, m->flags, 0);
+
+ if (sem == SSEM_NOBITS) {
+ obj_reserve_bss(ob, id, (u32)m->size, align);
+ } else if (m->size) {
+ if (m->fileoff + m->size > len)
+ compiler_panic(c, no_loc(), "read_macho: section bytes out of range");
+ obj_write(ob, id, data + m->fileoff, (size_t)m->size);
+ }
+ m->obj_sec = id;
+ }
+
+ /* ---- pass 3: parse symbol table. Two-pass strategy: first pass
+ * creates undefs (so relocations can refer to them), second
+ * pass creates defined locals/extdefs. Both write into
+ * mach_idx -> ObjSymId so reloc resolution works. */
+ ObjSymId* sym_macho_to_obj =
+ arena_zarray(c->scratch, ObjSymId, nsyms ? nsyms : 1);
+
+ const u8* sbase = data + symoff;
+ for (u32 i = 0; i < nsyms; ++i) {
+ const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE;
+ u32 strx = rd_u32_le(p + 0);
+ u8 n_type = p[4];
+ u8 n_sect = p[5];
+ u16 n_desc = rd_u16_le(p + 6);
+ u64 n_value = rd_u64_le(p + 8);
+
+ const char* nm = "";
+ u32 nlen = 0;
+ if (strx < strsize) {
+ nm = (const char*)(strtab + strx);
+ while (strx + nlen < strsize && nm[nlen]) ++nlen;
+ }
+ /* Inverse of the leading-underscore prefix emit_macho applies to C
+ * symbols. If the Mach-O name starts with `_`, strip it so the
+ * builder holds the source-level name. */
+ if (nlen && nm[0] == '_') {
+ ++nm;
+ --nlen;
+ }
+ Sym sn = nlen ? pool_intern(c->global, nm, nlen) : 0;
+
+ u8 type_field = (u8)(n_type & N_TYPE);
+ u8 ext = (u8)(n_type & N_EXT);
+ u8 pext = (u8)(n_type & N_PEXT);
+
+ u16 bind = ext ? SB_GLOBAL : SB_LOCAL;
+ if (ext && (n_desc & N_WEAK_DEF)) bind = SB_WEAK;
+ u8 vis = pext ? SV_HIDDEN : SV_DEFAULT;
+
+ u16 kind;
+ ObjSecId sec_id = OBJ_SEC_NONE;
+ u64 value = 0;
+ u64 cmnalign = 0;
+
+ if (type_field == N_UNDF) {
+ if (ext && n_value != 0) {
+ /* Common: n_value is size, n_desc encodes log2(align) in
+ * GET_COMM_ALIGN bits. */
+ kind = SK_COMMON;
+ value = 0;
+ u32 la = (u32)((n_desc >> 8) & 0xf);
+ cmnalign = 1u << la;
+ } else {
+ kind = SK_UNDEF;
+ }
+ } else if (type_field == N_ABS) {
+ kind = SK_ABS;
+ value = n_value;
+ } else if (type_field == N_SECT) {
+ if (n_sect == 0 || n_sect > nmsecs) {
+ kind = SK_NOTYPE;
+ } else {
+ sec_id = msecs[n_sect - 1].obj_sec;
+ /* Mach-O n_value for defined symbols is segment-relative addr;
+ * convert back to a section-local offset. */
+ u64 base = msecs[n_sect - 1].addr;
+ value = (n_value >= base) ? (n_value - base) : 0;
+ kind = (msecs[n_sect - 1].flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC
+ : SK_OBJ;
+ }
+ } else {
+ kind = SK_NOTYPE;
+ }
+
+ ObjSymId id = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis,
+ (SymKind)kind, sec_id, value, 0, cmnalign);
+ sym_macho_to_obj[i] = id;
+ }
+
+ /* ---- pass 4: parse per-section relocations into ObjBuilder relocs.
+ * Mach-O encodes addends out-of-band as a leading
+ * ARM64_RELOC_ADDEND followed by the real reloc; the
+ * reader collapses the pair on the way in. */
+ for (u32 i = 0; i < nmsecs; ++i) {
+ MSecRec* m = &msecs[i];
+ if (!m->nreloc) continue;
+ if (m->reloff + (u64)m->nreloc * MACHO_RELOC_SIZE > len)
+ compiler_panic(c, no_loc(),
+ "read_macho: relocation table out of range");
+ const u8* rp = data + m->reloff;
+ i64 pending_addend = 0;
+ int have_pending = 0;
+ for (u32 j = 0; j < m->nreloc; ++j) {
+ u32 r_address = rd_u32_le(rp + j * MACHO_RELOC_SIZE);
+ u32 packed = rd_u32_le(rp + j * MACHO_RELOC_SIZE + 4);
+ u32 r_symbolnum = packed & 0x00ffffffu;
+ u32 r_pcrel = (packed >> 24) & 1u;
+ u32 r_length = (packed >> 25) & 3u;
+ u32 r_extern = (packed >> 27) & 1u;
+ u32 r_type = (packed >> 28) & 0xfu;
+
+ if (r_type == ARM64_RELOC_ADDEND) {
+ /* Sign-extend 24-bit addend. */
+ i32 ad = (i32)(r_symbolnum & 0x00ffffffu);
+ if (ad & 0x00800000) ad |= ~0x00ffffff;
+ pending_addend = (i64)ad;
+ have_pending = 1;
+ continue;
+ }
+
+ u32 kind = macho_aarch64_reloc_from(r_type);
+ if (kind == (u32)-1)
+ compiler_panic(c, no_loc(),
+ "read_macho: unsupported reloc type %u", r_type);
+
+ /* Refine kind by (r_pcrel, r_length) when the type field alone
+ * is ambiguous. ARM64_RELOC_UNSIGNED collapses R_ABS64/R_ABS32
+ * and PC-relative variants. */
+ if (r_type == ARM64_RELOC_UNSIGNED) {
+ if (r_pcrel) {
+ kind = (r_length == 3) ? R_PC64 : R_PC32;
+ } else {
+ kind = (r_length == 3) ? R_ABS64 : R_ABS32;
+ }
+ } else if (r_type == ARM64_RELOC_BRANCH26) {
+ kind = R_AARCH64_CALL26;
+ }
+
+ ObjSymId target = OBJ_SYM_NONE;
+ if (r_extern) {
+ if (r_symbolnum < nsyms) target = sym_macho_to_obj[r_symbolnum];
+ } else {
+ /* Section-relative reloc — cfree's IR doesn't model these
+ * cleanly. Drop a panic so we notice if a real input drives
+ * us here. */
+ compiler_panic(c, no_loc(),
+ "read_macho: non-extern reloc not supported "
+ "(sec=%u offset=%u)",
+ (u32)m->obj_sec, r_address);
+ }
+
+ i64 addend = have_pending ? pending_addend : 0;
+ have_pending = 0;
+ pending_addend = 0;
+
+ obj_reloc_ex(ob, m->obj_sec, r_address, (RelocKind)kind, target,
+ addend, addend ? 1 : 0, 0);
+ }
+ }
+
+ obj_finalize(ob);
+ return ob;
+}
diff --git a/src/obj/macho_reloc_aarch64.c b/src/obj/macho_reloc_aarch64.c
@@ -1,37 +1,104 @@
/* RelocKind <-> arm64 Mach-O reloc-type mapping. Mirror of
- * elf_reloc_aarch64.c for Mach-O. Stubbed in Phase 1 of the
- * MULTIOBJ plan (doc/MULTIOBJ.md): the translator declarations
- * exist so macho.h compiles, but neither path is reachable yet —
- * the writer / reader / linker peers (macho_emit.c, macho_read.c,
- * link_macho.c) are Phase 2/3 work.
+ * elf_reloc_aarch64.c for Mach-O.
*
- * Filling these in is part of Phase 2. Until then, callers panic
- * via the (u32)-1 sentinel (mirrors the elf_aarch64_reloc_from
- * convention) — but no caller exists. The compile-time check that
- * obj/macho.h's declarations match a real definition is the value
- * this TU provides today. */
+ * Mach-O relocations carry three independent fields that the cfree
+ * RelocKind enum collapses into a single value: r_type (the 4-bit
+ * ARM64_RELOC_* code), r_pcrel, and r_length. The translator therefore
+ * exposes three accessors — the writer (macho_emit.c) consults all of
+ * them per Reloc, and the reader (macho_read.c) inverts via
+ * macho_aarch64_reloc_from which keys on (r_type, r_pcrel, r_length). */
#include "core/util.h"
#include "obj/macho.h"
u32 macho_aarch64_reloc_to(u32 kind /* RelocKind */) {
- (void)kind;
- /* Phase 2: full RelocKind <-> ARM64_RELOC_* table, with
- * R_AARCH64_CALL26 / R_AARCH64_JUMP26 -> ARM64_RELOC_BRANCH26,
- * R_AARCH64_ADR_PREL_PG_HI21 -> ARM64_RELOC_PAGE21,
- * R_AARCH64_ADD_ABS_LO12_NC / R_AARCH64_LDST*_ABS_LO12_NC ->
- * ARM64_RELOC_PAGEOFF12,
- * R_AARCH64_ADR_GOT_PAGE -> ARM64_RELOC_GOT_LOAD_PAGE21,
- * R_AARCH64_LD64_GOT_LO12_NC -> ARM64_RELOC_GOT_LOAD_PAGEOFF12,
- * R_ABS64 -> ARM64_RELOC_UNSIGNED, etc. Non-zero addends emit a
- * leading ARM64_RELOC_ADDEND pair (see doc/MULTIOBJ.md §3.2). */
- return (u32)-1;
+ switch (kind) {
+ case R_NONE:
+ return (u32)-1;
+ case R_ABS64:
+ case R_ABS32:
+ return ARM64_RELOC_UNSIGNED;
+ case R_REL64:
+ case R_REL32:
+ case R_PC64:
+ case R_PC32:
+ /* PC-relative absolute pointer-difference; encoded as
+ * UNSIGNED with r_pcrel=1, length=3/2. */
+ return ARM64_RELOC_UNSIGNED;
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ return ARM64_RELOC_BRANCH26;
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
+ return ARM64_RELOC_PAGE21;
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ return ARM64_RELOC_PAGEOFF12;
+ case R_AARCH64_ADR_GOT_PAGE:
+ return ARM64_RELOC_GOT_LOAD_PAGE21;
+ case R_AARCH64_LD64_GOT_LO12_NC:
+ return ARM64_RELOC_GOT_LOAD_PAGEOFF12;
+ default:
+ return (u32)-1;
+ }
+}
+
+u32 macho_aarch64_reloc_pcrel(u32 kind /* RelocKind */) {
+ switch (kind) {
+ case R_REL64:
+ case R_REL32:
+ case R_PC64:
+ case R_PC32:
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
+ case R_AARCH64_ADR_GOT_PAGE:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+u32 macho_aarch64_reloc_length(u32 kind /* RelocKind */) {
+ /* log2 of the patch width in bytes: 0=byte, 1=hword, 2=word, 3=quad.
+ * AArch64 instructions are 4 bytes and Mach-O encodes any 32-bit fixup
+ * (BRANCH26, PAGE21, PAGEOFF12, ...) with length=2. */
+ switch (kind) {
+ case R_ABS64:
+ case R_REL64:
+ case R_PC64:
+ return 3;
+ default:
+ return 2;
+ }
}
u32 macho_aarch64_reloc_from(u32 macho_type) {
- (void)macho_type;
- /* Phase 2: inverse of macho_aarch64_reloc_to, plus
- * ARM64_RELOC_SUBTRACTOR pair recognition (read-only;
- * cgtarget does not emit difference relocs in v1). */
- return (u32)-1;
+ /* The (r_type, r_pcrel, r_length) tuple disambiguates several kinds
+ * collapsed by macho_aarch64_reloc_to. The reader inspects pcrel and
+ * length itself when it matters; this function only maps the type
+ * field, returning the most common AArch64 instance for each. Reader
+ * callers refine via the pcrel/length companion if they need to
+ * distinguish R_ABS64 vs R_PC64 (both UNSIGNED). */
+ switch (macho_type) {
+ case ARM64_RELOC_UNSIGNED:
+ return R_ABS64;
+ case ARM64_RELOC_BRANCH26:
+ return R_AARCH64_CALL26;
+ case ARM64_RELOC_PAGE21:
+ return R_AARCH64_ADR_PREL_PG_HI21;
+ case ARM64_RELOC_PAGEOFF12:
+ return R_AARCH64_ADD_ABS_LO12_NC;
+ case ARM64_RELOC_GOT_LOAD_PAGE21:
+ return R_AARCH64_ADR_GOT_PAGE;
+ case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
+ return R_AARCH64_LD64_GOT_LO12_NC;
+ default:
+ return (u32)-1;
+ }
}
diff --git a/src/obj/obj_secnames.c b/src/obj/obj_secnames.c
@@ -35,9 +35,7 @@ Sym obj_secname_init_array(Compiler* c) {
case CFREE_OBJ_ELF:
return pool_intern_cstr(c->global, ".init_array");
case CFREE_OBJ_MACHO:
- /* TODO Phase 2: "__DATA,__mod_init_func" with
- * S_MOD_INIT_FUNC_POINTERS section type. */
- return secname_panic_unimpl(c, ".init_array");
+ return pool_intern_cstr(c->global, "__DATA,__mod_init_func");
default:
return secname_panic_unimpl(c, ".init_array");
}
@@ -48,9 +46,7 @@ Sym obj_secname_fini_array(Compiler* c) {
case CFREE_OBJ_ELF:
return pool_intern_cstr(c->global, ".fini_array");
case CFREE_OBJ_MACHO:
- /* TODO Phase 2: "__DATA,__mod_term_func" with
- * S_MOD_TERM_FUNC_POINTERS section type. */
- return secname_panic_unimpl(c, ".fini_array");
+ return pool_intern_cstr(c->global, "__DATA,__mod_term_func");
default:
return secname_panic_unimpl(c, ".fini_array");
}
@@ -63,8 +59,9 @@ Sym obj_secname_preinit_array(Compiler* c) {
case CFREE_OBJ_MACHO:
/* Mach-O has no direct `.preinit_array` analogue — dyld runs
* S_MOD_INIT_FUNC_POINTERS only. Phase 3 of the linker will
- * route the IFUNC ctor through __mod_init_func instead and
- * this entry point will become a target.obj-specific shim. */
+ * route preinit ctors through __mod_init_func; until then any
+ * caller hitting this on a MACHO target is doing something the
+ * platform can't represent. */
return secname_panic_unimpl(c, ".preinit_array");
default:
return secname_panic_unimpl(c, ".preinit_array");
@@ -76,9 +73,7 @@ Sym obj_secname_tdata(Compiler* c) {
case CFREE_OBJ_ELF:
return pool_intern_cstr(c->global, ".tdata");
case CFREE_OBJ_MACHO:
- /* TODO Phase 2: Mach-O TLS uses __DATA,__thread_data with
- * S_THREAD_LOCAL_REGULAR plus a tlv_descriptor record. */
- return secname_panic_unimpl(c, ".tdata");
+ return pool_intern_cstr(c->global, "__DATA,__thread_data");
default:
return secname_panic_unimpl(c, ".tdata");
}
@@ -89,8 +84,7 @@ Sym obj_secname_tbss(Compiler* c) {
case CFREE_OBJ_ELF:
return pool_intern_cstr(c->global, ".tbss");
case CFREE_OBJ_MACHO:
- /* TODO Phase 2: __DATA,__thread_bss with S_THREAD_LOCAL_ZEROFILL. */
- return secname_panic_unimpl(c, ".tbss");
+ return pool_intern_cstr(c->global, "__DATA,__thread_bss");
default:
return secname_panic_unimpl(c, ".tbss");
}
diff --git a/test/cg/run.sh b/test/cg/run.sh
@@ -43,19 +43,40 @@ JIT_RUNNER="$BUILD_DIR/jit-runner"
DWARF_CHECK="$BUILD_DIR/cg-check-dwarf"
NORMALIZE="$ROOT/test/elf/normalize.py"
-# CFREE_TEST_ARCH selects the cross-target the harness drives the
-# compiler at. Default aa64 preserves historical behavior. The runners
-# (cg-runner / link-exe-runner / jit-runner) read the same env var via
-# test/lib/cfree_test_target.h, so the C side and the shell side stay
-# in lockstep.
+# CFREE_TEST_ARCH and CFREE_TEST_OBJ together select the cross-target
+# the harness drives the compiler at. Defaults aa64+elf preserve
+# historical behavior. The runners (cg-runner / link-exe-runner /
+# jit-runner) read the same env vars via test/lib/cfree_test_target.h,
+# so the C side and the shell side stay in lockstep.
CFREE_TEST_ARCH="${CFREE_TEST_ARCH:-aa64}"
+CFREE_TEST_OBJ="${CFREE_TEST_OBJ:-elf}"
case "$CFREE_TEST_ARCH" in
- aa64|aarch64|arm64) TEST_ARCH=aa64; CLANG_TRIPLE=aarch64-linux-gnu; EXEC_ARCH=aarch64 ;;
- x64|x86_64|amd64) TEST_ARCH=x64; CLANG_TRIPLE=x86_64-linux-gnu; EXEC_ARCH=x64 ;;
- rv64|riscv64) TEST_ARCH=rv64; CLANG_TRIPLE=riscv64-linux-gnu; EXEC_ARCH=rv64 ;;
+ aa64|aarch64|arm64) TEST_ARCH=aa64; EXEC_ARCH=aarch64 ;;
+ x64|x86_64|amd64) TEST_ARCH=x64; EXEC_ARCH=x64 ;;
+ rv64|riscv64) TEST_ARCH=rv64; EXEC_ARCH=rv64 ;;
*) printf 'unknown CFREE_TEST_ARCH=%s\n' "$CFREE_TEST_ARCH" >&2; exit 2 ;;
esac
-export CFREE_TEST_ARCH
+case "$CFREE_TEST_OBJ" in
+ elf)
+ EXEC_OS=linux
+ case "$TEST_ARCH" in
+ aa64) CLANG_TRIPLE=aarch64-linux-gnu ;;
+ x64) CLANG_TRIPLE=x86_64-linux-gnu ;;
+ rv64) CLANG_TRIPLE=riscv64-linux-gnu ;;
+ esac
+ ;;
+ macho)
+ EXEC_OS=macos
+ case "$TEST_ARCH" in
+ aa64) CLANG_TRIPLE=arm64-apple-macos ;;
+ x64) CLANG_TRIPLE=x86_64-apple-macos ;;
+ rv64) printf 'CFREE_TEST_OBJ=macho has no rv64 target\n' >&2; exit 2 ;;
+ esac
+ ;;
+ *) printf 'unknown CFREE_TEST_OBJ=%s\n' "$CFREE_TEST_OBJ" >&2; exit 2 ;;
+esac
+EXEC_TAG="${EXEC_ARCH}-${EXEC_OS}"
+export CFREE_TEST_ARCH CFREE_TEST_OBJ
CLANG_TARGET="--target=$CLANG_TRIPLE"
CC="${CC:-cc}"
@@ -290,12 +311,13 @@ for OPT_LEVEL in $OPT_LEVELS; do
# negative-return cases compare correctly.
expected_byte=$(( expected & 0xff ))
- # Path E target arch. The shell drives every case at the
- # CFREE_TEST_ARCH-selected target — emit panics on stub backends
- # surface as case failures rather than harness skips, which is
- # the multi-arch contract through Phase 2. cg-runner's --arches
- # output is informational at this stage.
- case_arch="$EXEC_ARCH"
+ # Path E target tag. The shell drives every case at the
+ # (CFREE_TEST_ARCH, CFREE_TEST_OBJ)-selected target — emit panics
+ # on stub backends surface as case failures rather than harness
+ # skips, which is the multi-arch/multi-obj contract through
+ # Phase 2. cg-runner's --arches output is informational at this
+ # stage.
+ case_tag="$EXEC_TAG"
# ---- Path D: in-process JIT (only when host arch == cross-target) ----
if [ $RUN_D -eq 1 ]; then
@@ -356,7 +378,7 @@ for OPT_LEVEL in $OPT_LEVELS; do
>"$work/exec_link.out" 2>"$work/exec_link.err"; then
dt=$(( $(now_ms) - t0 )); T_E=$(( T_E + dt ))
note_fail "$name/E${TAG} (link failed, ${dt}ms)"
- elif exec_target_supported "$case_arch"; then
+ elif exec_target_supported "$case_tag"; then
link_dt=$(( $(now_ms) - t0 )); T_E=$(( T_E + link_dt ))
E_NAMES+=("$name")
E_WORK+=("$work")
@@ -364,11 +386,11 @@ for OPT_LEVEL in $OPT_LEVELS; do
E_EXPECTED+=("$expected_byte")
# Queue with a level-tagged key so cases at different
# opt levels don't collide in the batched runner.
- exec_target_queue "$case_arch" "L${OPT_LEVEL}_${name}" \
+ exec_target_queue "$case_tag" "L${OPT_LEVEL}_${name}" \
"$exe" "$work/exec.out" "$work/exec.err" \
"$work/exec.rc"
else
- note_skip "$name/E${TAG}" "no runner for $case_arch"
+ note_skip "$name/E${TAG}" "no runner for $case_tag"
fi
else
note_skip "$name/E${TAG}" "no link-exe-runner, aarch64 clang, or start.o"
diff --git a/test/elf/cases/18_bti_note.arches b/test/elf/cases/18_bti_note.arches
@@ -1 +0,0 @@
-aa64
diff --git a/test/elf/cases/18_bti_note.targets b/test/elf/cases/18_bti_note.targets
@@ -0,0 +1 @@
+aa64-elf
diff --git a/test/elf/run.sh b/test/elf/run.sh
@@ -112,15 +112,28 @@ printf '\n'
# ----- Layer B: cases/*.c ------------------------------------------------
-# Map CFREE_TEST_ARCH (default aa64) to the clang `--target=` triple the
-# Layer B golden objects are compiled against. cfree-roundtrip then
-# detects the input's e_machine and constructs a matching CfreeTarget,
-# so the readelf diff stays apples-to-apples per arch.
-case "${CFREE_TEST_ARCH:-aa64}" in
- aa64|aarch64|arm64) CLANG_TARGET="aarch64-linux-gnu" ;;
- x64|x86_64|amd64) CLANG_TARGET="x86_64-linux-gnu" ;;
- rv64|riscv64) CLANG_TARGET="riscv64-linux-gnu" ;;
- *) printf 'unknown CFREE_TEST_ARCH=%s\n' "${CFREE_TEST_ARCH}" >&2; exit 2 ;;
+# Map (CFREE_TEST_ARCH, CFREE_TEST_OBJ) (defaults aa64+elf) to the clang
+# `--target=` triple the Layer B golden objects are compiled against.
+# cfree-roundtrip then detects the input's e_machine / Mach-O cputype and
+# constructs a matching CfreeTarget, so the readelf/objdump diff stays
+# apples-to-apples per target.
+case "${CFREE_TEST_OBJ:-elf}" in
+ elf)
+ case "${CFREE_TEST_ARCH:-aa64}" in
+ aa64|aarch64|arm64) CLANG_TARGET="aarch64-linux-gnu" ;;
+ x64|x86_64|amd64) CLANG_TARGET="x86_64-linux-gnu" ;;
+ rv64|riscv64) CLANG_TARGET="riscv64-linux-gnu" ;;
+ *) printf 'unknown CFREE_TEST_ARCH=%s\n' "${CFREE_TEST_ARCH}" >&2; exit 2 ;;
+ esac
+ ;;
+ macho)
+ case "${CFREE_TEST_ARCH:-aa64}" in
+ aa64|aarch64|arm64) CLANG_TARGET="arm64-apple-macos" ;;
+ x64|x86_64|amd64) CLANG_TARGET="x86_64-apple-macos" ;;
+ *) printf 'CFREE_TEST_OBJ=macho: unsupported arch %s\n' "${CFREE_TEST_ARCH}" >&2; exit 2 ;;
+ esac
+ ;;
+ *) printf 'unknown CFREE_TEST_OBJ=%s\n' "${CFREE_TEST_OBJ}" >&2; exit 2 ;;
esac
printf 'Layer B — clang-oracle cases\n'
@@ -130,20 +143,22 @@ if [ ${#case_srcs[@]} -eq 0 ]; then
else
for src in "${case_srcs[@]}"; do
name="cases/$(basename "$src" .c)"
- # Per-case arch applicability: a NN_name.arches file lists the
- # CFREE_TEST_ARCH values the case applies to (one per line, or
- # whitespace-separated). When the current arch isn't listed the
- # case is silently filtered out — not a skip — because it
- # exercises arch-specific features with no equivalent elsewhere
- # (e.g. AArch64 BTI/PAC notes have no RISC-V analogue).
- arches_file="${src%.c}.arches"
- if [ -f "$arches_file" ]; then
+ # Per-case applicability: a NN_name.targets file lists the
+ # <arch>-<obj> tuples the case applies to (one per line, or
+ # whitespace-separated). When the current tuple isn't listed
+ # the case is silently filtered out — not a skip — because it
+ # exercises target-specific features with no equivalent
+ # elsewhere (e.g. AArch64 BTI/PAC notes have no RISC-V analogue;
+ # ELF features have no Mach-O peer).
+ targets_file="${src%.c}.targets"
+ cur_tuple="${CFREE_TEST_ARCH:-aa64}-${CFREE_TEST_OBJ:-elf}"
+ if [ -f "$targets_file" ]; then
applicable=0
- for a in $(cat "$arches_file"); do
- [ "$a" = "${CFREE_TEST_ARCH:-aa64}" ] && applicable=1
+ for tuple in $(cat "$targets_file"); do
+ [ "$tuple" = "$cur_tuple" ] && applicable=1
done
if [ $applicable -eq 0 ]; then
- printf ' %s %s — N/A on %s\n' "$(color_yel SKIP-NA)" "$name" "${CFREE_TEST_ARCH:-aa64}"
+ printf ' %s %s — N/A on %s\n' "$(color_yel SKIP-NA)" "$name" "$cur_tuple"
continue
fi
fi
diff --git a/test/lib/cfree_test_target.h b/test/lib/cfree_test_target.h
@@ -1,10 +1,12 @@
/* Shared CfreeTarget setup for C test runners.
*
- * Reads CFREE_TEST_ARCH (one of "aa64"/"aarch64", "x64"/"x86_64",
- * "rv64"/"riscv64") and fills *t with a Linux/ELF/LP64 target for
- * that arch. Defaults to aarch64 when the env var is unset or empty,
- * preserving the historical behavior of every harness that called
- * the previous local target_aarch64_linux() helper.
+ * Reads two env vars to pick a (arch, os, obj) triple:
+ *
+ * CFREE_TEST_ARCH one of "aa64"/"aarch64"/"arm64", "x64"/"x86_64",
+ * "rv64"/"riscv64". Default: "aa64".
+ * CFREE_TEST_OBJ one of "elf" (linux) or "macho" (macos).
+ * Default: "elf". Sets t->obj and t->os together so
+ * the C runners and shell drivers stay in lockstep.
*
* Header-only and self-contained: include from any runner TU. The
* caller still chooses whether the build's host arch matches the
@@ -24,13 +26,33 @@ static inline const char* cfree_test_arch_name(void) {
return a;
}
+static inline const char* cfree_test_obj_name(void) {
+ const char* o = getenv("CFREE_TEST_OBJ");
+ if (!o || !*o) return "elf";
+ return o;
+}
+
static inline int cfree_test_target_init(CfreeTarget* t) {
memset(t, 0, sizeof *t);
- t->os = CFREE_OS_LINUX;
- t->obj = CFREE_OBJ_ELF;
t->ptr_size = 8;
t->ptr_align = 8;
t->big_endian = 0;
+
+ const char* o = cfree_test_obj_name();
+ if (!strcmp(o, "elf")) {
+ t->os = CFREE_OS_LINUX;
+ t->obj = CFREE_OBJ_ELF;
+ } else if (!strcmp(o, "macho")) {
+ t->os = CFREE_OS_MACOS;
+ t->obj = CFREE_OBJ_MACHO;
+ } else {
+ fprintf(stderr,
+ "cfree_test_target: unrecognized CFREE_TEST_OBJ=\"%s\" "
+ "(expected elf/macho)\n",
+ o);
+ return -1;
+ }
+
const char* a = cfree_test_arch_name();
if (!strcmp(a, "aa64") || !strcmp(a, "aarch64") || !strcmp(a, "arm64")) {
t->arch = CFREE_ARCH_ARM_64;
@@ -41,6 +63,11 @@ static inline int cfree_test_target_init(CfreeTarget* t) {
return 0;
}
if (!strcmp(a, "rv64") || !strcmp(a, "riscv64")) {
+ if (t->os == CFREE_OS_MACOS) {
+ fprintf(stderr,
+ "cfree_test_target: rv64 has no macOS target\n");
+ return -1;
+ }
t->arch = CFREE_ARCH_RV64;
return 0;
}
diff --git a/test/lib/exec_target.sh b/test/lib/exec_target.sh
@@ -1,31 +1,35 @@
-# test/lib/exec_target.sh — shared per-arch exec helper for test harnesses.
+# test/lib/exec_target.sh — shared per-target exec helper for test harnesses.
#
# Sourced by test/{link,cg,parse}/run.sh. Provides three execution modes,
-# each parameterized by target arch:
+# each parameterized by a `<arch>-<os>` target tag:
#
-# exec_target_run <arch> EXE OUT ERR
+# exec_target_run TAG EXE OUT ERR
# Synchronous one-shot. Sets RUN_RC. Used for kernel images and
# negative-test cases that need an immediate rc.
#
-# exec_target_queue <arch> NAME EXE OUT ERR RC
-# Append a case to the internal queue. The arch tag is stored
-# alongside so flush can group cases by arch and run one batched
-# podman invocation per arch.
+# exec_target_queue TAG NAME EXE OUT ERR RC
+# Append a case to the internal queue. The tag is stored alongside
+# so flush can group cases by target and run one batched runner
+# invocation per group.
#
# exec_target_queue_size
-# Total queue size across all arches.
+# Total queue size across all targets.
#
# exec_target_flush
-# Drain the queue. Cases are grouped by arch and each group runs
-# through one `podman run` (or qemu serial loop). On podman hosts
-# this amortizes the ~150 ms per-launch client round-trip across
-# the whole suite.
+# Drain the queue. Cases are grouped by tag and each group runs
+# through one `podman run` (linux targets) or a native loop (macos
+# targets). On podman hosts this amortizes the ~150 ms per-launch
+# client round-trip across the whole suite.
#
-# exec_target_supported <arch>
-# Returns 0 if some runner is available for arch on this host.
+# exec_target_supported TAG
+# Returns 0 if some runner is available for tag on this host.
#
-# Recognized arches: aarch64, x64, rv64. Each maps to a podman --platform
-# string and an optional user-mode qemu binary detected on the host.
+# Recognized tags: <arch>-<os> where arch is aarch64/x64/rv64 and os
+# is linux/macos. linux tags map to podman --platform strings and an
+# optional user-mode qemu binary. macos tags require a Darwin host whose
+# native arch matches; Mach-O cannot be loaded by the Linux kernel and
+# Linux ELF cannot be loaded by Darwin, so cross-OS exec is unsupported
+# (callers see exec_target_supported return 1 and SKIP).
#
# Caller contract:
# - Sets the following before sourcing or calling: have_qemu (host
@@ -39,26 +43,42 @@
# container image (default alpine:latest, matching the prior
# inline implementation).
-# Internal queue arrays. Each entry's arch is recorded alongside the
-# rest so flush can split into per-arch batched runs.
-EXEC_TARGET_ARCHES=()
+# Internal queue arrays. Each entry's tag is recorded alongside the
+# rest so flush can split into per-target batched runs.
+EXEC_TARGET_TAGS=()
EXEC_TARGET_NAMES=()
EXEC_TARGET_EXES=()
EXEC_TARGET_OUTS=()
EXEC_TARGET_ERRS=()
EXEC_TARGET_RCS=()
-# ---- per-arch capability/dispatch knobs ------------------------------------
+# ---- tag parsing -----------------------------------------------------------
#
-# _exec_target_platform <arch> → echoes podman `--platform` value.
-# _exec_target_image <arch> → echoes container image to use.
-# _exec_target_native <arch> → returns 0 if host can run arch natively
-# without any container/qemu indirection.
-# _exec_target_qemu <arch> → echoes a host qemu-user binary path if
-# one is available, else empty.
+# _exec_target_arch TAG → echoes arch portion ("aarch64", "x64", "rv64").
+# _exec_target_os TAG → echoes os portion ("linux", "macos").
+#
+# Bare-arch tags ("aarch64", "x64", "rv64") are accepted and mean
+# "<arch>-linux" — preserves call-site compatibility while the harness
+# transition to <arch>-<os> tags is in progress.
-_exec_target_platform() {
+_exec_target_arch() {
case "$1" in
+ *-*) printf '%s' "${1%-*}" ;;
+ *) printf '%s' "$1" ;;
+ esac
+}
+
+_exec_target_os() {
+ case "$1" in
+ *-*) printf '%s' "${1#*-}" ;;
+ *) printf 'linux' ;;
+ esac
+}
+
+# ---- per-target capability/dispatch knobs ----------------------------------
+
+_exec_target_platform() {
+ case "$(_exec_target_arch "$1")" in
aarch64) echo "linux/arm64" ;;
x64) echo "linux/amd64" ;;
rv64) echo "linux/riscv64" ;;
@@ -67,7 +87,7 @@ _exec_target_platform() {
}
_exec_target_image() {
- case "$1" in
+ case "$(_exec_target_arch "$1")" in
aarch64) echo "${RUN_AARCH64_IMAGE:-alpine:latest}" ;;
x64) echo "${RUN_X64_IMAGE:-alpine:latest}" ;;
rv64) echo "${RUN_RV64_IMAGE:-alpine:latest}" ;;
@@ -75,19 +95,49 @@ _exec_target_image() {
esac
}
+# True when the host can exec this target without container/qemu help.
+#
+# linux targets: matching arch on a Linux host (e.g. aarch64-linux on
+# a Linux/aarch64 host). On Darwin, Linux ELF cannot be loaded by the
+# kernel even when the arch matches, so this is Linux-host-only.
+#
+# macos targets: matching arch on a Darwin host. The Linux kernel
+# cannot load Mach-O, so this is Darwin-host-only.
_exec_target_native() {
- # Targets are linux/<arch> ELFs; matching arch on a non-Linux host
- # (e.g. Darwin/arm64) cannot load them, so require Linux too.
- [ "$(uname -s 2>/dev/null)" = "Linux" ] || return 1
- _exec_target_podman_native "$1"
+ local arch os host_kernel host_arch
+ arch="$(_exec_target_arch "$1")"
+ os="$(_exec_target_os "$1")"
+ host_kernel="$(uname -s 2>/dev/null)"
+ host_arch="$(uname -m 2>/dev/null)"
+ case "$os" in
+ linux)
+ [ "$host_kernel" = "Linux" ] || return 1
+ _exec_target_arch_matches_host "$arch" "$host_arch"
+ ;;
+ macos)
+ [ "$host_kernel" = "Darwin" ] || return 1
+ _exec_target_arch_matches_host "$arch" "$host_arch"
+ ;;
+ *) return 1 ;;
+ esac
}
-# True when podman can run this target arch without emulation. The podman
+_exec_target_arch_matches_host() {
+ local arch="$1" host_arch="$2"
+ case "$arch" in
+ aarch64) [ "$host_arch" = "aarch64" ] || [ "$host_arch" = "arm64" ] ;;
+ x64) [ "$host_arch" = "x86_64" ] || [ "$host_arch" = "amd64" ] ;;
+ rv64) [ "$host_arch" = "riscv64" ] ;;
+ *) return 1 ;;
+ esac
+}
+
+# True when podman can run this linux target without emulation. The podman
# machine on Darwin/arm64 already runs linux/arm64, so passing `--platform
# linux/arm64` there is redundant — and worse, triggers a registry manifest
# lookup (~30 s) on every `podman run` even when the local image matches.
_exec_target_podman_native() {
- case "$1" in
+ case "$(_exec_target_arch "$1")" in
aarch64) [ "${is_aarch64:-0}" -eq 1 ] ;;
x64) [ "$(uname -m 2>/dev/null)" = "x86_64" ] || \
[ "$(uname -m 2>/dev/null)" = "amd64" ] ;;
@@ -97,7 +147,7 @@ _exec_target_podman_native() {
}
_exec_target_qemu() {
- case "$1" in
+ case "$(_exec_target_arch "$1")" in
aarch64) [ "${have_qemu:-0}" -eq 1 ] && echo "${QEMU_BIN:-}" ;;
x64) # No qemu-user fallback for x64 in current harnesses.
echo "" ;;
@@ -116,36 +166,49 @@ _exec_target_qemu() {
}
exec_target_supported() {
- local arch="$1"
- _exec_target_native "$arch" && return 0
- [ -n "$(_exec_target_qemu "$arch")" ] && return 0
+ local tag="$1" os
+ os="$(_exec_target_os "$tag")"
+ # macOS has no podman/qemu fallback — Mach-O exec requires a Darwin
+ # host with matching arch. Cross-OS exec (macOS-on-Linux) is not
+ # supported.
+ if [ "$os" = "macos" ]; then
+ _exec_target_native "$tag"
+ return $?
+ fi
+ _exec_target_native "$tag" && return 0
+ [ -n "$(_exec_target_qemu "$tag")" ] && return 0
[ "${have_podman:-0}" -eq 1 ] && return 0
return 1
}
# Synchronous run; sets RUN_RC.
exec_target_run() {
- local arch="$1" exe="$2" out="$3" err="$4"
- local qemu
- if _exec_target_native "$arch"; then
+ local tag="$1" exe="$2" out="$3" err="$4"
+ local os qemu
+ os="$(_exec_target_os "$tag")"
+ if _exec_target_native "$tag"; then
"$exe" >"$out" 2>"$err"; RUN_RC=$?; return
fi
- qemu="$(_exec_target_qemu "$arch")"
+ if [ "$os" = "macos" ]; then
+ # Mach-O cannot run via podman/qemu — only Darwin-native.
+ RUN_RC=127; return
+ fi
+ qemu="$(_exec_target_qemu "$tag")"
if [ -n "$qemu" ]; then
"$qemu" "$exe" >"$out" 2>"$err"; RUN_RC=$?; return
fi
if [ "${have_podman:-0}" -eq 1 ]; then
local dir base platform image platform_flag=()
dir="$(cd "$(dirname "$exe")" && pwd)"; base="$(basename "$exe")"
- platform="$(_exec_target_platform "$arch")"
- image="$(_exec_target_image "$arch")"
+ platform="$(_exec_target_platform "$tag")"
+ image="$(_exec_target_image "$tag")"
# `--platform` triggers a registry manifest lookup (~30 s) even
# when the local image already matches. Only pass it when podman
# would otherwise have to emulate — i.e. the podman machine's
# native arch differs from the target. (On Darwin/arm64 the
# podman VM is already linux/arm64, so aarch64 targets skip the
# flag even though the host can't load the ELF directly.)
- if ! _exec_target_podman_native "$arch"; then
+ if ! _exec_target_podman_native "$tag"; then
platform_flag=(--platform "$platform")
fi
podman run --rm "${platform_flag[@]}" --net=none \
@@ -160,7 +223,7 @@ exec_target_run() {
# Queue an exe to run later. Stored verbatim; flush writes <rc_file> with
# the integer exit code, and routes stdout/stderr to <out_file>/<err_file>.
exec_target_queue() {
- EXEC_TARGET_ARCHES+=("$1")
+ EXEC_TARGET_TAGS+=("$1")
EXEC_TARGET_NAMES+=("$2")
EXEC_TARGET_EXES+=("$3")
EXEC_TARGET_OUTS+=("$4")
@@ -170,21 +233,22 @@ exec_target_queue() {
exec_target_queue_size() { echo "${#EXEC_TARGET_EXES[@]}"; }
-# Internal: drain every entry whose arch matches $1, using qemu (if
+# Internal: drain every entry whose tag matches $1, using qemu (if
# available for that arch), podman batched run, or the no-runner stub.
-_exec_target_flush_arch() {
- local arch="$1"
+_exec_target_flush_tag() {
+ local tag="$1" os
+ os="$(_exec_target_os "$tag")"
local idx=()
local i=0 n="${#EXEC_TARGET_EXES[@]}"
while [ $i -lt "$n" ]; do
- [ "${EXEC_TARGET_ARCHES[$i]}" = "$arch" ] && idx+=("$i")
+ [ "${EXEC_TARGET_TAGS[$i]}" = "$tag" ] && idx+=("$i")
i=$((i+1))
done
[ "${#idx[@]}" -eq 0 ] && return 0
- local qemu; qemu="$(_exec_target_qemu "$arch")"
local k
- if _exec_target_native "$arch"; then
+ # Native exec (Linux-on-Linux, Darwin-on-Darwin) — same loop.
+ if _exec_target_native "$tag"; then
for k in "${idx[@]}"; do
"${EXEC_TARGET_EXES[$k]}" \
>"${EXEC_TARGET_OUTS[$k]}" 2>"${EXEC_TARGET_ERRS[$k]}"
@@ -192,6 +256,17 @@ _exec_target_flush_arch() {
done
return 0
fi
+ # macOS: no fallback — mark as 127 so callers can SKIP cleanly.
+ if [ "$os" = "macos" ]; then
+ for k in "${idx[@]}"; do
+ : >"${EXEC_TARGET_OUTS[$k]}"
+ : >"${EXEC_TARGET_ERRS[$k]}"
+ echo 127 >"${EXEC_TARGET_RCS[$k]}"
+ done
+ return 0
+ fi
+
+ local qemu; qemu="$(_exec_target_qemu "$tag")"
if [ -n "$qemu" ]; then
for k in "${idx[@]}"; do
"$qemu" "${EXEC_TARGET_EXES[$k]}" \
@@ -206,9 +281,9 @@ _exec_target_flush_arch() {
return 2
fi
local platform image platform_flag=()
- platform="$(_exec_target_platform "$arch")"
- image="$(_exec_target_image "$arch")"
- if ! _exec_target_podman_native "$arch"; then
+ platform="$(_exec_target_platform "$tag")"
+ image="$(_exec_target_image "$tag")"
+ if ! _exec_target_podman_native "$tag"; then
platform_flag=(--platform "$platform")
fi
# Manifest is fed via stdin; one tab-separated line per case.
@@ -246,16 +321,16 @@ done
# Drain the queue. Reads back via the .rc files written into the
# bind-mounted tree; callers iterate their own bookkeeping arrays after
-# this returns. Each arch present in the queue runs in its own batch.
+# this returns. Each tag present in the queue runs in its own batch.
exec_target_flush() {
[ "${#EXEC_TARGET_EXES[@]}" -eq 0 ] && return 0
- # Distinct arches in queue order. Bash 3.2 has no associative arrays;
+ # Distinct tags in queue order. Bash 3.2 has no associative arrays;
# use a small linear scan.
local seen=() a present k
- local i=0 n="${#EXEC_TARGET_ARCHES[@]}"
+ local i=0 n="${#EXEC_TARGET_TAGS[@]}"
while [ $i -lt "$n" ]; do
- a="${EXEC_TARGET_ARCHES[$i]}"
+ a="${EXEC_TARGET_TAGS[$i]}"
present=0
for k in "${seen[@]:-}"; do [ "$k" = "$a" ] && present=1 && break; done
[ "$present" -eq 0 ] && seen+=("$a")
@@ -264,10 +339,10 @@ exec_target_flush() {
local rc=0
for a in "${seen[@]}"; do
- _exec_target_flush_arch "$a" || rc=$?
+ _exec_target_flush_tag "$a" || rc=$?
done
- EXEC_TARGET_ARCHES=()
+ EXEC_TARGET_TAGS=()
EXEC_TARGET_NAMES=()
EXEC_TARGET_EXES=()
EXEC_TARGET_OUTS=()
diff --git a/test/macho/cfree-roundtrip-macho.c b/test/macho/cfree-roundtrip-macho.c
@@ -0,0 +1,172 @@
+/* cfree-roundtrip-macho: read a Mach-O object via libcfree's read_macho,
+ * then re-emit via emit_macho. Phase 2 oracle for doc/MULTIOBJ.md §3.1.
+ *
+ * Usage: cfree-roundtrip-macho <in.o> <out.o>
+ *
+ * Behavior: cfree_detect_target on the input bytes selects the Compiler
+ * target; read_macho parses into an ObjBuilder; emit_macho writes the
+ * canonical re-emit to out.o. Diagnostics go to stderr via the libc
+ * heap + stderr diag sink. compiler_panic exits the process with
+ * status 2 and the diagnostic text on stderr.
+ *
+ * Mixes public (<cfree.h>) and internal (src/obj/obj.h, src/core/core.h)
+ * headers — this is a test binary, not a libcfree consumer, so seeing
+ * the internal surface is fine. */
+
+#include <cfree.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "core/core.h"
+#include "obj/obj.h"
+
+static void* heap_alloc(CfreeHeap* h, size_t n, size_t a) {
+ (void)h;
+ (void)a;
+ return n ? malloc(n) : NULL;
+}
+static void* heap_realloc(CfreeHeap* h, void* p, size_t o, size_t n, size_t a) {
+ (void)h;
+ (void)o;
+ (void)a;
+ return realloc(p, n);
+}
+static void heap_free(CfreeHeap* h, void* p, size_t n) {
+ (void)h;
+ (void)n;
+ free(p);
+}
+static CfreeHeap g_heap = {heap_alloc, heap_realloc, heap_free, NULL};
+
+static void diag_emit(CfreeDiagSink* s, CfreeDiagKind k, CfreeSrcLoc loc,
+ const char* fmt, va_list ap) {
+ static const char* names[] = {"note", "warning", "error", "fatal"};
+ (void)s;
+ (void)loc;
+ fprintf(stderr, "%s: ", names[k]);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+}
+static CfreeDiagSink g_diag = {diag_emit, NULL, 0, 0};
+
+static int read_file(const char* path, uint8_t** data_out, size_t* len_out) {
+ int fd = open(path, O_RDONLY);
+ if (fd < 0) return -1;
+
+ struct stat sb;
+ if (fstat(fd, &sb) < 0) {
+ close(fd);
+ return -1;
+ }
+ size_t n = (size_t)sb.st_size;
+
+ uint8_t* buf = (uint8_t*)malloc(n ? n : 1);
+ if (!buf) {
+ close(fd);
+ return -1;
+ }
+
+ size_t got = 0;
+ while (got < n) {
+ ssize_t k = read(fd, buf + got, n - got);
+ if (k <= 0) {
+ free(buf);
+ close(fd);
+ return -1;
+ }
+ got += (size_t)k;
+ }
+ close(fd);
+
+ *data_out = buf;
+ *len_out = n;
+ return 0;
+}
+
+static int write_file(const char* path, const uint8_t* data, size_t len) {
+ int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (fd < 0) return -1;
+ size_t off = 0;
+ while (off < len) {
+ ssize_t k = write(fd, data + off, len - off);
+ if (k < 0) {
+ close(fd);
+ return -1;
+ }
+ off += (size_t)k;
+ }
+ close(fd);
+ return 0;
+}
+
+int main(int argc, char** argv) {
+ if (argc != 3) {
+ fprintf(stderr, "usage: cfree-roundtrip-macho <in.o> <out.o>\n");
+ return 2;
+ }
+ const char* in_path = argv[1];
+ const char* out_path = argv[2];
+
+ uint8_t* in_data = NULL;
+ size_t in_len = 0;
+ if (read_file(in_path, &in_data, &in_len) != 0) {
+ fprintf(stderr, "error: cannot read %s\n", in_path);
+ return 1;
+ }
+
+ CfreeTarget target;
+ if (cfree_detect_target(in_data, in_len, &target) != 0) {
+ fprintf(stderr, "error: %s: not a recognized object file\n", in_path);
+ free(in_data);
+ return 1;
+ }
+
+ CfreeEnv env;
+ env.heap = &g_heap;
+ env.file_io = NULL;
+ env.diag = &g_diag;
+
+ CfreeCompiler* c = cfree_compiler_new(target, &env);
+ if (!c) {
+ fprintf(stderr, "error: cfree_compiler_new failed\n");
+ free(in_data);
+ return 1;
+ }
+
+ if (setjmp(((Compiler*)c)->panic)) {
+ compiler_run_cleanups((Compiler*)c);
+ cfree_compiler_free(c);
+ free(in_data);
+ return 2;
+ }
+
+ ObjBuilder* ob = read_macho((Compiler*)c, in_path, in_data, in_len);
+
+ CfreeWriter* w = cfree_writer_mem(&g_heap);
+ if (!w) {
+ fprintf(stderr, "error: cfree_writer_mem failed\n");
+ obj_free(ob);
+ cfree_compiler_free(c);
+ free(in_data);
+ return 1;
+ }
+
+ emit_macho((Compiler*)c, ob, w);
+
+ size_t out_len = 0;
+ const uint8_t* out_data = cfree_writer_mem_bytes(w, &out_len);
+
+ int rc = write_file(out_path, out_data, out_len);
+ if (rc != 0) fprintf(stderr, "error: cannot write %s\n", out_path);
+
+ cfree_writer_close(w);
+ obj_free(ob);
+ cfree_compiler_free(c);
+ free(in_data);
+ return rc == 0 ? 0 : 1;
+}