commit a06d4ad1cc6afeb50842a6a561718a8e4f6ea5dc
parent 5381da7830175a7bdc007561160c0828f32095a9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 10 May 2026 14:32:21 -0700
link/macho: dyld-loadable MH_EXECUTE writer + .tbd / dylib DSO inputs
Phase 3 of doc/MULTIOBJ.md: cfree's own arm64-darwin linker, lighting
up `cfree cc -target arm64-apple-macos src.c -o exe -lSystem` end to
end on a Darwin host.
src/link/link_macho.c (new): MH_EXECUTE + MH_PIE writer. __PAGEZERO /
__TEXT (header + load commands + text + rodata + __stubs) /
__DATA_CONST (__got) / __DATA / __LINKEDIT segments.
LC_DYLD_CHAINED_FIXUPS for both bind (imports through __got slots)
and rebase (internal abs64 fixups). LC_DYLD_EXPORTS_TRIE, LC_SYMTAB,
LC_DYSYMTAB + indirect-symbol table, LC_LOAD_DYLINKER, LC_UUID,
LC_BUILD_VERSION, LC_LOAD_DYLIB per dylib, LC_MAIN, empty
LC_FUNCTION_STARTS / LC_DATA_IN_CODE. Ad-hoc LC_CODE_SIGNATURE with
SuperBlob + CodeDirectory v=0x20400 (sha256 4 KiB-page hashes,
execSeg fields) so the kernel will exec on macOS 11+.
src/obj/macho_read.c: read_macho_dso for MH_DYLIB inputs — extracts
LC_ID_DYLIB install-name and exported nlist entries.
src/obj/tbd_read.c (new): permissive token-scanner reader for Apple's
text-based .tbd stubs. Pulls install-name from the first document and
emits every `_id` token as an exported ObjSymbol — correct for the
static-link decision since LC_LOAD_DYLIB names the umbrella and dyld
walks re-exports at runtime.
driver/lib_resolve.{c,h}: -lname tries .tbd / .dylib / .so / .a in
order under LIB_RESOLVE_DYNAMIC_PREFER, with LIB_RESOLVE_KIND_TBD as
a third dispatch kind. driver/cc.c (covered by 5381da7) routes the
matched kind to dso_bytes[] vs archives[].
src/link/link.c: link_add_dso_bytes detects format (leading `---` for
.tbd, otherwise cfree_detect_fmt) and dispatches to the right reader.
link_set_entry default is `_main` on Mach-O (vs `_start` for ELF).
link_emit_image_writer routes CFREE_OBJ_MACHO to link_emit_macho.
src/link/link_internal.h: Linker* back-pointer on LinkImage so
format-specific emit can walk LinkInputs (e.g. resolving an imported
symbol's dso_input_id to its DSO install_name).
src/link/link_layout.c: boundary_name() helper Mach-O-prefixes
linker-synthesized boundaries (__init_array_start, etc.) so they
match consumer code's leading-`_` references; resolve_symbols's
is_def widened to require backing storage (section / abs / common)
— extern decls from decl.c land as kind=SK_FUNC, section_id=0 and
would otherwise be misclassified as definitions, masking the import
on the in-memory pipeline. ELF was unaffected (read_elf normalizes).
src/decl/decl.c: Mach-O target prepends `_` unconditionally to every
C identifier with linkage at obj_symbol creation time, matching
Apple's mangling.
src/api/pipeline.c: cfree_link_exe skips ELF layout_dyn for Mach-O
targets — their LC_LOAD_DYLIB / chained-fixups machinery is
synthesized in link_emit_macho instead.
Verified manually on Darwin/arm64: hello.c with printf, multi.c with
multiple imports + globals + bss link and run with the right exit
code. ELF test suite (test-elf) still 37/37; test-link path R + J
still green on aa64-macho. test-link path E (the new lane this
opens up) is 36 cases failing on a single boundary-symbol resolution
issue in start.o linkage — diagnosed and tracked in MULTIOBJ.md §9.1.
Diffstat:
12 files changed, 2784 insertions(+), 35 deletions(-)
diff --git a/doc/MULTIOBJ.md b/doc/MULTIOBJ.md
@@ -79,23 +79,64 @@ matrix.
granularity, not `-ffunction-sections`). `--gc-sections` can
drop whole sections but not individual functions, so the
`gc_absent unreachable_fn` check fails.
-- [ ] **Phase 3** — Mach-O linker (`link_emit_macho`)
- - [ ] Paused — design is option #1 from `doc/MULTIOBJ.md §3.3` (full
- dyld-loadable MH_EXECUTE with `LC_DYLD_CHAINED_FIXUPS` or
- `LC_DYLD_INFO_ONLY`, `__stubs` / `__got` synthesis,
- `LC_LOAD_DYLIB` against `libSystem.B.dylib`, `LC_MAIN`, and an
- ad-hoc `LC_CODE_SIGNATURE`). Until it lands, path E of
- `test/link/run.sh` panics on every case via
- `link_emit_image_writer`'s per-format "not yet implemented"
- diagnostic — the failures are visible (not skipped) so the
- outstanding work stays on the dashboard.
- - [ ] `test/link/harness/start.c` on macOS already calls `extern void
- exit(int)` via libc rather than emitting `svc #0x80` directly
- (Apple does not expose a stable syscall ABI); `link_emit_macho`
- is responsible for resolving the `_exit` import against
- libSystem and emitting the corresponding bind / fixup records.
- - [ ] Ad-hoc codesigning in `link_macho.c` (LC_CODE_SIGNATURE) so
- kernel will exec the binary on macOS 11+
+- [~] **Phase 3** — Mach-O linker (`link_emit_macho`) — driver path
+ working; test-link/E coverage pending (§9 Phase 3.1).
+ - [x] `src/link/link_macho.c` — MH_EXECUTE + MH_PIE writer with
+ `__PAGEZERO` / `__TEXT` / `__DATA_CONST` / `__DATA` /
+ `__LINKEDIT` segments, `__TEXT,__stubs` (12-byte arm64 stubs
+ through `__DATA_CONST,__got` slots), `LC_DYLD_CHAINED_FIXUPS`
+ for both bind (imports) and rebase (internal abs64) fixups,
+ `LC_DYLD_EXPORTS_TRIE` (single-entry minimal trie),
+ `LC_SYMTAB` + `LC_DYSYMTAB` + indirect-symbol table,
+ `LC_LOAD_DYLINKER` (`/usr/lib/dyld`), `LC_BUILD_VERSION`,
+ `LC_UUID`, `LC_MAIN`, empty `LC_FUNCTION_STARTS` /
+ `LC_DATA_IN_CODE`.
+ - [x] Ad-hoc `LC_CODE_SIGNATURE` (SuperBlob + CodeDirectory v=0x20400
+ with sha256 4 KiB-page hashes + execSeg fields) so the kernel
+ execs the binary on macOS 11+.
+ - [x] `read_macho_dso` for MH_DYLIB inputs and `read_tbd` for
+ Apple's text-based-stub `.tbd` files (sniffed via leading
+ `---`). `link_add_dso_bytes` dispatches on format. TBD parser
+ is a token scanner — emits every `_id` token as an exported
+ ObjSym; conservative but correct for the static-link decision
+ (the install-name on `LC_LOAD_DYLIB` is the umbrella, and dyld
+ walks re-exports at runtime).
+ - [x] `driver/lib_resolve.c` — `-lname` resolves `.tbd` first, then
+ `.dylib`, then `.so`, then `.a` under
+ `LIB_RESOLVE_DYNAMIC_PREFER`. `driver/cc.c` routes the result
+ to `dso_bytes[]` or `archives[]` by suffix and plumbs
+ `ndso_bytes` through `CfreeLinkInputs`.
+ - [x] `LinkImage.linker` back-pointer set by `link_resolve` so
+ format-specific emit can walk `LinkInputs` (e.g. resolving an
+ imported sym's `dso_input_id` to a DSO `install_name`).
+ - [x] `link_set_entry` defaults to `_main` on Mach-O (vs `_start`
+ for ELF), matching the LC_MAIN convention where dyld owns C
+ startup.
+ - [x] `link_layout.c::resolve_symbols` `is_def` widened to require
+ backing storage (section / abs / common) — `extern int f();`
+ from `decl.c` lands as `kind=SK_FUNC, section_id=0` and would
+ otherwise be misclassified as a definition, masking the import
+ and breaking CALL26 to libSystem on the in-memory pipeline.
+ ELF `.o` reads were unaffected because `read_elf` already
+ normalizes undefs to `SK_UNDEF`.
+ - [x] `link_layout.c::boundary_name` — Mach-O target prefixes every
+ linker-synthesized boundary symbol (`__init_array_start`, etc)
+ with `_` so the on-disk name matches what consumer code
+ compiles to under the leading-`_` mangling rule.
+ - [x] `decl.c` — Mach-O target prepends `_` to every C identifier
+ with linkage at obj_symbol creation time (unconditional, even
+ when source name already starts with `_`, matching Apple `cc`).
+ - [x] `pipeline.c::cfree_link_exe` — skip ELF `layout_dyn` for
+ Mach-O targets (their LC_LOAD_DYLIB / chained-fixups machinery
+ is synthesized in `link_emit_macho` instead, and ELF-shaped
+ `.plt` / `.got.plt` synthetic sections would only confuse the
+ Mach-O writer).
+ - [x] Smoke test — `cfree cc -target arm64-apple-macos hello.c -o
+ hello -lSystem` produces a runnable arm64-darwin Mach-O exe
+ that calls `printf` from libSystem and exits with the right
+ code (verified manually with `hello.c`, `multi.c` covering
+ multiple imports + globals + bss).
+ - [x] ELF test suite still green (`test-elf` 37/37).
---
@@ -679,3 +720,128 @@ A change in this plan is "done" when:
(ad-hoc codesigned by `link_macho.c` so the kernel will exec
it); per-milestone `test/cg/` cases green; ELF suite still
green.
+
+---
+
+## 9. Remaining work
+
+### 9.1 Phase 3.1 — `test-link` Path E on `aa64-macho`
+
+Phase 3 lit up the cc-driver path: `cfree cc -target arm64-apple-macos
+src.c -o exe -lSystem` produces a runnable binary. But
+`make test-link CFREE_TEST_OBJ=macho` reports **72 pass / 36 fail / 0
+skip** — the R (round-trip) and J (JIT) lanes are green across all 36
+test cases, while every E (exec) lane fails at link time with
+
+ fatal: link: undefined reference to '___fini_array_end'
+
+(`__fini_array_end` shown after the Mach-O leading-`_` strip in the
+diagnostic).
+
+The harness's `start.o` is built by host clang from
+`test/link/harness/start.c`, which references the array-boundary
+symbols (`__init_array_start/end`, `__fini_array_start/end`,
+`__preinit_array_start/end`, `__cfree_ifunc_init`,
+`__start_iplt_pairs`, `__stop_iplt_pairs`). On Mach-O, clang mangles
+those with a leading `_` so the .o carries `___fini_array_end` (3
+underscores).
+
+`link_layout.c::boundary_name` already prefixes every
+linker-synthesized boundary symbol on Mach-O. But when the runner is
+exercised in isolation, those boundary symbols **never get
+synthesized** — `emit_array_boundaries` evidently runs but the
+resulting `LinkSymbol` doesn't satisfy the per-input shadow's
+`defined=0` check. Two suspects, in order:
+
+1. The fan-out in `emit_boundary_sym` matches by `Sym` equality. If
+ the start.o's per-input shadow interns
+ `___fini_array_end` to a different `Sym` than `boundary_name`
+ produces (e.g. one path goes through `pool_intern` and another
+ through `pool_intern_cstr` with a length mismatch), they wouldn't
+ match. Both call sites use the same global `Pool`, so this should
+ be a no-op — but worth confirming with a single byte-level
+ comparison instead of the Sym-equality short-circuit.
+2. `link-exe-runner` may panic in `resolve_symbols` (before
+ `emit_array_boundaries`) because the start.o's `__cfree_ifunc_init`
+ undef hits a code path that doesn't tolerate it. The earlier
+ widening of `is_def` (require backing storage) is the same change
+ that fixed the in-memory CALL26 case; it might be triggering a
+ different early panic now.
+
+Recommended next step: instrument `cfree_link_exe` to print every
+`compiler_panic` site and the LinkSymbol state at the moment of the
+first failure, then walk back from there. Stderr fprintfs from
+`link_layout.c` were observed not to reach the runner's captured
+stderr in one local repro — verify whether the runner's
+`cfree_writer` redirection is intercepting them, or use a
+`compiler_panic`-shaped marker that the runner does propagate.
+
+Other items the E lane will surface once it gets past the start.o
+link:
+
+- `21_fini_array` / `22_init_fini_both` — Mach-O destructors flow
+ through `__cxa_atexit`, not the `.fini_array` shape `start.c` walks.
+ Same `j_targets`-style restriction the J lane already uses; extend
+ to E.
+- `25a_gc_basic` / `25d_gc_chain` — `--gc-sections` granularity is
+ per-section, but Apple's clang emits a single `__TEXT,__text` per
+ `.o` (subsections-via-symbols is per-symbol). Same restriction.
+- `kernel_image` cases — freestanding ELF kernels with their own
+ linker scripts; not portable to Mach-O at all. `targets`
+ applicability marker should drop them on `aa64-macho`.
+- `bad/` cases that probe ELF-specific malformations (`shoff_oob`,
+ `wrong_class`) need either Mach-O analogues or `targets` exclusion.
+
+### 9.2 Phase 4 — `test-cg` Path E on `aa64-macho`
+
+`make test-cg CFREE_TEST_OBJ=macho` exercises every cg case end-to-end
+via Path E (compile + link + run). Phase 4 prerequisites:
+
+- `test/cg/run.sh` already routes Path E for Mach-O through
+ `link_macho_via_clang.sh` (per Phase 2 §7.4); switch to cfree's own
+ linker once Phase 3.1 is green.
+- `test/sdk/macos/` shim materializes `xcrun --show-sdk-path` for
+ `-isysroot` and `-lSystem` resolution. No-op on a Linux host —
+ cases requiring libc stay SKIP there.
+- `*.targets` audit: every cg case that's currently `aarch64-elf`-only
+ should either grow `arm64-macos` or document why it's restricted
+ (linker scripts, IFUNC, ELF-specific intrinsics).
+
+### 9.3 Phase 5 — x86_64-darwin
+
+Additive on top of Phases 3–4, gated on `MULTIARCH.md` Phase 3 (x64
+codegen) landing. Concrete scope:
+
+- `obj/macho_reloc_x86_64.c` — `RelocKind` ↔ `X86_64_RELOC_*`
+ (UNSIGNED, SIGNED, BRANCH, GOT, GOT_LOAD, SUBTRACTOR, TLV,
+ SIGNED_1/2/4). Mirror of `macho_reloc_aarch64.c`.
+- `link_emit_macho` arch dispatch — currently arm64-only at the
+ cputype/stub-encoding level. Add an x86_64 branch: 5-byte
+ `jmpq *got(%rip)` stubs (vs arm64's 12-byte adrp+ldr+br).
+- `apple_x64_vtable` — likely a literal re-export of `sysv_x64_vtable`
+ per §3.4 design; revisit if testing reveals a quirk.
+- `CFREE_TEST_ARCH=x64 CFREE_TEST_OBJ=macho` lane in CI on a
+ Darwin/x86_64 host (or skipped cleanly on Apple Silicon, since
+ Rosetta-emulation of cfree-emitted binaries isn't a goal).
+
+### 9.4 Phase 6 — universal (fat) binaries
+
+Optional. Fat header wrapping per-arch `MH_EXECUTE` slices. Defer
+until a user wants `lipo`-style multi-arch output. Implementation is
+shallow — a fat header prepended to the existing slice writer, plus
+matching multi-arch reader.
+
+### 9.5 Cleanup deferred from Phase 3
+
+- The Phase-2 deferred item (clang-emitted Mach-O round-trip via
+ `read_macho` → `emit_macho`) needs section-relative reloc and
+ `__compact_unwind` handling. Independent of linker work; lift out
+ into its own task.
+- `read_tbd` is a permissive token scanner (every `_id` becomes an
+ exported sym). Tighten to filter Obj-C metadata (`_OBJC_CLASS_$_*`)
+ and `R<rev>$_*` reverse-export markers if Apple ever adds a symbol
+ whose textual form would clash with a real C identifier.
+- `link_macho.c` carries a few oversize cleanup `free()` calls that
+ pass `0` for the byte size (the buffers came from `VEC_GROW` which
+ doesn't track capacity post-hand-off). Audit — leak-equivalent on
+ the panic path, harmless on success.
diff --git a/driver/lib_resolve.c b/driver/lib_resolve.c
@@ -77,6 +77,18 @@ int driver_lib_resolve(DriverEnv* env, const char* name, LibResolveMode mode,
* target keeps both side-by-side, so the difference is invisible
* for the cases the harness exercises. */
if (mode != LIB_RESOLVE_STATIC_ONLY) {
+ /* Apple .tbd / .dylib first — the macOS SDK ships .tbd stubs in
+ * place of full .dylib bytes for system libraries. */
+ if (try_suffix(env, name, ".tbd", search_dirs, nsearch_dirs, out_path,
+ out_size) == 0) {
+ if (out_kind) *out_kind = LIB_RESOLVE_KIND_TBD;
+ return 0;
+ }
+ if (try_suffix(env, name, ".dylib", search_dirs, nsearch_dirs, out_path,
+ out_size) == 0) {
+ if (out_kind) *out_kind = LIB_RESOLVE_KIND_SHARED;
+ return 0;
+ }
if (try_suffix(env, name, ".so", search_dirs, nsearch_dirs, out_path,
out_size) == 0) {
if (out_kind) *out_kind = LIB_RESOLVE_KIND_SHARED;
diff --git a/driver/lib_resolve.h b/driver/lib_resolve.h
@@ -21,6 +21,10 @@ typedef enum LibResolveMode {
typedef enum LibResolveKind {
LIB_RESOLVE_KIND_ARCHIVE = 0,
LIB_RESOLVE_KIND_SHARED = 1,
+ /* Apple .tbd text-based stub: same downstream routing as SHARED
+ * (feeds the linker as a DSO input), distinct so the cc driver can
+ * tell what suffix matched. */
+ LIB_RESOLVE_KIND_TBD = 2,
} LibResolveKind;
/* Resolve `-l<name>` against a list of `-L`-style search directories.
diff --git a/src/api/pipeline.c b/src/api/pipeline.c
@@ -400,8 +400,14 @@ int cfree_link_exe(CfreeCompiler* c, const CfreeLinkOptions* opts,
link_set_emit_static_exe(linker, 1);
/* PIE / dynamic-exe (Phase 4 + 6). Triggered by an explicit `pie`
* flag or by the presence of any DSO input — both shapes need
- * PT_INTERP / PT_DYNAMIC and the synthetic .dynsym machinery. */
- if (opts->pie || opts->inputs.ndso_bytes > 0) {
+ * PT_INTERP / PT_DYNAMIC and the synthetic .dynsym machinery.
+ *
+ * Mach-O has its own dyld machinery (LC_LOAD_DYLIB / chained fixups
+ * synthesized in link_emit_macho), so we leave emit_pie off there —
+ * layout_dyn is ELF-shaped and would generate spurious PLT/GOT
+ * sections the Mach-O writer doesn't know what to do with. */
+ if ((opts->pie || opts->inputs.ndso_bytes > 0) &&
+ c->target.obj != CFREE_OBJ_MACHO) {
link_set_pie(linker, 1);
link_set_interp_path(linker, opts->interp_path);
}
@@ -1042,6 +1048,8 @@ static const char* reloc_kind_name(u16 kind) {
return "R_AARCH64_COPY";
case R_X64_PC8:
return "R_X86_64_PC8";
+ case R_X64_32S:
+ return "R_X86_64_32S";
case R_X64_PLT32:
return "R_X86_64_PLT32";
case R_X64_GOTPCREL:
diff --git a/src/decl/decl.c b/src/decl/decl.c
@@ -89,7 +89,37 @@ DeclId decl_declare(DeclTable* t, const Decl* in) {
slot->storage != DS_REGISTER) {
SymBind bind = (slot->linkage == DL_EXTERNAL) ? SB_GLOBAL : SB_LOCAL;
SymKind k = (slot->type && slot->type->kind == TY_FUNC) ? SK_FUNC : SK_OBJ;
- slot->obj_sym = obj_symbol_ex(t->ob, slot->name, bind, (SymVis)slot->visibility,
+ Sym onwire = slot->name;
+ /* Mach-O C-symbol convention: every C identifier carries a leading
+ * `_` on disk (so `int main()` is exposed as `_main`). The cgtarget
+ * never sees the unmangled form again — emit_macho writes ObjSym
+ * names verbatim, and the linker's link_intern_c_name rewrites
+ * caller-supplied source-level names back to this on-disk form
+ * when matching entry symbols / JIT lookups. */
+ if (t->c->target.obj == CFREE_OBJ_MACHO &&
+ slot->linkage != DL_NONE) {
+ /* Apple's C-symbol convention is unconditional: every C
+ * identifier with linkage gets a leading `_` on disk, regardless
+ * of whether the source name already started with one
+ * (`extern int __init_array_start[];` becomes
+ * `___init_array_start`). The boundary_name helper in
+ * link_layout.c mirrors the same rule for linker-synthesized
+ * boundary symbols. */
+ size_t nl;
+ const char* nm = pool_str(t->c->global, slot->name, &nl);
+ if (nm) {
+ Heap* h = t->c->env->heap;
+ char* buf = (char*)h->alloc(h, nl + 2u, 1);
+ if (buf) {
+ buf[0] = '_';
+ if (nl) memcpy(buf + 1, nm, nl);
+ buf[nl + 1] = 0;
+ onwire = pool_intern(t->c->global, buf, (u32)(nl + 1u));
+ h->free(h, buf, nl + 2u);
+ }
+ }
+ }
+ slot->obj_sym = obj_symbol_ex(t->ob, onwire, bind, (SymVis)slot->visibility,
k, OBJ_SEC_NONE, 0, 0, 0);
}
return id;
diff --git a/src/link/link.c b/src/link/link.c
@@ -72,7 +72,12 @@ Linker* link_new(Compiler* c) {
l->heap = h;
LinkInputs_init(&l->inputs, h);
LinkArchives_init(&l->archives, h);
- l->entry_name = pool_intern_cstr(c->global, "_start");
+ /* Default entry: ELF/static convention uses `_start`. Mach-O's
+ * LC_MAIN names main directly (dyld owns the C runtime startup),
+ * so the on-disk symbol is `_main` (the mangled form of `main`). */
+ l->entry_name = (c->target.obj == CFREE_OBJ_MACHO)
+ ? pool_intern_cstr(c->global, "_main")
+ : pool_intern_cstr(c->global, "_start");
/* Match the rest of libcfree's lifetime story: the new'd Linker is
* registered for cleanup in case a panic fires before link_free. */
l->deferred = compiler_defer(c, linker_cleanup, l);
@@ -150,16 +155,44 @@ LinkInputId link_add_obj_bytes(Linker* l, const char* name, const u8* data,
LinkInputId link_add_dso_bytes(Linker* l, const char* name, const u8* data,
size_t len) {
- ObjBuilder* ob;
+ ObjBuilder* ob = NULL;
LinkInput* in;
LinkInputId id;
Sym soname = 0;
+ CfreeBinFmt fmt;
+ const char* reader_name;
if (!l || !data || !len) return LINK_INPUT_NONE;
- ob = read_elf_dso(l->c, name, data, len, &soname);
+ /* Three DSO surfaces are supported on input:
+ * - ELF ET_DYN .so (read_elf_dso)
+ * - Mach-O MH_DYLIB / MH_BUNDLE (read_macho_dso)
+ * - Apple .tbd text-based stubs (read_tbd; magic is "---")
+ * The first two are detected via cfree_detect_fmt; .tbd is a textual
+ * format with no binary magic, so we sniff the leading "---". */
+ if (len >= 3 && data[0] == '-' && data[1] == '-' && data[2] == '-') {
+ ob = read_tbd(l->c, name, data, len, &soname);
+ reader_name = "read_tbd";
+ } else {
+ fmt = cfree_detect_fmt(data, len);
+ switch (fmt) {
+ case CFREE_BIN_ELF:
+ ob = read_elf_dso(l->c, name, data, len, &soname);
+ reader_name = "read_elf_dso";
+ break;
+ case CFREE_BIN_MACHO:
+ ob = read_macho_dso(l->c, name, data, len, &soname);
+ reader_name = "read_macho_dso";
+ break;
+ default:
+ compiler_panic(l->c, no_loc(),
+ "link_add_dso_bytes: unsupported DSO format "
+ "(fmt=%u) for '%s'",
+ (u32)fmt, name ? name : "(unnamed)");
+ }
+ }
if (!ob)
compiler_panic(l->c, no_loc(),
- "link_add_dso_bytes: read_elf_dso returned NULL for '%s'",
- name ? name : "(unnamed)");
+ "link_add_dso_bytes: %s returned NULL for '%s'",
+ reader_name, name ? name : "(unnamed)");
in = inputs_push(l, &id);
in->kind = LINK_INPUT_DSO_BYTES;
in->obj = ob;
@@ -483,14 +516,8 @@ void link_emit_image_writer(LinkImage* img, Writer* w) {
link_emit_elf(img, w);
return;
case CFREE_OBJ_MACHO:
- compiler_panic(img->c, no_loc(),
- "link_emit_image_writer: Mach-O exe emit not yet "
- "implemented (paused; see doc/MULTIOBJ.md Phase 3). "
- "Path R (round-trip) and path J (in-process JIT) "
- "work today on aa64-macho via read_macho + the "
- "format-agnostic LinkImage; path E needs the full "
- "dyld-loadable MH_EXECUTE linker (LC_LOAD_DYLIB "
- "libSystem, chained fixups, ad-hoc code signature).");
+ link_emit_macho(img, w);
+ return;
case CFREE_OBJ_COFF:
compiler_panic(img->c, no_loc(),
"link_emit_image_writer: COFF/PE linker emit not yet "
diff --git a/src/link/link_internal.h b/src/link/link_internal.h
@@ -241,6 +241,11 @@ struct LinkImage {
Compiler* c;
Heap* heap;
CompilerCleanup* deferred; /* registered by link_resolve */
+ /* Borrowed back-pointer set by link_resolve. The Linker is not
+ * mutated through this handle; it's used by the format-specific emit
+ * passes that need to walk LinkInputs (e.g. resolving an imported
+ * symbol's dso_input_id back to the providing dylib's install-name). */
+ struct Linker* linker;
LinkSyms syms; /* LinkSymId = slot index + 1 */
SymHash globals; /* name -> LinkSymId for global/weak */
@@ -311,6 +316,7 @@ void link_reloc_apply(Compiler*, RelocKind, u8* P_bytes, u64 S, i64 A, u64 P);
* peer (link_macho.c) and COFF peer arrive in later phases of
* doc/MULTIOBJ.md. */
void link_emit_elf(LinkImage*, Writer*);
+void link_emit_macho(LinkImage*, Writer*);
/* Format-agnostic 16-byte image identity, derived from per-segment
* post-shift bytes + vaddrs/sizes. ELF wraps it in a
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -171,7 +171,16 @@ static void resolve_symbols(Linker* l, LinkImage* img) {
const ObjSym* s = e.sym;
LinkSymbol rec;
LinkSymId existing;
- int is_def = (s->kind != SK_UNDEF);
+ /* "Defined" means: not SK_UNDEF AND has a backing storage — a
+ * containing section, an absolute value, or COMMON reservation.
+ * cgtarget paths emit SK_FUNC / SK_OBJ for an `extern`
+ * declaration's bookkeeping symbol with section_id = 0; those are
+ * still undefs from the linker's perspective. ELF's read_elf
+ * already normalizes those to SK_UNDEF; this check unifies the
+ * in-memory pipeline with that. */
+ int is_def = (s->kind != SK_UNDEF) &&
+ (s->kind == SK_ABS || s->kind == SK_COMMON ||
+ s->section_id != OBJ_SEC_NONE);
memset(&rec, 0, sizeof(rec));
rec.name = s->name;
@@ -1015,6 +1024,28 @@ static u64 eval_link_expr(Linker* l, LinkImage* img, u64 dot,
}
}
+/* Format-aware C-symbol mangling for linker-synthesized boundaries:
+ * Mach-O prefixes every C identifier with `_` on disk (so an
+ * `extern void (*__init_array_start[])(void);` compiles to a reference
+ * to `___init_array_start`). ELF and others use the name verbatim. */
+static Sym boundary_name(Linker* l, const char* name) {
+ Compiler* c = l->c;
+ if (c->target.obj == CFREE_OBJ_MACHO) {
+ Heap* h = c->env->heap;
+ size_t n = strlen(name);
+ char* buf = (char*)h->alloc(h, n + 2u, 1);
+ Sym s;
+ if (!buf) return pool_intern_cstr(c->global, name);
+ buf[0] = '_';
+ memcpy(buf + 1, name, n);
+ buf[n + 1] = 0;
+ s = pool_intern(c->global, buf, (u32)(n + 1u));
+ h->free(h, buf, n + 2u);
+ return s;
+ }
+ return pool_intern_cstr(c->global, name);
+}
+
static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
u64 vaddr); /* defined below */
@@ -1497,7 +1528,7 @@ static void link_symbols_to_sections(Linker* l, LinkImage* img) {
static void emit_boundary_sym(Linker* l, LinkImage* img, const char* name,
u64 vaddr) {
- Sym sym = pool_intern_cstr(l->c->global, name);
+ Sym sym = boundary_name(l, name);
LinkSymId id = symhash_get(&img->globals, sym);
LinkSymbol rec;
u32 i, n;
@@ -1726,6 +1757,8 @@ static u8 reloc_width(RelocKind k) {
case R_PC32:
case R_GOT32:
case R_PLT32:
+ case R_X64_PLT32:
+ case R_X64_32S:
return 4;
case R_ABS64:
case R_REL64:
@@ -2762,6 +2795,7 @@ LinkImage* link_resolve(Linker* l) {
img = link_image_alloc(l->c);
h = img->heap;
+ img->linker = l;
/* Per-input map storage. */
img->ninput_maps = LinkInputs_count(&l->inputs);
diff --git a/src/link/link_macho.c b/src/link/link_macho.c
@@ -0,0 +1,2175 @@
+/* link_emit_macho — write a dyld-loadable arm64 MH_EXECUTE.
+ *
+ * Mach-O peer of link_emit_elf. Produces a position-independent
+ * MH_EXECUTE that links against libSystem.B.dylib (or any other
+ * dylib/.tbd input) via LC_LOAD_DYLIB + LC_DYLD_CHAINED_FIXUPS. The
+ * binary is ad-hoc codesigned at the tail so the kernel will exec it
+ * on macOS 11+.
+ *
+ * Layout (Apple's stock arm64 layout):
+ *
+ * __PAGEZERO vmaddr 0, vmsize 0x100000000, no file bytes
+ * __TEXT (R-X)
+ * mach_header_64
+ * load commands
+ * [SF_EXEC sections — .text]
+ * [SF_ALLOC R-only sections — .rodata, init/fini_array, etc.]
+ * __stubs (12B per import-func)
+ * __DATA_CONST (RW initially, dyld marks R-only after fixups)
+ * __got (8B per import — both data and func imports)
+ * __DATA (R-W)
+ * [SF_WRITE sections — .data, .bss]
+ * __LINKEDIT (R)
+ * dyld_chained_fixups blob
+ * dyld_exports_trie blob
+ * function starts (empty)
+ * data in code (empty)
+ * symtab
+ * indirect symbol table (one entry per __stubs and __got slot)
+ * strtab
+ * code signature
+ *
+ * Imports are routed:
+ * CALL26/JUMP26 against an imported function -> __stubs entry
+ * GOT_LOAD_PAGE21/PAGEOFF12 against any import -> __got slot
+ * ABS64 against an imported symbol -> chained-bind at site
+ * ABS64 against a defined internal symbol -> chained-rebase at site
+ *
+ * arm64-only. x86_64-macos arrives with x64 codegen. */
+
+#include <string.h>
+
+#include "core/bytes.h"
+#include "core/heap.h"
+#include "core/pool.h"
+#include "core/util.h"
+#include "core/vec.h"
+#include "link/link.h"
+#include "link/link_internal.h"
+#include "obj/macho.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+/* ---- constants ---- */
+#define MZ_PAGEZERO 0x100000000ULL
+#define MZ_PAGE 0x4000ULL
+#define MZ_STUB_SIZE 12u
+#define MZ_GOT_SIZE 8u
+
+#define DYLD_CHAINED_PTR_64 2u
+#define DYLD_CHAINED_IMPORT 1u
+
+#define VM_PROT_READ 0x1u
+#define VM_PROT_WRITE 0x2u
+#define VM_PROT_EXECUTE 0x4u
+
+#define CS_MAGIC_EMBEDDED_SIGNATURE 0xfade0cc0u
+#define CS_MAGIC_CODEDIRECTORY 0xfade0c02u
+#define CSSLOT_CODEDIRECTORY 0u
+#define CS_HASHTYPE_SHA256 2u
+#define CS_SHA256_LEN 32u
+#define CS_PAGE_SIZE_LOG2 12u
+#define CS_EXECSEG_MAIN_BINARY 1u
+
+/* extra LC ids */
+#define LC_DYLD_INFO_ONLY (0x22u | 0x80000000u)
+#define LC_FUNCTION_STARTS_C 0x26u
+#define LC_DATA_IN_CODE_C 0x29u
+#define LC_CODE_SIGNATURE_C 0x1du
+
+/* ---- byte buffer ---- */
+
+typedef struct MByte {
+ Heap* heap;
+ u8* data;
+ u32 len;
+ u32 cap;
+} MByte;
+
+static void mbuf_init(MByte* b, Heap* h) {
+ b->heap = h;
+ b->data = NULL;
+ b->len = 0;
+ b->cap = 0;
+}
+static void mbuf_fini(MByte* b) {
+ if (b->data) b->heap->free(b->heap, b->data, b->cap);
+ b->data = NULL;
+ b->cap = b->len = 0;
+}
+static void mbuf_reserve(MByte* b, u32 need) {
+ if (need <= b->cap) return;
+ (void)VEC_GROW(b->heap, b->data, b->cap, need);
+}
+static u32 mbuf_align(MByte* b, u32 a) {
+ u32 n = (u32)ALIGN_UP((u64)b->len, (u64)a);
+ if (n > b->len) {
+ mbuf_reserve(b, n);
+ memset(b->data + b->len, 0, n - b->len);
+ b->len = n;
+ }
+ return b->len;
+}
+static u32 mbuf_append(MByte* b, const void* src, u32 n) {
+ u32 off = b->len;
+ mbuf_reserve(b, b->len + n);
+ if (n) memcpy(b->data + b->len, src, n);
+ b->len += n;
+ return off;
+}
+static u32 mbuf_u32(MByte* b, u32 v) {
+ u8 t[4];
+ wr_u32_le(t, v);
+ return mbuf_append(b, t, 4);
+}
+static u32 mbuf_u16(MByte* b, u16 v) {
+ u8 t[2];
+ wr_u16_le(t, v);
+ return mbuf_append(b, t, 2);
+}
+static u32 mbuf_u64(MByte* b, u64 v) {
+ u8 t[8];
+ wr_u64_le(t, v);
+ return mbuf_append(b, t, 8);
+}
+static u32 mbuf_u8(MByte* b, u8 v) { return mbuf_append(b, &v, 1); }
+static u32 mbuf_str(MByte* b, const char* s, u32 n) {
+ u32 off = b->len;
+ mbuf_reserve(b, b->len + n + 1u);
+ if (n) memcpy(b->data + b->len, s, n);
+ b->data[b->len + n] = 0;
+ b->len += n + 1u;
+ return off;
+}
+
+/* ---- imports + dylibs ---- */
+
+typedef struct MachImp {
+ LinkSymId sym;
+ Sym name;
+ u32 dylib_ord; /* 1-based ordinal into LC_LOAD_DYLIB list */
+ u32 stub_idx; /* 1-based index into __stubs (0 if data import) */
+ u32 got_idx; /* 1-based index into __got */
+ u32 imports_strx; /* offset into chained-fixups symbol pool */
+ u8 is_func;
+ u8 weak;
+ u8 pad[2];
+} MachImp;
+
+typedef struct MachDylib {
+ Sym install;
+} MachDylib;
+
+/* ---- planned section ---- */
+
+typedef struct MSec {
+ /* Source: either a LinkSection (link_sec_id != 0) or a synthetic
+ * pre-built byte buffer (data + size). */
+ LinkSectionId link_sec_id;
+ const u8* synth_data;
+ u32 synth_size;
+ /* Mach-O placement */
+ const char* segname;
+ const char* sectname;
+ u64 vaddr;
+ u64 file_offset;
+ u64 size;
+ u32 align;
+ u32 flags; /* S_TYPE | S_ATTR_* */
+ u32 reserved1;
+ u32 reserved2;
+ u8 segidx; /* 1=__TEXT, 2=__DATA_CONST, 3=__DATA */
+ u8 is_zerofill;
+ u8 pad[6];
+} MSec;
+
+typedef struct MSeg {
+ const char* name;
+ u32 maxprot;
+ u32 initprot;
+ u64 vmaddr;
+ u64 vmsize;
+ u64 fileoff;
+ u64 filesize;
+ u32 nsects;
+ u32 first_sec; /* first index into MSec[] */
+} MSeg;
+
+/* ---- main context ---- */
+
+typedef struct MCtx {
+ LinkImage* img;
+ Compiler* c;
+ Heap* h;
+ Writer* w;
+ Linker* linker;
+
+ /* imports */
+ MachImp* imports;
+ u32 nimports;
+ u32 nimport_funcs;
+ MachDylib* dylibs;
+ u32 ndylibs;
+ /* sym->import index, 1-based, 0 = not an import. Sized to LinkSymId space + 1. */
+ u32* sym_to_imp;
+ u32 sym_to_imp_size;
+
+ /* sections + segments */
+ MSec* secs;
+ u32 nsecs;
+ MSeg segs[5]; /* PAGEZERO, TEXT, DATA_CONST, DATA, LINKEDIT */
+ u32 nsegs;
+
+ /* Synthetic byte buffers, owned. */
+ u8* stubs_bytes;
+ u32 stubs_size;
+ u8* got_bytes;
+ u32 got_size;
+
+ /* Final layout (computed during plan) */
+ u64 text_vaddr;
+ u64 text_filesz;
+ u64 stubs_vaddr;
+ u64 got_vaddr;
+ u64 data_const_vaddr;
+ u64 data_vaddr;
+ u64 data_const_filesz;
+ u64 data_filesz;
+ u64 data_memsz;
+ u64 linkedit_vaddr;
+ u64 linkedit_fileoff;
+ u32 entry_offset; /* offset of entry within __TEXT segment */
+
+ u64 headers_size; /* header + loadcmds */
+
+ /* LINKEDIT contents */
+ MByte chained_fixups;
+ MByte exports_trie;
+ MByte symtab; /* binary nlist_64 array */
+ MByte strtab;
+ MByte indirect; /* u32 array */
+ MByte fn_starts;
+ MByte data_in_code;
+ MByte codesig;
+
+ u32 chained_fixups_off;
+ u32 exports_trie_off;
+ u32 fn_starts_off;
+ u32 data_in_code_off;
+ u32 symtab_off;
+ u32 indirect_off;
+ u32 strtab_off;
+ u32 codesig_off;
+ u32 codesig_size;
+ u32 nsyms;
+
+ u8 uuid[16];
+} MCtx;
+
+/* ---- helpers for finding LinkSymbol vaddr ---- */
+
+static LinkSymbol* sym_at(LinkImage* img, LinkSymId id) {
+ if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return NULL;
+ return LinkSyms_at(&img->syms, id - 1);
+}
+
+/* ---- pass: build __stubs and __got bytes ---- */
+
+static void write_u32(u8* p, u32 v) { wr_u32_le(p, v); }
+
+/* Encode a 12-byte arm64 stub:
+ * ADRP x16, __got_slot@PAGE
+ * LDR x16, [x16, #__got_slot@PAGEOFF]
+ * BR x16
+ */
+static void encode_stub(u8* out, u64 stub_vaddr, u64 got_slot_vaddr) {
+ i64 page_s = ((i64)got_slot_vaddr) & ~(i64)0xfff;
+ i64 page_p = ((i64)stub_vaddr) & ~(i64)0xfff;
+ i64 imm21 = (page_s - page_p) >> 12;
+ u32 immlo = (u32)(imm21 & 0x3u);
+ u32 immhi = (u32)((imm21 >> 2) & 0x7ffffu);
+ u32 lo12 = (u32)(got_slot_vaddr & 0xfffu);
+ u32 imm12_ldr = (lo12 >> 3) & 0xfffu; /* slot 8-byte aligned */
+ /* ADRP x16, ... */
+ write_u32(out + 0, 0x90000010u | (immlo << 29) | (immhi << 5));
+ /* LDR x16, [x16, #imm] */
+ write_u32(out + 4, 0xF9400210u | (imm12_ldr << 10));
+ /* BR x16 */
+ write_u32(out + 8, 0xD61F0200u);
+}
+
+/* ---- pass: collect imports ---- */
+
+static u32 dylib_ordinal_of(MCtx* x, Sym install) {
+ for (u32 j = 0; j < x->ndylibs; ++j)
+ if (x->dylibs[j].install == install) return j + 1u;
+ return 0;
+}
+
+static void collect_imports(MCtx* x) {
+ LinkImage* img = x->img;
+ Heap* h = x->h;
+
+ x->sym_to_imp_size = LinkSyms_count(&img->syms) + 1u;
+ x->sym_to_imp =
+ (u32*)h->alloc(h, sizeof(u32) * x->sym_to_imp_size, _Alignof(u32));
+ if (!x->sym_to_imp)
+ compiler_panic(x->c, no_loc(), "link_macho: oom on sym_to_imp");
+ memset(x->sym_to_imp, 0, sizeof(u32) * x->sym_to_imp_size);
+
+ u32 cap = 0, cap_d = 0;
+ for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (!s->imported) continue;
+ if (s->name == 0) continue;
+ LinkSymId canon = symhash_get(&img->globals, s->name);
+ if (canon != LINK_SYM_NONE && canon != s->id) continue;
+ if (VEC_GROW(h, x->imports, cap, x->nimports + 1u))
+ compiler_panic(x->c, no_loc(), "link_macho: oom on imports");
+ MachImp* mi = &x->imports[x->nimports++];
+ memset(mi, 0, sizeof(*mi));
+ mi->sym = s->id;
+ mi->name = s->name;
+ mi->is_func = (s->kind == SK_FUNC || s->kind == SK_IFUNC) ? 1 : 0;
+ mi->weak = (s->bind == SB_WEAK) ? 1 : 0;
+ x->sym_to_imp[s->id] = x->nimports;
+ }
+
+ /* Back-classify: any CALL26/JUMP26 reloc target -> function. */
+ for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
+ LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ if (r->kind != R_AARCH64_CALL26 && r->kind != R_AARCH64_JUMP26) continue;
+ if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue;
+ u32 idx = x->sym_to_imp[r->target];
+ if (!idx) {
+ /* Resolve through canonical. */
+ LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
+ if (tgt->name == 0) continue;
+ LinkSymId canon = symhash_get(&img->globals, tgt->name);
+ if (canon == LINK_SYM_NONE || canon >= x->sym_to_imp_size) continue;
+ idx = x->sym_to_imp[canon];
+ if (!idx) continue;
+ /* Stash so future lookups skip this loop. */
+ x->sym_to_imp[r->target] = idx;
+ }
+ x->imports[idx - 1].is_func = 1;
+ }
+
+ /* Build dylib ordinal table. Pull soname from the providing DSO. */
+ for (u32 i = 0; i < x->nimports; ++i) {
+ MachImp* mi = &x->imports[i];
+ LinkSymbol* s = sym_at(img, mi->sym);
+ LinkInputId dso_id = s ? s->dso_input_id : LINK_INPUT_NONE;
+ Sym install = 0;
+ if (dso_id != LINK_INPUT_NONE && x->linker &&
+ dso_id - 1u < LinkInputs_count(&x->linker->inputs))
+ install = LinkInputs_at(&x->linker->inputs, dso_id - 1u)->soname;
+ if (install == 0)
+ install = pool_intern_cstr(x->c->global, "/usr/lib/libSystem.B.dylib");
+ u32 ord = dylib_ordinal_of(x, install);
+ if (!ord) {
+ if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u))
+ compiler_panic(x->c, no_loc(), "link_macho: oom on dylibs");
+ x->dylibs[x->ndylibs].install = install;
+ ++x->ndylibs;
+ ord = x->ndylibs;
+ }
+ mi->dylib_ord = ord;
+ }
+
+ /* Always include every DSO input's install-name. */
+ if (x->linker) {
+ for (u32 ii = 0; ii < LinkInputs_count(&x->linker->inputs); ++ii) {
+ LinkInput* in = LinkInputs_at(&x->linker->inputs, ii);
+ if (in->kind != LINK_INPUT_DSO_BYTES) continue;
+ if (in->soname == 0) continue;
+ if (dylib_ordinal_of(x, in->soname)) continue;
+ if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u))
+ compiler_panic(x->c, no_loc(), "link_macho: oom on dylibs");
+ x->dylibs[x->ndylibs].install = in->soname;
+ ++x->ndylibs;
+ }
+ }
+
+ /* Assign stub_idx + got_idx. */
+ u32 stub_run = 0;
+ for (u32 i = 0; i < x->nimports; ++i) {
+ MachImp* mi = &x->imports[i];
+ mi->got_idx = i + 1u;
+ if (mi->is_func) mi->stub_idx = ++stub_run;
+ }
+ x->nimport_funcs = stub_run;
+}
+
+/* ---- pass: plan Mach-O sections ----
+ *
+ * Walks LinkImage sections. Each non-zero-size LinkSection becomes one
+ * MSec. Synthetic __stubs and __got are appended at the right segment
+ * boundaries. Vaddr and file_offset are assigned in a single forward
+ * pass starting at __TEXT base; __PAGEZERO and __LINKEDIT are special. */
+
+static void seg_init(MSeg* s, const char* name, u32 maxp, u32 initp) {
+ memset(s, 0, sizeof(*s));
+ s->name = name;
+ s->maxprot = maxp;
+ s->initprot = initp;
+}
+
+static int sec_is_writable(const LinkSection* ls) {
+ return (ls->flags & SF_WRITE) != 0u;
+}
+static int sec_is_exec(const LinkSection* ls) {
+ return (ls->flags & SF_EXEC) != 0u;
+}
+static int sec_is_zerofill(const LinkSection* ls) {
+ return ls->sem == SSEM_NOBITS;
+}
+
+/* Pick (segname, sectname) for a LinkSection. */
+static void pick_macho_names(const LinkSection* ls, Compiler* c,
+ const char** out_seg, const char** out_sect) {
+ size_t nlen;
+ const char* nm = pool_str(c->global, ls->name, &nlen);
+ if (nm) {
+ /* Comma-form: "__SEG,__sect" round-tripped from a Mach-O input. */
+ for (size_t i = 0; i < nlen; ++i) {
+ if (nm[i] == ',') {
+ /* point to interned-pool string; safe because pool strings are
+ * NUL-terminated (segments after comma have an embedded NUL we
+ * patch over via static buffers below). */
+ static char seg_buf[17], sect_buf[17];
+ u32 seg_n = (u32)(i > 16 ? 16 : i);
+ memcpy(seg_buf, nm, seg_n);
+ seg_buf[seg_n] = 0;
+ u32 sect_n = (u32)((nlen - i - 1) > 16 ? 16 : (nlen - i - 1));
+ memcpy(sect_buf, nm + i + 1, sect_n);
+ sect_buf[sect_n] = 0;
+ *out_seg = seg_buf;
+ *out_sect = sect_buf;
+ return;
+ }
+ }
+ }
+ /* Derive from flags. */
+ if (sec_is_exec(ls)) {
+ *out_seg = "__TEXT";
+ *out_sect = "__text";
+ } else if (sec_is_writable(ls)) {
+ *out_seg = "__DATA";
+ *out_sect = sec_is_zerofill(ls) ? "__bss" : "__data";
+ } else {
+ *out_seg = "__TEXT";
+ *out_sect = "__const";
+ }
+ (void)nm;
+}
+
+static void plan_layout(MCtx* x) {
+ LinkImage* img = x->img;
+ Heap* h = x->h;
+
+ /* PAGEZERO */
+ seg_init(&x->segs[0], "__PAGEZERO", 0, 0);
+ x->segs[0].vmaddr = 0;
+ x->segs[0].vmsize = MZ_PAGEZERO;
+ x->segs[0].fileoff = 0;
+ x->segs[0].filesize = 0;
+ x->segs[0].nsects = 0;
+ x->segs[0].first_sec = 0;
+
+ /* Segments 1..4 */
+ seg_init(&x->segs[1], "__TEXT",
+ VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE);
+ seg_init(&x->segs[2], "__DATA_CONST",
+ VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE);
+ seg_init(&x->segs[3], "__DATA",
+ VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE);
+ seg_init(&x->segs[4], "__LINKEDIT", VM_PROT_READ, VM_PROT_READ);
+ x->nsegs = 5;
+
+ /* Pre-allocate MSec capacity: every LinkSection + 2 synth (__stubs,
+ * __got). (LinkSections from the dynamic-link layer — .dynsym / .plt
+ * etc. — were synthesized by layout_dyn for ELF; we won't have them
+ * since pie wasn't set on this Linker. Still, oversize by a few.) */
+ u32 cap = LinkRelocs_count(&img->relocs) + img->nsections + 4u;
+ x->secs = (MSec*)h->alloc(h, sizeof(MSec) * cap, _Alignof(MSec));
+ if (!x->secs) compiler_panic(x->c, no_loc(), "link_macho: oom on MSec");
+ memset(x->secs, 0, sizeof(MSec) * cap);
+ x->nsecs = 0;
+
+ /* Pass 1: __TEXT segment. Header + loadcmds reserve front. */
+ /* We need the exact header_size to set first sec's file_offset. We'll
+ * compute it later, but reserve a placeholder; for now use 0 and patch
+ * in pass 4 (offsets get bumped). */
+
+ u64 text_vaddr = MZ_PAGEZERO;
+ /* We'll compute headers_size after plan; stash starting vaddr only. */
+ x->segs[1].vmaddr = text_vaddr;
+ x->segs[1].fileoff = 0;
+ x->text_vaddr = text_vaddr;
+
+ /* Collect: (a) exec sections, (b) read-only allocatable sections. */
+ /* (cursor advances per-segment in pass 2; nothing to track here) */
+
+ /* We don't know the header size yet; walk sections first to enumerate
+ * MSec entries, then back-fill file_offset/vaddr after we know the
+ * load-command count. */
+
+ u32 first_text_sec = x->nsecs;
+
+ for (u32 i = 0; i < img->nsections; ++i) {
+ LinkSection* ls = &img->sections[i];
+ if (!ls->size) continue;
+ if (sec_is_writable(ls)) continue;
+ if (sec_is_zerofill(ls)) continue; /* placed in __DATA */
+ MSec* m = &x->secs[x->nsecs++];
+ memset(m, 0, sizeof(*m));
+ m->link_sec_id = ls->id;
+ pick_macho_names(ls, x->c, &m->segname, &m->sectname);
+ /* Force into __TEXT. */
+ if (strcmp(m->segname, "__TEXT") != 0) m->segname = "__TEXT";
+ m->align = ls->align ? ls->align : 1u;
+ m->size = ls->size;
+ m->segidx = 1;
+ m->flags = sec_is_exec(ls)
+ ? (0x80000000u /*S_ATTR_PURE_INSTRUCTIONS*/ |
+ 0x00000400u /*S_ATTR_SOME_INSTRUCTIONS*/)
+ : 0u;
+ }
+
+ /* __stubs synthetic */
+ u32 stub_sec_idx = (u32)-1;
+ if (x->nimport_funcs) {
+ x->stubs_size = x->nimport_funcs * MZ_STUB_SIZE;
+ x->stubs_bytes = (u8*)h->alloc(h, x->stubs_size, 4);
+ if (!x->stubs_bytes)
+ compiler_panic(x->c, no_loc(), "link_macho: oom on stubs");
+ memset(x->stubs_bytes, 0, x->stubs_size);
+ MSec* m = &x->secs[x->nsecs++];
+ memset(m, 0, sizeof(*m));
+ m->synth_data = x->stubs_bytes;
+ m->synth_size = x->stubs_size;
+ m->segname = "__TEXT";
+ m->sectname = "__stubs";
+ m->align = 4u;
+ m->size = x->stubs_size;
+ m->segidx = 1;
+ m->flags = 0x80000000u | 0x00000400u | 0x00000008u /*S_SYMBOL_STUBS*/;
+ m->reserved1 = 0; /* fill in later: indirect-symtab base */
+ m->reserved2 = MZ_STUB_SIZE;
+ stub_sec_idx = x->nsecs - 1u;
+ }
+ x->segs[1].nsects = x->nsecs - first_text_sec;
+ x->segs[1].first_sec = first_text_sec;
+
+ /* __DATA_CONST: __got synth */
+ u32 first_dc = x->nsecs;
+ u32 got_sec_idx = (u32)-1;
+ if (x->nimports) {
+ x->got_size = x->nimports * MZ_GOT_SIZE;
+ x->got_bytes = (u8*)h->alloc(h, x->got_size, 8);
+ if (!x->got_bytes)
+ compiler_panic(x->c, no_loc(), "link_macho: oom on got");
+ memset(x->got_bytes, 0, x->got_size);
+ MSec* m = &x->secs[x->nsecs++];
+ memset(m, 0, sizeof(*m));
+ m->synth_data = x->got_bytes;
+ m->synth_size = x->got_size;
+ m->segname = "__DATA_CONST";
+ m->sectname = "__got";
+ m->align = 8u;
+ m->size = x->got_size;
+ m->segidx = 2;
+ m->flags = 0x00000006u /*S_NON_LAZY_SYMBOL_POINTERS*/;
+ m->reserved1 = 0; /* indirect-symtab base */
+ got_sec_idx = x->nsecs - 1u;
+ }
+ x->segs[2].nsects = x->nsecs - first_dc;
+ x->segs[2].first_sec = first_dc;
+
+ /* __DATA segment: writable sections + zerofill. */
+ u32 first_d = x->nsecs;
+ for (u32 i = 0; i < img->nsections; ++i) {
+ LinkSection* ls = &img->sections[i];
+ if (!ls->size && !sec_is_zerofill(ls)) continue;
+ if (!sec_is_writable(ls)) continue;
+ MSec* m = &x->secs[x->nsecs++];
+ memset(m, 0, sizeof(*m));
+ m->link_sec_id = ls->id;
+ pick_macho_names(ls, x->c, &m->segname, &m->sectname);
+ if (strcmp(m->segname, "__DATA") != 0) m->segname = "__DATA";
+ m->align = ls->align ? ls->align : 1u;
+ m->size = ls->size;
+ m->segidx = 3;
+ m->is_zerofill = sec_is_zerofill(ls) ? 1 : 0;
+ m->flags = m->is_zerofill ? 0x00000001u /*S_ZEROFILL*/ : 0;
+ }
+ x->segs[3].nsects = x->nsecs - first_d;
+ x->segs[3].first_sec = first_d;
+
+ /* Compute load-command count + sizeofcmds, then back-fill section
+ * offsets. Layout pass 2. */
+ u32 nseg_real = 0;
+ for (u32 i = 0; i < x->nsegs; ++i) {
+ /* Skip __DATA_CONST or __DATA if no sections (edge case). */
+ if (i == 0) {
+ ++nseg_real;
+ continue;
+ } /* PAGEZERO */
+ if (i == 4) {
+ ++nseg_real;
+ continue;
+ } /* LINKEDIT always */
+ if (x->segs[i].nsects > 0) ++nseg_real;
+ }
+ /* Each LC_SEGMENT_64 carries 72 + 80*nsects bytes. */
+ u32 sizeofcmds = 0;
+ for (u32 i = 0; i < x->nsegs; ++i) {
+ if (i == 0 || i == 4) {
+ sizeofcmds += MACHO_SEGCMD64_SIZE; /* no sections */
+ continue;
+ }
+ if (x->segs[i].nsects == 0) continue;
+ sizeofcmds += MACHO_SEGCMD64_SIZE + x->segs[i].nsects * MACHO_SECT64_SIZE;
+ }
+ (void)nseg_real;
+ /* LC_DYLD_CHAINED_FIXUPS / LC_DYLD_EXPORTS_TRIE */
+ sizeofcmds += 16u + 16u;
+ /* LC_SYMTAB / LC_DYSYMTAB */
+ sizeofcmds += MACHO_SYMTAB_CMD_SIZE + MACHO_DYSYMTAB_CMD_SIZE;
+ /* LC_LOAD_DYLINKER */
+ {
+ u32 ld_size = 12u + (u32)strlen("/usr/lib/dyld") + 1u;
+ sizeofcmds += (u32)ALIGN_UP((u64)ld_size, 8u);
+ }
+ /* LC_UUID + LC_BUILD_VERSION + LC_MAIN */
+ sizeofcmds += 24u + 24u + 24u;
+ /* LC_LOAD_DYLIB per dylib */
+ for (u32 i = 0; i < x->ndylibs; ++i) {
+ size_t nl;
+ pool_str(x->c->global, x->dylibs[i].install, &nl);
+ u32 sz = 24u + (u32)nl + 1u;
+ sizeofcmds += (u32)ALIGN_UP((u64)sz, 8u);
+ }
+ /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE / LC_CODE_SIGNATURE */
+ sizeofcmds += 16u + 16u + 16u;
+
+ x->headers_size = MACHO_HDR64_SIZE + sizeofcmds;
+
+ /* Now place sections in __TEXT, __DATA_CONST, __DATA. */
+ u64 vaddr = MZ_PAGEZERO + x->headers_size;
+ u64 fileoff = x->headers_size;
+ /* Pad __TEXT sections to natural alignment. */
+ for (u32 i = 0; i < x->nsegs; ++i) {
+ if (i == 0 || i == 4) continue;
+ MSeg* sg = &x->segs[i];
+ if (i > 1) {
+ /* page-align the start of __DATA_CONST and __DATA */
+ vaddr = ALIGN_UP(vaddr, MZ_PAGE);
+ fileoff = ALIGN_UP(fileoff, MZ_PAGE);
+ }
+ sg->vmaddr = (i == 1) ? MZ_PAGEZERO : vaddr;
+ sg->fileoff = (i == 1) ? 0 : fileoff;
+ /* __TEXT carries the headers_size + sections. */
+ u64 seg_start_v = sg->vmaddr;
+ u64 seg_start_f = sg->fileoff;
+ /* For __TEXT, sections begin after the header area. */
+ u64 cur_v = (i == 1) ? (seg_start_v + x->headers_size) : seg_start_v;
+ u64 cur_f = (i == 1) ? (seg_start_f + x->headers_size) : seg_start_f;
+ u64 first_zerofill_v = 0;
+ int seen_zerofill = 0;
+ /* Non-zerofill first */
+ for (u32 j = 0; j < sg->nsects; ++j) {
+ MSec* m = &x->secs[sg->first_sec + j];
+ if (m->is_zerofill) continue;
+ cur_v = ALIGN_UP(cur_v, (u64)m->align);
+ cur_f = ALIGN_UP(cur_f, (u64)m->align);
+ m->vaddr = cur_v;
+ m->file_offset = cur_f;
+ cur_v += m->size;
+ cur_f += m->size;
+ }
+ first_zerofill_v = cur_v;
+ /* zerofill last (no file bytes) */
+ for (u32 j = 0; j < sg->nsects; ++j) {
+ MSec* m = &x->secs[sg->first_sec + j];
+ if (!m->is_zerofill) continue;
+ cur_v = ALIGN_UP(cur_v, (u64)m->align);
+ m->vaddr = cur_v;
+ m->file_offset = 0;
+ cur_v += m->size;
+ seen_zerofill = 1;
+ }
+ sg->filesize = (i == 1) ? (cur_f - seg_start_f)
+ : (first_zerofill_v
+ ? (first_zerofill_v - seg_start_v)
+ : (cur_v - seg_start_v));
+ sg->vmsize = ALIGN_UP(cur_v - seg_start_v, MZ_PAGE);
+ if (sg->vmsize == 0 && sg->nsects > 0) sg->vmsize = MZ_PAGE;
+ if (i == 1) {
+ x->stubs_vaddr = 0;
+ for (u32 j = 0; j < sg->nsects; ++j) {
+ MSec* m = &x->secs[sg->first_sec + j];
+ if (strcmp(m->sectname, "__stubs") == 0) x->stubs_vaddr = m->vaddr;
+ }
+ x->text_filesz = sg->filesize;
+ }
+ if (i == 2) {
+ for (u32 j = 0; j < sg->nsects; ++j) {
+ MSec* m = &x->secs[sg->first_sec + j];
+ if (strcmp(m->sectname, "__got") == 0) x->got_vaddr = m->vaddr;
+ }
+ x->data_const_vaddr = sg->vmaddr;
+ x->data_const_filesz = sg->filesize;
+ }
+ if (i == 3) {
+ x->data_vaddr = sg->vmaddr;
+ x->data_filesz = sg->filesize;
+ x->data_memsz = sg->vmsize;
+ }
+ vaddr = sg->vmaddr + sg->vmsize;
+ fileoff = sg->fileoff + sg->filesize;
+ (void)seen_zerofill;
+ }
+ /* LINKEDIT placeholder; size is filled after blob assembly. */
+ vaddr = ALIGN_UP(vaddr, MZ_PAGE);
+ fileoff = ALIGN_UP(fileoff, MZ_PAGE);
+ x->segs[4].vmaddr = vaddr;
+ x->segs[4].fileoff = fileoff;
+ x->linkedit_vaddr = vaddr;
+ x->linkedit_fileoff = fileoff;
+
+ /* Encode __stubs bytes now that vaddrs are settled. */
+ for (u32 i = 0; i < x->nimports; ++i) {
+ MachImp* mi = &x->imports[i];
+ if (!mi->is_func) continue;
+ u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * MZ_STUB_SIZE;
+ u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
+ encode_stub(x->stubs_bytes + (mi->stub_idx - 1u) * MZ_STUB_SIZE, stub_v,
+ got_v);
+ }
+ (void)stub_sec_idx;
+ (void)got_sec_idx;
+}
+
+/* ---- pass: shift LinkImage into final vaddrs/file_offsets ----
+ *
+ * The sections in img->sections are still in their original
+ * link_layout coordinates. Map each LinkSection -> its MSec and copy
+ * the final vaddr/file_offset so reloc-apply walks correctly. */
+
+static void shift_sections(MCtx* x) {
+ LinkImage* img = x->img;
+ /* Build a quick lookup: link_sec_id -> MSec*. */
+ for (u32 i = 0; i < x->nsecs; ++i) {
+ MSec* m = &x->secs[i];
+ if (!m->link_sec_id) continue;
+ /* Walk link_section_id slot. */
+ LinkSection* ls = &img->sections[m->link_sec_id - 1u];
+ /* shift relocs whose write_vaddr/file_offset live within this
+ * section's original [old_vaddr, old_vaddr+size). */
+ u64 old_v = ls->vaddr;
+ u64 old_f = ls->file_offset;
+ u64 new_v = m->vaddr;
+ u64 new_f = m->file_offset;
+ if (old_v == new_v && old_f == new_f) continue;
+ /* Update the LinkSection itself. */
+ ls->vaddr = new_v;
+ ls->file_offset = new_f;
+ /* Update relocs that target this section. */
+ for (u32 ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) {
+ LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri);
+ if (r->link_section_id != ls->id) continue;
+ r->write_vaddr = new_v + (r->write_vaddr - old_v);
+ r->write_file_offset = new_f + (r->write_file_offset - old_f);
+ }
+ /* Update LinkSyms that belong to this LinkSection. Match by
+ * section_id rather than vaddr range — multiple input sections
+ * may share the same pre-shift vaddr (each bucket in
+ * link_layout starts at offset 0). */
+ for (u32 si = 0; si < LinkSyms_count(&img->syms); ++si) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, si);
+ if (!s->defined) continue;
+ if (s->kind == SK_ABS) continue;
+ if (s->section_id != ls->id) continue;
+ s->vaddr = new_v + (s->vaddr - old_v);
+ }
+ }
+}
+
+/* ---- pass: apply relocations + collect chained-fixup sites ----
+ *
+ * Reloc dispatch:
+ * target=imported func + CALL26/JUMP26 -> S = stub vaddr
+ * target=import + GOT_LOAD_PAGE21/PAGEOFF12 -> S = got slot vaddr
+ * target=import + ABS64 -> write 0; collect bind site
+ * target=internal + ABS64 -> write target VA; collect rebase site
+ * everything else -> standard apply
+ *
+ * Patch sites for chained fixups are 8-byte slots; for ABS32 we do not
+ * support fixups (no chained-fixup format for 32-bit pointers in
+ * standard arm64 — would need DYLD_CHAINED_PTR_32). Internal R_ABS32
+ * still works (no slide adjustment is wrong technically, but for
+ * compile-time-known offsets it suffices).
+ */
+
+typedef struct FixSite {
+ u8 segidx; /* 2 = __DATA_CONST, 3 = __DATA */
+ u8 is_bind; /* 0 = rebase, 1 = bind */
+ u8 pad[2];
+ u32 import_idx; /* 1-based import index for binds, 0 for rebases */
+ u64 vaddr; /* absolute VA of the slot */
+ u64 rebase_target; /* unslid target VA; only used for rebases */
+} FixSite;
+
+typedef struct FixList {
+ Heap* heap;
+ FixSite* a;
+ u32 n;
+ u32 cap;
+} FixList;
+
+static void fix_init(FixList* fl, Heap* h) {
+ fl->heap = h;
+ fl->a = NULL;
+ fl->n = 0;
+ fl->cap = 0;
+}
+static void fix_fini(FixList* fl) {
+ if (fl->a) fl->heap->free(fl->heap, fl->a, sizeof(*fl->a) * fl->cap);
+ fl->a = NULL;
+ fl->n = fl->cap = 0;
+}
+static void fix_push(FixList* fl, const FixSite* s) {
+ if (VEC_GROW(fl->heap, fl->a, fl->cap, fl->n + 1u)) return;
+ fl->a[fl->n++] = *s;
+}
+
+/* find MSec covering an absolute vaddr */
+static MSec* msec_for_vaddr(MCtx* x, u64 v) {
+ for (u32 i = 0; i < x->nsecs; ++i) {
+ MSec* m = &x->secs[i];
+ if (v >= m->vaddr && v < m->vaddr + m->size) return m;
+ }
+ return NULL;
+}
+
+static u8* bytes_for_section(MCtx* x, MSec* m, LinkImage* img) {
+ if (m->synth_data) {
+ /* Synthetic — caller reads/writes via x->stubs_bytes / x->got_bytes. */
+ if (m->synth_data == x->stubs_bytes) return x->stubs_bytes;
+ if (m->synth_data == x->got_bytes) return x->got_bytes;
+ return NULL;
+ }
+ /* Backed by a LinkSection: find the LinkSegment buffer that section
+ * sits in (link_layout.c stored input section bytes there). */
+ LinkSection* ls = &img->sections[m->link_sec_id - 1u];
+ u32 segid = ls->segment_id;
+ if (segid == LINK_SEG_NONE) return NULL;
+ return img->segment_bytes[segid - 1u];
+}
+
+/* Map the LinkSection that backs a write_vaddr to an MSec, then to the
+ * underlying byte buffer. */
+static u8* patch_ptr(MCtx* x, LinkImage* img, const LinkRelocApply* r,
+ MSec** out_msec) {
+ /* Look up via the LinkSection. After shift_sections the section
+ * vaddr is the Mach-O vaddr; the corresponding MSec backs it. */
+ if (r->link_section_id == LINK_SEC_NONE) return NULL;
+ LinkSection* ls = &img->sections[r->link_section_id - 1u];
+ /* Find the MSec by link_sec_id. */
+ MSec* m = NULL;
+ for (u32 i = 0; i < x->nsecs; ++i) {
+ if (x->secs[i].link_sec_id == ls->id) {
+ m = &x->secs[i];
+ break;
+ }
+ }
+ if (!m) return NULL;
+ /* The LinkSegment's bytes are valid (not shifted), but the offset
+ * within them is the original input_offset. Use input_offset for
+ * the byte offset, since the LinkSegment buffer wasn't reshuffled. */
+ /* link_layout.c set ls->file_offset = seg.file_offset + input_offset
+ * originally. ls->vaddr similarly. After our shift, they're new.
+ * The byte offset within the segment buffer is still input_offset. */
+ u8* base = bytes_for_section(x, m, img);
+ if (!base) return NULL;
+ u32 within_section = (u32)(r->write_vaddr - m->vaddr);
+ /* The segment buffer's first byte corresponds to ls->input_offset==0
+ * for the FIRST section in the segment. But that's a complication.
+ * For simplicity we recompute the segment-relative byte offset by
+ * (file_offset - segment.file_offset) where segment.file_offset is
+ * unchanged. Wait: the original layout produced `ls->file_offset =
+ * seg.file_offset + input_offset`, and we may have changed
+ * ls->file_offset. Let's just use input_offset stored on the
+ * LinkSection. */
+ u32 in_off = (u32)(ls->input_offset + within_section);
+ if (out_msec) *out_msec = m;
+ return base + in_off;
+}
+
+/* Symbol-relative resolved-address S, accounting for imports. */
+static int sym_S(MCtx* x, LinkImage* img, LinkSymId id, u64* out_S,
+ int* out_imp_idx) {
+ *out_S = 0;
+ *out_imp_idx = 0;
+ if (id == LINK_SYM_NONE) return 0;
+ LinkSymbol* s = sym_at(img, id);
+ if (!s) return 0;
+ if (s->imported) {
+ /* Resolve to canonical to find import index. */
+ u32 idx = 0;
+ if (id < x->sym_to_imp_size) idx = x->sym_to_imp[id];
+ if (!idx && s->name != 0) {
+ LinkSymId canon = symhash_get(&img->globals, s->name);
+ if (canon != LINK_SYM_NONE && canon < x->sym_to_imp_size)
+ idx = x->sym_to_imp[canon];
+ }
+ *out_imp_idx = (int)idx;
+ return 1;
+ }
+ if (s->kind == SK_ABS) {
+ *out_S = s->vaddr;
+ return 0;
+ }
+ *out_S = s->vaddr;
+ return 0;
+}
+
+static void apply_relocs(MCtx* x, FixList* fl) {
+ LinkImage* img = x->img;
+ for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
+ LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
+ if (r->target == LINK_SYM_NONE) continue;
+ MSec* msec = NULL;
+ u8* P_bytes = patch_ptr(x, img, r, &msec);
+ if (!P_bytes) continue;
+ u64 P = r->write_vaddr;
+
+ u64 S;
+ int imp_idx;
+ int is_imp = sym_S(x, img, r->target, &S, &imp_idx);
+
+ if (is_imp) {
+ MachImp* mi = (imp_idx > 0) ? &x->imports[imp_idx - 1] : NULL;
+ switch (r->kind) {
+ case R_AARCH64_CALL26:
+ case R_AARCH64_JUMP26: {
+ if (!mi || !mi->stub_idx)
+ compiler_panic(x->c, no_loc(),
+ "link_macho: import has no stub for branch");
+ u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * MZ_STUB_SIZE;
+ link_reloc_apply(x->c, r->kind, P_bytes, stub_v, r->addend, P);
+ continue;
+ }
+ case R_AARCH64_ADR_GOT_PAGE:
+ case R_AARCH64_LD64_GOT_LO12_NC: {
+ if (!mi)
+ compiler_panic(x->c, no_loc(),
+ "link_macho: GOT reloc for unknown import");
+ u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
+ link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P);
+ continue;
+ }
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ case R_AARCH64_LDST128_ABS_LO12_NC: {
+ /* Direct page/lo12 against an import: route through __got
+ * by treating the reloc as the GOT-load form. */
+ if (!mi)
+ compiler_panic(x->c, no_loc(),
+ "link_macho: PAGE/LO12 against unknown import");
+ u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
+ link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P);
+ continue;
+ }
+ case R_ABS64: {
+ /* Direct 8-byte absolute against an import: bind the slot. */
+ wr_u64_le(P_bytes, 0);
+ FixSite fs = {(u8)msec->segidx, 1, {0}, (u32)imp_idx, P, 0};
+ fix_push(fl, &fs);
+ continue;
+ }
+ default:
+ compiler_panic(x->c, no_loc(),
+ "link_macho: unhandled reloc kind %u against "
+ "imported symbol",
+ (u32)r->kind);
+ }
+ }
+
+ /* Internal relocs. */
+ if (r->kind == R_ABS64) {
+ /* Rebase site. */
+ wr_u64_le(P_bytes, S + (u64)r->addend);
+ FixSite fs = {(u8)msec->segidx, 0, {0}, 0, P, S + (u64)r->addend};
+ fix_push(fl, &fs);
+ continue;
+ }
+ /* Generic apply. */
+ link_reloc_apply(x->c, r->kind, P_bytes, S, r->addend, P);
+ }
+
+ /* Add a chained-bind site for every __got slot. Each got slot's
+ * value will be the import. We bind it. */
+ for (u32 i = 0; i < x->nimports; ++i) {
+ MachImp* mi = &x->imports[i];
+ u64 slot_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
+ /* clear slot bytes (already zero) — dyld writes via chain */
+ FixSite fs = {2u, 1, {0}, i + 1u, slot_v, 0};
+ fix_push(fl, &fs);
+ }
+}
+
+/* ---- chained fixups blob assembler ----
+ *
+ * For each segment that has fixups, build a dyld_chained_starts_in_segment
+ * with one chain per page (MZ_PAGE). Within a page, sort sites by
+ * offset, encode each as DYLD_CHAINED_PTR_64, and link via the `next`
+ * field (4-byte units, 0 = end of chain).
+ */
+
+typedef struct PageChain {
+ u32 first_offset_in_page; /* relative to page start */
+ u32 nsites;
+ u32 first_site_idx; /* into a per-segment site array */
+} PageChain;
+
+static int site_cmp_by_vaddr(const void* a, const void* b) {
+ const FixSite* x = a;
+ const FixSite* y = b;
+ if (x->vaddr < y->vaddr) return -1;
+ if (x->vaddr > y->vaddr) return 1;
+ return 0;
+}
+
+/* tiny insertion sort to avoid pulling qsort */
+static void sort_sites(FixSite* a, u32 n) {
+ for (u32 i = 1; i < n; ++i) {
+ FixSite tmp = a[i];
+ u32 j = i;
+ while (j > 0 && site_cmp_by_vaddr(&a[j - 1], &tmp) > 0) {
+ a[j] = a[j - 1];
+ --j;
+ }
+ a[j] = tmp;
+ }
+}
+
+static void emit_pointer(u8* slot, int is_bind, u32 ord_or_target_lo,
+ u32 high_or_target_hi, u32 next4) {
+ /* DYLD_CHAINED_PTR_64:
+ * bind : ordinal:24, addend:8, reserved:19, next:12, bind:1=1
+ * rebase: target:36 (vmaddr), high8:8, reserved:7, next:12, bind:1=0
+ */
+ u64 v = 0;
+ if (is_bind) {
+ u64 ordinal = (u64)ord_or_target_lo & 0xffffffull; /* 24 bits */
+ u64 addend = 0;
+ u64 next = (u64)next4 & 0xfffull;
+ v = ordinal | (addend << 24) | (0ull /* reserved */ << 32) | (next << 51) |
+ ((u64)1 << 63);
+ } else {
+ /* rebase: target is full vmaddr; we get hi:lo split. */
+ u64 target =
+ ((u64)high_or_target_hi << 32) | (u64)ord_or_target_lo;
+ target &= ((u64)1 << 36) - 1u; /* 36 bits */
+ u64 high8 = 0;
+ u64 next = (u64)next4 & 0xfffull;
+ v = target | (high8 << 36) | (0ull /* reserved */ << 44) | (next << 51) |
+ ((u64)0 << 63);
+ }
+ wr_u64_le(slot, v);
+}
+
+static void build_chained_fixups(MCtx* x, FixList* fl) {
+ Heap* h = x->h;
+ MByte* out = &x->chained_fixups;
+ mbuf_init(out, h);
+
+ /* Header (32 B):
+ * uint32 fixups_version (=0)
+ * uint32 starts_offset
+ * uint32 imports_offset
+ * uint32 symbols_offset
+ * uint32 imports_count
+ * uint32 imports_format (=1)
+ * uint32 symbols_format (=0)
+ */
+ u32 hdr_pos = mbuf_u32(out, 0); /* fixups_version */
+ (void)hdr_pos;
+ u32 starts_offset_pos = mbuf_u32(out, 0);
+ u32 imports_offset_pos = mbuf_u32(out, 0);
+ u32 symbols_offset_pos = mbuf_u32(out, 0);
+ mbuf_u32(out, x->nimports);
+ mbuf_u32(out, DYLD_CHAINED_IMPORT);
+ mbuf_u32(out, 0); /* symbols uncompressed */
+ /* dyld expects 8-byte alignment of the starts table. */
+ mbuf_align(out, 4);
+
+ /* dyld_chained_starts_in_image:
+ * uint32 seg_count
+ * uint32 seg_info_offset[seg_count]
+ *
+ * seg_count must equal mach-O segment count (5).
+ * seg_info_offset[i] = 0 means no fixups in that segment.
+ */
+ u32 starts_off = out->len;
+ wr_u32_le(out->data + starts_offset_pos, starts_off);
+ mbuf_u32(out, x->nsegs);
+ /* Reserve seg_info_offset[]. */
+ u32 seg_info_offsets_pos = out->len;
+ for (u32 i = 0; i < x->nsegs; ++i) mbuf_u32(out, 0);
+
+ /* Sort fixsites by vaddr globally. */
+ sort_sites(fl->a, fl->n);
+
+ /* Per segment, emit dyld_chained_starts_in_segment when fixups present. */
+ for (u32 si = 0; si < x->nsegs; ++si) {
+ /* count sites in this segment */
+ u32 first = (u32)-1, count = 0;
+ for (u32 k = 0; k < fl->n; ++k) {
+ if (fl->a[k].segidx == si) {
+ if (first == (u32)-1) first = k;
+ ++count;
+ }
+ }
+ if (!count) continue;
+ /* Page-align this struct to 4. */
+ mbuf_align(out, 4);
+ u32 sis_off = out->len;
+ /* Patch seg_info_offset[si] to (sis_off - starts_off). */
+ wr_u32_le(out->data + seg_info_offsets_pos + si * 4u, sis_off - starts_off);
+
+ /* Compute page count for this segment. */
+ u64 seg_va = x->segs[si].vmaddr;
+ u64 seg_size = x->segs[si].vmsize ? x->segs[si].vmsize : MZ_PAGE;
+ u32 page_count = (u32)((seg_size + MZ_PAGE - 1u) / MZ_PAGE);
+
+ /* dyld_chained_starts_in_segment:
+ * uint32 size
+ * uint16 page_size
+ * uint16 pointer_format
+ * uint64 segment_offset (offset of segment's first byte from
+ * mach_header)
+ * uint32 max_valid_pointer (0 for 64-bit)
+ * uint16 page_count
+ * uint16 page_start[page_count] (0xFFFF = no fixups in page)
+ */
+ u32 sis_size_pos = mbuf_u32(out, 0); /* fill below */
+ mbuf_u16(out, (u16)MZ_PAGE);
+ mbuf_u16(out, (u16)DYLD_CHAINED_PTR_64);
+ mbuf_u64(out, (u64)x->segs[si].fileoff); /* segment file offset */
+ mbuf_u32(out, 0);
+ mbuf_u16(out, (u16)page_count);
+ u32 page_starts_pos = out->len;
+ for (u32 p = 0; p < page_count; ++p) mbuf_u16(out, 0xFFFFu);
+ /* size includes the page_start array */
+ u32 sis_size = out->len - sis_size_pos + 4u;
+ /* Hmm, the `size` field is the size of *this* struct. We measure
+ * from sis_off through end of page_starts. */
+ sis_size = out->len - sis_off;
+ wr_u32_le(out->data + sis_size_pos, sis_size);
+
+ /* Now: walk sites in this segment, group by page, write
+ * page_start[i] = offset_in_page of first site, and chain via
+ * next-field in the actual segment's bytes. */
+ /* Sites are sorted globally; collect contiguous run for this seg. */
+ u32 cur = first;
+ while (cur < first + count) {
+ u32 page_idx =
+ (u32)((fl->a[cur].vaddr - seg_va) / MZ_PAGE);
+ u32 offset_in_page = (u32)((fl->a[cur].vaddr - seg_va) % MZ_PAGE);
+ wr_u16_le(out->data + page_starts_pos + page_idx * 2u,
+ (u16)offset_in_page);
+ /* Walk this page's chain. */
+ u32 next_in_page = cur;
+ while (next_in_page + 1 < first + count) {
+ u64 nv = fl->a[next_in_page + 1].vaddr;
+ if (nv >= seg_va + (u64)(page_idx + 1) * MZ_PAGE) break;
+ ++next_in_page;
+ }
+ /* Encode chain pointers. */
+ for (u32 k = cur; k <= next_in_page; ++k) {
+ FixSite* s = &fl->a[k];
+ u32 next4 = 0;
+ if (k < next_in_page) {
+ u64 dist = fl->a[k + 1].vaddr - s->vaddr;
+ next4 = (u32)(dist / 4u);
+ }
+ /* Find segment bytes; for slot in __DATA_CONST __got use
+ * x->got_bytes; for __DATA, find the LinkSegment. */
+ u8* slot = NULL;
+ if (s->segidx == 2) {
+ /* __DATA_CONST: __got slot. */
+ slot = x->got_bytes + (s->vaddr - x->got_vaddr);
+ } else if (s->segidx == 3) {
+ /* __DATA: walk MSecs to find the matching one. */
+ MSec* m = msec_for_vaddr(x, s->vaddr);
+ if (m) {
+ u8* base = bytes_for_section(x, m, x->img);
+ if (base) {
+ LinkSection* ls = &x->img->sections[m->link_sec_id - 1u];
+ u32 in_off = (u32)(ls->input_offset + (s->vaddr - m->vaddr));
+ slot = base + in_off;
+ }
+ }
+ }
+ if (!slot)
+ compiler_panic(x->c, no_loc(),
+ "link_macho: chained-fixup slot for vaddr 0x%llx not "
+ "in any segment buffer",
+ (unsigned long long)s->vaddr);
+ if (s->is_bind) {
+ /* ordinal is import index (1-based) - 1; chained-import format
+ * uses 0-based. */
+ u32 ord = s->import_idx - 1u;
+ emit_pointer(slot, 1, ord, 0, next4);
+ } else {
+ /* rebase target = unslid vmaddr */
+ u32 lo = (u32)(s->rebase_target & 0xffffffffu);
+ u32 hi = (u32)(s->rebase_target >> 32);
+ emit_pointer(slot, 0, lo, hi, next4);
+ }
+ }
+ cur = next_in_page + 1u;
+ }
+ }
+
+ /* Imports table: one dyld_chained_import (4B) per import.
+ * Layout: lib_ordinal:8, weak:1, name_offset:23 */
+ mbuf_align(out, 4);
+ u32 imports_off = out->len;
+ wr_u32_le(out->data + imports_offset_pos, imports_off);
+ /* We need to first build the symbol pool to know name offsets. */
+ u32 symbols_off = imports_off + x->nimports * 4u;
+ /* Reserve imports area. */
+ for (u32 i = 0; i < x->nimports; ++i) mbuf_u32(out, 0);
+ /* Emit symbols (each NUL-terminated). Set name_offset on each import. */
+ wr_u32_le(out->data + symbols_offset_pos, out->len);
+ /* Leading NUL for offset 0. */
+ mbuf_u8(out, 0);
+ for (u32 i = 0; i < x->nimports; ++i) {
+ MachImp* mi = &x->imports[i];
+ size_t nl;
+ const char* nm = pool_str(x->c->global, mi->name, &nl);
+ u32 off = out->len - symbols_off;
+ if (nm && nl) mbuf_str(out, nm, (u32)nl);
+ else mbuf_u8(out, 0);
+ /* Patch the import slot. */
+ u32 packed = ((u32)mi->dylib_ord & 0xffu) |
+ ((u32)(mi->weak ? 1u : 0u) << 8) |
+ ((off & 0x7fffffu) << 9);
+ wr_u32_le(out->data + imports_off + i * 4u, packed);
+ }
+ (void)symbols_off;
+}
+
+/* ---- exports trie ---- *
+ *
+ * Minimal trie: one node carrying a single export "_main" with the
+ * entry symbol's VA-relative offset. This is enough for dyld; binaries
+ * with a real exports trie include more data but we don't need it. */
+
+static void uleb128(MByte* out, u64 v) {
+ do {
+ u8 byte = v & 0x7fu;
+ v >>= 7;
+ if (v) byte |= 0x80u;
+ mbuf_u8(out, byte);
+ } while (v);
+}
+
+static void build_exports_trie(MCtx* x) {
+ /* Format:
+ * node = (terminal_size: uleb128) (export_data)? (children_count: u8)
+ * (children: [(label NUL) (offset uleb128)]*)
+ *
+ * We emit a trie with a single leaf at "_main" with offset
+ * entry_offset (from __TEXT base).
+ *
+ * Easiest: single root node with children_count=1, child label = "_main",
+ * child offset points to a leaf node.
+ */
+ MByte* out = &x->exports_trie;
+ mbuf_init(out, x->h);
+
+ LinkImage* img = x->img;
+ LinkSymbol* esym = sym_at(img, img->entry_sym);
+ if (!esym || !esym->defined) {
+ /* No entry — emit a single empty terminal trie. */
+ mbuf_u8(out, 0); /* terminal_size 0 */
+ mbuf_u8(out, 0); /* children 0 */
+ return;
+ }
+ size_t nl;
+ const char* nm = pool_str(x->c->global, esym->name, &nl);
+ if (!nm || nl == 0) {
+ mbuf_u8(out, 0);
+ mbuf_u8(out, 0);
+ return;
+ }
+ /* leaf node: terminal_size = sizeof(uleb(flags)+uleb(offset))
+ * flags = 0 (regular export); offset = vaddr - __TEXT.vmaddr */
+ u64 entry_off = esym->vaddr - x->text_vaddr;
+
+ /* Compute leaf-node bytes length: uleb(flags=0) + uleb(offset). */
+ u32 flags = 0;
+ u32 leaf_payload_len;
+ {
+ /* count uleb bytes for flags=0 -> 1 byte */
+ u32 a = 1;
+ /* count uleb bytes for entry_off */
+ u32 b = 0;
+ u64 v = entry_off;
+ do {
+ ++b;
+ v >>= 7;
+ } while (v);
+ leaf_payload_len = a + b;
+ }
+ /* Layout: root node first, then leaf. The root node's child entry
+ * carries the absolute offset of the leaf within the trie. */
+
+ /* root: terminal_size=0, children_count=1, "_main"\0, child_offset=
+ * (leaf-position uleb). */
+ /* We'll back-patch child_offset after we know the leaf position. */
+ mbuf_u8(out, 0); /* root terminal size */
+ mbuf_u8(out, 1); /* children_count */
+ mbuf_str(out, nm, (u32)nl);
+ /* child offset: 5 bytes max for uleb128(u32). Reserve and patch. */
+ u32 child_off_pos = out->len;
+ /* Reserve 5 bytes. */
+ for (u32 i = 0; i < 5; ++i) mbuf_u8(out, 0);
+ /* leaf node */
+ u32 leaf_pos = out->len;
+ /* terminal_size byte then payload */
+ mbuf_u8(out, (u8)leaf_payload_len);
+ uleb128(out, flags);
+ uleb128(out, entry_off);
+ mbuf_u8(out, 0); /* children_count */
+
+ /* Patch child_offset uleb. */
+ u32 v = leaf_pos;
+ for (u32 i = 0; i < 5; ++i) {
+ u8 b = (u8)(v & 0x7fu);
+ v >>= 7;
+ if (v) b |= 0x80u;
+ out->data[child_off_pos + i] = b;
+ if (!v && i < 4) {
+ /* Remaining bytes need to be 0x00 — but we already wrote zeros;
+ * we need a continuation-zero so the consumer sees 5 bytes. Set
+ * top bit on lower bytes to indicate continuation, last byte = 0. */
+ /* Actually: ULEB needs proper termination. Force final byte to
+ * 0 with no continuation by setting bit-7=0 on the last
+ * non-zero byte and also forcing remaining bytes to be 0x80
+ * extension or trim. Simpler: set last byte explicitly. */
+ out->data[child_off_pos + i] = (u8)(out->data[child_off_pos + i] & 0x7fu);
+ for (u32 j = i + 1; j < 5; ++j)
+ out->data[child_off_pos + j] = 0x80;
+ out->data[child_off_pos + 4] = 0x00;
+ break;
+ }
+ }
+ /* Pad trie to 8 bytes. */
+ mbuf_align(out, 8);
+}
+
+/* ---- symtab + strtab + indirect symtab ---- */
+
+typedef struct NlistRec {
+ u32 strx;
+ u8 type;
+ u8 sect; /* 1-based section index (Mach-O) */
+ u16 desc;
+ u64 value;
+} NlistRec;
+
+static void build_symtab(MCtx* x) {
+ Heap* h = x->h;
+ LinkImage* img = x->img;
+ mbuf_init(&x->symtab, h);
+ mbuf_init(&x->strtab, h);
+ mbuf_init(&x->indirect, h);
+
+ /* strtab leading NUL */
+ mbuf_u8(&x->strtab, 0);
+
+ /* Approach:
+ * - Add one local nlist per defined LinkSymbol (locals + non-imported
+ * externs) — but to keep things simple we only emit external defined
+ * syms (mainly _main), plus all imports as N_UNDF|N_EXT.
+ *
+ * Mach-O dyld requires the symtab order: locals first, ext-defs next,
+ * undef last (matched by LC_DYSYMTAB ranges).
+ */
+
+ /* Pass A: defined externals. */
+ u32 n_local = 0;
+ u32 n_extdef = 0;
+ u32 n_undef = 0;
+
+ /* For now we emit only externals + imports. No locals. */
+ /* extdef pass */
+ for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (!s->defined) continue;
+ if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue;
+ if (s->name == 0) continue;
+ if (s->kind == SK_ABS) continue; /* skip abs externs */
+ /* Locate which MSec contains this vaddr to figure out n_sect. */
+ u8 n_sect = 0;
+ for (u32 k = 0; k < x->nsecs; ++k) {
+ MSec* m = &x->secs[k];
+ if (s->vaddr >= m->vaddr && s->vaddr < m->vaddr + m->size) {
+ n_sect = (u8)(k + 1u);
+ break;
+ }
+ if (s->vaddr == m->vaddr + m->size) {
+ n_sect = (u8)(k + 1u);
+ break;
+ }
+ }
+ size_t nl;
+ const char* nm = pool_str(x->c->global, s->name, &nl);
+ u32 strx = x->strtab.len;
+ if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl);
+
+ u8 t[16];
+ u8 nt = N_SECT | N_EXT;
+ if (s->bind == SB_WEAK) {
+ /* N_WEAK_DEF in n_desc (not a flag in n_type) */
+ }
+ wr_u32_le(t + 0, strx);
+ t[4] = nt;
+ t[5] = n_sect;
+ wr_u16_le(t + 6, s->bind == SB_WEAK ? N_WEAK_DEF : 0);
+ wr_u64_le(t + 8, s->vaddr);
+ mbuf_append(&x->symtab, t, 16);
+ ++n_extdef;
+ }
+
+ /* undef imports */
+ /* Imports go after extdefs. */
+ u32 imp_first_symtab_idx = n_extdef;
+ for (u32 i = 0; i < x->nimports; ++i) {
+ MachImp* mi = &x->imports[i];
+ size_t nl;
+ const char* nm = pool_str(x->c->global, mi->name, &nl);
+ u32 strx = x->strtab.len;
+ if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl);
+
+ u8 t[16];
+ wr_u32_le(t + 0, strx);
+ t[4] = N_UNDF | N_EXT;
+ t[5] = 0;
+ /* n_desc carries dylib ordinal in high byte (REFERENCED_DYNAMICALLY etc.) */
+ u16 desc =
+ (u16)(((u16)mi->dylib_ord & 0xff) << 8);
+ if (mi->weak) desc |= N_WEAK_REF;
+ wr_u16_le(t + 6, desc);
+ wr_u64_le(t + 8, 0);
+ mbuf_append(&x->symtab, t, 16);
+ ++n_undef;
+ }
+
+ /* indirect symtab: one entry per __stubs slot, then one per __got slot. */
+ /* __stubs: in declaration order across imports where is_func=1. */
+ u32 indirect_start = 0;
+ /* Patch reserved1 of each synth section. */
+ for (u32 i = 0; i < x->nsecs; ++i) {
+ MSec* m = &x->secs[i];
+ if (m->synth_data == x->stubs_bytes && m->synth_size) {
+ m->reserved1 = indirect_start;
+ for (u32 k = 0; k < x->nimports; ++k) {
+ MachImp* mi = &x->imports[k];
+ if (!mi->stub_idx) continue;
+ u32 sym_idx = imp_first_symtab_idx + k;
+ mbuf_u32(&x->indirect, sym_idx);
+ ++indirect_start;
+ }
+ }
+ }
+ for (u32 i = 0; i < x->nsecs; ++i) {
+ MSec* m = &x->secs[i];
+ if (m->synth_data == x->got_bytes && m->synth_size) {
+ m->reserved1 = indirect_start;
+ for (u32 k = 0; k < x->nimports; ++k) {
+ MachImp* mi = &x->imports[k];
+ u32 sym_idx = imp_first_symtab_idx + k;
+ (void)mi;
+ mbuf_u32(&x->indirect, sym_idx);
+ ++indirect_start;
+ }
+ }
+ }
+
+ x->nsyms = n_local + n_extdef + n_undef;
+ (void)n_local;
+ (void)imp_first_symtab_idx;
+}
+
+/* ---- LINKEDIT layout assembly ----
+ *
+ * Place blobs in the order Apple prefers:
+ * chained_fixups, exports_trie, fn_starts, data_in_code,
+ * symtab, indirect, strtab, codesig
+ */
+
+static void layout_linkedit(MCtx* x) {
+ /* fn_starts and data_in_code are both empty. */
+ mbuf_init(&x->fn_starts, x->h);
+ mbuf_init(&x->data_in_code, x->h);
+ mbuf_init(&x->codesig, x->h);
+
+ u64 cur = x->linkedit_fileoff;
+ /* chained fixups */
+ cur = ALIGN_UP(cur, 8u);
+ x->chained_fixups_off = (u32)cur;
+ cur += x->chained_fixups.len;
+ /* exports trie */
+ cur = ALIGN_UP(cur, 8u);
+ x->exports_trie_off = (u32)cur;
+ cur += x->exports_trie.len;
+ /* function starts (empty placeholder, but allocate one byte) */
+ cur = ALIGN_UP(cur, 8u);
+ x->fn_starts_off = (u32)cur;
+ /* data in code */
+ cur = ALIGN_UP(cur, 8u);
+ x->data_in_code_off = (u32)cur;
+ /* symtab */
+ cur = ALIGN_UP(cur, 8u);
+ x->symtab_off = (u32)cur;
+ cur += x->symtab.len;
+ /* indirect symtab */
+ cur = ALIGN_UP(cur, 4u);
+ x->indirect_off = (u32)cur;
+ cur += x->indirect.len;
+ /* strtab */
+ cur = ALIGN_UP(cur, 8u);
+ x->strtab_off = (u32)cur;
+ cur += x->strtab.len;
+ /* code signature: end-aligned to 16 */
+ cur = ALIGN_UP(cur, 16u);
+ x->codesig_off = (u32)cur;
+
+ /* Linkedit segment file_size includes everything up to (but not yet
+ * including) codesig. Codesig is computed below. */
+ u64 le_size = cur - x->linkedit_fileoff;
+ /* Set linkedit segment size; will be increased after codesig. */
+ x->segs[4].filesize = le_size;
+ x->segs[4].vmsize = ALIGN_UP(le_size, MZ_PAGE);
+ if (!x->segs[4].vmsize) x->segs[4].vmsize = MZ_PAGE;
+}
+
+/* ---- ad-hoc code signature (CodeDirectory + SuperBlob) ----
+ *
+ * Produces a minimal embedded SuperBlob with a single CodeDirectory.
+ * The CD is sha256-hashed over CS_PAGE_SIZE_LOG2 = 4096-byte pages of
+ * the file (excluding the codesig itself). The kernel verifies the
+ * CD's hash chain on exec.
+ *
+ * Output format (in big-endian for SuperBlob/CodeDirectory headers):
+ * [SuperBlob]
+ * u32 magic (0xfade0cc0)
+ * u32 length
+ * u32 count (=1)
+ * [Slot]
+ * u32 type (=0 CSSLOT_CODEDIRECTORY)
+ * u32 offset (=20) -- relative to start of SuperBlob
+ * [CodeDirectory]
+ * u32 magic (0xfade0c02)
+ * u32 length (bytes including all hashes)
+ * u32 version (>=0x20400 for execSeg fields)
+ * u32 flags (=0 ad-hoc — actually flags must include 0x2 (kSecCodeSignatureAdhoc))
+ * u32 hashOffset (offset of first slot hash)
+ * u32 identOffset (offset of identifier string)
+ * u32 nSpecialSlots (=0)
+ * u32 nCodeSlots
+ * u32 codeLimit (file bytes covered)
+ * u8 hashSize (=32)
+ * u8 hashType (=2 sha256)
+ * u8 platform (=0)
+ * u8 pageSize (=12 for 4096)
+ * u32 spare2 (=0)
+ * u32 scatterOffset (=0)
+ * u32 teamOffset (=0)
+ * u32 spare3 (=0)
+ * u64 codeLimit64 (=0)
+ * u64 execSegBase (=__TEXT.fileoff)
+ * u64 execSegLimit (=__TEXT.filesize)
+ * u64 execSegFlags (=1 main binary)
+ * [identifier bytes "a.out\0"]
+ * [codeslot hashes nCodeSlots * 32 B]
+ *
+ * Hashes computed AFTER everything else is final — including the codesig
+ * blob's own offset in the file (the hash range stops just before
+ * codeLimit). */
+
+/* Tiny SHA-256 implementation. */
+
+static const u32 SHA256_K[64] = {
+ 0x428a2f98u, 0x71374491u, 0xb5c0fbcfu, 0xe9b5dba5u, 0x3956c25bu,
+ 0x59f111f1u, 0x923f82a4u, 0xab1c5ed5u, 0xd807aa98u, 0x12835b01u,
+ 0x243185beu, 0x550c7dc3u, 0x72be5d74u, 0x80deb1feu, 0x9bdc06a7u,
+ 0xc19bf174u, 0xe49b69c1u, 0xefbe4786u, 0x0fc19dc6u, 0x240ca1ccu,
+ 0x2de92c6fu, 0x4a7484aau, 0x5cb0a9dcu, 0x76f988dau, 0x983e5152u,
+ 0xa831c66du, 0xb00327c8u, 0xbf597fc7u, 0xc6e00bf3u, 0xd5a79147u,
+ 0x06ca6351u, 0x14292967u, 0x27b70a85u, 0x2e1b2138u, 0x4d2c6dfcu,
+ 0x53380d13u, 0x650a7354u, 0x766a0abbu, 0x81c2c92eu, 0x92722c85u,
+ 0xa2bfe8a1u, 0xa81a664bu, 0xc24b8b70u, 0xc76c51a3u, 0xd192e819u,
+ 0xd6990624u, 0xf40e3585u, 0x106aa070u, 0x19a4c116u, 0x1e376c08u,
+ 0x2748774cu, 0x34b0bcb5u, 0x391c0cb3u, 0x4ed8aa4au, 0x5b9cca4fu,
+ 0x682e6ff3u, 0x748f82eeu, 0x78a5636fu, 0x84c87814u, 0x8cc70208u,
+ 0x90befffau, 0xa4506cebu, 0xbef9a3f7u, 0xc67178f2u};
+
+static u32 rotr32(u32 v, u32 n) { return (v >> n) | (v << (32 - n)); }
+
+typedef struct Sha256 {
+ u32 h[8];
+ u8 buf[64];
+ u32 buflen;
+ u64 total;
+} Sha256;
+
+static void sha256_init(Sha256* s) {
+ s->h[0] = 0x6a09e667u;
+ s->h[1] = 0xbb67ae85u;
+ s->h[2] = 0x3c6ef372u;
+ s->h[3] = 0xa54ff53au;
+ s->h[4] = 0x510e527fu;
+ s->h[5] = 0x9b05688cu;
+ s->h[6] = 0x1f83d9abu;
+ s->h[7] = 0x5be0cd19u;
+ s->buflen = 0;
+ s->total = 0;
+}
+
+static void sha256_block(Sha256* s, const u8* p) {
+ u32 w[64];
+ for (u32 i = 0; i < 16; ++i)
+ w[i] = ((u32)p[i * 4] << 24) | ((u32)p[i * 4 + 1] << 16) |
+ ((u32)p[i * 4 + 2] << 8) | (u32)p[i * 4 + 3];
+ for (u32 i = 16; i < 64; ++i) {
+ u32 s0 = rotr32(w[i - 15], 7) ^ rotr32(w[i - 15], 18) ^ (w[i - 15] >> 3);
+ u32 s1 = rotr32(w[i - 2], 17) ^ rotr32(w[i - 2], 19) ^ (w[i - 2] >> 10);
+ w[i] = w[i - 16] + s0 + w[i - 7] + s1;
+ }
+ u32 a = s->h[0], b = s->h[1], cc = s->h[2], d = s->h[3];
+ u32 e = s->h[4], f = s->h[5], g = s->h[6], hh = s->h[7];
+ for (u32 i = 0; i < 64; ++i) {
+ u32 S1 = rotr32(e, 6) ^ rotr32(e, 11) ^ rotr32(e, 25);
+ u32 ch = (e & f) ^ ((~e) & g);
+ u32 t1 = hh + S1 + ch + SHA256_K[i] + w[i];
+ u32 S0 = rotr32(a, 2) ^ rotr32(a, 13) ^ rotr32(a, 22);
+ u32 mj = (a & b) ^ (a & cc) ^ (b & cc);
+ u32 t2 = S0 + mj;
+ hh = g;
+ g = f;
+ f = e;
+ e = d + t1;
+ d = cc;
+ cc = b;
+ b = a;
+ a = t1 + t2;
+ }
+ s->h[0] += a;
+ s->h[1] += b;
+ s->h[2] += cc;
+ s->h[3] += d;
+ s->h[4] += e;
+ s->h[5] += f;
+ s->h[6] += g;
+ s->h[7] += hh;
+}
+
+static void sha256_update(Sha256* s, const u8* data, u32 n) {
+ s->total += n;
+ while (n) {
+ u32 take = 64u - s->buflen;
+ if (take > n) take = n;
+ memcpy(s->buf + s->buflen, data, take);
+ s->buflen += take;
+ data += take;
+ n -= take;
+ if (s->buflen == 64) {
+ sha256_block(s, s->buf);
+ s->buflen = 0;
+ }
+ }
+}
+
+static void sha256_final(Sha256* s, u8 out[32]) {
+ u64 bits = s->total * 8u;
+ u8 pad1 = 0x80;
+ sha256_update(s, &pad1, 1);
+ while (s->buflen != 56) {
+ u8 z = 0;
+ sha256_update(s, &z, 1);
+ }
+ u8 lenbe[8];
+ for (u32 i = 0; i < 8; ++i) lenbe[7 - i] = (u8)(bits >> (i * 8));
+ sha256_update(s, lenbe, 8);
+ for (u32 i = 0; i < 8; ++i) {
+ out[i * 4 + 0] = (u8)(s->h[i] >> 24);
+ out[i * 4 + 1] = (u8)(s->h[i] >> 16);
+ out[i * 4 + 2] = (u8)(s->h[i] >> 8);
+ out[i * 4 + 3] = (u8)(s->h[i]);
+ }
+}
+
+static void wr_u64_be(u8* p, u64 v) {
+ for (u32 i = 0; i < 8; ++i) p[7 - i] = (u8)(v >> (i * 8));
+}
+
+/* Build the codesig blob with placeholder hashes; size is precise so
+ * file layout is final after this. */
+static void build_codesig_skeleton(MCtx* x, u32 code_limit, const char* ident) {
+ u32 code_page = 1u << CS_PAGE_SIZE_LOG2; /* 4096 */
+ u32 nslots = (code_limit + code_page - 1u) / code_page;
+
+ /* CodeDirectory size:
+ * header 88 bytes through execSegFlags
+ * identifier (ident_len + 1)
+ * hashes (nslots * 32)
+ */
+ u32 ident_len = (u32)strlen(ident) + 1u;
+ u32 cd_hdr = 88u;
+ u32 cd_size = cd_hdr + ident_len + nslots * CS_SHA256_LEN;
+ /* SuperBlob: 12 hdr + 8 slot + cd. */
+ u32 sb_size = 12u + 8u + cd_size;
+
+ MByte* out = &x->codesig;
+ mbuf_init(out, x->h);
+ mbuf_reserve(out, sb_size);
+ memset(out->data, 0, sb_size);
+ out->len = sb_size;
+
+ u8* sb = out->data;
+ /* SuperBlob header */
+ wr_u32_be(sb + 0, CS_MAGIC_EMBEDDED_SIGNATURE);
+ wr_u32_be(sb + 4, sb_size);
+ wr_u32_be(sb + 8, 1); /* count */
+ /* slot 0: type=CSSLOT_CODEDIRECTORY, offset=20 */
+ wr_u32_be(sb + 12, CSSLOT_CODEDIRECTORY);
+ wr_u32_be(sb + 16, 20u);
+
+ /* CodeDirectory */
+ u8* cd = sb + 20;
+ wr_u32_be(cd + 0, CS_MAGIC_CODEDIRECTORY);
+ wr_u32_be(cd + 4, cd_size);
+ wr_u32_be(cd + 8, 0x20400u); /* version with execSeg */
+ wr_u32_be(cd + 12, 0x2u); /* flags = adhoc */
+ wr_u32_be(cd + 16, cd_hdr + ident_len); /* hashOffset */
+ wr_u32_be(cd + 20, cd_hdr); /* identOffset */
+ wr_u32_be(cd + 24, 0); /* nSpecialSlots */
+ wr_u32_be(cd + 28, nslots);
+ wr_u32_be(cd + 32, code_limit);
+ cd[36] = (u8)CS_SHA256_LEN;
+ cd[37] = (u8)CS_HASHTYPE_SHA256;
+ cd[38] = 0; /* platform */
+ cd[39] = (u8)CS_PAGE_SIZE_LOG2;
+ wr_u32_be(cd + 40, 0); /* spare2 */
+ wr_u32_be(cd + 44, 0); /* scatterOffset */
+ wr_u32_be(cd + 48, 0); /* teamOffset */
+ wr_u32_be(cd + 52, 0); /* spare3 */
+ wr_u64_be(cd + 56, 0); /* codeLimit64 */
+ wr_u64_be(cd + 64, x->segs[1].fileoff); /* execSegBase */
+ wr_u64_be(cd + 72, x->segs[1].filesize); /* execSegLimit */
+ wr_u64_be(cd + 80, CS_EXECSEG_MAIN_BINARY);
+
+ /* identifier */
+ memcpy(cd + cd_hdr, ident, ident_len);
+
+ x->codesig_size = sb_size;
+}
+
+static void compute_codesig(MCtx* x, const u8* full_file, u32 file_len_excl_cs,
+ const char* ident) {
+ u32 code_page = 1u << CS_PAGE_SIZE_LOG2;
+ u32 nslots = (file_len_excl_cs + code_page - 1u) / code_page;
+ u32 ident_len = (u32)strlen(ident) + 1u;
+ u8* cd = x->codesig.data + 12 + 8;
+ u8* hashes = cd + 88u + ident_len;
+
+ for (u32 i = 0; i < nslots; ++i) {
+ u32 off = i * code_page;
+ u32 take = (off + code_page <= file_len_excl_cs) ? code_page
+ : (file_len_excl_cs - off);
+ Sha256 s;
+ sha256_init(&s);
+ sha256_update(&s, full_file + off, take);
+ /* Pages shorter than code_page get the standard SHA over the
+ * partial bytes — Apple's tools do exactly this (no zero padding
+ * on the tail). */
+ sha256_final(&s, hashes + i * CS_SHA256_LEN);
+ }
+}
+
+/* ---- final emission ---- */
+
+static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) {
+ MSeg* sg = &x->segs[segidx];
+ u32 seg_cmd_size =
+ MACHO_SEGCMD64_SIZE + sg->nsects * MACHO_SECT64_SIZE;
+ u32 base = lc->len;
+ mbuf_u32(lc, LC_SEGMENT_64);
+ mbuf_u32(lc, seg_cmd_size);
+ /* segname: 16 bytes zero-padded */
+ u8 nm[16];
+ memset(nm, 0, 16);
+ size_t nlen = strlen(sg->name);
+ if (nlen > 16) nlen = 16;
+ memcpy(nm, sg->name, nlen);
+ mbuf_append(lc, nm, 16);
+ mbuf_u64(lc, sg->vmaddr);
+ mbuf_u64(lc, sg->vmsize);
+ mbuf_u64(lc, sg->fileoff);
+ mbuf_u64(lc, sg->filesize);
+ mbuf_u32(lc, sg->maxprot);
+ mbuf_u32(lc, sg->initprot);
+ mbuf_u32(lc, sg->nsects);
+ mbuf_u32(lc, 0); /* flags */
+
+ for (u32 j = 0; j < sg->nsects; ++j) {
+ MSec* m = &x->secs[sg->first_sec + j];
+ u8 sname[16], gname[16];
+ memset(sname, 0, 16);
+ memset(gname, 0, 16);
+ size_t sl = m->sectname ? strlen(m->sectname) : 0;
+ if (sl > 16) sl = 16;
+ if (sl) memcpy(sname, m->sectname, sl);
+ size_t gl = strlen(sg->name); /* segname must match */
+ if (gl > 16) gl = 16;
+ memcpy(gname, sg->name, gl);
+ mbuf_append(lc, sname, 16);
+ mbuf_append(lc, gname, 16);
+ mbuf_u64(lc, m->vaddr);
+ mbuf_u64(lc, m->size);
+ mbuf_u32(lc, (u32)m->file_offset);
+ /* align is power of 2; encode as log2. */
+ u32 a = m->align ? m->align : 1u;
+ u32 al = 0;
+ while ((1u << al) < a) ++al;
+ mbuf_u32(lc, al);
+ mbuf_u32(lc, 0); /* reloff */
+ mbuf_u32(lc, 0); /* nreloc */
+ mbuf_u32(lc, m->flags);
+ mbuf_u32(lc, m->reserved1);
+ mbuf_u32(lc, m->reserved2);
+ mbuf_u32(lc, 0); /* reserved3 */
+ }
+ (void)base;
+}
+
+void link_emit_macho(LinkImage* img, Writer* w);
+
+void link_emit_macho(LinkImage* img, Writer* w) {
+ MCtx x;
+ memset(&x, 0, sizeof(x));
+ x.img = img;
+ x.c = img->c;
+ x.h = img->heap;
+ x.w = w;
+ x.linker = img->linker;
+
+ if (x.c->target.arch != CFREE_ARCH_ARM_64)
+ compiler_panic(x.c, no_loc(),
+ "link_emit_macho: only arm64 supported (arch=%u)",
+ (u32)x.c->target.arch);
+ if (img->entry_sym == LINK_SYM_NONE)
+ compiler_panic(x.c, no_loc(), "link_emit_macho: no resolved entry");
+
+ collect_imports(&x);
+ plan_layout(&x);
+ shift_sections(&x);
+
+ /* entry offset within __TEXT segment. */
+ LinkSymbol* esym = sym_at(img, img->entry_sym);
+ if (!esym || !esym->defined)
+ compiler_panic(x.c, no_loc(), "link_emit_macho: entry symbol undefined");
+ if (esym->vaddr < x.text_vaddr)
+ compiler_panic(x.c, no_loc(),
+ "link_emit_macho: entry symbol below __TEXT base");
+ x.entry_offset = (u32)(esym->vaddr - x.text_vaddr);
+
+ /* image-id UUID. */
+ u8 image_id[LINK_IMAGE_ID_BYTES];
+ link_image_id_compute(img, image_id);
+ memcpy(x.uuid, image_id, 16);
+
+ /* Reloc apply collects fixsites. */
+ FixList fl;
+ fix_init(&fl, x.h);
+ apply_relocs(&x, &fl);
+
+ /* Build LINKEDIT contents. */
+ build_chained_fixups(&x, &fl);
+ build_exports_trie(&x);
+ build_symtab(&x);
+ layout_linkedit(&x);
+
+ /* Compute code-sig skeleton sized to file bytes excluding sig. */
+ u32 code_limit = x.codesig_off;
+ build_codesig_skeleton(&x, code_limit, "a.out");
+ /* Now extend linkedit segment to include codesig. */
+ u64 le_size = (u64)x.codesig_off + (u64)x.codesig_size - x.linkedit_fileoff;
+ x.segs[4].filesize = le_size;
+ x.segs[4].vmsize = ALIGN_UP(le_size, MZ_PAGE);
+
+ /* Build load commands buffer. */
+ MByte lc;
+ mbuf_init(&lc, x.h);
+
+ /* LC_SEGMENT_64 for each segment with sections (and PAGEZERO/LINKEDIT). */
+ emit_load_command_segment(&lc, &x, 0); /* PAGEZERO */
+ emit_load_command_segment(&lc, &x, 1); /* TEXT */
+ if (x.segs[2].nsects > 0)
+ emit_load_command_segment(&lc, &x, 2); /* DATA_CONST */
+ if (x.segs[3].nsects > 0)
+ emit_load_command_segment(&lc, &x, 3); /* DATA */
+ emit_load_command_segment(&lc, &x, 4); /* LINKEDIT */
+
+ /* LC_DYLD_CHAINED_FIXUPS (linkedit_data_command: 16B) */
+ mbuf_u32(&lc, LC_DYLD_CHAINED_FIXUPS);
+ mbuf_u32(&lc, 16);
+ mbuf_u32(&lc, x.chained_fixups_off);
+ mbuf_u32(&lc, x.chained_fixups.len);
+
+ /* LC_DYLD_EXPORTS_TRIE */
+ mbuf_u32(&lc, LC_DYLD_EXPORTS_TRIE);
+ mbuf_u32(&lc, 16);
+ mbuf_u32(&lc, x.exports_trie_off);
+ mbuf_u32(&lc, x.exports_trie.len);
+
+ /* LC_SYMTAB */
+ mbuf_u32(&lc, LC_SYMTAB);
+ mbuf_u32(&lc, MACHO_SYMTAB_CMD_SIZE);
+ mbuf_u32(&lc, x.symtab_off);
+ mbuf_u32(&lc, x.nsyms);
+ mbuf_u32(&lc, x.strtab_off);
+ mbuf_u32(&lc, x.strtab.len);
+
+ /* LC_DYSYMTAB */
+ /* nlocal=0, nextdef=#defined-globals, nundef=#imports. We tracked
+ * those during build_symtab; recompute by inspecting strtab... easier
+ * to recount: defined globals are total - imports. */
+ u32 nlocal = 0;
+ u32 nundef = x.nimports;
+ u32 nextdef = (x.nsyms > nundef) ? x.nsyms - nundef - nlocal : 0;
+ mbuf_u32(&lc, LC_DYSYMTAB);
+ mbuf_u32(&lc, MACHO_DYSYMTAB_CMD_SIZE);
+ mbuf_u32(&lc, 0); /* ilocalsym */
+ mbuf_u32(&lc, nlocal);
+ mbuf_u32(&lc, nlocal);
+ mbuf_u32(&lc, nextdef);
+ mbuf_u32(&lc, nlocal + nextdef);
+ mbuf_u32(&lc, nundef);
+ mbuf_u32(&lc, 0); mbuf_u32(&lc, 0); /* tocoff, ntoc */
+ mbuf_u32(&lc, 0); mbuf_u32(&lc, 0); /* modtaboff, nmodtab */
+ mbuf_u32(&lc, 0); mbuf_u32(&lc, 0); /* extrefsymoff, nextrefsyms */
+ mbuf_u32(&lc, x.indirect_off);
+ mbuf_u32(&lc, x.indirect.len / 4u);
+ mbuf_u32(&lc, 0); mbuf_u32(&lc, 0); /* extreloff, nextrel */
+ mbuf_u32(&lc, 0); mbuf_u32(&lc, 0); /* locreloff, nlocrel */
+
+ /* LC_LOAD_DYLINKER */
+ {
+ const char* dyld = "/usr/lib/dyld";
+ u32 cmd_size = (u32)ALIGN_UP((u64)(12u + (u32)strlen(dyld) + 1u), 8u);
+ mbuf_u32(&lc, LC_LOAD_DYLINKER);
+ mbuf_u32(&lc, cmd_size);
+ mbuf_u32(&lc, 12u); /* name offset within cmd */
+ u32 wrote = mbuf_str(&lc, dyld, (u32)strlen(dyld));
+ (void)wrote;
+ /* Pad to cmd_size. */
+ while (lc.len < (u32)((u64)mbuf_align(&lc, 1) + 0)) {
+ /* no-op */
+ break;
+ }
+ /* Re-align to cmd_size. */
+ u32 want = (u32)(lc.len);
+ /* Walk back: lc grew by 12 + (strlen+1). Pad to cmd_size. */
+ u32 cmd_start_back = lc.len - (12u + (u32)strlen(dyld) + 1u);
+ u32 pad_needed = cmd_size - (lc.len - cmd_start_back);
+ while (pad_needed-- > 0) mbuf_u8(&lc, 0);
+ (void)want;
+ }
+
+ /* LC_UUID */
+ mbuf_u32(&lc, LC_UUID);
+ mbuf_u32(&lc, 24);
+ mbuf_append(&lc, x.uuid, 16);
+
+ /* LC_BUILD_VERSION */
+ mbuf_u32(&lc, LC_BUILD_VERSION);
+ mbuf_u32(&lc, 24);
+ mbuf_u32(&lc, 1); /* PLATFORM_MACOS */
+ mbuf_u32(&lc, (12u << 16) | 0); /* minos 12.0.0 */
+ mbuf_u32(&lc, (12u << 16) | 0); /* sdk 12.0.0 */
+ mbuf_u32(&lc, 0); /* ntools */
+
+ /* LC_MAIN — entryoff is offset within __TEXT segment from its file
+ * start (0). */
+ mbuf_u32(&lc, LC_MAIN);
+ mbuf_u32(&lc, 24);
+ mbuf_u64(&lc, (u64)x.entry_offset); /* entryoff = vaddr - __TEXT.vmaddr */
+ mbuf_u64(&lc, 0); /* stacksize */
+
+ /* LC_LOAD_DYLIB per dylib. */
+ for (u32 i = 0; i < x.ndylibs; ++i) {
+ size_t nl;
+ const char* nm = pool_str(x.c->global, x.dylibs[i].install, &nl);
+ u32 cmd_size = (u32)ALIGN_UP((u64)(24u + (u32)nl + 1u), 8u);
+ u32 cmd_start = lc.len;
+ mbuf_u32(&lc, LC_LOAD_DYLIB);
+ mbuf_u32(&lc, cmd_size);
+ mbuf_u32(&lc, 24u); /* name offset */
+ mbuf_u32(&lc, 0); /* timestamp */
+ mbuf_u32(&lc, (1u << 16)); /* current_version 1.0 */
+ mbuf_u32(&lc, (1u << 16)); /* compat_version 1.0 */
+ mbuf_str(&lc, nm ? nm : "", (u32)nl);
+ while (lc.len - cmd_start < cmd_size) mbuf_u8(&lc, 0);
+ }
+
+ /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE — empty. */
+ mbuf_u32(&lc, LC_FUNCTION_STARTS_C);
+ mbuf_u32(&lc, 16);
+ mbuf_u32(&lc, x.fn_starts_off);
+ mbuf_u32(&lc, 0);
+
+ mbuf_u32(&lc, LC_DATA_IN_CODE_C);
+ mbuf_u32(&lc, 16);
+ mbuf_u32(&lc, x.data_in_code_off);
+ mbuf_u32(&lc, 0);
+
+ /* LC_CODE_SIGNATURE */
+ mbuf_u32(&lc, LC_CODE_SIGNATURE_C);
+ mbuf_u32(&lc, 16);
+ mbuf_u32(&lc, x.codesig_off);
+ mbuf_u32(&lc, x.codesig_size);
+
+ /* Sanity: lc.len + MACHO_HDR64_SIZE must equal headers_size we
+ * predicted in plan_layout. If not, we mis-sized — panic. */
+ if ((u64)lc.len + MACHO_HDR64_SIZE != x.headers_size) {
+ compiler_panic(x.c, no_loc(),
+ "link_macho: load-cmd size mismatch: predicted %llu got %u",
+ (unsigned long long)(x.headers_size - MACHO_HDR64_SIZE),
+ lc.len);
+ }
+
+ /* ---- now stream the file ---- */
+ /* The Writer in cfree allows seek; we'll write a flat buffer first
+ * (so we can hash it for codesig) and flush at the end. */
+ MByte file;
+ mbuf_init(&file, x.h);
+
+ /* mach_header_64 */
+ u32 ncmds = 0;
+ /* Recount: PAGEZERO + TEXT + maybe DATA_CONST + maybe DATA + LINKEDIT
+ * + chained + exports_trie + symtab + dysymtab + dyld + uuid +
+ * build_version + main + nDylibs + fn_starts + data_in_code +
+ * codesig. */
+ ncmds += 2; /* PAGEZERO + TEXT */
+ if (x.segs[2].nsects > 0) ncmds++;
+ if (x.segs[3].nsects > 0) ncmds++;
+ ncmds++; /* LINKEDIT */
+ ncmds += 11 + x.ndylibs;
+ /* (chained, exports_trie, symtab, dysymtab, dyld, uuid, build_version,
+ * main, fn_starts, data_in_code, codesig) = 11 */
+
+ mbuf_u32(&file, MH_MAGIC_64);
+ mbuf_u32(&file, CPU_TYPE_ARM64);
+ mbuf_u32(&file, CPU_SUBTYPE_ARM64_ALL);
+ mbuf_u32(&file, MH_EXECUTE);
+ mbuf_u32(&file, ncmds);
+ mbuf_u32(&file, lc.len);
+ mbuf_u32(&file,
+ MH_DYLDLINK | MH_TWOLEVEL | MH_NOUNDEFS | MH_PIE);
+ mbuf_u32(&file, 0); /* reserved */
+ mbuf_append(&file, lc.data, lc.len);
+
+ /* Pad to first section's file offset. */
+ /* __TEXT first section begins at headers_size; we wrote header+lc =
+ * headers_size, so no pad needed. Then each MSec's file_offset
+ * tells us where to write its bytes. */
+
+ /* Now emit segment payload bytes per MSec. */
+ for (u32 i = 0; i < x.nsecs; ++i) {
+ MSec* m = &x.secs[i];
+ if (m->is_zerofill || m->size == 0) continue;
+ /* Pad up to m->file_offset. */
+ while (file.len < m->file_offset) mbuf_u8(&file, 0);
+ if (m->synth_data) {
+ mbuf_append(&file, m->synth_data, m->synth_size);
+ } else {
+ LinkSection* ls = &img->sections[m->link_sec_id - 1u];
+ u32 segid = ls->segment_id;
+ u8* base = (segid != LINK_SEG_NONE) ? img->segment_bytes[segid - 1u]
+ : NULL;
+ if (base && ls->size) {
+ mbuf_append(&file, base + ls->input_offset, (u32)ls->size);
+ } else if (ls->size) {
+ for (u64 k = 0; k < ls->size; ++k) mbuf_u8(&file, 0);
+ }
+ }
+ }
+
+ /* Pad to LINKEDIT start. */
+ while (file.len < x.linkedit_fileoff) mbuf_u8(&file, 0);
+
+ /* LINKEDIT contents in declared order. */
+ while (file.len < x.chained_fixups_off) mbuf_u8(&file, 0);
+ mbuf_append(&file, x.chained_fixups.data, x.chained_fixups.len);
+ while (file.len < x.exports_trie_off) mbuf_u8(&file, 0);
+ mbuf_append(&file, x.exports_trie.data, x.exports_trie.len);
+ while (file.len < x.fn_starts_off) mbuf_u8(&file, 0);
+ /* fn_starts is empty */
+ while (file.len < x.data_in_code_off) mbuf_u8(&file, 0);
+ /* empty */
+ while (file.len < x.symtab_off) mbuf_u8(&file, 0);
+ mbuf_append(&file, x.symtab.data, x.symtab.len);
+ while (file.len < x.indirect_off) mbuf_u8(&file, 0);
+ mbuf_append(&file, x.indirect.data, x.indirect.len);
+ while (file.len < x.strtab_off) mbuf_u8(&file, 0);
+ mbuf_append(&file, x.strtab.data, x.strtab.len);
+ while (file.len < x.codesig_off) mbuf_u8(&file, 0);
+
+ /* Compute codesig hashes over file bytes [0, codesig_off). */
+ /* The codesig blob currently has zero hashes; hash now. */
+ compute_codesig(&x, file.data, x.codesig_off, "a.out");
+ /* Append codesig. */
+ mbuf_append(&file, x.codesig.data, x.codesig.len);
+
+ /* Stream out. */
+ cfree_writer_seek(w, 0);
+ cfree_writer_write(w, file.data, file.len);
+
+ /* Cleanup. */
+ fix_fini(&fl);
+ mbuf_fini(&lc);
+ mbuf_fini(&file);
+ mbuf_fini(&x.chained_fixups);
+ mbuf_fini(&x.exports_trie);
+ mbuf_fini(&x.symtab);
+ mbuf_fini(&x.strtab);
+ mbuf_fini(&x.indirect);
+ mbuf_fini(&x.fn_starts);
+ mbuf_fini(&x.data_in_code);
+ mbuf_fini(&x.codesig);
+ if (x.imports) x.h->free(x.h, x.imports, 0); /* VEC_GROW: cap unknown */
+ if (x.dylibs) x.h->free(x.h, x.dylibs, 0);
+ if (x.sym_to_imp)
+ x.h->free(x.h, x.sym_to_imp, sizeof(u32) * x.sym_to_imp_size);
+ if (x.secs) x.h->free(x.h, x.secs, 0);
+ if (x.stubs_bytes) x.h->free(x.h, x.stubs_bytes, x.stubs_size);
+ if (x.got_bytes) x.h->free(x.h, x.got_bytes, x.got_size);
+}
diff --git a/src/obj/macho_read.c b/src/obj/macho_read.c
@@ -381,3 +381,119 @@ ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data,
obj_finalize(ob);
return ob;
}
+
+/* ---- read_macho_dso ----
+ *
+ * MH_DYLIB reader. Walks load commands once to find LC_ID_DYLIB
+ * (install-name) and LC_SYMTAB (symbol table + string table), then
+ * emits one defined ObjSym per externally-visible nlist entry.
+ *
+ * Like read_elf_dso, the produced ObjBuilder carries no sections /
+ * relocations / groups — only symbol definitions in OBJ_SEC_NONE. The
+ * consumer's resolve_undefs sees these as defined globals and marks the
+ * matching consumer-side undef as `imported`. The dylib's own undefs
+ * (its imports of other dylibs) are filtered: they don't satisfy any
+ * undef in the consumer. */
+
+ObjBuilder* read_macho_dso(Compiler* c, const char* name, const u8* data,
+ size_t len, Sym* install_name_out) {
+ (void)name;
+ if (install_name_out) *install_name_out = 0;
+ if (len < MACHO_HDR64_SIZE)
+ compiler_panic(c, no_loc(), "read_macho_dso: input shorter than header");
+
+ u32 magic = rd_u32_le(data + 0);
+ if (magic != MH_MAGIC_64)
+ compiler_panic(c, no_loc(), "read_macho_dso: bad magic 0x%x", magic);
+
+ u32 cputype = rd_u32_le(data + 4);
+ u32 filetype = rd_u32_le(data + 12);
+ u32 ncmds = rd_u32_le(data + 16);
+ u32 sizeofcmds = rd_u32_le(data + 20);
+
+ if (cputype != CPU_TYPE_ARM64)
+ compiler_panic(c, no_loc(),
+ "read_macho_dso: unsupported cputype 0x%x (arm64 only)",
+ cputype);
+ if (filetype != MH_DYLIB && filetype != MH_BUNDLE)
+ compiler_panic(c, no_loc(),
+ "read_macho_dso: not MH_DYLIB/MH_BUNDLE (filetype=%u)",
+ filetype);
+ if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len)
+ compiler_panic(c, no_loc(), "read_macho_dso: load commands exceed file");
+
+ u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0;
+ Sym install_name = 0;
+
+ u64 pos = MACHO_HDR64_SIZE;
+ u64 end = pos + sizeofcmds;
+ for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) {
+ u32 cmd = rd_u32_le(data + pos);
+ u32 cmdsize = rd_u32_le(data + pos + 4);
+ if (cmdsize < 8 || pos + cmdsize > end)
+ compiler_panic(c, no_loc(), "read_macho_dso: malformed load command");
+ if (cmd == LC_ID_DYLIB) {
+ /* dylib_command: cmd, cmdsize, name(lc_str: 4-byte offset within
+ * the cmd), timestamp, current_version, compat_version. */
+ if (cmdsize < 24) goto next;
+ u32 nm_off = rd_u32_le(data + pos + 8);
+ if (nm_off >= cmdsize) goto next;
+ const char* p = (const char*)(data + pos + nm_off);
+ u32 maxlen = cmdsize - nm_off;
+ u32 nlen = 0;
+ while (nlen < maxlen && p[nlen]) ++nlen;
+ if (nlen) install_name = pool_intern(c->global, p, nlen);
+ } else if (cmd == LC_SYMTAB) {
+ symoff = rd_u32_le(data + pos + 8);
+ nsyms = rd_u32_le(data + pos + 12);
+ stroff = rd_u32_le(data + pos + 16);
+ strsize = rd_u32_le(data + pos + 20);
+ }
+ next:
+ pos += cmdsize;
+ }
+ if (install_name_out) *install_name_out = install_name;
+
+ if (stroff + (u64)strsize > len)
+ compiler_panic(c, no_loc(), "read_macho_dso: string table out of range");
+ if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len)
+ compiler_panic(c, no_loc(), "read_macho_dso: symbol table out of range");
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_macho_dso: obj_new failed");
+
+ const u8* strtab = data + stroff;
+ const u8* sbase = data + symoff;
+ for (u32 i = 0; i < nsyms; ++i) {
+ const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE;
+ u32 strx = rd_u32_le(p + 0);
+ u8 n_type = p[4];
+ u16 n_desc = rd_u16_le(p + 6);
+
+ u8 type_field = (u8)(n_type & N_TYPE);
+ u8 ext = (u8)(n_type & N_EXT);
+ /* Skip non-external (locals) and undef refs (the dylib's own imports). */
+ if (!ext) continue;
+ if (type_field == N_UNDF) continue;
+ /* N_INDR / N_PBUD / N_STAB: skip — not interesting for static link. */
+ if (n_type & N_STAB) continue;
+
+ if (strx >= strsize) continue;
+ const char* nm = (const char*)(strtab + strx);
+ u32 nlen = 0;
+ while (strx + nlen < strsize && nm[nlen]) ++nlen;
+ if (!nlen) continue;
+ Sym sn = pool_intern(c->global, nm, nlen);
+
+ SymBind bind = (n_desc & (N_WEAK_DEF | N_WEAK_REF)) ? SB_WEAK : SB_GLOBAL;
+ SymKind kind = SK_NOTYPE;
+ /* Mach-O dylib nlist doesn't carry STT_FUNC / STT_OBJECT cleanly —
+ * default to NOTYPE. The consuming linker uses dso_export_is_func
+ * to peek at this for ELF; for Mach-O the `imported` decision flows
+ * through synthetic __got / __stubs regardless of kind. */
+ obj_symbol_ex(ob, sn, bind, SV_DEFAULT, kind, OBJ_SEC_NONE, 0, 0, 0);
+ }
+
+ obj_finalize(ob);
+ return ob;
+}
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -151,6 +151,7 @@ typedef enum RelocKind {
* R_PC entries; the few here are the x86_64-only encodings (8-bit
* displacements, GOT/PLT, dynamic linker-only entries). */
R_X64_PC8,
+ R_X64_32S,
R_X64_PLT32,
R_X64_GOTPCREL,
R_X64_GOTPCRELX,
@@ -406,6 +407,26 @@ ObjBuilder* read_elf_dso(Compiler*, const char* name, const u8* data,
size_t len, Sym* soname_out);
ObjBuilder* read_coff(Compiler*, const char* name, const u8* data, size_t len);
ObjBuilder* read_macho(Compiler*, const char* name, const u8* data, size_t len);
+/* Mach-O MH_DYLIB reader. Produces an ObjBuilder containing only the
+ * dylib's exported symbols (as defined OBJ_SEC_NONE entries — the
+ * peer of read_elf_dso). LC_ID_DYLIB's install-name is interned and
+ * returned via *install_name_out (the Mach-O analogue of DT_SONAME).
+ *
+ * arm64-only for v1; other cputypes panic. */
+ObjBuilder* read_macho_dso(Compiler*, const char* name, const u8* data,
+ size_t len, Sym* install_name_out);
+/* Apple `.tbd` (text-based stub) reader. Parses the YAML-shaped TAPI
+ * format produced by Apple's SDKs (see /usr/lib/lib*.tbd in
+ * `xcrun --show-sdk-path`). Extracts the umbrella install-name and the
+ * union of every exported / re-exported symbol whose `targets:` block
+ * names the active arch (e.g. arm64-macos). Symbols are emitted into
+ * the ObjBuilder verbatim (they already include the leading `_` Apple
+ * uses for C symbols), so resolve_undefs matches them against the
+ * Mach-O on-disk symbol names directly.
+ *
+ * The arch string ("arm64" or "x86_64") comes from Compiler.target. */
+ObjBuilder* read_tbd(Compiler*, const char* name, const u8* data, size_t len,
+ Sym* install_name_out);
ObjBuilder* read_wasm(Compiler*, const char* name, const u8* data, size_t len);
#endif
diff --git a/src/obj/tbd_read.c b/src/obj/tbd_read.c
@@ -0,0 +1,150 @@
+/* Apple `.tbd` (text-based stub) reader.
+ *
+ * `.tbd` files describe a dylib's ABI surface in a YAML-shaped TAPI
+ * format — Apple ships them in the macOS SDK as a substitute for the
+ * actual .dylib bytes. cfree's linker treats them as a peer of
+ * read_macho_dso: extract install-name + the exported symbol set, and
+ * surface that as an ObjBuilder full of defined OBJ_SEC_NONE entries.
+ *
+ * The TAPI grammar is intricate (per-target `exports:` blocks, weak
+ * symbols, Obj-C metadata, re-exports, ...). Rather than re-implement
+ * a YAML parser, this reader takes the conservative approach: it
+ * extracts the FIRST document's `install-name:` (the umbrella the
+ * consumer records in LC_LOAD_DYLIB) and then **scans the entire file
+ * for symbol-looking tokens** — sequences starting with `_` followed by
+ * identifier chars. The result is the union of every C, Obj-C class,
+ * weak, and re-exported symbol declared anywhere in the file.
+ *
+ * Why the union is safe:
+ * - cfree's linker only consults the DSO's exported set to satisfy
+ * undefs. Including a symbol the consumer never references is
+ * harmless — the symbol simply never appears in our output's
+ * LC_LOAD_DYLIB chain.
+ * - dyld at runtime walks libSystem's full re-export graph to bind
+ * each name; our static-link decision (which dylib provides it)
+ * reduces to "the umbrella" anyway. We only need to convince the
+ * static linker that the name is bindable, then write
+ * LC_LOAD_DYLIB against the umbrella's install-name.
+ *
+ * The scanner skips the top-of-file `install-name:` line so its path
+ * (`/usr/lib/libSystem.B.dylib`) doesn't end up as a fake symbol — but
+ * since paths don't start with `_`, that wasn't actually a risk.
+ *
+ * Identifier alphabet: A-Z, a-z, 0-9, `_`, `$`, `.`. This matches
+ * Apple's C / Obj-C symbol mangling (e.g. `'_OBJC_CLASS_$_NSString'`,
+ * `'_pause$NOCANCEL'`). Tokens may be surrounded by single or double
+ * quotes — the scanner doesn't see those, since they aren't in the
+ * identifier alphabet, so a token like `'_pause$NOCANCEL'` matches as
+ * just `_pause$NOCANCEL`. */
+
+#include <string.h>
+
+#include "core/heap.h"
+#include "core/pool.h"
+#include "obj/obj.h"
+
+static SrcLoc no_loc(void) {
+ SrcLoc l = {0, 0, 0};
+ return l;
+}
+
+static int is_id_start(u8 c) { return c == '_'; }
+static int is_id_cont(u8 c) {
+ return (c == '_') || (c == '$') || (c == '.') ||
+ (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '9');
+}
+
+/* Extract the install-name from the first document. We look for a
+ * line beginning with "install-name:" and take the value up to EOL,
+ * then strip whitespace and surrounding quotes. Returns 0 if absent. */
+static Sym extract_install_name(Compiler* c, const u8* data, size_t len) {
+ static const char KEY[] = "install-name:";
+ size_t klen = sizeof(KEY) - 1u;
+ for (size_t i = 0; i + klen <= len; ++i) {
+ /* Match at start of line (i==0 or preceded by '\n'). */
+ if (i > 0 && data[i - 1] != '\n') continue;
+ if (memcmp(data + i, KEY, klen) != 0) continue;
+ /* Skip past the colon and surrounding whitespace. */
+ size_t j = i + klen;
+ while (j < len && (data[j] == ' ' || data[j] == '\t')) ++j;
+ /* Take to EOL. */
+ size_t start = j;
+ while (j < len && data[j] != '\n' && data[j] != '\r') ++j;
+ size_t end = j;
+ /* Strip trailing whitespace. */
+ while (end > start &&
+ (data[end - 1] == ' ' || data[end - 1] == '\t' ||
+ data[end - 1] == '\r'))
+ --end;
+ /* Strip surrounding single or double quotes. */
+ if (end > start + 1u && (data[start] == '\'' || data[start] == '"') &&
+ data[end - 1] == data[start]) {
+ ++start;
+ --end;
+ }
+ if (end > start)
+ return pool_intern(c->global, (const char*)(data + start),
+ (u32)(end - start));
+ return 0;
+ }
+ return 0;
+}
+
+ObjBuilder* read_tbd(Compiler* c, const char* name, const u8* data, size_t len,
+ Sym* install_name_out) {
+ (void)name;
+ if (install_name_out) *install_name_out = 0;
+ if (!data || !len)
+ compiler_panic(c, no_loc(), "read_tbd: empty input");
+
+ /* Validate magic: a tbd starts with `--- !tapi-tbd` (or any `---`). */
+ if (len < 4 || data[0] != '-' || data[1] != '-' || data[2] != '-')
+ compiler_panic(c, no_loc(), "read_tbd: not a tbd file (missing '---')");
+
+ /* Reject obviously-wrong target arches up front so we don't stream a
+ * bunch of irrelevant symbols in. */
+ switch (c->target.arch) {
+ case CFREE_ARCH_ARM_64:
+ case CFREE_ARCH_X86_64:
+ break;
+ default:
+ compiler_panic(c, no_loc(),
+ "read_tbd: unsupported target arch %u for tbd lookup",
+ (u32)c->target.arch);
+ }
+
+ ObjBuilder* ob = obj_new(c);
+ if (!ob) compiler_panic(c, no_loc(), "read_tbd: obj_new failed");
+
+ if (install_name_out) *install_name_out = extract_install_name(c, data, len);
+
+ /* Token scanner: walk the file, emit every `_id` token as a defined
+ * external ObjSymbol. Tracking already-seen names via a tiny linear
+ * dedup list would be linear-quadratic on a multi-MB tbd; instead we
+ * rely on the pool's intern de-dup downstream — duplicate ObjSymbol
+ * names are tolerated by the linker's hash, with the second insert
+ * resolving to the existing entry on collision. */
+ size_t i = 0;
+ while (i < len) {
+ /* Skip non-token bytes. */
+ while (i < len && !is_id_start(data[i])) ++i;
+ if (i >= len) break;
+ size_t start = i;
+ while (i < len && is_id_cont(data[i])) ++i;
+ size_t tlen = i - start;
+ if (tlen == 0) continue;
+ /* Filter out the obvious YAML-key-like collisions: tokens that are
+ * field names ("_macos" doesn't occur, but be defensive). All
+ * Apple symbols start with `_` followed by another id char, so we
+ * keep tokens of length >= 2. Single `_` is the throwaway-name
+ * convention and never an exported symbol. */
+ if (tlen < 2u) continue;
+ Sym sn = pool_intern(c->global, (const char*)(data + start), (u32)tlen);
+ obj_symbol_ex(ob, sn, SB_GLOBAL, SV_DEFAULT, SK_NOTYPE, OBJ_SEC_NONE, 0,
+ 0, 0);
+ }
+
+ obj_finalize(ob);
+ return ob;
+}