commit 118e679911158143ee59dfa081e68731e362b1e9
parent 1bd5bf26465edcb99077f453fc6fd9e50b067ebe
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 10 May 2026 16:07:38 -0700
link/macho: test-link Path E green on aa64-macho — 103/103
Two fixes close out the failures tracked in doc/MACHO.md §1 + §2:
- layout_commons synthesizes a NOBITS .bss.common LinkSection
wrapping every common symbol, so shift_sections has a section_id
to rebase against on Mach-O. emit_segment_bytes skips synthetic
sections (input_id == NONE). ELF behavior unchanged.
- plan_layout stamps S_MOD_INIT_FUNC_POINTERS (0x9) /
S_MOD_TERM_FUNC_POINTERS (0xa) on __mod_init_func / __mod_term_func
sections so dyld actually invokes them.
Doc rewritten to mark §1 + §2 resolved and add §3 with TODOs for
the 8 remaining Path J failures (cross-TU data reloc value-vs-address
mix-up, ADRP ±4 GiB range, IFUNC under Mach-O JIT, extern resolver).
Diffstat:
3 files changed, 177 insertions(+), 21 deletions(-)
diff --git a/doc/MACHO.md b/doc/MACHO.md
@@ -5,11 +5,14 @@ bringing `test-link` Path E green on `aa64-macho`. Items listed here are the
known gaps in `link_emit_macho` (and its read-side / layout dependencies) that
need work before the suite is fully green.
-Current state (2026-05-10): `make test-link CFREE_TEST_OBJ=macho` is
-**101 pass / 2 fail / 0 skip**. Both failures surface as runtime
-SIGSEGV after a successful link, and each traces back to one of the
-issues below. `33_ifunc_in_init/E` is `e_targets`-restricted to ELF
-tuples (§3) since IFUNC has no Mach-O analogue.
+Current state (2026-05-10): Path E on `aa64-macho` is **103/103 pass**
+after the §1 and §2 fixes below landed. `33_ifunc_in_init/E` remains
+`e_targets`-restricted to ELF tuples (§3) since IFUNC has no Mach-O
+analogue.
+
+Path J on `aa64-macho` has a separate set of 8 pre-existing
+failures (11/14/16/17/28/32/33/34); see §3 for the per-case
+breakdown.
ELF (`make test-elf`, `make test-link`) is unaffected — every change
described here is either Mach-O-only or guarded on `target.obj ==
@@ -17,7 +20,7 @@ CFREE_OBJ_MACHO`.
---
-## 1. Common symbols (case `17_common_coalesce`)
+## 1. Common symbols (case `17_common_coalesce`) — RESOLVED
**Symptom.** clang on Mach-O emits `int shared_val;` (tentative def) as
`N_UNDF | N_EXT` with `n_value = size`. cfree's `read_macho` translates
@@ -49,7 +52,18 @@ shows the LDR pair targeting GOT slot 1 (`0x100004008`), which
contains the literal `0x4000` rebase target — that's the stale
common-symbol vaddr.
-## 2. Init / fini ctors (case `23_init_order`)
+**Resolution.** `layout_commons` (option 1 above) now appends a
+synthetic `.bss.common` NOBITS LinkSection that wraps every common
+symbol, with each common's `section_id` pointing at it and `value`
+set to the per-symbol offset. `link_symbols_to_sections` then
+recomputes `s->vaddr` from `section.vaddr + value` after Mach-O's
+`shift_sections` rebases the synthetic section into `__DATA,__bss`.
+`emit_segment_bytes` skips synthetic sections (input_id == NONE).
+ELF behavior is unchanged — the segment's `mem_size` extension and
+the symbols' final vaddrs are the same as before, only the
+intervening representation has a backing section_id.
+
+## 2. Init / fini ctors (case `23_init_order`) — RESOLVED
**Symptom.** With `-ffreestanding -O1 -fno-inline`, clang on Mach-O
*does not emit* `__mod_init_func` for `__attribute__((constructor))`
@@ -68,6 +82,93 @@ the iteration. Bridging the two:
Needs the clang-emit issue solved first,
otherwise the input objects don't carry the ctor pointers.
+**Resolution.** Two-line fix:
+
+1. `link_macho.c::plan_layout` now stamps the section-type byte to
+ `S_MOD_INIT_FUNC_POINTERS` (0x9) on every `__mod_init_func` MSec
+ (and `S_MOD_TERM_FUNC_POINTERS` 0xa on `__mod_term_func`). Without
+ the right type byte, dyld walks past the section silently — the
+ section was structurally present in the output, but its entries
+ never ran. Pass-through of the clang-emitted `__mod_init_func`
+ from input objects is sufficient; no synthesis from `__init_array`
+ is needed since Mach-O inputs carry `__mod_init_func` directly.
+2. `test/link/harness/start.c` short-circuits the `__init_array` /
+ `__fini_array` walks under `__APPLE__`. Boundary symbols on
+ Mach-O land in the `__got` region (no real init-array section);
+ dyld already invokes `__mod_init_func` entries before `_start`,
+ so the harness loop would otherwise fault on the synthesized
+ boundaries.
+
+The clang-emit observation is real but turned out not to block the
+test: at `-O1` clang pre-evaluates the constructor's effect into the
+initial values of static data (e.g. `g_pos = 1`, `g_seq[0] = 1` in
+this case), so the test still observes the expected end state once
+the remaining ctor (the cross-TU one whose effect can't be
+pre-evaluated) actually runs.
+
+---
+
+## 3. Path J on `aa64-macho` — TODO
+
+`make test-link CFREE_TEST_OBJ=macho` Path J currently fails on 8
+cases (all SIGSEGV / SIGBUS at runtime — the link succeeds, the
+JIT-mapped code faults). Path E covers the same surface and is
+green, so the divergence is in the JIT-only code paths
+(`link_jit.c`, in-process mmap / reloc-apply) rather than the
+shared resolver / layout passes. Each group below is reachable
+via `CFREE_TEST_OBJ=macho build/test/jit-runner <objs>`.
+
+- **§3.1 Cross-TU data via ADRP/ADD/LDR — value vs. address mix-up.**
+ Cases: `11_data_cross_tu/J`, `14_weak_present/J`,
+ `17_common_coalesce/J`, `34_ifunc_addr_taken/J`. Witness
+ (`11_data_cross_tu`): `test_main` is JIT-mapped, and the load of
+ `g_val` faults at address `0xdeadbeefcafebabe` — the literal value
+ of `g_val`. Reloc-apply is patching the ADRP/ADD pair with the
+ *value* of the cross-TU symbol instead of its *address*. Likely
+ in the Mach-O JIT path's `ARM64_RELOC_GOT_LOAD_PAGE21` /
+ `PAGEOFF12` apply, or in how internal-GOT slots are seeded for the
+ JIT (the exe path seeds them via chained-fixup REBASEs at dyld
+ load time — JIT has no dyld and must seed in-process). Start by
+ comparing the apply-time S/A/P inputs against the exe path for
+ `_g_val` and following where the cross-TU symbol's vaddr comes
+ from.
+
+- **§3.2 Weak-undef out of ±4 GiB range.** Case: `16_weak_undef/J`.
+ jit-runner errors with `link: ADR_PREL_PG_HI21 out of range (need
+ ±4GiB)`. The JIT maps code at a host VA that's more than 4 GiB
+ away from the slot the weak-undef ADRP targets (a NULL sentinel,
+ or a synthetic landing in some other segment). ELF JIT side-steps
+ this by colocating .got with .text; the Mach-O JIT needs the same
+ guarantee — either place the weak-undef sentinel inside the same
+ 4 GiB window as the patched code, or rewrite the ADRP into an
+ absolute MOV/LDR sequence when out-of-range.
+
+- **§3.3 IFUNC under Mach-O JIT.** Cases: `32_ifunc/J`,
+ `33_ifunc_in_init/J` (also `34_ifunc_addr_taken/J`, which overlaps
+ §3.1). IFUNC is ELF-only at the format level, so Mach-O has no
+ __mod_init_func equivalent for the iplt synthesis. Path E green
+ here is coincidental — `e_targets`-excluded on `aa64-macho` (§4).
+ Decide whether `j_targets` should likewise exclude these or
+ whether the JIT path should emulate the ELF iplt scheme inside
+ the JIT mapping (call resolver in-process and patch igot.plt
+ slots, mirroring `cfree_link_jit`'s existing IFUNC handling for
+ ELF inputs).
+
+- **§3.4 Extern resolver mismatch.** Case: `28_extern_resolver/J`.
+ SEGVs after link — the resolver returned a host pointer for
+ `external_value`, and JIT reloc-apply tried to encode it as a
+ PC-relative ADRP/ADD pair. Same underlying issue as §3.2 (host
+ pointer far from the JIT mapping). Either route resolver-supplied
+ symbols through an internal-GOT slot inside the JIT mapping
+ (already the exe shape) or extend the JIT reloc-apply to handle
+ >±4 GiB targets via an indirect load.
+
+These are all reachable with the doc's `make test-link
+CFREE_TEST_OBJ=macho` invocation; the test reporter currently prints
+`Segmentation fault: 11` lines from the harness wrapper, with no
+J-specific markers. Cleaning up the J path is the natural next
+slice for finishing aa64-macho.
+
---
## 4. Other gaps surfaced en route
@@ -131,14 +232,7 @@ guardrail:
make test-elf # 37/37 — unaffected
make test-link # 119/119 — ELF baseline
- make test-link CFREE_TEST_OBJ=macho # 101 pass / 2 fail (this doc)
-
-Both remaining Mach-O failures involve C↔Mach-O ABI mismatches at the
-runtime-conventions layer, not bugs in cfree's writer or applier:
-
- 17_common_coalesce/E — §1, common symbols need a synthetic LinkSection
- 23_init_order/E — §2, __mod_init_func ↔ __init_array bridging
-
-`33_ifunc_in_init/E` is `e_targets`-excluded on `aa64-macho` (§3).
+ make test-link CFREE_TEST_OBJ=macho # Path E: 103/103
+ # Path J: 8 fail (see §3)
-When §1 and §2 land, the suite will be at 103/103 on `aa64-macho`.
+`33_ifunc_in_init/E` is `e_targets`-excluded on `aa64-macho` (§4).
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -1424,20 +1424,72 @@ static void layout_commons(Linker* l, LinkImage* img) {
img->nsegments++;
}
- /* Allocate BSS space for each COMMON symbol after file_size. */
+ /* Synthesize a single NOBITS LinkSection that wraps every COMMON
+ * symbol. Without a backing section, COMMON symbols carry a vaddr
+ * but no section_id — Mach-O's shift_sections rebases by
+ * section_id and would leave their vaddrs at pre-shift coordinates,
+ * with the __got slot pointing into the wrong segment. Giving each
+ * common a section_id lets link_symbols_to_sections recompute vaddr
+ * from section.vaddr + value, and plan_layout / shift_sections
+ * picks it up like any other writable zerofill section. */
{
- u64 bss_cursor = rw_seg->vaddr + rw_seg->mem_size;
+ Heap* h = img->heap;
+ u64 bss_start = rw_seg->vaddr + rw_seg->mem_size;
+ u64 bss_cursor = bss_start;
+ u32 max_align = 1u;
+ LinkSection* commsec;
+ LinkSectionId comm_lsid;
+
+ /* First sweep computes layout + max alignment. */
for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
LinkSymbol* s = LinkSyms_at(&img->syms, i);
u32 align;
if (s->kind != SK_COMMON || !s->defined) continue;
align = s->common_align ? s->common_align : 1u;
+ if (align > max_align) max_align = align;
bss_cursor = ALIGN_UP(bss_cursor, (u64)(align));
- s->vaddr = bss_cursor;
+ s->value = bss_cursor - bss_start;
bss_cursor += s->size ? s->size : 1u;
+ }
+
+ /* Append the synthetic NOBITS LinkSection. */
+ {
+ u32 new_nsec = img->nsections + 1u;
+ LinkSection* nsec = (LinkSection*)h->realloc(
+ h, img->sections, sizeof(*img->sections) * img->nsections,
+ sizeof(*img->sections) * new_nsec, _Alignof(LinkSection));
+ if (!nsec)
+ compiler_panic(img->c, no_loc(), "link: oom on common section");
+ img->sections = nsec;
+ }
+ commsec = &img->sections[img->nsections];
+ memset(commsec, 0, sizeof(*commsec));
+ comm_lsid = (LinkSectionId)(img->nsections + 1u);
+ commsec->id = comm_lsid;
+ commsec->input_id = LINK_INPUT_NONE;
+ commsec->obj_section_id = OBJ_SEC_NONE;
+ commsec->segment_id = rw_seg->id;
+ commsec->input_offset = 0;
+ commsec->file_offset = bss_start;
+ commsec->vaddr = bss_start;
+ commsec->size = bss_cursor - bss_start;
+ commsec->flags = SF_ALLOC | SF_WRITE;
+ commsec->align = max_align;
+ commsec->name = pool_intern_cstr(img->c->global, ".bss.common");
+ commsec->sem = SSEM_NOBITS;
+ img->nsections++;
+
+ /* Second sweep wires each common to the synthetic section. */
+ for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
+ LinkSymbol* s = LinkSyms_at(&img->syms, i);
+ if (s->kind != SK_COMMON || !s->defined) continue;
+ s->section_id = comm_lsid;
+ s->vaddr = bss_start + s->value;
s->kind = SK_OBJ; /* no longer COMMON once placed */
}
+
rw_seg->mem_size = bss_cursor - rw_seg->vaddr;
+ rw_seg->nsections++;
}
}
@@ -1447,7 +1499,9 @@ static void emit_segment_bytes(Linker* l, LinkImage* img) {
u32 j;
for (j = 0; j < img->nsections; ++j) {
LinkSection* ls = &img->sections[j];
- ObjBuilder* ob = LinkInputs_at(&l->inputs, ls->input_id - 1)->obj;
+ ObjBuilder* ob;
+ if (ls->input_id == LINK_INPUT_NONE) continue; /* synthetic (e.g. .bss.common) */
+ ob = LinkInputs_at(&l->inputs, ls->input_id - 1)->obj;
const Section* s = obj_section_get(ob, ls->obj_section_id);
LinkSegment* seg = &img->segments[ls->segment_id - 1];
u8* dst;
diff --git a/src/link/link_macho.c b/src/link/link_macho.c
@@ -674,6 +674,14 @@ static void plan_layout(MCtx* x) {
m->segidx = 3;
m->is_zerofill = sec_is_zerofill(ls) ? 1 : 0;
m->flags = m->is_zerofill ? 0x00000001u /*S_ZEROFILL*/ : 0;
+ /* dyld dispatches on the section type byte (low 8 bits of flags).
+ * __mod_init_func / __mod_term_func sections must carry the
+ * S_MOD_INIT_FUNC_POINTERS / S_MOD_TERM_FUNC_POINTERS type or dyld
+ * skips them entirely — leaving constructors unrun at startup. */
+ if (strcmp(m->sectname, "__mod_init_func") == 0)
+ m->flags = 0x00000009u /*S_MOD_INIT_FUNC_POINTERS*/;
+ else if (strcmp(m->sectname, "__mod_term_func") == 0)
+ m->flags = 0x0000000au /*S_MOD_TERM_FUNC_POINTERS*/;
}
x->segs[3].nsects = x->nsecs - first_d;
x->segs[3].first_sec = first_d;