commit 068c1134505dfa9b6fcfa4de52f918f9a973e797
parent 28c94cf5c588425e4ebd5a30bfc8a18e61794a8c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 19 May 2026 14:56:56 -0700
Fix Mach-O OutSec drift and clang non-extern reloc support
link_macho: in-memory ObjBuilders use ELF-style section names
(`.text`, `.rodata`) and `.o` inputs use Mach-O comma-form
(`__TEXT,__text`); both map to the same Mach-O `(segname, sectname)`
in pick_macho_names but link_layout groups them by raw name, so
same-mapped MSecs got interleaved with sections of a different mapped
name. Phase B's adjacency-based OutSec coalescing then split the run,
mismatching Phase A's distinct-name count. Sort MSecs by
`(segname, sectname)` within each segment before vaddr placement.
macho_read: clang emits section-relative relocations (`r_extern == 0`)
in `__LD,__compact_unwind` (and DWARF/EH sections); cfree's IR only
modelled symbol-relative relocs. Lazily synthesize one
`.Lcfree.macho_secstart.<idx>` local symbol per referenced section and
re-express the reloc as `target = sec_start_sym,
addend = inplace_value - section.addr_in_obj`. The linker then resolves
it to `target.vaddr + addend`, matching the original referent.
Verified: all 7 test/libc/cases/*.c on the darwin cell now pass; a
clang-emitted Mach-O .o links through cfree ld and runs.
Diffstat:
3 files changed, 111 insertions(+), 9 deletions(-)
diff --git a/doc/BUGS.md b/doc/BUGS.md
@@ -12,3 +12,24 @@ Format as:
- [x] function declarator with an inline function-pointer return type (no typedef): `6_7_6_20_func_returning_funcptr_no_typedef`
- [x] static initializer accepts unary `-` on a floating constant: `6_7_9_30_static_init_neg_float`
- [x] `#warning` preprocessing directive (non-fatal, parsing continues): `6_10_warning_directive`
+
+Known bugs caught by other harnesses
+
+- [x] Mach-O `OutSec count drift` when `cfree cc` compiles a source and links it together with a precompiled `.o` input in one step: every `test/libc/cases/*.c` on the `darwin` cell of `test/libc/run.sh` (was 7/7 red, now 7/7 green). Root cause: in-memory ObjBuilders use ELF-style section names (`.text`, `.rodata`) and `.o` inputs use Mach-O comma-form (`__TEXT,__text`); both map to the same Mach-O `(segname, sectname)` in `pick_macho_names` but `link_layout` groups them by raw name, so same-mapped MSecs got interleaved with sections of a different mapped name. Phase B's adjacency-based OutSec coalescing then split the run, mismatching Phase A's distinct-name count. Fixed in `src/link/link_macho.c` by sorting MSecs by `(segname, sectname)` within each segment before vaddr placement.
+
+- [x] clang-emitted Mach-O `.o` rejected by `cfree ld` reader (`read_macho: non-extern reloc not supported`). Root cause: clang emits section-relative relocations (`r_extern == 0`) in `__LD,__compact_unwind` (and DWARF/EH sections); cfree's IR only modelled symbol-relative relocs. Fixed in `src/obj/macho_read.c` by lazily synthesizing one `.Lcfree.macho_secstart.<idx>` local symbol per referenced section and re-expressing the reloc as `target = sec_start_sym, addend = inplace_value - section.addr_in_obj`. The linker then resolves it to `target.vaddr + addend`, matching the original referent. Verified by linking `xcrun clang -c hello.c -o hello.o` output through `cfree ld -lSystem` and running.
+
+- [ ] segfault compiling `lua-5.4.7/src/lparser.c` (no diagnostic, no minimal reduction yet): no red test yet
+
+ ```sh
+ # Source lives at tmp/projects/lua/src/lparser.c (lua-5.4.7).
+ # First clear B3 (parenthesized declarator names in lua.h / lauxlib.h)
+ # so the parser gets far enough to crash:
+ sed -i.bak -E 's/^([A-Z_]+API[[:space:]][^*]+\*?)[[:space:]]*\(([a-zA-Z0-9_]+)\)[[:space:]]*\(/\1 \2(/' \
+ tmp/projects/lua/src/lua.h tmp/projects/lua/src/lauxlib.h
+ build/cfree cc -target aarch64-darwin \
+ -isystem rt/include/libc -isystem rt/include -DLUA_USE_POSIX \
+ -c tmp/projects/lua/src/lparser.c -o tmp/lparser.o
+ # → Segmentation fault: 11 (exit 139, no diagnostic)
+ # Needs reduction before it becomes a parse-case-sized repro.
+ ```
diff --git a/src/link/link_macho.c b/src/link/link_macho.c
@@ -830,6 +830,35 @@ static void plan_layout(MCtx* x) {
x->segs[3].nsects = x->nsecs - first_d;
x->segs[3].first_sec = first_d;
+ /* Group MSecs by (segname, sectname) within each segment so vaddr
+ * placement keeps same-named runs contiguous. Otherwise Phase B's
+ * adjacency-based coalescing splits a single Mach-O section into
+ * multiple OutSecs (e.g. `.text` from an in-memory ObjBuilder and
+ * `__TEXT,__text` from a Mach-O .o input both map to `__TEXT,__text`
+ * but arrive in separate link_layout groups, interleaved with other
+ * sections from each input). Stable insertion sort preserves input
+ * order within a name, which matters for synth __stubs/__thread_ptrs
+ * order relative to peers. */
+ for (u32 i = 0; i < x->nsegs; ++i) {
+ MSeg* sg = &x->segs[i];
+ if (sg->nsects < 2) continue;
+ u32 base = sg->first_sec;
+ u32 n = sg->nsects;
+ for (u32 a = 1; a < n; ++a) {
+ MSec key = x->secs[base + a];
+ u32 j = a;
+ while (j > 0) {
+ MSec* prev = &x->secs[base + j - 1];
+ int cmp = strcmp(prev->segname, key.segname);
+ if (cmp == 0) cmp = strcmp(prev->sectname, key.sectname);
+ if (cmp <= 0) break;
+ x->secs[base + j] = x->secs[base + j - 1];
+ --j;
+ }
+ x->secs[base + j] = key;
+ }
+ }
+
/* Phase A: count OutSecs per segment (distinct sectnames) so we can
* size the load commands before placing vaddrs. Phase B builds the
* actual OutSec[] after placement, when vaddrs are final. */
diff --git a/src/obj/macho_read.c b/src/obj/macho_read.c
@@ -319,6 +319,12 @@ ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data,
* Mach-O encodes addends out-of-band as a leading
* ARM64_RELOC_ADDEND followed by the real reloc; the
* reader collapses the pair on the way in. */
+ /* Lazily-populated section-start local symbols, for clang-emitted
+ * non-extern (section-relative) relocations. See the r_extern==0
+ * branch below for the encoding. */
+ ObjSymId* sec_start_sym =
+ arena_zarray(c->scratch, ObjSymId, nmsecs ? nmsecs : 1);
+ for (u32 i = 0; i < nmsecs; ++i) sec_start_sym[i] = OBJ_SYM_NONE;
for (u32 i = 0; i < nmsecs; ++i) {
MSecRec* m = &msecs[i];
if (!m->nreloc) continue;
@@ -417,24 +423,70 @@ ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data,
}
ObjSymId target = OBJ_SYM_NONE;
+ i64 inplace_addend_override = 0;
+ int use_inplace_addend = 0;
if (r_extern) {
if (r_symbolnum < nsyms) target = sym_macho_to_obj[r_symbolnum];
} else {
- /* Section-relative reloc — cfree's IR doesn't model these
- * cleanly. Drop a panic so we notice if a real input drives
- * us here. */
- compiler_panic(c, no_loc(),
- "read_macho: non-extern reloc not supported "
- "(sec=%u offset=%u)",
- (u32)m->obj_sec, r_address);
+ /* Section-relative reloc — clang emits these for compact unwind,
+ * EH frame, and DWARF debug info. r_symbolnum is the 1-based
+ * section index; the in-place value at r_address is the absolute
+ * .o virtual address of the referent. Synthesize a local
+ * symbol pointing to the target section's start (lazily, once
+ * per section) and re-express the reloc as
+ * target = sec_start_sym, addend = inplace - section.addr. */
+ if (r_symbolnum == 0 || r_symbolnum > nmsecs)
+ compiler_panic(c, no_loc(),
+ "read_macho: section-relative reloc references "
+ "invalid section index %u",
+ r_symbolnum);
+ u32 sec_idx = r_symbolnum - 1u;
+ MSecRec* tm = &msecs[sec_idx];
+ if (sec_start_sym[sec_idx] == OBJ_SYM_NONE) {
+ /* Build ".Lcfree.macho_secstart.<sec_idx>" without snprintf
+ * (the freestanding build doesn't pull in stdio). */
+ static const char prefix[] = ".Lcfree.macho_secstart.";
+ char nmbuf[sizeof(prefix) + 10];
+ u32 nlen = (u32)(sizeof(prefix) - 1);
+ memcpy(nmbuf, prefix, nlen);
+ char dec[10];
+ u32 dn = 0;
+ u32 v = sec_idx;
+ do {
+ dec[dn++] = (char)('0' + (v % 10u));
+ v /= 10u;
+ } while (v);
+ for (u32 k = 0; k < dn; ++k) nmbuf[nlen + k] = dec[dn - 1 - k];
+ nlen += dn;
+ Sym sn = pool_intern(c->global, nmbuf, nlen);
+ u16 sk = (tm->flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC : SK_OBJ;
+ sec_start_sym[sec_idx] = obj_symbol(ob, sn, SB_LOCAL, (SymKind)sk,
+ tm->obj_sec, 0, 0);
+ }
+ target = sec_start_sym[sec_idx];
+ u32 rsz = 1u << r_length;
+ if ((u64)m->fileoff + r_address + rsz > len)
+ compiler_panic(c, no_loc(),
+ "read_macho: non-extern reloc r_address out of range");
+ u64 inplace;
+ const u8* pv = data + m->fileoff + r_address;
+ if (r_length == 3) inplace = rd_u64_le(pv);
+ else if (r_length == 2) inplace = (u64)rd_u32_le(pv);
+ else if (r_length == 1) inplace = (u64)rd_u16_le(pv);
+ else inplace = (u64)pv[0];
+ inplace_addend_override = (i64)inplace - (i64)tm->addr;
+ use_inplace_addend = 1;
}
- i64 addend = have_pending ? pending_addend : 0;
+ i64 addend = have_pending ? pending_addend
+ : (use_inplace_addend ? inplace_addend_override
+ : 0);
+ int has_explicit = have_pending || use_inplace_addend || addend != 0;
have_pending = 0;
pending_addend = 0;
obj_reloc_ex(ob, m->obj_sec, r_address, (RelocKind)kind, target,
- addend, addend ? 1 : 0, 0);
+ addend, has_explicit, 0);
if (r_type == ARM64_RELOC_SUBTRACTOR) {
pending_subtractor = 1;
pending_subtractor_offset = r_address;