kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 23338340287823ed9e87979bca0dd7a261a48b4b
parent 1c1cdd6474b2490108ebf52f932fe4d0405370c9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 11 May 2026 16:10:19 -0700

link/macho: coalesce same-named sections into one section_64 per (segname,sectname)

plan_layout previously emitted one section_64 per input LinkSection,
producing sibling __TEXT,__text records that violate the Mach-O spec.
Add an OutSec[] layer alongside MSec[], built after vaddr placement by
sorting MSecs by (segidx, vaddr) and coalescing adjacent same-name
runs — mirrors the OutShdr pattern in link_elf.c. Reloc-apply still
walks MSec[]; only the on-disk view changes.

Diffstat:
Mdoc/MACHO.md | 17+----------------
Msrc/link/link_macho.c | 200++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
2 files changed, 165 insertions(+), 52 deletions(-)

diff --git a/doc/MACHO.md b/doc/MACHO.md @@ -18,8 +18,6 @@ What still doesn't work: `j_targets` excludelist. §1. 3. **`33_ifunc_in_init/E` on `aa64-macho`** — IFUNC has no Mach-O representation; permanent `e_targets` exclusion, not a fix target. -4. **Section coalescing in `link_macho.plan_layout`** — cosmetic, see - §2. ELF lanes are the regression guardrail; every Mach-O change must keep `make test-elf` and `make test-link` (ELF) green. @@ -65,20 +63,7 @@ state before the entry call. --- -## 2. Section coalescing in `plan_layout` - -`link_macho.plan_layout` emits one `__TEXT,__text` `MSec` per input -`__text` `LinkSection`. Mach-O wants one section per -`(segname, sectname)` within a segment. `llvm-objdump` rejects the -result ("bad section index"); the kernel and dyld don't care and -binaries still execute — that's why this hasn't bitten a test. - -Fix: merge equal-named MSecs in `plan_layout` before writing -`section_64` records. - ---- - -## 3. Cosmetic divergences worth flagging +## 2. Cosmetic divergences worth flagging These do not block tests but are documented so a future "why doesn't our output byte-match clang's?" doesn't re-debug them: diff --git a/src/link/link_macho.c b/src/link/link_macho.c @@ -227,10 +227,31 @@ typedef struct MSeg { u64 vmsize; u64 fileoff; u64 filesize; - u32 nsects; + u32 nsects; /* MSec count in segment — internal layout */ u32 first_sec; /* first index into MSec[] */ + u32 nouts; /* OutSec count in segment — what hits the file */ + u32 first_out; /* first index into OutSec[] */ } MSeg; +/* On-disk section view: one record per (segname, sectname) within a + * segment. Mach-O requires this — emitting one section_64 per input + * MSec yields sibling __TEXT,__text records that violate the spec. + * Built from MSec[] after vaddr placement; reloc-apply still uses + * MSec[] for byte-buffer addressing. */ +typedef struct OutSec { + const char* segname; + const char* sectname; + u64 vaddr; + u64 file_offset; + u64 size; + u32 align; + u32 flags; + u32 reserved1; + u32 reserved2; + u8 segidx; + u8 is_zerofill; +} OutSec; + /* ---- main context ---- */ typedef struct MCtx { @@ -255,6 +276,8 @@ typedef struct MCtx { /* sections + segments */ MSec* secs; u32 nsecs; + OutSec* outs; + u32 nouts; MSeg segs[5]; /* PAGEZERO, TEXT, DATA_CONST, DATA, LINKEDIT */ u32 nsegs; @@ -720,7 +743,6 @@ static void plan_layout(MCtx* x) { } /* __stubs synthetic */ - u32 stub_sec_idx = (u32)-1; if (x->nimport_funcs) { x->stubs_size = x->nimport_funcs * MZ_STUB_SIZE; x->stubs_bytes = (u8*)h->alloc(h, x->stubs_size, 4); @@ -739,14 +761,12 @@ static void plan_layout(MCtx* x) { m->flags = 0x80000000u | 0x00000400u | 0x00000008u /*S_SYMBOL_STUBS*/; m->reserved1 = 0; /* fill in later: indirect-symtab base */ m->reserved2 = MZ_STUB_SIZE; - stub_sec_idx = x->nsecs - 1u; } x->segs[1].nsects = x->nsecs - first_text_sec; x->segs[1].first_sec = first_text_sec; /* __DATA_CONST: __got synth */ u32 first_dc = x->nsecs; - u32 got_sec_idx = (u32)-1; if (x->nimports) { x->got_size = x->nimports * MZ_GOT_SIZE; x->got_bytes = (u8*)h->alloc(h, x->got_size, 8); @@ -764,7 +784,6 @@ static void plan_layout(MCtx* x) { m->segidx = 2; m->flags = 0x00000006u /*S_NON_LAZY_SYMBOL_POINTERS*/; m->reserved1 = 0; /* indirect-symtab base */ - got_sec_idx = x->nsecs - 1u; } x->segs[2].nsects = x->nsecs - first_dc; x->segs[2].first_sec = first_dc; @@ -839,6 +858,27 @@ static void plan_layout(MCtx* x) { x->segs[3].nsects = x->nsecs - first_d; x->segs[3].first_sec = first_d; + /* Phase A: count OutSecs per segment (distinct sectnames) so we can + * size the load commands before placing vaddrs. Phase B builds the + * actual OutSec[] after placement, when vaddrs are final. */ + for (u32 i = 0; i < x->nsegs; ++i) { + MSeg* sg = &x->segs[i]; + u32 cnt = 0; + for (u32 a = sg->first_sec; a < sg->first_sec + sg->nsects; ++a) { + int seen = 0; + for (u32 b = sg->first_sec; b < a; ++b) { + if (strcmp(x->secs[a].sectname, x->secs[b].sectname) == 0 && + strcmp(x->secs[a].segname, x->secs[b].segname) == 0) { + seen = 1; + break; + } + } + if (!seen) ++cnt; + } + sg->nouts = cnt; + sg->first_out = 0; /* assigned in Phase B */ + } + /* Compute load-command count + sizeofcmds, then back-fill section * offsets. Layout pass 2. */ u32 nseg_real = 0; @@ -854,7 +894,8 @@ static void plan_layout(MCtx* x) { } /* LINKEDIT always */ if (x->segs[i].nsects > 0) ++nseg_real; } - /* Each LC_SEGMENT_64 carries 72 + 80*nsects bytes. */ + /* Each LC_SEGMENT_64 carries 72 + 80*nouts bytes (one section_64 + * record per coalesced (segname,sectname), not per MSec). */ u32 sizeofcmds = 0; for (u32 i = 0; i < x->nsegs; ++i) { if (i == 0 || i == 4) { @@ -862,7 +903,7 @@ static void plan_layout(MCtx* x) { continue; } if (x->segs[i].nsects == 0) continue; - sizeofcmds += MACHO_SEGCMD64_SIZE + x->segs[i].nsects * MACHO_SECT64_SIZE; + sizeofcmds += MACHO_SEGCMD64_SIZE + x->segs[i].nouts * MACHO_SECT64_SIZE; } (void)nseg_real; /* LC_DYLD_CHAINED_FIXUPS / LC_DYLD_EXPORTS_TRIE */ @@ -997,8 +1038,90 @@ static void plan_layout(MCtx* x) { encode_stub(x->stubs_bytes + (mi->stub_idx - 1u) * MZ_STUB_SIZE, stub_v, got_v); } - (void)stub_sec_idx; - (void)got_sec_idx; + + /* Phase B: build OutSec[] now that all MSec vaddrs are final. Walk + * MSecs sorted by (segidx, vaddr) and coalesce adjacent same-name + * runs. Mirrors link_elf.c's OutShdr build at link_elf.c:879. */ + { + u32* order = + (u32*)h->alloc(h, sizeof(u32) * (x->nsecs + 1u), _Alignof(u32)); + if (!order && x->nsecs) + compiler_panic(x->c, no_loc(), "link_macho: oom on outsec sort"); + for (u32 i = 0; i < x->nsecs; ++i) order[i] = i; + /* Insertion sort — section count is small. */ + for (u32 i = 1; i < x->nsecs; ++i) { + u32 cur = order[i]; + MSec* a = &x->secs[cur]; + u32 j = i; + while (j > 0) { + MSec* b = &x->secs[order[j - 1]]; + if ((b->segidx < a->segidx) || + (b->segidx == a->segidx && b->vaddr <= a->vaddr)) + break; + order[j] = order[j - 1]; + --j; + } + order[j] = cur; + } + u32 cap = x->nsecs + 1u; + x->outs = (OutSec*)h->alloc(h, sizeof(OutSec) * cap, _Alignof(OutSec)); + if (!x->outs) + compiler_panic(x->c, no_loc(), "link_macho: oom on OutSec"); + memset(x->outs, 0, sizeof(OutSec) * cap); + x->nouts = 0; + for (u32 i = 0; i < x->nsecs; ++i) { + MSec* m = &x->secs[order[i]]; + OutSec* tail = x->nouts ? &x->outs[x->nouts - 1] : NULL; + int merge = tail && tail->segidx == m->segidx && + strcmp(tail->sectname, m->sectname) == 0 && + strcmp(tail->segname, m->segname) == 0; + if (merge) { + if (tail->flags != m->flags || tail->is_zerofill != m->is_zerofill) + compiler_panic( + x->c, no_loc(), + "link_macho: coalesce mismatch on %s,%s (flags/zerofill)", + m->segname, m->sectname); + u64 end = m->vaddr + m->size; + u64 prev_end = tail->vaddr + tail->size; + if (end > prev_end) tail->size = end - tail->vaddr; + if (m->align > tail->align) tail->align = m->align; + } else { + OutSec* o = &x->outs[x->nouts++]; + o->segname = m->segname; + o->sectname = m->sectname; + o->vaddr = m->vaddr; + o->file_offset = m->file_offset; + o->size = m->size; + o->align = m->align; + o->flags = m->flags; + o->reserved1 = m->reserved1; + o->reserved2 = m->reserved2; + o->segidx = m->segidx; + o->is_zerofill = m->is_zerofill; + } + } + h->free(h, order, sizeof(u32) * (x->nsecs + 1u)); + /* Recompute per-segment OutSec span; Phase A's count was for + * sizeofcmds sizing — recompute it here as the source of truth and + * assert agreement. */ + for (u32 i = 0; i < x->nsegs; ++i) { + x->segs[i].first_out = 0; + } + u32 prev_nouts[5]; + for (u32 i = 0; i < x->nsegs; ++i) prev_nouts[i] = x->segs[i].nouts; + for (u32 i = 0; i < x->nsegs; ++i) x->segs[i].nouts = 0; + for (u32 i = 0; i < x->nouts; ++i) { + u8 sx = x->outs[i].segidx; + if (x->segs[sx].nouts == 0) x->segs[sx].first_out = i; + ++x->segs[sx].nouts; + } + for (u32 i = 0; i < x->nsegs; ++i) { + if (prev_nouts[i] != x->segs[i].nouts) + compiler_panic(x->c, no_loc(), + "link_macho: OutSec count drift seg %u (%u vs %u)", + (u32)i, prev_nouts[i], x->segs[i].nouts); + } + } } /* ---- pass: shift LinkImage into final vaddrs/file_offsets ---- @@ -1764,15 +1887,18 @@ static void build_symtab(MCtx* x) { if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue; if (s->name == 0) continue; if (s->kind == SK_ABS) continue; /* skip abs externs */ - /* Locate which MSec contains this vaddr to figure out n_sect. */ + /* Locate which OutSec contains this vaddr to figure out n_sect. + * n_sect is the 1-based index into the flat section_64 table the + * file actually contains (post-coalesce), matching what we emit + * in emit_load_command_segment. */ u8 n_sect = 0; - for (u32 k = 0; k < x->nsecs; ++k) { - MSec* m = &x->secs[k]; - if (s->vaddr >= m->vaddr && s->vaddr < m->vaddr + m->size) { + for (u32 k = 0; k < x->nouts; ++k) { + OutSec* o = &x->outs[k]; + if (s->vaddr >= o->vaddr && s->vaddr < o->vaddr + o->size) { n_sect = (u8)(k + 1u); break; } - if (s->vaddr == m->vaddr + m->size) { + if (s->vaddr == o->vaddr + o->size) { n_sect = (u8)(k + 1u); break; } @@ -1824,11 +1950,13 @@ static void build_symtab(MCtx* x) { * slot. Internal-GOT slots use INDIRECT_SYMBOL_LOCAL (0x80000000) * since they have no nlist entry. */ u32 indirect_start = 0; - /* Patch reserved1 of each synth section. */ - for (u32 i = 0; i < x->nsecs; ++i) { - MSec* m = &x->secs[i]; - if (m->synth_data == x->stubs_bytes && m->synth_size) { - m->reserved1 = indirect_start; + /* Patch reserved1 of each synth OutSec. __stubs and __got are each + * singleton OutSecs (synth sections never coalesce with user input), + * so a sectname match identifies them unambiguously. */ + for (u32 i = 0; i < x->nouts; ++i) { + OutSec* o = &x->outs[i]; + if (strcmp(o->sectname, "__stubs") == 0 && o->size) { + o->reserved1 = indirect_start; for (u32 k = 0; k < x->nimports; ++k) { MachImp* mi = &x->imports[k]; if (!mi->stub_idx) continue; @@ -1838,10 +1966,10 @@ static void build_symtab(MCtx* x) { } } } - for (u32 i = 0; i < x->nsecs; ++i) { - MSec* m = &x->secs[i]; - if (m->synth_data == x->got_bytes && m->synth_size) { - m->reserved1 = indirect_start; + for (u32 i = 0; i < x->nouts; ++i) { + OutSec* o = &x->outs[i]; + if (strcmp(o->sectname, "__got") == 0 && o->size) { + o->reserved1 = indirect_start; for (u32 k = 0; k < x->nimports; ++k) { MachImp* mi = &x->imports[k]; u32 sym_idx = mi->internal ? 0x80000000u /* INDIRECT_SYMBOL_LOCAL */ @@ -2159,7 +2287,7 @@ static void compute_codesig(MCtx* x, const u8* full_file, u32 file_len_excl_cs, static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) { MSeg* sg = &x->segs[segidx]; u32 seg_cmd_size = - MACHO_SEGCMD64_SIZE + sg->nsects * MACHO_SECT64_SIZE; + MACHO_SEGCMD64_SIZE + sg->nouts * MACHO_SECT64_SIZE; u32 base = lc->len; mbuf_u32(lc, LC_SEGMENT_64); mbuf_u32(lc, seg_cmd_size); @@ -2176,35 +2304,35 @@ static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) { mbuf_u64(lc, sg->filesize); mbuf_u32(lc, sg->maxprot); mbuf_u32(lc, sg->initprot); - mbuf_u32(lc, sg->nsects); + mbuf_u32(lc, sg->nouts); mbuf_u32(lc, 0); /* flags */ - for (u32 j = 0; j < sg->nsects; ++j) { - MSec* m = &x->secs[sg->first_sec + j]; + for (u32 j = 0; j < sg->nouts; ++j) { + OutSec* o = &x->outs[sg->first_out + j]; u8 sname[16], gname[16]; memset(sname, 0, 16); memset(gname, 0, 16); - size_t sl = m->sectname ? strlen(m->sectname) : 0; + size_t sl = o->sectname ? strlen(o->sectname) : 0; if (sl > 16) sl = 16; - if (sl) memcpy(sname, m->sectname, sl); + if (sl) memcpy(sname, o->sectname, sl); size_t gl = strlen(sg->name); /* segname must match */ if (gl > 16) gl = 16; memcpy(gname, sg->name, gl); mbuf_append(lc, sname, 16); mbuf_append(lc, gname, 16); - mbuf_u64(lc, m->vaddr); - mbuf_u64(lc, m->size); - mbuf_u32(lc, (u32)m->file_offset); + mbuf_u64(lc, o->vaddr); + mbuf_u64(lc, o->size); + mbuf_u32(lc, (u32)o->file_offset); /* align is power of 2; encode as log2. */ - u32 a = m->align ? m->align : 1u; + u32 a = o->align ? o->align : 1u; u32 al = 0; while ((1u << al) < a) ++al; mbuf_u32(lc, al); mbuf_u32(lc, 0); /* reloff */ mbuf_u32(lc, 0); /* nreloc */ - mbuf_u32(lc, m->flags); - mbuf_u32(lc, m->reserved1); - mbuf_u32(lc, m->reserved2); + mbuf_u32(lc, o->flags); + mbuf_u32(lc, o->reserved1); + mbuf_u32(lc, o->reserved2); mbuf_u32(lc, 0); /* reserved3 */ } (void)base;