kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

link.c (99433B)


      1 /* link_emit_macho — write a dyld-loadable arm64 MH_EXECUTE.
      2  *
      3  * Mach-O peer of link_emit_elf.  Produces a position-independent
      4  * MH_EXECUTE that links against libSystem.B.dylib (or any other
      5  * dylib/.tbd input) via LC_LOAD_DYLIB + LC_DYLD_CHAINED_FIXUPS.  The
      6  * binary is ad-hoc codesigned at the tail so the kernel will exec it
      7  * on macOS 11+.
      8  *
      9  * Layout (Apple's stock arm64 layout):
     10  *
     11  *   __PAGEZERO  vmaddr 0, vmsize 0x100000000, no file bytes
     12  *   __TEXT  (R-X)
     13  *     mach_header_64
     14  *     load commands
     15  *     [SF_EXEC sections — .text]
     16  *     [SF_ALLOC R-only sections — .rodata, init/fini_array, etc.]
     17  *     __stubs (12B per import-func)
     18  *   __DATA_CONST  (RW initially, dyld marks R-only after fixups)
     19  *     __got    (8B per import — both data and func imports)
     20  *   __DATA  (R-W)
     21  *     [SF_WRITE sections — .data, .bss]
     22  *   __LINKEDIT  (R)
     23  *     dyld_chained_fixups blob
     24  *     dyld_exports_trie blob
     25  *     function starts (empty)
     26  *     data in code (empty)
     27  *     symtab
     28  *     indirect symbol table (one entry per __stubs and __got slot)
     29  *     strtab
     30  *     code signature
     31  *
     32  * Imports are routed:
     33  *   CALL26/JUMP26 against an imported function -> __stubs entry
     34  *   GOT_LOAD_PAGE21/PAGEOFF12 against any import -> __got slot
     35  *   ABS64 against an imported symbol            -> chained-bind at site
     36  *   ABS64 against a defined internal symbol     -> chained-rebase at site
     37  *
     38  * arm64-only.  x86_64-macos arrives with x64 codegen. */
     39 
     40 #include "link/link.h"
     41 
     42 #include <string.h>
     43 
     44 #include "core/bytes.h"
     45 #include "core/heap.h"
     46 #include "core/pool.h"
     47 #include "core/sha256.h"
     48 #include "core/slice.h"
     49 #include "core/util.h"
     50 #include "core/vec.h"
     51 #include "link/link_arch.h"
     52 #include "link/link_internal.h"
     53 #include "link/link_reloc_desc.h"
     54 #include "obj/format.h"
     55 #include "obj/macho/macho.h"
     56 
     57 /* ---- constants ---- */
     58 #define MZ_PAGEZERO 0x100000000ULL
     59 #define MZ_PAGE 0x4000ULL
     60 #define MZ_GOT_SIZE 8u
     61 /* __DATA,__thread_ptrs slot size — one pointer per unique TLV referenced
     62  * via TLVP_LOAD_PAGE21/PAGEOFF12. Each slot holds the address of the
     63  * matching TLV descriptor in __DATA,__thread_vars. */
     64 #define MZ_TLVP_SIZE 8u
     65 
     66 #define DYLD_CHAINED_PTR_64 2u
     67 #define DYLD_CHAINED_IMPORT 1u
     68 
     69 #define VM_PROT_READ 0x1u
     70 #define VM_PROT_WRITE 0x2u
     71 #define VM_PROT_EXECUTE 0x4u
     72 
     73 #define CS_MAGIC_EMBEDDED_SIGNATURE 0xfade0cc0u
     74 #define CS_MAGIC_CODEDIRECTORY 0xfade0c02u
     75 #define CSSLOT_CODEDIRECTORY 0u
     76 #define CS_HASHTYPE_SHA256 2u
     77 #define CS_SHA256_LEN SHA256_DIGEST_LEN
     78 #define CS_PAGE_SIZE_LOG2 12u
     79 #define CS_EXECSEG_MAIN_BINARY 1u
     80 
     81 /* extra LC ids */
     82 #define LC_DYLD_INFO_ONLY (0x22u | 0x80000000u)
     83 #define LC_FUNCTION_STARTS_C 0x26u
     84 #define LC_DATA_IN_CODE_C 0x29u
     85 #define LC_CODE_SIGNATURE_C 0x1du
     86 
     87 /* ---- byte buffer ---- */
     88 
     89 typedef struct MByte {
     90   Heap* heap;
     91   u8* data;
     92   u32 len;
     93   u32 cap;
     94 } MByte;
     95 
     96 static void mbuf_init(MByte* b, Heap* h) {
     97   b->heap = h;
     98   b->data = NULL;
     99   b->len = 0;
    100   b->cap = 0;
    101 }
    102 static void mbuf_fini(MByte* b) {
    103   if (b->data) b->heap->free(b->heap, b->data, b->cap);
    104   b->data = NULL;
    105   b->cap = b->len = 0;
    106 }
    107 static void mbuf_reserve(MByte* b, u32 need) {
    108   if (need <= b->cap) return;
    109   (void)VEC_GROW(b->heap, b->data, b->cap, need);
    110 }
    111 static u32 mbuf_align(MByte* b, u32 a) {
    112   u32 n = (u32)ALIGN_UP((u64)b->len, (u64)a);
    113   if (n > b->len) {
    114     mbuf_reserve(b, n);
    115     memset(b->data + b->len, 0, n - b->len);
    116     b->len = n;
    117   }
    118   return b->len;
    119 }
    120 static u32 mbuf_append(MByte* b, const void* src, u32 n) {
    121   u32 off = b->len;
    122   mbuf_reserve(b, b->len + n);
    123   if (n) memcpy(b->data + b->len, src, n);
    124   b->len += n;
    125   return off;
    126 }
    127 static u32 mbuf_u32(MByte* b, u32 v) {
    128   u8 t[4];
    129   wr_u32_le(t, v);
    130   return mbuf_append(b, t, 4);
    131 }
    132 static u32 mbuf_u16(MByte* b, u16 v) {
    133   u8 t[2];
    134   wr_u16_le(t, v);
    135   return mbuf_append(b, t, 2);
    136 }
    137 static u32 mbuf_u64(MByte* b, u64 v) {
    138   u8 t[8];
    139   wr_u64_le(t, v);
    140   return mbuf_append(b, t, 8);
    141 }
    142 static u32 mbuf_u8(MByte* b, u8 v) { return mbuf_append(b, &v, 1); }
    143 static u32 mbuf_str(MByte* b, const char* s, u32 n) {
    144   u32 off = b->len;
    145   mbuf_reserve(b, b->len + n + 1u);
    146   if (n) memcpy(b->data + b->len, s, n);
    147   b->data[b->len + n] = 0;
    148   b->len += n + 1u;
    149   return off;
    150 }
    151 
    152 /* ---- imports + dylibs ---- */
    153 
    154 typedef struct MachImp {
    155   LinkSymId sym;
    156   Sym name;
    157   u32 dylib_ord;    /* 1-based ordinal into LC_LOAD_DYLIB list */
    158   u32 stub_idx;     /* 1-based index into __stubs (0 if data import) */
    159   u32 got_idx;      /* 1-based index into __got */
    160   u32 imports_strx; /* offset into chained-fixups symbol pool */
    161   u8 is_func;
    162   u8 weak;
    163   /* internal=1 means this entry is an in-image symbol that's referenced
    164    * via GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC (clang emits these for any
    165    * extern global so a single static-link can later become PIC).  The
    166    * GOT slot stores the symbol's image-relative vaddr and gets a
    167    * chained-fixup rebase entry (or no entry at all for a weak-undef
    168    * resolving to NULL).  No dylib_ord / stub_idx / chained-fixup bind. */
    169   u8 internal;
    170   u8 pad[1];
    171   u64 internal_vaddr; /* image-relative target vaddr; meaningful only when
    172                          internal=1 */
    173 } MachImp;
    174 
    175 typedef struct MachDylib {
    176   Sym install;
    177 } MachDylib;
    178 
    179 /* One slot in the synthetic __DATA,__thread_ptrs section per unique TLV
    180  * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12.  Modeled after
    181  * MachImp's internal-GOT entries: the slot holds the descriptor address
    182  * (REBASE for internal-to-image descriptors, BIND for dylib-imported
    183  * ones).  The descriptor itself is laid out in __DATA,__thread_vars by
    184  * either the input objects (internal) or the providing dylib (imported). */
    185 typedef struct MachTlv {
    186   LinkSymId sym; /* canonical descriptor LinkSymId */
    187   u32 tlv_idx;   /* 1-based slot index in __thread_ptrs */
    188   u8 imported;   /* 1 == descriptor lives in a dylib (BIND), 0 == internal
    189                     (REBASE) */
    190   u8 pad[3];
    191   u32 import_idx; /* 1-based MachImp index when imported (for chained-bind
    192                      ordinal) */
    193 } MachTlv;
    194 
    195 /* ---- planned section ---- */
    196 
    197 typedef struct MSec {
    198   /* Source: either a LinkSection (link_sec_id != 0) or a synthetic
    199    * pre-built byte buffer (data + size). */
    200   LinkSectionId link_sec_id;
    201   const u8* synth_data;
    202   u32 synth_size;
    203   /* Mach-O placement */
    204   const char* segname;
    205   const char* sectname;
    206   /* Inline storage for segname/sectname when split from a Mach-O
    207    * `__SEG,__sect`-form LinkSection name.  Names from string literals
    208    * (synth sections, derived-from-flags defaults) point at .rodata
    209    * and don't use these.  17 bytes: the on-disk field is a fixed 16
    210    * (no NUL needed there), but these are read as C strings, so a full
    211    * 16-char name (e.g. __debug_line_str) needs the extra NUL slot. */
    212   char segname_buf[17];
    213   char sectname_buf[17];
    214   u64 vaddr;
    215   u64 file_offset;
    216   u64 size;
    217   u32 align;
    218   u32 flags; /* S_TYPE | S_ATTR_* */
    219   u32 reserved1;
    220   u32 reserved2;
    221   u8 segidx; /* 1=__TEXT, 2=__DATA_CONST, 3=__DATA */
    222   u8 is_zerofill;
    223   u8 pad[6];
    224 } MSec;
    225 
    226 static void msec_repair_name_ptrs(MSec* m) {
    227   if (m->segname_buf[0]) m->segname = m->segname_buf;
    228   if (m->sectname_buf[0]) m->sectname = m->sectname_buf;
    229 }
    230 
    231 /* Segment slot indices in MCtx.segs[].  __DWARF carries the file-only
    232  * .debug_* sections (debug-info retention); it sits before __LINKEDIT so
    233  * the ad-hoc code signature stays the last bytes of the file. */
    234 enum {
    235   MSEG_PAGEZERO = 0,
    236   MSEG_TEXT = 1,
    237   MSEG_DATA_CONST = 2,
    238   MSEG_DATA = 3,
    239   MSEG_DWARF = 4,
    240   MSEG_LINKEDIT = 5,
    241   MSEG_COUNT = 6,
    242 };
    243 
    244 typedef struct MSeg {
    245   const char* name;
    246   u32 maxprot;
    247   u32 initprot;
    248   u64 vmaddr;
    249   u64 vmsize;
    250   u64 fileoff;
    251   u64 filesize;
    252   u32 nsects;    /* MSec count in segment — internal layout */
    253   u32 first_sec; /* first index into MSec[] */
    254   u32 nouts;     /* OutSec count in segment — what hits the file */
    255   u32 first_out; /* first index into OutSec[] */
    256 } MSeg;
    257 
    258 /* On-disk section view: one record per (segname, sectname) within a
    259  * segment.  Mach-O requires this — emitting one section_64 per input
    260  * MSec yields sibling __TEXT,__text records that violate the spec.
    261  * Built from MSec[] after vaddr placement; reloc-apply still uses
    262  * MSec[] for byte-buffer addressing. */
    263 typedef struct OutSec {
    264   const char* segname;
    265   const char* sectname;
    266   u64 vaddr;
    267   u64 file_offset;
    268   u64 size;
    269   u32 align;
    270   u32 flags;
    271   u32 reserved1;
    272   u32 reserved2;
    273   u8 segidx;
    274   u8 is_zerofill;
    275 } OutSec;
    276 
    277 /* ---- main context ---- */
    278 
    279 typedef struct MCtx {
    280   LinkImage* img;
    281   Compiler* c;
    282   Heap* h;
    283   Writer* w;
    284   Linker* linker;
    285   const LinkArchDesc* link_arch;
    286   const ObjMachoArchOps* macho;
    287 
    288   /* imports */
    289   MachImp* imports;
    290   u32 nimports;
    291   u32 nimports_real; /* count of imports with internal=0 (== prefix length;
    292                       * collect_imports appends internal=1 entries last) */
    293   u32 nimport_funcs;
    294   MachDylib* dylibs;
    295   u32 ndylibs;
    296   /* sym->import index, 1-based, 0 = not an import. Sized to LinkSymId space
    297    * + 1. */
    298   u32* sym_to_imp;
    299   u32 sym_to_imp_size;
    300 
    301   /* sections + segments */
    302   MSec* secs;
    303   u32 nsecs;
    304   OutSec* outs;
    305   u32 nouts;
    306   MSeg segs[MSEG_COUNT]; /* PAGEZERO, TEXT, DATA_CONST, DATA, DWARF, LINKEDIT */
    307   u32 nsegs;
    308 
    309   /* Synthetic byte buffers, owned. */
    310   u8* stubs_bytes;
    311   u32 stubs_size;
    312   u8* got_bytes;
    313   u32 got_size;
    314   /* TLV pointer slots — one entry in __DATA,__thread_ptrs per unique
    315    * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12.  sym_to_tlv
    316    * maps LinkSymId → 1-based slot index (parallel to sym_to_imp).  Slot
    317    * bytes are populated at apply_relocs time once shift_sections has
    318    * pinned descriptor vaddrs. */
    319   MachTlv* tlv_slots;
    320   u32 ntlv;
    321   u32* sym_to_tlv;
    322   u32 sym_to_tlv_size;
    323   u8* tlv_ptrs_bytes;
    324   u32 tlv_ptrs_size;
    325   u64 tlv_ptrs_vaddr;
    326   /* Vaddr of the first thread-local-storage section
    327    * (__thread_data / __thread_bss).  Each TLV descriptor's word 2
    328    * stores the symbol's offset within this image rather than an
    329    * absolute address — see apply_relocs's S_THREAD_LOCAL_VARIABLES
    330    * ABS64 special case. */
    331   u64 tls_image_vaddr;
    332   u8 has_tls_image;
    333 
    334   /* Final layout (computed during plan) */
    335   u64 text_vaddr;
    336   u64 text_filesz;
    337   u64 stubs_vaddr;
    338   u64 got_vaddr;
    339   u64 data_const_vaddr;
    340   u64 data_vaddr;
    341   u64 data_const_filesz;
    342   u64 data_filesz;
    343   u64 data_memsz;
    344   u64 linkedit_vaddr;
    345   u64 linkedit_fileoff;
    346   u32 entry_offset; /* offset of entry within __TEXT segment */
    347 
    348   u64 headers_size; /* header + loadcmds */
    349 
    350   /* LINKEDIT contents */
    351   MByte chained_fixups;
    352   MByte exports_trie;
    353   MByte symtab; /* binary nlist_64 array */
    354   MByte strtab;
    355   MByte indirect; /* u32 array */
    356   MByte fn_starts;
    357   MByte data_in_code;
    358   MByte codesig;
    359 
    360   u32 chained_fixups_off;
    361   u32 exports_trie_off;
    362   u32 fn_starts_off;
    363   u32 data_in_code_off;
    364   u32 symtab_off;
    365   u32 indirect_off;
    366   u32 strtab_off;
    367   u32 codesig_off;
    368   u32 codesig_size;
    369   u32 nsyms;
    370 
    371   u8 uuid[16];
    372 } MCtx;
    373 
    374 /* ---- helpers for finding LinkSymbol vaddr ---- */
    375 
    376 static LinkSymbol* sym_at(LinkImage* img, LinkSymId id) {
    377   if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return NULL;
    378   return LinkSyms_at(&img->syms, id - 1);
    379 }
    380 
    381 /* ---- pass: collect imports ---- */
    382 
    383 static u32 dylib_ordinal_of(MCtx* x, Sym install) {
    384   for (u32 j = 0; j < x->ndylibs; ++j)
    385     if (x->dylibs[j].install == install) return j + 1u;
    386   return 0;
    387 }
    388 
    389 static void collect_imports(MCtx* x) {
    390   LinkImage* img = x->img;
    391   Heap* h = x->h;
    392 
    393   x->sym_to_imp_size = LinkSyms_count(&img->syms) + 1u;
    394   x->sym_to_imp =
    395       (u32*)h->alloc(h, sizeof(u32) * x->sym_to_imp_size, _Alignof(u32));
    396   if (!x->sym_to_imp)
    397     compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on sym_to_imp");
    398   memset(x->sym_to_imp, 0, sizeof(u32) * x->sym_to_imp_size);
    399 
    400   u32 cap = 0, cap_d = 0;
    401   for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) {
    402     LinkSymbol* s = LinkSyms_at(&img->syms, i);
    403     if (!s->imported) continue;
    404     if (s->name == 0) continue;
    405     LinkSymId canon = symhash_get(&img->globals, s->name);
    406     if (canon != LINK_SYM_NONE && canon != s->id) continue;
    407     if (VEC_GROW(h, x->imports, cap, x->nimports + 1u))
    408       compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on imports");
    409     MachImp* mi = &x->imports[x->nimports++];
    410     memset(mi, 0, sizeof(*mi));
    411     mi->sym = s->id;
    412     mi->name = s->name;
    413     mi->is_func = (s->kind == SK_FUNC || s->kind == SK_IFUNC) ? 1 : 0;
    414     mi->weak = (s->bind == SB_WEAK) ? 1 : 0;
    415     x->sym_to_imp[s->id] = x->nimports;
    416   }
    417 
    418   /* Back-classify: any CALL26/JUMP26 reloc target -> function. */
    419   for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
    420     LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
    421     if (!reloc_kind_is_branch(x->c, r->kind)) continue;
    422     if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue;
    423     u32 idx = x->sym_to_imp[r->target];
    424     if (!idx) {
    425       /* Resolve through canonical. */
    426       LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
    427       if (tgt->name == 0) continue;
    428       LinkSymId canon = symhash_get(&img->globals, tgt->name);
    429       if (canon == LINK_SYM_NONE || canon >= x->sym_to_imp_size) continue;
    430       idx = x->sym_to_imp[canon];
    431       if (!idx) continue;
    432       /* Stash so future lookups skip this loop. */
    433       x->sym_to_imp[r->target] = idx;
    434     }
    435     x->imports[idx - 1].is_func = 1;
    436   }
    437 
    438   /* Build dylib ordinal table.  Pull soname from the providing DSO. */
    439   for (u32 i = 0; i < x->nimports; ++i) {
    440     MachImp* mi = &x->imports[i];
    441     LinkSymbol* s = sym_at(img, mi->sym);
    442     LinkInputId dso_id = s ? s->dso_input_id : LINK_INPUT_NONE;
    443     Sym install = 0;
    444     if (dso_id != LINK_INPUT_NONE && x->linker &&
    445         dso_id - 1u < LinkInputs_count(&x->linker->inputs)) {
    446       LinkInput* in = LinkInputs_at(&x->linker->inputs, dso_id - 1u);
    447       if (in->kind == LINK_INPUT_DSO_BYTES) install = in->soname;
    448     }
    449     if (install == 0)
    450       install = pool_intern_slice(x->c->global,
    451                                   SLICE_LIT("/usr/lib/libSystem.B.dylib"));
    452     u32 ord = dylib_ordinal_of(x, install);
    453     if (!ord) {
    454       if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u))
    455         compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on dylibs");
    456       x->dylibs[x->ndylibs].install = install;
    457       ++x->ndylibs;
    458       ord = x->ndylibs;
    459     }
    460     mi->dylib_ord = ord;
    461   }
    462 
    463   /* Always include every DSO input's install-name. */
    464   if (x->linker) {
    465     for (u32 ii = 0; ii < LinkInputs_count(&x->linker->inputs); ++ii) {
    466       LinkInput* in = LinkInputs_at(&x->linker->inputs, ii);
    467       if (in->kind != LINK_INPUT_DSO_BYTES) continue;
    468       if (in->soname == 0) continue;
    469       if (dylib_ordinal_of(x, in->soname)) continue;
    470       if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u))
    471         compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on dylibs");
    472       x->dylibs[x->ndylibs].install = in->soname;
    473       ++x->ndylibs;
    474     }
    475   }
    476 
    477   /* All entries so far are real imports; remember the partition point
    478    * so import/symtab table emit loops can skip the appended internals. */
    479   x->nimports_real = x->nimports;
    480 
    481   /* Internal GOT pass.  clang on Mach-O routes every extern-global
    482    * reference through the GOT (GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC), so
    483    * even a common symbol or weak-undef that ends up resolved within the
    484    * image still needs a __got slot.  For each such reloc whose target
    485    * isn't an existing import, materialize a MachImp with internal=1.
    486    * The slot's contents are filled at write time and a chained-fixup
    487    * REBASE entry (or none, for weak undef → NULL) keeps it valid
    488    * post-ASLR. */
    489   for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
    490     LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
    491     if (!reloc_kind_is_got_load(x->c, r->kind)) continue;
    492     if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue;
    493     if (x->sym_to_imp[r->target]) continue;
    494     LinkSymbol* t = sym_at(img, r->target);
    495     if (!t) continue;
    496     /* Resolve through canonical so we share a single slot per symbol. */
    497     LinkSymId canon = r->target;
    498     if (t->name != 0) {
    499       LinkSymId hit = symhash_get(&img->globals, t->name);
    500       if (hit != LINK_SYM_NONE) {
    501         canon = hit;
    502         if (x->sym_to_imp[canon]) {
    503           x->sym_to_imp[r->target] = x->sym_to_imp[canon];
    504           continue;
    505         }
    506         t = sym_at(img, canon);
    507         if (!t) continue;
    508       }
    509     }
    510     if (VEC_GROW(h, x->imports, cap, x->nimports + 1u))
    511       compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on internal got");
    512     MachImp* mi = &x->imports[x->nimports++];
    513     memset(mi, 0, sizeof(*mi));
    514     mi->sym = canon;
    515     mi->name = t->name;
    516     mi->is_func = (t->kind == SK_FUNC || t->kind == SK_IFUNC) ? 1 : 0;
    517     mi->weak = (t->bind == SB_WEAK) ? 1 : 0;
    518     mi->internal = 1;
    519     /* internal_vaddr is read fresh from the LinkSymbol when the slot
    520      * gets initialized — collect_imports runs before shift_sections
    521      * rebases section vaddrs to Mach-O layout, so capturing here would
    522      * be stale by the time __got bytes are written. */
    523     mi->internal_vaddr = 0;
    524     x->sym_to_imp[canon] = x->nimports;
    525     if (canon != r->target) x->sym_to_imp[r->target] = x->nimports;
    526   }
    527 
    528   /* Assign stub_idx + got_idx.  Internal entries get a slot but no stub:
    529    * the call site (CALL26) on internal funcs goes direct, not via stub. */
    530   u32 stub_run = 0;
    531   for (u32 i = 0; i < x->nimports; ++i) {
    532     MachImp* mi = &x->imports[i];
    533     mi->got_idx = i + 1u;
    534     if (mi->is_func && !mi->internal) mi->stub_idx = ++stub_run;
    535   }
    536   x->nimport_funcs = stub_run;
    537 }
    538 
    539 /* ---- pass: collect TLV pointer slots ----
    540  *
    541  * Mirror of collect_imports' internal-GOT pass, but for TLV descriptors:
    542  * each unique descriptor referenced via ARM64_RELOC_TLVP_LOAD_PAGE21 /
    543  * PAGEOFF12 gets one slot in the synthetic __DATA,__thread_ptrs section.
    544  * The slot's runtime value is the descriptor's address; we patch it at
    545  * apply_relocs time (REBASE for in-image descriptors, BIND for ones in
    546  * a dylib).
    547  *
    548  * Slots are deduplicated by canonical LinkSymId so a single descriptor
    549  * referenced from N call sites shares one __thread_ptrs entry. */
    550 static void collect_tlv(MCtx* x) {
    551   LinkImage* img = x->img;
    552   Heap* h = x->h;
    553   x->sym_to_tlv_size = LinkSyms_count(&img->syms) + 1u;
    554   x->sym_to_tlv =
    555       (u32*)h->alloc(h, sizeof(u32) * x->sym_to_tlv_size, _Alignof(u32));
    556   if (!x->sym_to_tlv)
    557     compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on sym_to_tlv");
    558   memset(x->sym_to_tlv, 0, sizeof(u32) * x->sym_to_tlv_size);
    559 
    560   u32 cap = 0;
    561   for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
    562     LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
    563     if (!reloc_kind_is_tlvp(x->c, r->kind)) continue;
    564     if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_tlv_size) continue;
    565     /* Resolve through canonical so multiple per-input duplicate undefs
    566      * collapse onto one __thread_ptrs slot. */
    567     LinkSymId canon = r->target;
    568     LinkSymbol* t = sym_at(img, r->target);
    569     if (!t) continue;
    570     if (t->name != 0) {
    571       LinkSymId hit = symhash_get(&img->globals, t->name);
    572       if (hit != LINK_SYM_NONE) {
    573         canon = hit;
    574         t = sym_at(img, canon);
    575         if (!t) continue;
    576       }
    577     }
    578     if (x->sym_to_tlv[canon]) {
    579       if (canon != r->target) x->sym_to_tlv[r->target] = x->sym_to_tlv[canon];
    580       continue;
    581     }
    582     if (VEC_GROW(h, x->tlv_slots, cap, x->ntlv + 1u))
    583       compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on tlv_slots");
    584     MachTlv* ts = &x->tlv_slots[x->ntlv++];
    585     memset(ts, 0, sizeof(*ts));
    586     ts->sym = canon;
    587     ts->tlv_idx = x->ntlv;
    588     ts->imported = t->imported ? 1u : 0u;
    589     /* If the descriptor is imported we route the bind through the
    590      * symbol's MachImp slot — that's where dyld's chained-import index
    591      * comes from.  When this loop fires the imp pass has already
    592      * materialized the entry (real imports were processed first); the
    593      * lookup may also have stashed an alias for non-canonical ids. */
    594     if (ts->imported) {
    595       u32 idx = (canon < x->sym_to_imp_size) ? x->sym_to_imp[canon] : 0u;
    596       if (!idx && t->name != 0) {
    597         LinkSymId hit2 = symhash_get(&img->globals, t->name);
    598         if (hit2 != LINK_SYM_NONE && hit2 < x->sym_to_imp_size)
    599           idx = x->sym_to_imp[hit2];
    600       }
    601       ts->import_idx = idx;
    602     }
    603     x->sym_to_tlv[canon] = x->ntlv;
    604     if (canon != r->target) x->sym_to_tlv[r->target] = x->ntlv;
    605   }
    606 }
    607 
    608 /* ---- pass: plan Mach-O sections ----
    609  *
    610  * Walks LinkImage sections.  Each non-zero-size LinkSection becomes one
    611  * MSec.  Synthetic __stubs and __got are appended at the right segment
    612  * boundaries.  Vaddr and file_offset are assigned in a single forward
    613  * pass starting at __TEXT base; __PAGEZERO and __LINKEDIT are special. */
    614 
    615 static void seg_init(MSeg* s, const char* name, u32 maxp, u32 initp) {
    616   memset(s, 0, sizeof(*s));
    617   s->name = name;
    618   s->maxprot = maxp;
    619   s->initprot = initp;
    620 }
    621 
    622 static int sec_is_writable(const LinkSection* ls) {
    623   return (ls->flags & SF_WRITE) != 0u;
    624 }
    625 static int sec_is_exec(const LinkSection* ls) {
    626   return (ls->flags & SF_EXEC) != 0u;
    627 }
    628 static int sec_is_zerofill(const LinkSection* ls) {
    629   return ls->sem == SSEM_NOBITS;
    630 }
    631 
    632 static int section_has_abs64_reloc(const LinkImage* img, LinkSectionId id) {
    633   for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
    634     const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
    635     if (r->link_section_id == id && r->kind == R_ABS64) return 1;
    636   }
    637   return 0;
    638 }
    639 
    640 static int sec_needs_data_const(const LinkImage* img, const LinkSection* ls) {
    641   if (!ls || !ls->size || sec_is_exec(ls) || sec_is_writable(ls) ||
    642       sec_is_zerofill(ls)) {
    643     return 0;
    644   }
    645   return section_has_abs64_reloc(img, ls->id);
    646 }
    647 
    648 /* Pick (segname, sectname) for a LinkSection.  Comma-form Mach-O names
    649  * round-trip into MSec's inline 16-byte buffers; literal defaults point
    650  * at .rodata strings.  Caller passes the MSec for per-section storage —
    651  * a previous version used a shared static buffer which aliased all
    652  * sections to whichever name was set last. */
    653 static void pick_macho_names(const LinkSection* ls, Compiler* c, MSec* m) {
    654   Slice nm_s = pool_slice(c->global, ls->name);
    655   const char* nm = nm_s.s;
    656   size_t nlen = nm_s.len;
    657   if (nm) {
    658     /* Comma-form: "__SEG,__sect" round-tripped from a Mach-O input. */
    659     for (size_t i = 0; i < nlen; ++i) {
    660       if (nm[i] == ',') {
    661         u32 seg_n = (u32)(i > 16 ? 16 : i);
    662         memcpy(m->segname_buf, nm, seg_n);
    663         m->segname_buf[seg_n] = 0;
    664         u32 sect_n = (u32)((nlen - i - 1) > 16 ? 16 : (nlen - i - 1));
    665         memcpy(m->sectname_buf, nm + i + 1, sect_n);
    666         m->sectname_buf[sect_n] = 0;
    667         m->segname = m->segname_buf;
    668         m->sectname = m->sectname_buf;
    669         return;
    670       }
    671     }
    672   }
    673   /* Derive from flags. */
    674   if (sec_is_exec(ls)) {
    675     m->segname = "__TEXT";
    676     m->sectname = "__text";
    677   } else if (sec_is_writable(ls)) {
    678     m->segname = "__DATA";
    679     m->sectname = sec_is_zerofill(ls) ? "__bss" : "__data";
    680   } else {
    681     m->segname = "__TEXT";
    682     m->sectname = "__const";
    683   }
    684 }
    685 
    686 static void plan_layout(MCtx* x) {
    687   LinkImage* img = x->img;
    688   Heap* h = x->h;
    689 
    690   /* PAGEZERO */
    691   seg_init(&x->segs[0], "__PAGEZERO", 0, 0);
    692   x->segs[0].vmaddr = 0;
    693   x->segs[0].vmsize = MZ_PAGEZERO;
    694   x->segs[0].fileoff = 0;
    695   x->segs[0].filesize = 0;
    696   x->segs[0].nsects = 0;
    697   x->segs[0].first_sec = 0;
    698 
    699   /* Segments 1..4 */
    700   seg_init(&x->segs[1], "__TEXT", VM_PROT_READ | VM_PROT_EXECUTE,
    701            VM_PROT_READ | VM_PROT_EXECUTE);
    702   seg_init(&x->segs[2], "__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE,
    703            VM_PROT_READ | VM_PROT_WRITE);
    704   seg_init(&x->segs[3], "__DATA", VM_PROT_READ | VM_PROT_WRITE,
    705            VM_PROT_READ | VM_PROT_WRITE);
    706   /* __DWARF holds the file-only .debug_* sections; mapped R but never
    707    * referenced at runtime. Empty (nsects 0) when there's no debug info. */
    708   seg_init(&x->segs[MSEG_DWARF], "__DWARF", VM_PROT_READ, VM_PROT_READ);
    709   seg_init(&x->segs[MSEG_LINKEDIT], "__LINKEDIT", VM_PROT_READ, VM_PROT_READ);
    710   x->nsegs = MSEG_COUNT;
    711 
    712   /* Pre-allocate MSec capacity: every LinkSection + 2 synth (__stubs,
    713    * __got).  (LinkSections from the dynamic-link layer — .dynsym / .plt
    714    * etc. — were synthesized by layout_dyn for ELF; we won't have them
    715    * since pie wasn't set on this Linker.  Still, oversize by a few.) */
    716   u32 cap = LinkRelocs_count(&img->relocs) + img->nsections + 4u;
    717   x->secs = (MSec*)h->alloc(h, sizeof(MSec) * cap, _Alignof(MSec));
    718   if (!x->secs) compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on MSec");
    719   memset(x->secs, 0, sizeof(MSec) * cap);
    720   x->nsecs = 0;
    721 
    722   /* Pass 1: __TEXT segment.  Header + loadcmds reserve front. */
    723   /* We need the exact header_size to set first sec's file_offset.  We'll
    724    * compute it later, but reserve a placeholder; for now use 0 and patch
    725    * in pass 4 (offsets get bumped). */
    726 
    727   u64 text_vaddr = MZ_PAGEZERO;
    728   /* We'll compute headers_size after plan; stash starting vaddr only. */
    729   x->segs[1].vmaddr = text_vaddr;
    730   x->segs[1].fileoff = 0;
    731   x->text_vaddr = text_vaddr;
    732 
    733   /* Collect: (a) exec sections, (b) read-only allocatable sections. */
    734   /* (cursor advances per-segment in pass 2; nothing to track here) */
    735 
    736   /* We don't know the header size yet; walk sections first to enumerate
    737    * MSec entries, then back-fill file_offset/vaddr after we know the
    738    * load-command count. */
    739 
    740   u32 first_text_sec = x->nsecs;
    741 
    742   for (u32 i = 0; i < img->nsections; ++i) {
    743     LinkSection* ls = &img->sections[i];
    744     if (!ls->size) continue;
    745     if (ls->file_only) continue; /* .debug_* → __DWARF segment below */
    746     if (sec_is_writable(ls)) continue;
    747     if (sec_is_zerofill(ls)) continue; /* placed in __DATA */
    748     if (sec_needs_data_const(img, ls)) continue;
    749     MSec* m = &x->secs[x->nsecs++];
    750     memset(m, 0, sizeof(*m));
    751     m->link_sec_id = ls->id;
    752     pick_macho_names(ls, x->c, m);
    753     /* Force into __TEXT. */
    754     if (!slice_eq_cstr(slice_from_cstr(m->segname), "__TEXT"))
    755       m->segname = "__TEXT";
    756     m->align = ls->align ? ls->align : 1u;
    757     m->size = ls->size;
    758     m->segidx = 1;
    759     m->flags = sec_is_exec(ls) ? (0x80000000u /*S_ATTR_PURE_INSTRUCTIONS*/ |
    760                                   0x00000400u /*S_ATTR_SOME_INSTRUCTIONS*/)
    761                                : 0u;
    762   }
    763 
    764   /* __stubs synthetic */
    765   if (x->nimport_funcs) {
    766     x->stubs_size = x->nimport_funcs * x->macho->stub_size;
    767     x->stubs_bytes = (u8*)h->alloc(h, x->stubs_size, 4);
    768     if (!x->stubs_bytes)
    769       compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on stubs");
    770     memset(x->stubs_bytes, 0, x->stubs_size);
    771     MSec* m = &x->secs[x->nsecs++];
    772     memset(m, 0, sizeof(*m));
    773     m->synth_data = x->stubs_bytes;
    774     m->synth_size = x->stubs_size;
    775     m->segname = "__TEXT";
    776     m->sectname = "__stubs";
    777     m->align = 4u;
    778     m->size = x->stubs_size;
    779     m->segidx = 1;
    780     m->flags = 0x80000000u | 0x00000400u | 0x00000008u /*S_SYMBOL_STUBS*/;
    781     m->reserved1 = 0; /* fill in later: indirect-symtab base */
    782     m->reserved2 = x->macho->stub_size;
    783   }
    784   x->segs[1].nsects = x->nsecs - first_text_sec;
    785   x->segs[1].first_sec = first_text_sec;
    786 
    787   /* __DATA_CONST: __got synth */
    788   u32 first_dc = x->nsecs;
    789   if (x->nimports) {
    790     x->got_size = x->nimports * MZ_GOT_SIZE;
    791     x->got_bytes = (u8*)h->alloc(h, x->got_size, 8);
    792     if (!x->got_bytes)
    793       compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on got");
    794     memset(x->got_bytes, 0, x->got_size);
    795     MSec* m = &x->secs[x->nsecs++];
    796     memset(m, 0, sizeof(*m));
    797     m->synth_data = x->got_bytes;
    798     m->synth_size = x->got_size;
    799     m->segname = "__DATA_CONST";
    800     m->sectname = "__got";
    801     m->align = 8u;
    802     m->size = x->got_size;
    803     m->segidx = 2;
    804     m->flags = 0x00000006u /*S_NON_LAZY_SYMBOL_POINTERS*/;
    805     m->reserved1 = 0; /* indirect-symtab base */
    806   }
    807   for (u32 i = 0; i < img->nsections; ++i) {
    808     LinkSection* ls = &img->sections[i];
    809     if (ls->file_only) continue; /* .debug_* → __DWARF (has ABS64 relocs) */
    810     if (!sec_needs_data_const(img, ls)) continue;
    811     MSec* m = &x->secs[x->nsecs++];
    812     memset(m, 0, sizeof(*m));
    813     m->link_sec_id = ls->id;
    814     pick_macho_names(ls, x->c, m);
    815     m->segname = "__DATA_CONST";
    816     m->align = ls->align ? ls->align : 1u;
    817     m->size = ls->size;
    818     m->segidx = 2;
    819     m->flags = 0;
    820   }
    821   x->segs[2].nsects = x->nsecs - first_dc;
    822   x->segs[2].first_sec = first_dc;
    823 
    824   /* __DATA segment: writable sections + zerofill. */
    825   u32 first_d = x->nsecs;
    826   for (u32 i = 0; i < img->nsections; ++i) {
    827     LinkSection* ls = &img->sections[i];
    828     if (ls->file_only) continue; /* .debug_* → __DWARF */
    829     if (!ls->size && !sec_is_zerofill(ls)) continue;
    830     if (!sec_is_writable(ls)) continue;
    831     MSec* m = &x->secs[x->nsecs++];
    832     memset(m, 0, sizeof(*m));
    833     m->link_sec_id = ls->id;
    834     pick_macho_names(ls, x->c, m);
    835     if (!slice_eq_cstr(slice_from_cstr(m->segname), "__DATA"))
    836       m->segname = "__DATA";
    837     m->align = ls->align ? ls->align : 1u;
    838     m->size = ls->size;
    839     m->segidx = 3;
    840     m->is_zerofill = sec_is_zerofill(ls) ? 1 : 0;
    841     m->flags = m->is_zerofill ? 0x00000001u /*S_ZEROFILL*/ : 0;
    842     /* dyld dispatches on the section type byte (low 8 bits of flags).
    843      * __mod_init_func / __mod_term_func sections must carry the
    844      * S_MOD_INIT_FUNC_POINTERS / S_MOD_TERM_FUNC_POINTERS type or dyld
    845      * skips them entirely — leaving constructors unrun at startup. */
    846     if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_init_func"))
    847       m->flags = 0x00000009u /*S_MOD_INIT_FUNC_POINTERS*/;
    848     else if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_term_func"))
    849       m->flags = 0x0000000au /*S_MOD_TERM_FUNC_POINTERS*/;
    850     else if (ls->flags & SF_TLS) {
    851       /* TLV sections: dyld dispatches by section type, not name.  Map
    852        * __thread_vars → S_THREAD_LOCAL_VARIABLES (descriptor records),
    853        * __thread_data → S_THREAD_LOCAL_REGULAR (initial data),
    854        * __thread_bss → S_THREAD_LOCAL_ZEROFILL (zero-init data).  Done
    855        * by sectname so per-TU inputs without a Mach-O ext_type still
    856        * get the right section type. */
    857       if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_vars")) {
    858         m->flags = S_THREAD_LOCAL_VARIABLES;
    859         /* Each descriptor is three pointers (24B) whose first word is
    860          * dyld's _tlv_bootstrap thunk pointer.  Clang/llvm emit
    861          * __thread_vars with on-disk alignment 1 (relying on layout to
    862          * land it on 8); force 8-alignment here so the descriptor
    863          * pointers fall on 8-byte boundaries — dyld's chained-fixup
    864          * processing assumes that. */
    865         if (m->align < 8u) m->align = 8u;
    866       } else if (m->is_zerofill)
    867         m->flags = S_THREAD_LOCAL_ZEROFILL;
    868       else
    869         m->flags = S_THREAD_LOCAL_REGULAR;
    870     }
    871   }
    872   /* __thread_ptrs synthetic (TLV pointer slots).  Emitted into __DATA
    873    * after the user's TLV input sections so descriptors and their
    874    * pointers share the same segment.  Each slot's runtime initial
    875    * value (= TLV descriptor address) is patched during apply_relocs. */
    876   if (x->ntlv) {
    877     x->tlv_ptrs_size = x->ntlv * MZ_TLVP_SIZE;
    878     x->tlv_ptrs_bytes = (u8*)h->alloc(h, x->tlv_ptrs_size, 8);
    879     if (!x->tlv_ptrs_bytes)
    880       compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on tlv_ptrs");
    881     memset(x->tlv_ptrs_bytes, 0, x->tlv_ptrs_size);
    882     MSec* m = &x->secs[x->nsecs++];
    883     memset(m, 0, sizeof(*m));
    884     m->synth_data = x->tlv_ptrs_bytes;
    885     m->synth_size = x->tlv_ptrs_size;
    886     m->segname = "__DATA";
    887     m->sectname = "__thread_ptrs";
    888     m->align = 8u;
    889     m->size = x->tlv_ptrs_size;
    890     m->segidx = 3;
    891     m->flags = S_THREAD_LOCAL_VARIABLE_POINTERS;
    892   }
    893   x->segs[3].nsects = x->nsecs - first_d;
    894   x->segs[3].first_sec = first_d;
    895 
    896   /* __DWARF: file-only .debug_* sections (debug-info retention). Each
    897    * contribution becomes a synth MSec whose bytes are the per-image
    898    * debug registry buffer (relocs applied in place at apply_relocs).
    899    * Iterated in registry order (= input order) so same-name runs land
    900    * adjacent and the per-input DWARF-relative bases line up with the
    901    * coalesced section's byte layout. */
    902   u32 first_dw = x->nsecs;
    903   for (u32 i = 0; i < img->nsections; ++i) {
    904     LinkSection* ls = &img->sections[i];
    905     u8* dbg;
    906     if (!ls->file_only || !ls->size) continue;
    907     dbg = link_fileonly_bytes(img, ls->id);
    908     if (!dbg) continue;
    909     MSec* m = &x->secs[x->nsecs++];
    910     memset(m, 0, sizeof(*m));
    911     m->synth_data = dbg;
    912     m->synth_size = (u32)ls->size;
    913     /* Section name: a Mach-O input already carries "__DWARF,__debug_*";
    914      * an in-process .debug_* maps via obj_macho_debug_sectname. */
    915     {
    916       Slice nm = pool_slice(x->c->global, ls->name);
    917       const char* comma = nm.s ? memchr(nm.s, ',', nm.len) : NULL;
    918       char sect[17];
    919       if (comma) {
    920         u32 sgn = (u32)(comma - nm.s);
    921         if (sgn > 16u) sgn = 16u;
    922         memcpy(m->segname_buf, nm.s, sgn);
    923         m->segname_buf[sgn] = 0;
    924         u32 stn = (u32)(nm.len - (comma - nm.s) - 1);
    925         if (stn > 16u) stn = 16u;
    926         memcpy(m->sectname_buf, comma + 1, stn);
    927         m->sectname_buf[stn] = 0;
    928       } else if (obj_macho_debug_sectname(nm.s, nm.len, sect)) {
    929         memcpy(m->segname_buf, "__DWARF", 8);
    930         memcpy(m->sectname_buf, sect, slice_from_cstr(sect).len + 1);
    931       } else {
    932         memcpy(m->segname_buf, "__DWARF", 8);
    933         u32 stn = nm.len > 16u ? 16u : (u32)nm.len;
    934         memcpy(m->sectname_buf, nm.s, stn);
    935         m->sectname_buf[stn] = 0;
    936       }
    937       m->segname = m->segname_buf;
    938       m->sectname = m->sectname_buf;
    939     }
    940     /* align 1: contributions concatenate gap-free so the DWARF-relative
    941      * bases (assigned without padding in link_layout_debug) stay valid. */
    942     m->align = 1u;
    943     m->size = ls->size;
    944     m->segidx = MSEG_DWARF;
    945     m->flags = 0; /* S_REGULAR */
    946   }
    947   x->segs[MSEG_DWARF].nsects = x->nsecs - first_dw;
    948   x->segs[MSEG_DWARF].first_sec = first_dw;
    949 
    950   /* Group MSecs by (segname, sectname) within each segment so vaddr
    951    * placement keeps same-named runs contiguous.  Otherwise Phase B's
    952    * adjacency-based coalescing splits a single Mach-O section into
    953    * multiple OutSecs (e.g. `.text` from an in-memory ObjBuilder and
    954    * `__TEXT,__text` from a Mach-O .o input both map to `__TEXT,__text`
    955    * but arrive in separate link_layout groups, interleaved with other
    956    * sections from each input).  Stable insertion sort preserves input
    957    * order within a name, which matters for synth __stubs/__thread_ptrs
    958    * order relative to peers. */
    959   for (u32 i = 0; i < x->nsegs; ++i) {
    960     MSeg* sg = &x->segs[i];
    961     if (sg->nsects < 2) continue;
    962     u32 base = sg->first_sec;
    963     u32 n = sg->nsects;
    964     for (u32 a = 1; a < n; ++a) {
    965       MSec key = x->secs[base + a];
    966       msec_repair_name_ptrs(&key);
    967       u32 j = a;
    968       while (j > 0) {
    969         MSec* prev = &x->secs[base + j - 1];
    970         /* Ordering compare for stable sort: slices don't order, keep strcmp. */
    971         int cmp = strcmp(prev->segname, key.segname);             /* ordering */
    972         if (cmp == 0) cmp = strcmp(prev->sectname, key.sectname); /* ordering */
    973         if (cmp <= 0) break;
    974         x->secs[base + j] = x->secs[base + j - 1];
    975         msec_repair_name_ptrs(&x->secs[base + j]);
    976         --j;
    977       }
    978       x->secs[base + j] = key;
    979       msec_repair_name_ptrs(&x->secs[base + j]);
    980     }
    981   }
    982 
    983   /* Phase A: count OutSecs per segment (distinct sectnames) so we can
    984    * size the load commands before placing vaddrs.  Phase B builds the
    985    * actual OutSec[] after placement, when vaddrs are final. */
    986   for (u32 i = 0; i < x->nsegs; ++i) {
    987     MSeg* sg = &x->segs[i];
    988     u32 cnt = 0;
    989     for (u32 a = sg->first_sec; a < sg->first_sec + sg->nsects; ++a) {
    990       int seen = 0;
    991       for (u32 b = sg->first_sec; b < a; ++b) {
    992         if (slice_eq_cstr(slice_from_cstr(x->secs[a].sectname),
    993                           x->secs[b].sectname) &&
    994             slice_eq_cstr(slice_from_cstr(x->secs[a].segname),
    995                           x->secs[b].segname)) {
    996           seen = 1;
    997           break;
    998         }
    999       }
   1000       if (!seen) ++cnt;
   1001     }
   1002     sg->nouts = cnt;
   1003     sg->first_out = 0; /* assigned in Phase B */
   1004   }
   1005 
   1006   /* Compute load-command count + sizeofcmds, then back-fill section
   1007    * offsets.  Layout pass 2. */
   1008   u32 nseg_real = 0;
   1009   for (u32 i = 0; i < x->nsegs; ++i) {
   1010     /* Skip __DATA_CONST or __DATA if no sections (edge case). */
   1011     if (i == 0) {
   1012       ++nseg_real;
   1013       continue;
   1014     } /* PAGEZERO */
   1015     if (i == MSEG_LINKEDIT) {
   1016       ++nseg_real;
   1017       continue;
   1018     } /* LINKEDIT always */
   1019     if (x->segs[i].nsects > 0) ++nseg_real; /* incl. __DWARF when present */
   1020   }
   1021   /* Each LC_SEGMENT_64 carries 72 + 80*nouts bytes (one section_64
   1022    * record per coalesced (segname,sectname), not per MSec). */
   1023   u32 sizeofcmds = 0;
   1024   for (u32 i = 0; i < x->nsegs; ++i) {
   1025     if (i == 0 || i == MSEG_LINKEDIT) {
   1026       sizeofcmds += MACHO_SEGCMD64_SIZE; /* no sections */
   1027       continue;
   1028     }
   1029     if (x->segs[i].nsects == 0) continue;
   1030     sizeofcmds += MACHO_SEGCMD64_SIZE + x->segs[i].nouts * MACHO_SECT64_SIZE;
   1031   }
   1032   (void)nseg_real;
   1033   /* LC_DYLD_CHAINED_FIXUPS / LC_DYLD_EXPORTS_TRIE */
   1034   sizeofcmds += 16u + 16u;
   1035   /* LC_SYMTAB / LC_DYSYMTAB */
   1036   sizeofcmds += MACHO_SYMTAB_CMD_SIZE + MACHO_DYSYMTAB_CMD_SIZE;
   1037   /* LC_LOAD_DYLINKER */
   1038   {
   1039     u32 ld_size = 12u + (u32)(sizeof("/usr/lib/dyld") - 1u) + 1u;
   1040     sizeofcmds += (u32)ALIGN_UP((u64)ld_size, 8u);
   1041   }
   1042   /* LC_UUID + LC_BUILD_VERSION + LC_MAIN */
   1043   sizeofcmds += 24u + 24u + 24u;
   1044   /* LC_LOAD_DYLIB per dylib */
   1045   for (u32 i = 0; i < x->ndylibs; ++i) {
   1046     size_t nl = pool_slice(x->c->global, x->dylibs[i].install).len;
   1047     u32 sz = 24u + (u32)nl + 1u;
   1048     sizeofcmds += (u32)ALIGN_UP((u64)sz, 8u);
   1049   }
   1050   /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE / LC_CODE_SIGNATURE */
   1051   sizeofcmds += 16u + 16u + 16u;
   1052 
   1053   x->headers_size = MACHO_HDR64_SIZE + sizeofcmds;
   1054 
   1055   /* Now place sections in __TEXT, __DATA_CONST, __DATA. */
   1056   u64 vaddr = MZ_PAGEZERO + x->headers_size;
   1057   u64 fileoff = x->headers_size;
   1058   /* Pad __TEXT sections to natural alignment. */
   1059   for (u32 i = 0; i < x->nsegs; ++i) {
   1060     if (i == 0 || i == MSEG_LINKEDIT) continue; /* DWARF placed here too */
   1061     MSeg* sg = &x->segs[i];
   1062     if (i > 1) {
   1063       /* page-align the start of __DATA_CONST and __DATA */
   1064       vaddr = ALIGN_UP(vaddr, MZ_PAGE);
   1065       fileoff = ALIGN_UP(fileoff, MZ_PAGE);
   1066     }
   1067     sg->vmaddr = (i == 1) ? MZ_PAGEZERO : vaddr;
   1068     sg->fileoff = (i == 1) ? 0 : fileoff;
   1069     /* __TEXT carries the headers_size + sections. */
   1070     u64 seg_start_v = sg->vmaddr;
   1071     u64 seg_start_f = sg->fileoff;
   1072     /* For __TEXT, sections begin after the header area. */
   1073     u64 cur_v = (i == 1) ? (seg_start_v + x->headers_size) : seg_start_v;
   1074     u64 cur_f = (i == 1) ? (seg_start_f + x->headers_size) : seg_start_f;
   1075     u64 first_zerofill_v = 0;
   1076     int seen_zerofill = 0;
   1077     /* Non-zerofill first */
   1078     for (u32 j = 0; j < sg->nsects; ++j) {
   1079       MSec* m = &x->secs[sg->first_sec + j];
   1080       if (m->is_zerofill) continue;
   1081       cur_v = ALIGN_UP(cur_v, (u64)m->align);
   1082       cur_f = ALIGN_UP(cur_f, (u64)m->align);
   1083       m->vaddr = cur_v;
   1084       m->file_offset = cur_f;
   1085       cur_v += m->size;
   1086       cur_f += m->size;
   1087     }
   1088     first_zerofill_v = cur_v;
   1089     /* zerofill last (no file bytes) */
   1090     for (u32 j = 0; j < sg->nsects; ++j) {
   1091       MSec* m = &x->secs[sg->first_sec + j];
   1092       if (!m->is_zerofill) continue;
   1093       cur_v = ALIGN_UP(cur_v, (u64)m->align);
   1094       m->vaddr = cur_v;
   1095       m->file_offset = 0;
   1096       cur_v += m->size;
   1097       seen_zerofill = 1;
   1098     }
   1099     sg->filesize = (i == 1)
   1100                        ? (cur_f - seg_start_f)
   1101                        : (first_zerofill_v ? (first_zerofill_v - seg_start_v)
   1102                                            : (cur_v - seg_start_v));
   1103     sg->vmsize = ALIGN_UP(cur_v - seg_start_v, MZ_PAGE);
   1104     if (sg->vmsize == 0 && sg->nsects > 0) sg->vmsize = MZ_PAGE;
   1105     if (i == 1) {
   1106       x->stubs_vaddr = 0;
   1107       for (u32 j = 0; j < sg->nsects; ++j) {
   1108         MSec* m = &x->secs[sg->first_sec + j];
   1109         if (slice_eq_cstr(slice_from_cstr(m->sectname), "__stubs"))
   1110           x->stubs_vaddr = m->vaddr;
   1111       }
   1112       x->text_filesz = sg->filesize;
   1113     }
   1114     if (i == 2) {
   1115       for (u32 j = 0; j < sg->nsects; ++j) {
   1116         MSec* m = &x->secs[sg->first_sec + j];
   1117         if (slice_eq_cstr(slice_from_cstr(m->sectname), "__got"))
   1118           x->got_vaddr = m->vaddr;
   1119       }
   1120       x->data_const_vaddr = sg->vmaddr;
   1121       x->data_const_filesz = sg->filesize;
   1122     }
   1123     if (i == 3) {
   1124       for (u32 j = 0; j < sg->nsects; ++j) {
   1125         MSec* m = &x->secs[sg->first_sec + j];
   1126         if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_ptrs"))
   1127           x->tlv_ptrs_vaddr = m->vaddr;
   1128         /* TLS storage image base: min vaddr across __thread_data and
   1129          * __thread_bss sections.  __thread_vars is excluded — it holds
   1130          * the descriptors, not the data that maps into the per-thread
   1131          * block. */
   1132         if ((slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_data") ||
   1133              slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_bss")) &&
   1134             (!x->has_tls_image || m->vaddr < x->tls_image_vaddr)) {
   1135           x->tls_image_vaddr = m->vaddr;
   1136           x->has_tls_image = 1;
   1137         }
   1138       }
   1139       x->data_vaddr = sg->vmaddr;
   1140       x->data_filesz = sg->filesize;
   1141       x->data_memsz = sg->vmsize;
   1142     }
   1143     vaddr = sg->vmaddr + sg->vmsize;
   1144     /* Mach-O segments are mapped in page units.  If a segment's memory
   1145      * image extends past its initialized file bytes (for example
   1146      * __DATA,__bss), the following segment's fileoff must not reuse those
   1147      * pages or the kernel can map later file contents into the zero-fill
   1148      * tail. */
   1149     fileoff = sg->fileoff + ((sg->vmsize > ALIGN_UP(sg->filesize, MZ_PAGE))
   1150                                  ? sg->vmsize
   1151                                  : sg->filesize);
   1152     (void)seen_zerofill;
   1153   }
   1154   /* LINKEDIT placeholder; size is filled after blob assembly. */
   1155   vaddr = ALIGN_UP(vaddr, MZ_PAGE);
   1156   fileoff = ALIGN_UP(fileoff, MZ_PAGE);
   1157   x->segs[MSEG_LINKEDIT].vmaddr = vaddr;
   1158   x->segs[MSEG_LINKEDIT].fileoff = fileoff;
   1159   x->linkedit_vaddr = vaddr;
   1160   x->linkedit_fileoff = fileoff;
   1161 
   1162   /* Encode __stubs bytes now that vaddrs are settled.  Internal-GOT
   1163    * entries have stub_idx=0 (direct CALL26, no stub) and must be
   1164    * skipped so the (stub_idx - 1u) arithmetic doesn't wrap. */
   1165   for (u32 i = 0; i < x->nimports; ++i) {
   1166     MachImp* mi = &x->imports[i];
   1167     if (!mi->is_func || !mi->stub_idx) continue;
   1168     u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * x->macho->stub_size;
   1169     u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
   1170     x->macho->emit_stub(
   1171         x->stubs_bytes + (mi->stub_idx - 1u) * x->macho->stub_size, stub_v,
   1172         got_v);
   1173   }
   1174 
   1175   /* Phase B: build OutSec[] now that all MSec vaddrs are final.  Walk
   1176    * MSecs sorted by (segidx, vaddr) and coalesce adjacent same-name
   1177    * runs.  Mirrors link_elf.c's OutShdr build at link_elf.c:879. */
   1178   {
   1179     u32* order =
   1180         (u32*)h->alloc(h, sizeof(u32) * (x->nsecs + 1u), _Alignof(u32));
   1181     if (!order && x->nsecs)
   1182       compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on outsec sort");
   1183     for (u32 i = 0; i < x->nsecs; ++i) order[i] = i;
   1184     /* Insertion sort — section count is small. */
   1185     for (u32 i = 1; i < x->nsecs; ++i) {
   1186       u32 cur = order[i];
   1187       MSec* a = &x->secs[cur];
   1188       u32 j = i;
   1189       while (j > 0) {
   1190         MSec* b = &x->secs[order[j - 1]];
   1191         if ((b->segidx < a->segidx) ||
   1192             (b->segidx == a->segidx && b->vaddr <= a->vaddr))
   1193           break;
   1194         order[j] = order[j - 1];
   1195         --j;
   1196       }
   1197       order[j] = cur;
   1198     }
   1199     u32 cap = x->nsecs + 1u;
   1200     x->outs = (OutSec*)h->alloc(h, sizeof(OutSec) * cap, _Alignof(OutSec));
   1201     if (!x->outs)
   1202       compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on OutSec");
   1203     memset(x->outs, 0, sizeof(OutSec) * cap);
   1204     x->nouts = 0;
   1205     for (u32 i = 0; i < x->nsecs; ++i) {
   1206       MSec* m = &x->secs[order[i]];
   1207       OutSec* tail = x->nouts ? &x->outs[x->nouts - 1] : NULL;
   1208       int merge = tail && tail->segidx == m->segidx &&
   1209                   slice_eq_cstr(slice_from_cstr(tail->sectname), m->sectname) &&
   1210                   slice_eq_cstr(slice_from_cstr(tail->segname), m->segname);
   1211       if (merge) {
   1212         if (tail->flags != m->flags || tail->is_zerofill != m->is_zerofill)
   1213           compiler_panic(
   1214               x->c, SRCLOC_NONE,
   1215               "link_macho: coalesce mismatch on %.*s,%.*s (flags/zerofill)",
   1216               SLICE_ARG(slice_from_cstr(m->segname)),
   1217               SLICE_ARG(slice_from_cstr(m->sectname)));
   1218         u64 end = m->vaddr + m->size;
   1219         u64 prev_end = tail->vaddr + tail->size;
   1220         if (end > prev_end) tail->size = end - tail->vaddr;
   1221         if (m->align > tail->align) tail->align = m->align;
   1222       } else {
   1223         OutSec* o = &x->outs[x->nouts++];
   1224         o->segname = m->segname;
   1225         o->sectname = m->sectname;
   1226         o->vaddr = m->vaddr;
   1227         o->file_offset = m->file_offset;
   1228         o->size = m->size;
   1229         o->align = m->align;
   1230         o->flags = m->flags;
   1231         o->reserved1 = m->reserved1;
   1232         o->reserved2 = m->reserved2;
   1233         o->segidx = m->segidx;
   1234         o->is_zerofill = m->is_zerofill;
   1235       }
   1236     }
   1237     h->free(h, order, sizeof(u32) * (x->nsecs + 1u));
   1238     /* Recompute per-segment OutSec span; Phase A's count was for
   1239      * sizeofcmds sizing — recompute it here as the source of truth and
   1240      * assert agreement. */
   1241     for (u32 i = 0; i < x->nsegs; ++i) {
   1242       x->segs[i].first_out = 0;
   1243     }
   1244     u32 prev_nouts[MSEG_COUNT];
   1245     for (u32 i = 0; i < x->nsegs; ++i) prev_nouts[i] = x->segs[i].nouts;
   1246     for (u32 i = 0; i < x->nsegs; ++i) x->segs[i].nouts = 0;
   1247     for (u32 i = 0; i < x->nouts; ++i) {
   1248       u8 sx = x->outs[i].segidx;
   1249       if (x->segs[sx].nouts == 0) x->segs[sx].first_out = i;
   1250       ++x->segs[sx].nouts;
   1251     }
   1252     for (u32 i = 0; i < x->nsegs; ++i) {
   1253       if (prev_nouts[i] != x->segs[i].nouts)
   1254         compiler_panic(x->c, SRCLOC_NONE,
   1255                        "link_macho: OutSec count drift seg %u (%u vs %u)",
   1256                        (u32)i, prev_nouts[i], x->segs[i].nouts);
   1257     }
   1258   }
   1259 }
   1260 
   1261 /* ---- pass: shift LinkImage into final vaddrs/file_offsets ----
   1262  *
   1263  * The sections in img->sections are still in their original
   1264  * link_layout coordinates.  Map each LinkSection -> its MSec and copy
   1265  * the final vaddr/file_offset so reloc-apply walks correctly. */
   1266 
   1267 static void shift_sections(MCtx* x) {
   1268   LinkImage* img = x->img;
   1269   /* Build a quick lookup: link_sec_id -> MSec*. */
   1270   for (u32 i = 0; i < x->nsecs; ++i) {
   1271     MSec* m = &x->secs[i];
   1272     if (!m->link_sec_id) continue;
   1273     /* Walk link_section_id slot. */
   1274     LinkSection* ls = &img->sections[m->link_sec_id - 1u];
   1275     /* shift relocs whose write_vaddr/file_offset live within this
   1276      * section's original [old_vaddr, old_vaddr+size). */
   1277     u64 old_v = ls->vaddr;
   1278     u64 old_f = ls->file_offset;
   1279     u64 new_v = m->vaddr;
   1280     u64 new_f = m->file_offset;
   1281     if (old_v == new_v && old_f == new_f) continue;
   1282     /* Update the LinkSection itself. */
   1283     ls->vaddr = new_v;
   1284     ls->file_offset = new_f;
   1285     /* Update relocs that target this section. */
   1286     for (u32 ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) {
   1287       LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri);
   1288       if (r->link_section_id != ls->id) continue;
   1289       r->write_vaddr = new_v + (r->write_vaddr - old_v);
   1290       r->write_file_offset = new_f + (r->write_file_offset - old_f);
   1291     }
   1292     /* Update LinkSyms that belong to this LinkSection.  Match by
   1293      * section_id rather than vaddr range — multiple input sections
   1294      * may share the same pre-shift vaddr (each bucket in
   1295      * link_layout starts at offset 0). */
   1296     for (u32 si = 0; si < LinkSyms_count(&img->syms); ++si) {
   1297       LinkSymbol* s = LinkSyms_at(&img->syms, si);
   1298       if (!s->defined) continue;
   1299       if (s->kind == SK_ABS) continue;
   1300       if (s->section_id != ls->id) continue;
   1301       s->vaddr = new_v + (s->vaddr - old_v);
   1302     }
   1303   }
   1304 }
   1305 
   1306 /* ---- pass: apply relocations + collect chained-fixup sites ----
   1307  *
   1308  * Reloc dispatch:
   1309  *   target=imported func + CALL26/JUMP26 -> S = stub vaddr
   1310  *   target=import + GOT_LOAD_PAGE21/PAGEOFF12 -> S = got slot vaddr
   1311  *   target=import + ABS64 -> write 0; collect bind site
   1312  *   target=internal + ABS64 -> write target VA; collect rebase site
   1313  *   everything else -> standard apply
   1314  *
   1315  * Patch sites for chained fixups are 8-byte slots; for ABS32 we do not
   1316  * support fixups (no chained-fixup format for 32-bit pointers in
   1317  * standard arm64 — would need DYLD_CHAINED_PTR_32).  Internal R_ABS32
   1318  * still works (no slide adjustment is wrong technically, but for
   1319  * compile-time-known offsets it suffices).
   1320  */
   1321 
   1322 typedef struct FixSite {
   1323   u8 segidx;  /* 2 = __DATA_CONST, 3 = __DATA */
   1324   u8 is_bind; /* 0 = rebase, 1 = bind */
   1325   u8 pad[2];
   1326   u32 import_idx;    /* 1-based import index for binds, 0 for rebases */
   1327   u64 vaddr;         /* absolute VA of the slot */
   1328   u64 rebase_target; /* unslid target VA; only used for rebases */
   1329 } FixSite;
   1330 
   1331 typedef struct FixList {
   1332   Heap* heap;
   1333   FixSite* a;
   1334   u32 n;
   1335   u32 cap;
   1336 } FixList;
   1337 
   1338 static void fix_init(FixList* fl, Heap* h) {
   1339   fl->heap = h;
   1340   fl->a = NULL;
   1341   fl->n = 0;
   1342   fl->cap = 0;
   1343 }
   1344 static void fix_fini(FixList* fl) {
   1345   if (fl->a) fl->heap->free(fl->heap, fl->a, sizeof(*fl->a) * fl->cap);
   1346   fl->a = NULL;
   1347   fl->n = fl->cap = 0;
   1348 }
   1349 static void fix_push(FixList* fl, const FixSite* s) {
   1350   if (VEC_GROW(fl->heap, fl->a, fl->cap, fl->n + 1u)) return;
   1351   fl->a[fl->n++] = *s;
   1352 }
   1353 
   1354 /* find MSec covering an absolute vaddr */
   1355 static MSec* msec_for_vaddr(MCtx* x, u64 v) {
   1356   for (u32 i = 0; i < x->nsecs; ++i) {
   1357     MSec* m = &x->secs[i];
   1358     if (v >= m->vaddr && v < m->vaddr + m->size) return m;
   1359   }
   1360   return NULL;
   1361 }
   1362 
   1363 static u8* bytes_for_section(MCtx* x, MSec* m, LinkImage* img) {
   1364   if (m->synth_data) {
   1365     /* Synthetic — caller reads/writes via x->stubs_bytes / x->got_bytes. */
   1366     if (m->synth_data == x->stubs_bytes) return x->stubs_bytes;
   1367     if (m->synth_data == x->got_bytes) return x->got_bytes;
   1368     return NULL;
   1369   }
   1370   /* Backed by a LinkSection: find the LinkSegment buffer that section
   1371    * sits in (link_layout.c stored input section bytes there). */
   1372   LinkSection* ls = &img->sections[m->link_sec_id - 1u];
   1373   u32 segid = ls->segment_id;
   1374   if (segid == LINK_SEG_NONE) return NULL;
   1375   return img->segment_bytes[segid - 1u];
   1376 }
   1377 
   1378 /* Map the LinkSection that backs a write_vaddr to an MSec, then to the
   1379  * underlying byte buffer. */
   1380 static u8* patch_ptr(MCtx* x, LinkImage* img, const LinkRelocApply* r,
   1381                      MSec** out_msec) {
   1382   /* Look up via the LinkSection.  After shift_sections the section
   1383    * vaddr is the Mach-O vaddr; the corresponding MSec backs it. */
   1384   if (r->link_section_id == LINK_SEC_NONE) return NULL;
   1385   LinkSection* ls = &img->sections[r->link_section_id - 1u];
   1386   /* Find the MSec by link_sec_id. */
   1387   MSec* m = NULL;
   1388   for (u32 i = 0; i < x->nsecs; ++i) {
   1389     if (x->secs[i].link_sec_id == ls->id) {
   1390       m = &x->secs[i];
   1391       break;
   1392     }
   1393   }
   1394   if (!m) return NULL;
   1395   /* The LinkSegment's bytes are valid (not shifted), but the offset
   1396    * within them is the original input_offset.  Use input_offset for
   1397    * the byte offset, since the LinkSegment buffer wasn't reshuffled. */
   1398   /* link_layout.c set ls->file_offset = seg.file_offset + input_offset
   1399    * originally.  ls->vaddr similarly.  After our shift, they're new.
   1400    * The byte offset within the segment buffer is still input_offset. */
   1401   u8* base = bytes_for_section(x, m, img);
   1402   if (!base) return NULL;
   1403   u32 within_section = (u32)(r->write_vaddr - m->vaddr);
   1404   /* The segment buffer's first byte corresponds to ls->input_offset==0
   1405    * for the FIRST section in the segment.  But that's a complication.
   1406    * For simplicity we recompute the segment-relative byte offset by
   1407    * (file_offset - segment.file_offset) where segment.file_offset is
   1408    * unchanged.  Wait: the original layout produced `ls->file_offset =
   1409    * seg.file_offset + input_offset`, and we may have changed
   1410    * ls->file_offset.  Let's just use input_offset stored on the
   1411    * LinkSection. */
   1412   u32 in_off = (u32)(ls->input_offset + within_section);
   1413   if (out_msec) *out_msec = m;
   1414   return base + in_off;
   1415 }
   1416 
   1417 /* Symbol-relative resolved-address S, accounting for imports. */
   1418 static int sym_S(MCtx* x, LinkImage* img, LinkSymId id, u64* out_S,
   1419                  int* out_imp_idx) {
   1420   *out_S = 0;
   1421   *out_imp_idx = 0;
   1422   if (id == LINK_SYM_NONE) return 0;
   1423   LinkSymbol* s = sym_at(img, id);
   1424   if (!s) return 0;
   1425   /* Look up the import index — real imports plus internal-GOT entries
   1426    * the collect_imports pass materialized for GOT-routed internal refs. */
   1427   u32 idx = 0;
   1428   if (id < x->sym_to_imp_size) idx = x->sym_to_imp[id];
   1429   if (!idx && s->name != 0) {
   1430     LinkSymId canon = symhash_get(&img->globals, s->name);
   1431     if (canon != LINK_SYM_NONE && canon < x->sym_to_imp_size)
   1432       idx = x->sym_to_imp[canon];
   1433   }
   1434   if (s->imported) {
   1435     *out_imp_idx = (int)idx;
   1436     return 1;
   1437   }
   1438   /* Internal symbol that has a GOT slot — surface the import index so
   1439    * the GOT_LOAD reloc paths in apply_relocs find it, but also expose
   1440    * S=vaddr so non-GOT relocs (CALL26 etc.) still apply directly. */
   1441   *out_imp_idx = (int)idx;
   1442   *out_S = s->vaddr;
   1443   return 0;
   1444 }
   1445 
   1446 static void apply_relocs(MCtx* x, FixList* fl) {
   1447   LinkImage* img = x->img;
   1448   for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) {
   1449     LinkRelocApply* r = LinkRelocs_at(&img->relocs, i);
   1450     if (r->target == LINK_SYM_NONE) continue;
   1451     /* File-only .debug_* section: patch the registry buffer in place (no
   1452      * __got/stub/chained-fixup — debug bytes aren't loaded or slid). A
   1453      * SK_SECTION target resolves to its DWARF-section-relative base; a
   1454      * code/data symbol to its final (absolute) vaddr for low_pc. Mach-O
   1455      * vaddrs are already absolute, so there's no extra image base. */
   1456     {
   1457       const LinkSection* sec = &img->sections[r->link_section_id - 1u];
   1458       if (sec->file_only) {
   1459         u8* dbg = link_fileonly_bytes(img, r->link_section_id);
   1460         const LinkSymbol* tgt = sym_at(img, r->target);
   1461         if (dbg && tgt)
   1462           link_reloc_apply(x->c, r->kind, dbg + r->offset, tgt->vaddr,
   1463                            r->addend, 0);
   1464         continue;
   1465       }
   1466     }
   1467     MSec* msec = NULL;
   1468     u8* P_bytes = patch_ptr(x, img, r, &msec);
   1469     if (!P_bytes) continue;
   1470     u64 P = r->write_vaddr;
   1471 
   1472     u64 S;
   1473     int imp_idx;
   1474     int is_imp = sym_S(x, img, r->target, &S, &imp_idx);
   1475 
   1476     /* TLVP relocs route through a __thread_ptrs slot regardless of
   1477      * whether the descriptor target is in-image or imported.  Resolved
   1478      * before the import / internal split because an imported TLV
   1479      * descriptor doesn't use the __got slot (its address lives in
   1480      * __thread_ptrs with its own chained bind). */
   1481     if (reloc_kind_is_tlvp(x->c, r->kind)) {
   1482       u32 tlv_idx =
   1483           (r->target < x->sym_to_tlv_size) ? x->sym_to_tlv[r->target] : 0u;
   1484       if (!tlv_idx)
   1485         compiler_panic(x->c, SRCLOC_NONE,
   1486                        "link_macho: TLVP reloc has no __thread_ptrs slot");
   1487       u64 slot_v = x->tlv_ptrs_vaddr + (tlv_idx - 1u) * MZ_TLVP_SIZE;
   1488       link_reloc_apply(x->c, r->kind, P_bytes, slot_v, r->addend, P);
   1489       continue;
   1490     }
   1491 
   1492     if (is_imp) {
   1493       MachImp* mi = (imp_idx > 0) ? &x->imports[imp_idx - 1] : NULL;
   1494       if (reloc_kind_is_branch(x->c, r->kind)) {
   1495         if (!mi || !mi->stub_idx)
   1496           compiler_panic(x->c, SRCLOC_NONE,
   1497                          "link_macho: import has no stub for branch");
   1498         u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * x->macho->stub_size;
   1499         link_reloc_apply(x->c, r->kind, P_bytes, stub_v, r->addend, P);
   1500         continue;
   1501       }
   1502       if (reloc_kind_is_got_load(x->c, r->kind)) {
   1503         if (!mi)
   1504           compiler_panic(x->c, SRCLOC_NONE,
   1505                          "link_macho: GOT reloc for unknown import");
   1506         u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
   1507         link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P);
   1508         continue;
   1509       }
   1510       if (reloc_kind_is_direct_page(x->c, r->kind)) {
   1511         /* Direct page/lo12 against an import: route through __got. */
   1512         if (!mi)
   1513           compiler_panic(x->c, SRCLOC_NONE,
   1514                          "link_macho: PAGE/LO12 against unknown import");
   1515         u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
   1516         link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P);
   1517         continue;
   1518       }
   1519       if (r->kind == R_ABS64) {
   1520         /* Direct 8-byte absolute against an import: bind the slot. */
   1521         wr_u64_le(P_bytes, 0);
   1522         FixSite fs = {(u8)msec->segidx, 1, {0}, (u32)imp_idx, P, 0};
   1523         fix_push(fl, &fs);
   1524         continue;
   1525       }
   1526       compiler_panic(x->c, SRCLOC_NONE,
   1527                      "link_macho: unhandled reloc kind %u against imported "
   1528                      "symbol",
   1529                      (u32)r->kind);
   1530     }
   1531 
   1532     /* Internal relocs. */
   1533     if (r->kind == R_ABS64) {
   1534       /* Special case: ABS64 reloc inside a TLV descriptor record
   1535        * (__thread_vars section) targeting in-image TLS storage.  This
   1536        * is the descriptor's word-2 "offset" field — dyld interprets it
   1537        * as the per-thread offset of the storage within the TLS image,
   1538        * NOT as an absolute address.  Apple's ld writes the literal
   1539        * offset and emits no chained-fixup entry; replicate that so the
   1540        * chain skips over this slot (chained_fixups already does the
   1541        * right thing: no fixsite -> no chain link). */
   1542       if (msec && (msec->flags & SECTION_TYPE) == S_THREAD_LOCAL_VARIABLES &&
   1543           x->has_tls_image) {
   1544         u64 offset = (S + (u64)r->addend) - x->tls_image_vaddr;
   1545         wr_u64_le(P_bytes, offset);
   1546         continue;
   1547       }
   1548       /* Rebase site. */
   1549       wr_u64_le(P_bytes, S + (u64)r->addend);
   1550       FixSite fs = {(u8)msec->segidx, 0, {0}, 0, P, S + (u64)r->addend};
   1551       fix_push(fl, &fs);
   1552       continue;
   1553     }
   1554     /* Internal symbol routed through __got (clang emits GOT_LOAD_PAGE21
   1555      * for any extern global, even if the def is in-image).  imp_idx
   1556      * was populated by collect_imports' internal-GOT pass; redirect
   1557      * the page/lo12 reloc to the GOT slot's vaddr. */
   1558     if (imp_idx > 0 && reloc_kind_is_got_load(x->c, r->kind)) {
   1559       MachImp* mi = &x->imports[imp_idx - 1];
   1560       u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
   1561       link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P);
   1562       continue;
   1563     }
   1564     /* Generic apply. */
   1565     link_reloc_apply(x->c, r->kind, P_bytes, S, r->addend, P);
   1566   }
   1567 
   1568   /* Per-slot chained fixup.  Real imports → bind (dyld resolves at
   1569    * load).  Internal GOT entries → rebase pointing at the symbol's
   1570    * image-relative vaddr; a target vaddr of 0 (weak undef → NULL) gets
   1571    * no fixup, just a literal zero slot — chained fixups treat 0 as a
   1572    * gap and won't disturb it. */
   1573   for (u32 i = 0; i < x->nimports; ++i) {
   1574     MachImp* mi = &x->imports[i];
   1575     u64 slot_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE;
   1576     if (mi->internal) {
   1577       /* Re-read the symbol's final vaddr now that shift_sections has
   1578        * rebased every defined symbol into the Mach-O image layout
   1579        * (collect_imports snapshotted too early). */
   1580       LinkSymbol* s = sym_at(img, mi->sym);
   1581       u64 tgt_v = s ? s->vaddr : 0;
   1582       u8* slot = x->got_bytes + (mi->got_idx - 1u) * MZ_GOT_SIZE;
   1583       wr_u64_le(slot, tgt_v);
   1584       if (tgt_v == 0) continue; /* weak-undef → NULL */
   1585       FixSite fs = {2u, 0, {0}, 0, slot_v, tgt_v};
   1586       fix_push(fl, &fs);
   1587     } else {
   1588       /* clear slot bytes (already zero) — dyld writes via chain */
   1589       FixSite fs = {2u, 1, {0}, i + 1u, slot_v, 0};
   1590       fix_push(fl, &fs);
   1591     }
   1592   }
   1593 
   1594   /* Per-slot TLV pointer fixups.  Mirror of the __got loop above: each
   1595    * __thread_ptrs slot points at the descriptor record.  When the
   1596    * descriptor is in-image (internal) we REBASE to its final vaddr; when
   1597    * it lives in a dylib we BIND through the descriptor's MachImp.  The
   1598    * slot itself lives in __DATA (segidx=3), distinct from __got's
   1599    * __DATA_CONST (segidx=2). */
   1600   for (u32 i = 0; i < x->ntlv; ++i) {
   1601     MachTlv* ts = &x->tlv_slots[i];
   1602     u64 slot_v = x->tlv_ptrs_vaddr + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE;
   1603     u8* slot = x->tlv_ptrs_bytes + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE;
   1604     if (ts->imported) {
   1605       if (!ts->import_idx)
   1606         compiler_panic(x->c, SRCLOC_NONE,
   1607                        "link_macho: imported TLV without matching import slot");
   1608       wr_u64_le(slot, 0);
   1609       FixSite fs = {3u, 1, {0}, ts->import_idx, slot_v, 0};
   1610       fix_push(fl, &fs);
   1611     } else {
   1612       LinkSymbol* s = sym_at(img, ts->sym);
   1613       u64 tgt_v = s ? s->vaddr : 0;
   1614       wr_u64_le(slot, tgt_v);
   1615       if (tgt_v == 0) continue; /* weak-undef descriptor → NULL */
   1616       FixSite fs = {3u, 0, {0}, 0, slot_v, tgt_v};
   1617       fix_push(fl, &fs);
   1618     }
   1619   }
   1620 }
   1621 
   1622 /* ---- chained fixups blob assembler ----
   1623  *
   1624  * For each segment that has fixups, build a dyld_chained_starts_in_segment
   1625  * with one chain per page (MZ_PAGE).  Within a page, sort sites by
   1626  * offset, encode each as DYLD_CHAINED_PTR_64, and link via the `next`
   1627  * field (4-byte units, 0 = end of chain).
   1628  */
   1629 
   1630 typedef struct PageChain {
   1631   u32 first_offset_in_page; /* relative to page start */
   1632   u32 nsites;
   1633   u32 first_site_idx; /* into a per-segment site array */
   1634 } PageChain;
   1635 
   1636 static int site_cmp_by_vaddr(const void* a, const void* b) {
   1637   const FixSite* x = a;
   1638   const FixSite* y = b;
   1639   if (x->vaddr < y->vaddr) return -1;
   1640   if (x->vaddr > y->vaddr) return 1;
   1641   return 0;
   1642 }
   1643 
   1644 /* tiny insertion sort to avoid pulling qsort */
   1645 static void sort_sites(FixSite* a, u32 n) {
   1646   for (u32 i = 1; i < n; ++i) {
   1647     FixSite tmp = a[i];
   1648     u32 j = i;
   1649     while (j > 0 && site_cmp_by_vaddr(&a[j - 1], &tmp) > 0) {
   1650       a[j] = a[j - 1];
   1651       --j;
   1652     }
   1653     a[j] = tmp;
   1654   }
   1655 }
   1656 
   1657 static void emit_pointer(u8* slot, int is_bind, u32 ord_or_target_lo,
   1658                          u32 high_or_target_hi, u32 next4) {
   1659   /* DYLD_CHAINED_PTR_64:
   1660    *   bind  : ordinal:24, addend:8, reserved:19, next:12, bind:1=1
   1661    *   rebase: target:36 (vmaddr), high8:8, reserved:7, next:12, bind:1=0
   1662    */
   1663   u64 v = 0;
   1664   if (is_bind) {
   1665     u64 ordinal = (u64)ord_or_target_lo & 0xffffffull; /* 24 bits */
   1666     u64 addend = 0;
   1667     u64 next = (u64)next4 & 0xfffull;
   1668     v = ordinal | (addend << 24) | (0ull /* reserved */ << 32) | (next << 51) |
   1669         ((u64)1 << 63);
   1670   } else {
   1671     /* rebase: target is full vmaddr; we get hi:lo split. */
   1672     u64 target = ((u64)high_or_target_hi << 32) | (u64)ord_or_target_lo;
   1673     target &= ((u64)1 << 36) - 1u; /* 36 bits */
   1674     u64 high8 = 0;
   1675     u64 next = (u64)next4 & 0xfffull;
   1676     v = target | (high8 << 36) | (0ull /* reserved */ << 44) | (next << 51) |
   1677         ((u64)0 << 63);
   1678   }
   1679   wr_u64_le(slot, v);
   1680 }
   1681 
   1682 static void build_chained_fixups(MCtx* x, FixList* fl) {
   1683   Heap* h = x->h;
   1684   MByte* out = &x->chained_fixups;
   1685   mbuf_init(out, h);
   1686 
   1687   /* Header (32 B):
   1688    *   uint32 fixups_version (=0)
   1689    *   uint32 starts_offset
   1690    *   uint32 imports_offset
   1691    *   uint32 symbols_offset
   1692    *   uint32 imports_count
   1693    *   uint32 imports_format (=1)
   1694    *   uint32 symbols_format (=0)
   1695    */
   1696   u32 hdr_pos = mbuf_u32(out, 0); /* fixups_version */
   1697   (void)hdr_pos;
   1698   u32 starts_offset_pos = mbuf_u32(out, 0);
   1699   u32 imports_offset_pos = mbuf_u32(out, 0);
   1700   u32 symbols_offset_pos = mbuf_u32(out, 0);
   1701   mbuf_u32(out, x->nimports_real);
   1702   mbuf_u32(out, DYLD_CHAINED_IMPORT);
   1703   mbuf_u32(out, 0); /* symbols uncompressed */
   1704   /* dyld expects 8-byte alignment of the starts table. */
   1705   mbuf_align(out, 4);
   1706 
   1707   /* dyld_chained_starts_in_image:
   1708    *   uint32 seg_count
   1709    *   uint32 seg_info_offset[seg_count]
   1710    *
   1711    * seg_count must equal mach-O segment count (5).
   1712    * seg_info_offset[i] = 0 means no fixups in that segment.
   1713    */
   1714   u32 starts_off = out->len;
   1715   wr_u32_le(out->data + starts_offset_pos, starts_off);
   1716   mbuf_u32(out, x->nsegs);
   1717   /* Reserve seg_info_offset[]. */
   1718   u32 seg_info_offsets_pos = out->len;
   1719   for (u32 i = 0; i < x->nsegs; ++i) mbuf_u32(out, 0);
   1720 
   1721   /* Sort fixsites by vaddr globally. */
   1722   sort_sites(fl->a, fl->n);
   1723 
   1724   /* Per segment, emit dyld_chained_starts_in_segment when fixups present. */
   1725   for (u32 si = 0; si < x->nsegs; ++si) {
   1726     /* count sites in this segment */
   1727     u32 first = (u32)-1, count = 0;
   1728     for (u32 k = 0; k < fl->n; ++k) {
   1729       if (fl->a[k].segidx == si) {
   1730         if (first == (u32)-1) first = k;
   1731         ++count;
   1732       }
   1733     }
   1734     if (!count) continue;
   1735     /* Page-align this struct to 4. */
   1736     mbuf_align(out, 4);
   1737     u32 sis_off = out->len;
   1738     /* Patch seg_info_offset[si] to (sis_off - starts_off). */
   1739     wr_u32_le(out->data + seg_info_offsets_pos + si * 4u, sis_off - starts_off);
   1740 
   1741     /* Compute page count for this segment. */
   1742     u64 seg_va = x->segs[si].vmaddr;
   1743     u64 seg_size = x->segs[si].vmsize ? x->segs[si].vmsize : MZ_PAGE;
   1744     u32 page_count = (u32)((seg_size + MZ_PAGE - 1u) / MZ_PAGE);
   1745 
   1746     /* dyld_chained_starts_in_segment:
   1747      *   uint32 size
   1748      *   uint16 page_size
   1749      *   uint16 pointer_format
   1750      *   uint64 segment_offset    (offset of segment's first byte from
   1751      *                             mach_header)
   1752      *   uint32 max_valid_pointer (0 for 64-bit)
   1753      *   uint16 page_count
   1754      *   uint16 page_start[page_count]  (0xFFFF = no fixups in page)
   1755      */
   1756     u32 sis_size_pos = mbuf_u32(out, 0); /* fill below */
   1757     mbuf_u16(out, (u16)MZ_PAGE);
   1758     mbuf_u16(out, (u16)DYLD_CHAINED_PTR_64);
   1759     mbuf_u64(out, (u64)x->segs[si].fileoff); /* segment file offset */
   1760     mbuf_u32(out, 0);
   1761     mbuf_u16(out, (u16)page_count);
   1762     u32 page_starts_pos = out->len;
   1763     for (u32 p = 0; p < page_count; ++p) mbuf_u16(out, 0xFFFFu);
   1764     /* size includes the page_start array */
   1765     u32 sis_size = out->len - sis_size_pos + 4u;
   1766     /* Hmm, the `size` field is the size of *this* struct. We measure
   1767      * from sis_off through end of page_starts. */
   1768     sis_size = out->len - sis_off;
   1769     wr_u32_le(out->data + sis_size_pos, sis_size);
   1770 
   1771     /* Now: walk sites in this segment, group by page, write
   1772      * page_start[i] = offset_in_page of first site, and chain via
   1773      * next-field in the actual segment's bytes. */
   1774     /* Sites are sorted globally; collect contiguous run for this seg. */
   1775     u32 cur = first;
   1776     while (cur < first + count) {
   1777       u32 page_idx = (u32)((fl->a[cur].vaddr - seg_va) / MZ_PAGE);
   1778       u32 offset_in_page = (u32)((fl->a[cur].vaddr - seg_va) % MZ_PAGE);
   1779       wr_u16_le(out->data + page_starts_pos + page_idx * 2u,
   1780                 (u16)offset_in_page);
   1781       /* Walk this page's chain. */
   1782       u32 next_in_page = cur;
   1783       while (next_in_page + 1 < first + count) {
   1784         u64 nv = fl->a[next_in_page + 1].vaddr;
   1785         if (nv >= seg_va + (u64)(page_idx + 1) * MZ_PAGE) break;
   1786         ++next_in_page;
   1787       }
   1788       /* Encode chain pointers. */
   1789       for (u32 k = cur; k <= next_in_page; ++k) {
   1790         FixSite* s = &fl->a[k];
   1791         u32 next4 = 0;
   1792         if (k < next_in_page) {
   1793           u64 dist = fl->a[k + 1].vaddr - s->vaddr;
   1794           next4 = (u32)(dist / 4u);
   1795         }
   1796         /* Find segment bytes.  Synthetic pointer sections have private
   1797          * buffers; file-backed sections can live in any segment, including
   1798          * pointer-bearing read-only constants in __TEXT. */
   1799         u8* slot = NULL;
   1800         if (s->segidx == 2 && x->got_bytes && s->vaddr >= x->got_vaddr &&
   1801             s->vaddr < x->got_vaddr + x->got_size) {
   1802           /* __DATA_CONST: __got slot. */
   1803           slot = x->got_bytes + (s->vaddr - x->got_vaddr);
   1804         } else if (x->tlv_ptrs_bytes && s->vaddr >= x->tlv_ptrs_vaddr &&
   1805                    s->vaddr < x->tlv_ptrs_vaddr + x->tlv_ptrs_size) {
   1806           slot = x->tlv_ptrs_bytes + (s->vaddr - x->tlv_ptrs_vaddr);
   1807         } else {
   1808           MSec* m = msec_for_vaddr(x, s->vaddr);
   1809           if (m && m->link_sec_id) {
   1810             u8* base = bytes_for_section(x, m, x->img);
   1811             if (base) {
   1812               LinkSection* ls = &x->img->sections[m->link_sec_id - 1u];
   1813               u32 in_off = (u32)(ls->input_offset + (s->vaddr - m->vaddr));
   1814               slot = base + in_off;
   1815             }
   1816           }
   1817         }
   1818         if (!slot)
   1819           compiler_panic(x->c, SRCLOC_NONE,
   1820                          "link_macho: chained-fixup slot for vaddr 0x%llx not "
   1821                          "in any segment buffer",
   1822                          (unsigned long long)s->vaddr);
   1823         if (s->is_bind) {
   1824           /* ordinal is import index (1-based) - 1; chained-import format
   1825            * uses 0-based. */
   1826           if (s->import_idx == 0 || s->import_idx > x->nimports_real) {
   1827             compiler_panic(
   1828                 x->c, SRCLOC_NONE,
   1829                 "link_macho: chained bind for vaddr 0x%llx uses import index "
   1830                 "%u outside real import table size %u",
   1831                 (unsigned long long)s->vaddr, (unsigned)s->import_idx,
   1832                 (unsigned)x->nimports_real);
   1833           }
   1834           u32 ord = s->import_idx - 1u;
   1835           emit_pointer(slot, 1, ord, 0, next4);
   1836         } else {
   1837           /* rebase target = unslid vmaddr */
   1838           u32 lo = (u32)(s->rebase_target & 0xffffffffu);
   1839           u32 hi = (u32)(s->rebase_target >> 32);
   1840           emit_pointer(slot, 0, lo, hi, next4);
   1841         }
   1842       }
   1843       cur = next_in_page + 1u;
   1844     }
   1845   }
   1846 
   1847   /* Imports table: one dyld_chained_import (4B) per real import.
   1848    * Layout: lib_ordinal:8, weak:1, name_offset:23.  Internal-GOT
   1849    * entries are not bound by dyld so they're omitted here. */
   1850   mbuf_align(out, 4);
   1851   u32 imports_off = out->len;
   1852   wr_u32_le(out->data + imports_offset_pos, imports_off);
   1853   /* We need to first build the symbol pool to know name offsets. */
   1854   u32 symbols_off = imports_off + x->nimports_real * 4u;
   1855   /* Reserve imports area. */
   1856   for (u32 i = 0; i < x->nimports_real; ++i) mbuf_u32(out, 0);
   1857   /* Emit symbols (each NUL-terminated). Set name_offset on each import. */
   1858   wr_u32_le(out->data + symbols_offset_pos, out->len);
   1859   /* Leading NUL for offset 0. */
   1860   mbuf_u8(out, 0);
   1861   for (u32 i = 0; i < x->nimports_real; ++i) {
   1862     MachImp* mi = &x->imports[i];
   1863     Slice nm_s = pool_slice(x->c->global, mi->name);
   1864     const char* nm = nm_s.s;
   1865     size_t nl = nm_s.len;
   1866     if (!nm || !nl || mi->dylib_ord == 0 || mi->dylib_ord > x->ndylibs) {
   1867       compiler_panic(x->c, SRCLOC_NONE,
   1868                      "link_macho: invalid chained import %u "
   1869                      "(name=%u dylib_ord=%u ndylibs=%u)",
   1870                      (unsigned)i, (unsigned)mi->name, (unsigned)mi->dylib_ord,
   1871                      (unsigned)x->ndylibs);
   1872     }
   1873     u32 off = out->len - symbols_off;
   1874     mbuf_str(out, nm, (u32)nl);
   1875     /* Patch the import slot. */
   1876     u32 packed = ((u32)mi->dylib_ord & 0xffu) |
   1877                  ((u32)(mi->weak ? 1u : 0u) << 8) | ((off & 0x7fffffu) << 9);
   1878     wr_u32_le(out->data + imports_off + i * 4u, packed);
   1879   }
   1880   (void)symbols_off;
   1881 }
   1882 
   1883 /* ---- exports trie ---- *
   1884  *
   1885  * Minimal trie: one node carrying a single export "_main" with the
   1886  * entry symbol's VA-relative offset.  This is enough for dyld; binaries
   1887  * with a real exports trie include more data but we don't need it. */
   1888 
   1889 static void uleb128(MByte* out, u64 v) {
   1890   do {
   1891     u8 byte = v & 0x7fu;
   1892     v >>= 7;
   1893     if (v) byte |= 0x80u;
   1894     mbuf_u8(out, byte);
   1895   } while (v);
   1896 }
   1897 
   1898 static u32 uleb128_size(u64 v) {
   1899   u32 n = 0;
   1900   do {
   1901     ++n;
   1902     v >>= 7;
   1903   } while (v);
   1904   return n;
   1905 }
   1906 
   1907 static void build_exports_trie(MCtx* x) {
   1908   /* Format:
   1909    *   node = (terminal_size: uleb128) (export_data)? (children_count: u8)
   1910    *          (children: [(label NUL) (offset uleb128)]*)
   1911    *
   1912    * We emit a trie with a single leaf at "_main" with offset
   1913    * entry_offset (from __TEXT base).
   1914    *
   1915    * Easiest: single root node with children_count=1, child label = "_main",
   1916    * child offset points to a leaf node.
   1917    */
   1918   MByte* out = &x->exports_trie;
   1919   mbuf_init(out, x->h);
   1920 
   1921   LinkImage* img = x->img;
   1922   LinkSymbol* esym = sym_at(img, img->entry_sym);
   1923   if (!esym || !esym->defined) {
   1924     /* No entry — emit a single empty terminal trie. */
   1925     mbuf_u8(out, 0); /* terminal_size 0 */
   1926     mbuf_u8(out, 0); /* children 0 */
   1927     return;
   1928   }
   1929   Slice nm_s = pool_slice(x->c->global, esym->name);
   1930   const char* nm = nm_s.s;
   1931   size_t nl = nm_s.len;
   1932   if (!nm || nl == 0) {
   1933     mbuf_u8(out, 0);
   1934     mbuf_u8(out, 0);
   1935     return;
   1936   }
   1937   /* leaf node: terminal_size = sizeof(uleb(flags)+uleb(offset))
   1938    * flags = 0 (regular export); offset = vaddr - __TEXT.vmaddr */
   1939   u64 entry_off = esym->vaddr - x->text_vaddr;
   1940 
   1941   /* Compute leaf-node bytes length: uleb(flags=0) + uleb(offset). */
   1942   u32 flags = 0;
   1943   u32 leaf_payload_len = uleb128_size(flags) + uleb128_size(entry_off);
   1944   /* Layout: root node first, then leaf.  The root node's child entry
   1945    * carries the absolute offset of the leaf within the trie. */
   1946 
   1947   /* root: terminal_size=0, children_count=1, "_main"\0, child_offset=
   1948    *        (leaf-position uleb).
   1949    *
   1950    * The child offset's own ULEB width contributes to the leaf position, so
   1951    * solve for the fixed point before emitting. */
   1952   u32 leaf_pos = 2u + (u32)nl + 1u + 1u;
   1953   for (;;) {
   1954     u32 n = uleb128_size(leaf_pos);
   1955     u32 next = 2u + (u32)nl + 1u + n;
   1956     if (next == leaf_pos) break;
   1957     leaf_pos = next;
   1958   }
   1959 
   1960   mbuf_u8(out, 0); /* root terminal size */
   1961   mbuf_u8(out, 1); /* children_count */
   1962   mbuf_str(out, nm, (u32)nl);
   1963   uleb128(out, leaf_pos);
   1964   /* leaf node */
   1965   if (out->len != leaf_pos)
   1966     compiler_panic(x->c, SRCLOC_NONE,
   1967                    "macho: exports trie leaf offset mismatch");
   1968   /* terminal_size byte then payload */
   1969   mbuf_u8(out, (u8)leaf_payload_len);
   1970   uleb128(out, flags);
   1971   uleb128(out, entry_off);
   1972   mbuf_u8(out, 0); /* children_count */
   1973   /* Pad trie to 8 bytes. */
   1974   mbuf_align(out, 8);
   1975 }
   1976 
   1977 /* ---- symtab + strtab + indirect symtab ---- */
   1978 
   1979 typedef struct NlistRec {
   1980   u32 strx;
   1981   u8 type;
   1982   u8 sect; /* 1-based section index (Mach-O) */
   1983   u16 desc;
   1984   u64 value;
   1985 } NlistRec;
   1986 
   1987 static void build_symtab(MCtx* x) {
   1988   Heap* h = x->h;
   1989   LinkImage* img = x->img;
   1990   mbuf_init(&x->symtab, h);
   1991   mbuf_init(&x->strtab, h);
   1992   mbuf_init(&x->indirect, h);
   1993 
   1994   /* strtab leading NUL */
   1995   mbuf_u8(&x->strtab, 0);
   1996 
   1997   /* Approach:
   1998    * - Add one local nlist per defined LinkSymbol (locals + non-imported
   1999    *   externs) — but to keep things simple we only emit external defined
   2000    *   syms (mainly _main), plus all imports as N_UNDF|N_EXT.
   2001    *
   2002    * Mach-O dyld requires the symtab order: locals first, ext-defs next,
   2003    * undef last (matched by LC_DYSYMTAB ranges).
   2004    */
   2005 
   2006   /* Pass A: defined externals. */
   2007   u32 n_local = 0;
   2008   u32 n_extdef = 0;
   2009   u32 n_undef = 0;
   2010 
   2011   /* For now we emit only externals + imports.  No locals. */
   2012   /* extdef pass */
   2013   for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) {
   2014     LinkSymbol* s = LinkSyms_at(&img->syms, i);
   2015     if (!s->defined) continue;
   2016     if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue;
   2017     if (s->name == 0) continue;
   2018     if (s->kind == SK_ABS) continue; /* skip abs externs */
   2019     /* Locate which OutSec contains this vaddr to figure out n_sect.
   2020      * n_sect is the 1-based index into the flat section_64 table the
   2021      * file actually contains (post-coalesce), matching what we emit
   2022      * in emit_load_command_segment. */
   2023     u8 n_sect = 0;
   2024     /* Prefer the section whose half-open [vaddr, vaddr+size) range contains
   2025      * the symbol. This must win over the end-boundary fallback below: when
   2026      * two sections abut (A ends exactly where B begins), a symbol at the
   2027      * boundary is the *start* of B, not the end of A. */
   2028     for (u32 k = 0; k < x->nouts; ++k) {
   2029       OutSec* o = &x->outs[k];
   2030       if (s->vaddr >= o->vaddr && s->vaddr < o->vaddr + o->size) {
   2031         n_sect = (u8)(k + 1u);
   2032         break;
   2033       }
   2034     }
   2035     /* Fallback: a symbol sitting exactly one-past-the-end of a section with
   2036      * no following section covering it (e.g. an end-of-section marker) is
   2037      * attributed to the section that ends there. */
   2038     if (n_sect == 0) {
   2039       for (u32 k = 0; k < x->nouts; ++k) {
   2040         OutSec* o = &x->outs[k];
   2041         if (s->vaddr == o->vaddr + o->size) {
   2042           n_sect = (u8)(k + 1u);
   2043           break;
   2044         }
   2045       }
   2046     }
   2047     if (n_sect == 0) continue;
   2048     Slice nm_s = pool_slice(x->c->global, s->name);
   2049     const char* nm = nm_s.s;
   2050     size_t nl = nm_s.len;
   2051     u32 strx = x->strtab.len;
   2052     if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl);
   2053 
   2054     u8 t[16];
   2055     u8 nt = N_SECT | N_EXT;
   2056     if (s->bind == SB_WEAK) {
   2057       /* N_WEAK_DEF in n_desc (not a flag in n_type) */
   2058     }
   2059     wr_u32_le(t + 0, strx);
   2060     t[4] = nt;
   2061     t[5] = n_sect;
   2062     wr_u16_le(t + 6, s->bind == SB_WEAK ? N_WEAK_DEF : 0);
   2063     wr_u64_le(t + 8, s->vaddr);
   2064     mbuf_append(&x->symtab, t, 16);
   2065     ++n_extdef;
   2066   }
   2067 
   2068   /* undef imports — real imports only.  Internal-GOT entries don't get
   2069    * N_UNDF nlist records since they're defined in the image. */
   2070   u32 imp_first_symtab_idx = n_extdef;
   2071   for (u32 i = 0; i < x->nimports_real; ++i) {
   2072     MachImp* mi = &x->imports[i];
   2073     Slice nm_s = pool_slice(x->c->global, mi->name);
   2074     const char* nm = nm_s.s;
   2075     size_t nl = nm_s.len;
   2076     u32 strx = x->strtab.len;
   2077     if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl);
   2078 
   2079     u8 t[16];
   2080     wr_u32_le(t + 0, strx);
   2081     t[4] = N_UNDF | N_EXT;
   2082     t[5] = 0;
   2083     /* n_desc carries dylib ordinal in high byte (REFERENCED_DYNAMICALLY etc.)
   2084      */
   2085     u16 desc = (u16)(((u16)mi->dylib_ord & 0xff) << 8);
   2086     if (mi->weak) desc |= N_WEAK_REF;
   2087     wr_u16_le(t + 6, desc);
   2088     wr_u64_le(t + 8, 0);
   2089     mbuf_append(&x->symtab, t, 16);
   2090     ++n_undef;
   2091   }
   2092 
   2093   /* indirect symtab: one entry per __stubs slot, then one per __got
   2094    * slot.  Internal-GOT slots use INDIRECT_SYMBOL_LOCAL (0x80000000)
   2095    * since they have no nlist entry. */
   2096   u32 indirect_start = 0;
   2097   /* Patch reserved1 of each synth OutSec.  __stubs and __got are each
   2098    * singleton OutSecs (synth sections never coalesce with user input),
   2099    * so a sectname match identifies them unambiguously. */
   2100   for (u32 i = 0; i < x->nouts; ++i) {
   2101     OutSec* o = &x->outs[i];
   2102     if (slice_eq_cstr(slice_from_cstr(o->sectname), "__stubs") && o->size) {
   2103       o->reserved1 = indirect_start;
   2104       for (u32 k = 0; k < x->nimports; ++k) {
   2105         MachImp* mi = &x->imports[k];
   2106         if (!mi->stub_idx) continue;
   2107         u32 sym_idx = imp_first_symtab_idx + k;
   2108         mbuf_u32(&x->indirect, sym_idx);
   2109         ++indirect_start;
   2110       }
   2111     }
   2112   }
   2113   for (u32 i = 0; i < x->nouts; ++i) {
   2114     OutSec* o = &x->outs[i];
   2115     if (slice_eq_cstr(slice_from_cstr(o->sectname), "__got") && o->size) {
   2116       o->reserved1 = indirect_start;
   2117       for (u32 k = 0; k < x->nimports; ++k) {
   2118         MachImp* mi = &x->imports[k];
   2119         u32 sym_idx = mi->internal ? 0x80000000u /* INDIRECT_SYMBOL_LOCAL */
   2120                                    : (imp_first_symtab_idx + k);
   2121         mbuf_u32(&x->indirect, sym_idx);
   2122         ++indirect_start;
   2123       }
   2124     }
   2125   }
   2126 
   2127   x->nsyms = n_local + n_extdef + n_undef;
   2128   (void)n_local;
   2129   (void)imp_first_symtab_idx;
   2130 }
   2131 
   2132 /* ---- LINKEDIT layout assembly ----
   2133  *
   2134  * Place blobs in the order Apple prefers:
   2135  *   chained_fixups, exports_trie, fn_starts, data_in_code,
   2136  *   symtab, indirect, strtab, codesig
   2137  */
   2138 
   2139 static void layout_linkedit(MCtx* x) {
   2140   /* LC_FUNCTION_STARTS is a ULEB128 stream terminated by a zero byte.  Keep a
   2141    * real empty table here so tools that rewrite LINKEDIT preserve the
   2142    * canonical blob order between exports and the symbol table. */
   2143   mbuf_init(&x->fn_starts, x->h);
   2144   mbuf_u8(&x->fn_starts, 0);
   2145   mbuf_init(&x->data_in_code, x->h);
   2146   mbuf_init(&x->codesig, x->h);
   2147 
   2148   u64 cur = x->linkedit_fileoff;
   2149   /* chained fixups */
   2150   cur = ALIGN_UP(cur, 8u);
   2151   x->chained_fixups_off = (u32)cur;
   2152   cur += x->chained_fixups.len;
   2153   /* exports trie. Keep LINKEDIT data blobs contiguous; Apple strip rejects
   2154    * padding between chained fixups and the exports trie. */
   2155   x->exports_trie_off = (u32)cur;
   2156   cur += x->exports_trie.len;
   2157   /* function starts */
   2158   x->fn_starts_off = (u32)cur;
   2159   cur += x->fn_starts.len;
   2160   /* data in code */
   2161   cur = ALIGN_UP(cur, 8u);
   2162   x->data_in_code_off = (u32)cur;
   2163   /* symtab */
   2164   cur = ALIGN_UP(cur, 8u);
   2165   x->symtab_off = (u32)cur;
   2166   cur += x->symtab.len;
   2167   /* indirect symtab */
   2168   cur = ALIGN_UP(cur, 4u);
   2169   x->indirect_off = (u32)cur;
   2170   cur += x->indirect.len;
   2171   /* strtab */
   2172   cur = ALIGN_UP(cur, 8u);
   2173   x->strtab_off = (u32)cur;
   2174   cur += x->strtab.len;
   2175   /* code signature: end-aligned to 16 */
   2176   cur = ALIGN_UP(cur, 16u);
   2177   x->codesig_off = (u32)cur;
   2178 
   2179   /* Linkedit segment file_size includes everything up to (but not yet
   2180    * including) codesig.  Codesig is computed below. */
   2181   u64 le_size = cur - x->linkedit_fileoff;
   2182   /* Set linkedit segment size; will be increased after codesig. */
   2183   x->segs[MSEG_LINKEDIT].filesize = le_size;
   2184   x->segs[MSEG_LINKEDIT].vmsize = ALIGN_UP(le_size, MZ_PAGE);
   2185   if (!x->segs[MSEG_LINKEDIT].vmsize) x->segs[MSEG_LINKEDIT].vmsize = MZ_PAGE;
   2186 }
   2187 
   2188 /* ---- ad-hoc code signature (CodeDirectory + SuperBlob) ----
   2189  *
   2190  * Produces a minimal embedded SuperBlob with a single CodeDirectory.
   2191  * The CD is sha256-hashed over CS_PAGE_SIZE_LOG2 = 4096-byte pages of
   2192  * the file (excluding the codesig itself).  The kernel verifies the
   2193  * CD's hash chain on exec.
   2194  *
   2195  * Output format (in big-endian for SuperBlob/CodeDirectory headers):
   2196  *   [SuperBlob]
   2197  *     u32 magic    (0xfade0cc0)
   2198  *     u32 length
   2199  *     u32 count    (=1)
   2200  *     [Slot]
   2201  *       u32 type (=0 CSSLOT_CODEDIRECTORY)
   2202  *       u32 offset (=20)  -- relative to start of SuperBlob
   2203  *   [CodeDirectory]
   2204  *     u32 magic    (0xfade0c02)
   2205  *     u32 length   (bytes including all hashes)
   2206  *     u32 version  (>=0x20400 for execSeg fields)
   2207  *     u32 flags    (=0 ad-hoc — actually flags must include 0x2
   2208  * (kSecCodeSignatureAdhoc)) u32 hashOffset  (offset of first slot hash) u32
   2209  * identOffset (offset of identifier string) u32 nSpecialSlots (=0) u32
   2210  * nCodeSlots u32 codeLimit  (file bytes covered) u8  hashSize   (=32) u8
   2211  * hashType   (=2 sha256) u8  platform   (=0) u8  pageSize   (=12 for 4096) u32
   2212  * spare2     (=0) u32 scatterOffset (=0) u32 teamOffset    (=0) u32 spare3 (=0)
   2213  *     u64 codeLimit64   (=0)
   2214  *     u64 execSegBase   (=__TEXT.fileoff)
   2215  *     u64 execSegLimit  (=__TEXT.filesize)
   2216  *     u64 execSegFlags  (=1 main binary)
   2217  *     [identifier bytes "a.out\0"]
   2218  *     [codeslot hashes  nCodeSlots * 32 B]
   2219  *
   2220  * Hashes computed AFTER everything else is final — including the codesig
   2221  * blob's own offset in the file (the hash range stops just before
   2222  * codeLimit). */
   2223 
   2224 static void wr_u64_be(u8* p, u64 v) {
   2225   for (u32 i = 0; i < 8; ++i) p[7 - i] = (u8)(v >> (i * 8));
   2226 }
   2227 
   2228 /* Build the codesig blob with placeholder hashes; size is precise so
   2229  * file layout is final after this. */
   2230 static void build_codesig_skeleton(MCtx* x, u32 code_limit, const char* ident) {
   2231   u32 code_page = 1u << CS_PAGE_SIZE_LOG2; /* 4096 */
   2232   u32 nslots = (code_limit + code_page - 1u) / code_page;
   2233 
   2234   /* CodeDirectory size:
   2235    *   header 88 bytes through execSegFlags
   2236    *   identifier (ident_len + 1)
   2237    *   hashes (nslots * 32)
   2238    */
   2239   u32 ident_len = (u32)slice_from_cstr(ident).len + 1u;
   2240   u32 cd_hdr = 88u;
   2241   u32 cd_size = cd_hdr + ident_len + nslots * CS_SHA256_LEN;
   2242   /* SuperBlob: 12 hdr + 8 slot + cd. */
   2243   u32 sb_size = 12u + 8u + cd_size;
   2244 
   2245   MByte* out = &x->codesig;
   2246   mbuf_init(out, x->h);
   2247   mbuf_reserve(out, sb_size);
   2248   memset(out->data, 0, sb_size);
   2249   out->len = sb_size;
   2250 
   2251   u8* sb = out->data;
   2252   /* SuperBlob header */
   2253   wr_u32_be(sb + 0, CS_MAGIC_EMBEDDED_SIGNATURE);
   2254   wr_u32_be(sb + 4, sb_size);
   2255   wr_u32_be(sb + 8, 1); /* count */
   2256   /* slot 0: type=CSSLOT_CODEDIRECTORY, offset=20 */
   2257   wr_u32_be(sb + 12, CSSLOT_CODEDIRECTORY);
   2258   wr_u32_be(sb + 16, 20u);
   2259 
   2260   /* CodeDirectory */
   2261   u8* cd = sb + 20;
   2262   wr_u32_be(cd + 0, CS_MAGIC_CODEDIRECTORY);
   2263   wr_u32_be(cd + 4, cd_size);
   2264   wr_u32_be(cd + 8, 0x20400u);            /* version with execSeg */
   2265   wr_u32_be(cd + 12, 0x2u);               /* flags = adhoc */
   2266   wr_u32_be(cd + 16, cd_hdr + ident_len); /* hashOffset */
   2267   wr_u32_be(cd + 20, cd_hdr);             /* identOffset */
   2268   wr_u32_be(cd + 24, 0);                  /* nSpecialSlots */
   2269   wr_u32_be(cd + 28, nslots);
   2270   wr_u32_be(cd + 32, code_limit);
   2271   cd[36] = (u8)CS_SHA256_LEN;
   2272   cd[37] = (u8)CS_HASHTYPE_SHA256;
   2273   cd[38] = 0; /* platform */
   2274   cd[39] = (u8)CS_PAGE_SIZE_LOG2;
   2275   wr_u32_be(cd + 40, 0);                   /* spare2 */
   2276   wr_u32_be(cd + 44, 0);                   /* scatterOffset */
   2277   wr_u32_be(cd + 48, 0);                   /* teamOffset */
   2278   wr_u32_be(cd + 52, 0);                   /* spare3 */
   2279   wr_u64_be(cd + 56, 0);                   /* codeLimit64 */
   2280   wr_u64_be(cd + 64, x->segs[1].fileoff);  /* execSegBase */
   2281   wr_u64_be(cd + 72, x->segs[1].filesize); /* execSegLimit */
   2282   wr_u64_be(cd + 80, CS_EXECSEG_MAIN_BINARY);
   2283 
   2284   /* identifier */
   2285   memcpy(cd + cd_hdr, ident, ident_len);
   2286 
   2287   x->codesig_size = sb_size;
   2288 }
   2289 
   2290 static void compute_codesig(MCtx* x, const u8* full_file, u32 file_len_excl_cs,
   2291                             const char* ident) {
   2292   u32 code_page = 1u << CS_PAGE_SIZE_LOG2;
   2293   u32 nslots = (file_len_excl_cs + code_page - 1u) / code_page;
   2294   u32 ident_len = (u32)slice_from_cstr(ident).len + 1u;
   2295   u8* cd = x->codesig.data + 12 + 8;
   2296   u8* hashes = cd + 88u + ident_len;
   2297 
   2298   for (u32 i = 0; i < nslots; ++i) {
   2299     u32 off = i * code_page;
   2300     u32 take = (off + code_page <= file_len_excl_cs) ? code_page
   2301                                                      : (file_len_excl_cs - off);
   2302     Sha256 s;
   2303     sha256_init(&s);
   2304     sha256_update(&s, full_file + off, take);
   2305     /* Pages shorter than code_page get the standard SHA over the
   2306      * partial bytes — Apple's tools do exactly this (no zero padding
   2307      * on the tail). */
   2308     sha256_final(&s, hashes + i * CS_SHA256_LEN);
   2309   }
   2310 }
   2311 
   2312 /* ---- final emission ---- */
   2313 
   2314 static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) {
   2315   MSeg* sg = &x->segs[segidx];
   2316   u32 seg_cmd_size = MACHO_SEGCMD64_SIZE + sg->nouts * MACHO_SECT64_SIZE;
   2317   u32 base = lc->len;
   2318   mbuf_u32(lc, LC_SEGMENT_64);
   2319   mbuf_u32(lc, seg_cmd_size);
   2320   /* segname: 16 bytes zero-padded */
   2321   u8 nm[16];
   2322   memset(nm, 0, 16);
   2323   size_t nlen = slice_from_cstr(sg->name).len;
   2324   if (nlen > 16) nlen = 16;
   2325   memcpy(nm, sg->name, nlen);
   2326   mbuf_append(lc, nm, 16);
   2327   mbuf_u64(lc, sg->vmaddr);
   2328   mbuf_u64(lc, sg->vmsize);
   2329   mbuf_u64(lc, sg->fileoff);
   2330   mbuf_u64(lc, sg->filesize);
   2331   mbuf_u32(lc, sg->maxprot);
   2332   mbuf_u32(lc, sg->initprot);
   2333   mbuf_u32(lc, sg->nouts);
   2334   mbuf_u32(lc, 0); /* flags */
   2335 
   2336   for (u32 j = 0; j < sg->nouts; ++j) {
   2337     OutSec* o = &x->outs[sg->first_out + j];
   2338     u8 sname[16], gname[16];
   2339     memset(sname, 0, 16);
   2340     memset(gname, 0, 16);
   2341     size_t sl = o->sectname ? slice_from_cstr(o->sectname).len : 0;
   2342     if (sl > 16) sl = 16;
   2343     if (sl) memcpy(sname, o->sectname, sl);
   2344     size_t gl = slice_from_cstr(sg->name).len; /* segname must match */
   2345     if (gl > 16) gl = 16;
   2346     memcpy(gname, sg->name, gl);
   2347     mbuf_append(lc, sname, 16);
   2348     mbuf_append(lc, gname, 16);
   2349     mbuf_u64(lc, o->vaddr);
   2350     mbuf_u64(lc, o->size);
   2351     mbuf_u32(lc, (u32)o->file_offset);
   2352     /* align is power of 2; encode as log2. */
   2353     u32 a = o->align ? o->align : 1u;
   2354     u32 al = 0;
   2355     while ((1u << al) < a) ++al;
   2356     mbuf_u32(lc, al);
   2357     mbuf_u32(lc, 0); /* reloff */
   2358     mbuf_u32(lc, 0); /* nreloc */
   2359     mbuf_u32(lc, o->flags);
   2360     mbuf_u32(lc, o->reserved1);
   2361     mbuf_u32(lc, o->reserved2);
   2362     mbuf_u32(lc, 0); /* reserved3 */
   2363   }
   2364   (void)base;
   2365 }
   2366 
   2367 void link_emit_macho(LinkImage* img, Writer* w);
   2368 
   2369 void link_emit_macho(LinkImage* img, Writer* w) {
   2370   MCtx x;
   2371   memset(&x, 0, sizeof(x));
   2372   x.img = img;
   2373   x.c = img->c;
   2374   x.h = img->heap;
   2375   x.w = w;
   2376   x.linker = img->linker;
   2377   x.link_arch = link_arch_desc_for(img->c);
   2378   {
   2379     const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_MACHO);
   2380     x.macho =
   2381         fmt && fmt->macho_arch ? fmt->macho_arch(img->c->target.arch) : NULL;
   2382   }
   2383 
   2384   if (!x.link_arch || !x.macho || !x.macho->cputype || !x.macho->emit_stub ||
   2385       !x.macho->stub_size)
   2386     compiler_panic(x.c, SRCLOC_NONE,
   2387                    "link_emit_macho: no Mach-O descriptor for target");
   2388   if (img->entry_sym == LINK_SYM_NONE)
   2389     compiler_panic(x.c, SRCLOC_NONE, "link_emit_macho: no resolved entry");
   2390 
   2391   collect_imports(&x);
   2392   collect_tlv(&x);
   2393   plan_layout(&x);
   2394   shift_sections(&x);
   2395 
   2396   /* entry offset within __TEXT segment. */
   2397   LinkSymbol* esym = sym_at(img, img->entry_sym);
   2398   if (!esym || !esym->defined)
   2399     compiler_panic(x.c, SRCLOC_NONE, "link_emit_macho: entry symbol undefined");
   2400   if (esym->vaddr < x.text_vaddr)
   2401     compiler_panic(x.c, SRCLOC_NONE,
   2402                    "link_emit_macho: entry symbol below __TEXT base");
   2403   x.entry_offset = (u32)(esym->vaddr - x.text_vaddr);
   2404 
   2405   /* image-id UUID. */
   2406   u8 image_id[LINK_IMAGE_ID_BYTES];
   2407   link_image_id_compute(img, image_id);
   2408   memcpy(x.uuid, image_id, 16);
   2409 
   2410   /* Reloc apply collects fixsites. */
   2411   FixList fl;
   2412   fix_init(&fl, x.h);
   2413   apply_relocs(&x, &fl);
   2414 
   2415   /* Build LINKEDIT contents. */
   2416   build_chained_fixups(&x, &fl);
   2417   build_exports_trie(&x);
   2418   build_symtab(&x);
   2419   layout_linkedit(&x);
   2420 
   2421   /* Compute code-sig skeleton sized to file bytes excluding sig. */
   2422   u32 code_limit = x.codesig_off;
   2423   build_codesig_skeleton(&x, code_limit, "a.out");
   2424   /* Now extend linkedit segment to include codesig. */
   2425   u64 le_size = (u64)x.codesig_off + (u64)x.codesig_size - x.linkedit_fileoff;
   2426   x.segs[MSEG_LINKEDIT].filesize = le_size;
   2427   x.segs[MSEG_LINKEDIT].vmsize = ALIGN_UP(le_size, MZ_PAGE);
   2428 
   2429   /* Build load commands buffer. */
   2430   MByte lc;
   2431   mbuf_init(&lc, x.h);
   2432 
   2433   /* LC_SEGMENT_64 for each segment with sections (and PAGEZERO/LINKEDIT). */
   2434   emit_load_command_segment(&lc, &x, 0); /* PAGEZERO */
   2435   emit_load_command_segment(&lc, &x, 1); /* TEXT */
   2436   if (x.segs[2].nsects > 0)
   2437     emit_load_command_segment(&lc, &x, 2); /* DATA_CONST */
   2438   if (x.segs[3].nsects > 0) emit_load_command_segment(&lc, &x, 3); /* DATA */
   2439   if (x.segs[MSEG_DWARF].nsects > 0)
   2440     emit_load_command_segment(&lc, &x, MSEG_DWARF);  /* DWARF (debug info) */
   2441   emit_load_command_segment(&lc, &x, MSEG_LINKEDIT); /* LINKEDIT */
   2442 
   2443   /* LC_DYLD_CHAINED_FIXUPS  (linkedit_data_command: 16B) */
   2444   mbuf_u32(&lc, LC_DYLD_CHAINED_FIXUPS);
   2445   mbuf_u32(&lc, 16);
   2446   mbuf_u32(&lc, x.chained_fixups_off);
   2447   mbuf_u32(&lc, x.chained_fixups.len);
   2448 
   2449   /* LC_DYLD_EXPORTS_TRIE */
   2450   mbuf_u32(&lc, LC_DYLD_EXPORTS_TRIE);
   2451   mbuf_u32(&lc, 16);
   2452   mbuf_u32(&lc, x.exports_trie_off);
   2453   mbuf_u32(&lc, x.exports_trie.len);
   2454 
   2455   /* LC_SYMTAB */
   2456   mbuf_u32(&lc, LC_SYMTAB);
   2457   mbuf_u32(&lc, MACHO_SYMTAB_CMD_SIZE);
   2458   mbuf_u32(&lc, x.symtab_off);
   2459   mbuf_u32(&lc, x.nsyms);
   2460   mbuf_u32(&lc, x.strtab_off);
   2461   mbuf_u32(&lc, x.strtab.len);
   2462 
   2463   /* LC_DYSYMTAB */
   2464   /* nlocal=0, nextdef=#defined-globals, nundef=#imports.  We tracked
   2465    * those during build_symtab; recompute by inspecting strtab... easier
   2466    * to recount: defined globals are total - imports. */
   2467   u32 nlocal = 0;
   2468   u32 nundef = x.nimports_real;
   2469   u32 nextdef = (x.nsyms > nundef) ? x.nsyms - nundef - nlocal : 0;
   2470   mbuf_u32(&lc, LC_DYSYMTAB);
   2471   mbuf_u32(&lc, MACHO_DYSYMTAB_CMD_SIZE);
   2472   mbuf_u32(&lc, 0); /* ilocalsym */
   2473   mbuf_u32(&lc, nlocal);
   2474   mbuf_u32(&lc, nlocal);
   2475   mbuf_u32(&lc, nextdef);
   2476   mbuf_u32(&lc, nlocal + nextdef);
   2477   mbuf_u32(&lc, nundef);
   2478   mbuf_u32(&lc, 0);
   2479   mbuf_u32(&lc, 0); /* tocoff, ntoc */
   2480   mbuf_u32(&lc, 0);
   2481   mbuf_u32(&lc, 0); /* modtaboff, nmodtab */
   2482   mbuf_u32(&lc, 0);
   2483   mbuf_u32(&lc, 0); /* extrefsymoff, nextrefsyms */
   2484   mbuf_u32(&lc, x.indirect_off);
   2485   mbuf_u32(&lc, x.indirect.len / 4u);
   2486   mbuf_u32(&lc, 0);
   2487   mbuf_u32(&lc, 0); /* extreloff, nextrel */
   2488   mbuf_u32(&lc, 0);
   2489   mbuf_u32(&lc, 0); /* locreloff, nlocrel */
   2490 
   2491   /* LC_LOAD_DYLINKER */
   2492   {
   2493     const char* dyld = "/usr/lib/dyld";
   2494     u32 dyld_len = (u32)slice_from_cstr(dyld).len;
   2495     u32 cmd_size = (u32)ALIGN_UP((u64)(12u + dyld_len + 1u), 8u);
   2496     mbuf_u32(&lc, LC_LOAD_DYLINKER);
   2497     mbuf_u32(&lc, cmd_size);
   2498     mbuf_u32(&lc, 12u); /* name offset within cmd */
   2499     u32 wrote = mbuf_str(&lc, dyld, dyld_len);
   2500     (void)wrote;
   2501     /* Pad to cmd_size. */
   2502     while (lc.len < (u32)((u64)mbuf_align(&lc, 1) + 0)) {
   2503       /* no-op */
   2504       break;
   2505     }
   2506     /* Re-align to cmd_size. */
   2507     u32 want = (u32)(lc.len);
   2508     /* Walk back: lc grew by 12 + (strlen+1).  Pad to cmd_size. */
   2509     u32 cmd_start_back = lc.len - (12u + dyld_len + 1u);
   2510     u32 pad_needed = cmd_size - (lc.len - cmd_start_back);
   2511     while (pad_needed-- > 0) mbuf_u8(&lc, 0);
   2512     (void)want;
   2513   }
   2514 
   2515   /* LC_UUID */
   2516   mbuf_u32(&lc, LC_UUID);
   2517   mbuf_u32(&lc, 24);
   2518   mbuf_append(&lc, x.uuid, 16);
   2519 
   2520   /* LC_BUILD_VERSION */
   2521   mbuf_u32(&lc, LC_BUILD_VERSION);
   2522   mbuf_u32(&lc, 24);
   2523   mbuf_u32(&lc, 1);               /* PLATFORM_MACOS */
   2524   mbuf_u32(&lc, (12u << 16) | 0); /* minos 12.0.0 */
   2525   mbuf_u32(&lc, (12u << 16) | 0); /* sdk   12.0.0 */
   2526   mbuf_u32(&lc, 0);               /* ntools */
   2527 
   2528   /* LC_MAIN — entryoff is offset within __TEXT segment from its file
   2529    * start (0). */
   2530   mbuf_u32(&lc, LC_MAIN);
   2531   mbuf_u32(&lc, 24);
   2532   mbuf_u64(&lc, (u64)x.entry_offset); /* entryoff = vaddr - __TEXT.vmaddr */
   2533   mbuf_u64(&lc, 0);                   /* stacksize */
   2534 
   2535   /* LC_LOAD_DYLIB per dylib. */
   2536   for (u32 i = 0; i < x.ndylibs; ++i) {
   2537     Slice nm_s = pool_slice(x.c->global, x.dylibs[i].install);
   2538     const char* nm = nm_s.s;
   2539     size_t nl = nm_s.len;
   2540     u32 cmd_size = (u32)ALIGN_UP((u64)(24u + (u32)nl + 1u), 8u);
   2541     u32 cmd_start = lc.len;
   2542     mbuf_u32(&lc, LC_LOAD_DYLIB);
   2543     mbuf_u32(&lc, cmd_size);
   2544     mbuf_u32(&lc, 24u);        /* name offset */
   2545     mbuf_u32(&lc, 0);          /* timestamp */
   2546     mbuf_u32(&lc, (1u << 16)); /* current_version 1.0 */
   2547     mbuf_u32(&lc, (1u << 16)); /* compat_version 1.0 */
   2548     mbuf_str(&lc, nm ? nm : "", (u32)nl);
   2549     while (lc.len - cmd_start < cmd_size) mbuf_u8(&lc, 0);
   2550   }
   2551 
   2552   /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE */
   2553   mbuf_u32(&lc, LC_FUNCTION_STARTS_C);
   2554   mbuf_u32(&lc, 16);
   2555   mbuf_u32(&lc, x.fn_starts_off);
   2556   mbuf_u32(&lc, x.fn_starts.len);
   2557 
   2558   mbuf_u32(&lc, LC_DATA_IN_CODE_C);
   2559   mbuf_u32(&lc, 16);
   2560   mbuf_u32(&lc, x.data_in_code_off);
   2561   mbuf_u32(&lc, 0);
   2562 
   2563   /* LC_CODE_SIGNATURE */
   2564   mbuf_u32(&lc, LC_CODE_SIGNATURE_C);
   2565   mbuf_u32(&lc, 16);
   2566   mbuf_u32(&lc, x.codesig_off);
   2567   mbuf_u32(&lc, x.codesig_size);
   2568 
   2569   /* Sanity: lc.len + MACHO_HDR64_SIZE must equal headers_size we
   2570    * predicted in plan_layout.  If not, we mis-sized — panic. */
   2571   if ((u64)lc.len + MACHO_HDR64_SIZE != x.headers_size) {
   2572     compiler_panic(x.c, SRCLOC_NONE,
   2573                    "link_macho: load-cmd size mismatch: predicted %llu got %u",
   2574                    (unsigned long long)(x.headers_size - MACHO_HDR64_SIZE),
   2575                    lc.len);
   2576   }
   2577 
   2578   /* ---- now stream the file ---- */
   2579   /* The Writer in kit allows seek; we'll write a flat buffer first
   2580    * (so we can hash it for codesig) and flush at the end. */
   2581   MByte file;
   2582   mbuf_init(&file, x.h);
   2583 
   2584   /* mach_header_64 */
   2585   u32 ncmds = 0;
   2586   /* Recount: PAGEZERO + TEXT + maybe DATA_CONST + maybe DATA + LINKEDIT
   2587    * + chained + exports_trie + symtab + dysymtab + dyld + uuid +
   2588    * build_version + main + nDylibs + fn_starts + data_in_code +
   2589    * codesig. */
   2590   ncmds += 2; /* PAGEZERO + TEXT */
   2591   if (x.segs[2].nsects > 0) ncmds++;
   2592   if (x.segs[3].nsects > 0) ncmds++;
   2593   if (x.segs[MSEG_DWARF].nsects > 0) ncmds++; /* __DWARF (debug info) */
   2594   ncmds++;                                    /* LINKEDIT */
   2595   ncmds += 11 + x.ndylibs;
   2596   /* (chained, exports_trie, symtab, dysymtab, dyld, uuid, build_version,
   2597    *  main, fn_starts, data_in_code, codesig) = 11 */
   2598 
   2599   mbuf_u32(&file, MH_MAGIC_64);
   2600   mbuf_u32(&file, x.macho->cputype);
   2601   mbuf_u32(&file, x.macho->cpusubtype);
   2602   mbuf_u32(&file, MH_EXECUTE);
   2603   mbuf_u32(&file, ncmds);
   2604   mbuf_u32(&file, lc.len);
   2605   {
   2606     u32 mh_flags = MH_DYLDLINK | MH_TWOLEVEL | MH_NOUNDEFS | MH_PIE;
   2607     /* dyld scans __thread_vars and allocates a pthread_key for each
   2608      * descriptor only when this flag is set; without it the descriptor's
   2609      * thunk pointer is silently patched to _tlv_bootstrap_error.  Apple's
   2610      * ld sets it whenever the image contains S_THREAD_LOCAL_* sections. */
   2611     if (x.ntlv) mh_flags |= MH_HAS_TLV_DESCRIPTORS;
   2612     mbuf_u32(&file, mh_flags);
   2613   }
   2614   mbuf_u32(&file, 0); /* reserved */
   2615   mbuf_append(&file, lc.data, lc.len);
   2616 
   2617   /* Pad to first section's file offset. */
   2618   /* __TEXT first section begins at headers_size; we wrote header+lc =
   2619    * headers_size, so no pad needed.  Then each MSec's file_offset
   2620    * tells us where to write its bytes. */
   2621 
   2622   /* Now emit segment payload bytes per MSec. */
   2623   for (u32 i = 0; i < x.nsecs; ++i) {
   2624     MSec* m = &x.secs[i];
   2625     if (m->is_zerofill || m->size == 0) continue;
   2626     /* Pad up to m->file_offset. */
   2627     while (file.len < m->file_offset) mbuf_u8(&file, 0);
   2628     if (m->synth_data) {
   2629       mbuf_append(&file, m->synth_data, m->synth_size);
   2630     } else {
   2631       LinkSection* ls = &img->sections[m->link_sec_id - 1u];
   2632       u32 segid = ls->segment_id;
   2633       u8* base =
   2634           (segid != LINK_SEG_NONE) ? img->segment_bytes[segid - 1u] : NULL;
   2635       if (base && ls->size) {
   2636         mbuf_append(&file, base + ls->input_offset, (u32)ls->size);
   2637       } else if (ls->size) {
   2638         for (u64 k = 0; k < ls->size; ++k) mbuf_u8(&file, 0);
   2639       }
   2640     }
   2641   }
   2642 
   2643   /* Pad to LINKEDIT start. */
   2644   while (file.len < x.linkedit_fileoff) mbuf_u8(&file, 0);
   2645 
   2646   /* LINKEDIT contents in declared order. */
   2647   while (file.len < x.chained_fixups_off) mbuf_u8(&file, 0);
   2648   mbuf_append(&file, x.chained_fixups.data, x.chained_fixups.len);
   2649   while (file.len < x.exports_trie_off) mbuf_u8(&file, 0);
   2650   mbuf_append(&file, x.exports_trie.data, x.exports_trie.len);
   2651   while (file.len < x.fn_starts_off) mbuf_u8(&file, 0);
   2652   mbuf_append(&file, x.fn_starts.data, x.fn_starts.len);
   2653   while (file.len < x.data_in_code_off) mbuf_u8(&file, 0);
   2654   /* empty */
   2655   while (file.len < x.symtab_off) mbuf_u8(&file, 0);
   2656   mbuf_append(&file, x.symtab.data, x.symtab.len);
   2657   while (file.len < x.indirect_off) mbuf_u8(&file, 0);
   2658   mbuf_append(&file, x.indirect.data, x.indirect.len);
   2659   while (file.len < x.strtab_off) mbuf_u8(&file, 0);
   2660   mbuf_append(&file, x.strtab.data, x.strtab.len);
   2661   while (file.len < x.codesig_off) mbuf_u8(&file, 0);
   2662 
   2663   /* Compute codesig hashes over file bytes [0, codesig_off). */
   2664   /* The codesig blob currently has zero hashes; hash now. */
   2665   compute_codesig(&x, file.data, x.codesig_off, "a.out");
   2666   /* Append codesig. */
   2667   mbuf_append(&file, x.codesig.data, x.codesig.len);
   2668 
   2669   /* Stream out. */
   2670   kit_writer_seek(w, 0);
   2671   kit_writer_write(w, file.data, file.len);
   2672 
   2673   /* Cleanup. */
   2674   fix_fini(&fl);
   2675   mbuf_fini(&lc);
   2676   mbuf_fini(&file);
   2677   mbuf_fini(&x.chained_fixups);
   2678   mbuf_fini(&x.exports_trie);
   2679   mbuf_fini(&x.symtab);
   2680   mbuf_fini(&x.strtab);
   2681   mbuf_fini(&x.indirect);
   2682   mbuf_fini(&x.fn_starts);
   2683   mbuf_fini(&x.data_in_code);
   2684   mbuf_fini(&x.codesig);
   2685   if (x.imports) x.h->free(x.h, x.imports, 0); /* VEC_GROW: cap unknown */
   2686   if (x.dylibs) x.h->free(x.h, x.dylibs, 0);
   2687   if (x.sym_to_imp)
   2688     x.h->free(x.h, x.sym_to_imp, sizeof(u32) * x.sym_to_imp_size);
   2689   if (x.secs) x.h->free(x.h, x.secs, 0);
   2690   if (x.stubs_bytes) x.h->free(x.h, x.stubs_bytes, x.stubs_size);
   2691   if (x.got_bytes) x.h->free(x.h, x.got_bytes, x.got_size);
   2692   if (x.tlv_ptrs_bytes) x.h->free(x.h, x.tlv_ptrs_bytes, x.tlv_ptrs_size);
   2693   if (x.tlv_slots) x.h->free(x.h, x.tlv_slots, 0);
   2694   if (x.sym_to_tlv)
   2695     x.h->free(x.h, x.sym_to_tlv, sizeof(u32) * x.sym_to_tlv_size);
   2696 }