kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

read.c (40604B)


      1 /* Mach-O MH_OBJECT reader.  Parses a 64-bit little-endian relocatable
      2  * object back into a fresh ObjBuilder.  The post-finalize ObjBuilder
      3  * shape is the canonical superset of the writer's input:
      4  * read_macho of an emit_macho output produces an ObjBuilder
      5  * shape-equivalent to the writer's input, modulo the synthesized
      6  * "__SEG,__sect"-form section names.
      7  *
      8  * Scope: AArch64 little-endian.  MH_OBJECT parses to the section/symbol/
      9  * reloc view; MH_EXECUTE / MH_DYLIB additionally get the linked-image view
     10  * (read_macho_image: segments, dylibs, entry, dynamic symbols + relocs).
     11  * read_macho_dso remains the linker's DSO-only input path.  Other archs /
     12  * endianness produce a compiler_panic with a diagnostic. */
     13 
     14 #include <stdlib.h>
     15 #include <string.h>
     16 
     17 #include "core/arena.h"
     18 #include "core/bytes.h"
     19 #include "core/heap.h"
     20 #include "core/pool.h"
     21 #include "core/slice.h"
     22 #include "core/util.h"
     23 #include "obj/format.h"
     24 #include "obj/macho/macho.h"
     25 
     26 /* ---- mach-section scratch struct ---- */
     27 
     28 typedef struct MSecRec {
     29   char segname[16];
     30   char sectname[16];
     31   u32 seg_len;
     32   u32 sect_len;
     33   u64 addr;
     34   u64 size;
     35   u32 fileoff;
     36   u32 align_log2;
     37   u32 reloff;
     38   u32 nreloc;
     39   u32 flags;
     40   u32 reserved2;
     41   ObjSecId obj_sec; /* assigned in pass 1 */
     42 } MSecRec;
     43 
     44 typedef struct MAtomCand {
     45   ObjSecId sec;
     46   ObjSymId sym;
     47   u32 offset;
     48   u32 flags;
     49 } MAtomCand;
     50 
     51 static int matom_cand_cmp(const void* av, const void* bv) {
     52   const MAtomCand* a = (const MAtomCand*)av;
     53   const MAtomCand* b = (const MAtomCand*)bv;
     54   if (a->sec < b->sec) return -1;
     55   if (a->sec > b->sec) return 1;
     56   if (a->offset < b->offset) return -1;
     57   if (a->offset > b->offset) return 1;
     58   if (a->sym < b->sym) return -1;
     59   if (a->sym > b->sym) return 1;
     60   return 0;
     61 }
     62 
     63 static u32 fixed16_len(const char* s) {
     64   u32 n = 0;
     65   while (n < 16 && s[n] != 0) ++n;
     66   return n;
     67 }
     68 
     69 static u16 sec_kind_from_seg_sect(const char* segname, u32 seg_len,
     70                                   const char* sectname, u32 sect_len,
     71                                   u32 flags) {
     72   u32 stype = flags & SECTION_TYPE;
     73   if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL) return SEC_BSS;
     74   if (flags & S_ATTR_PURE_INSTRUCTIONS) return SEC_TEXT;
     75 
     76   if (seg_len == 7 && memcmp(segname, "__DWARF", 7) == 0) return SEC_DEBUG;
     77   if (seg_len == 6 && memcmp(segname, "__TEXT", 6) == 0) {
     78     if (sect_len == 6 && memcmp(sectname, "__text", 6) == 0) return SEC_TEXT;
     79     return SEC_RODATA; /* __const, __cstring, ... */
     80   }
     81   if (seg_len == 6 && memcmp(segname, "__DATA", 6) == 0) {
     82     if (sect_len == 5 && memcmp(sectname, "__bss", 5) == 0) return SEC_BSS;
     83     return SEC_DATA;
     84   }
     85   return SEC_OTHER;
     86 }
     87 
     88 static u16 sec_flags_from(u32 mflags, u16 sec_kind) {
     89   u16 f = 0;
     90   if (sec_kind == SEC_TEXT || (mflags & S_ATTR_PURE_INSTRUCTIONS)) {
     91     f |= SF_ALLOC | SF_EXEC;
     92   } else if (sec_kind == SEC_RODATA) {
     93     f |= SF_ALLOC;
     94   } else if (sec_kind == SEC_DATA || sec_kind == SEC_BSS) {
     95     f |= SF_ALLOC | SF_WRITE;
     96   }
     97   u32 stype = mflags & SECTION_TYPE;
     98   if (stype == S_THREAD_LOCAL_REGULAR || stype == S_THREAD_LOCAL_ZEROFILL ||
     99       stype == S_THREAD_LOCAL_VARIABLES) {
    100     f |= SF_TLS;
    101   }
    102   if (stype == S_CSTRING_LITERALS) {
    103     f |= SF_MERGE | SF_STRINGS;
    104   }
    105   return f;
    106 }
    107 
    108 static u16 sec_sem_from(u32 mflags, u16 sec_kind) {
    109   u32 stype = mflags & SECTION_TYPE;
    110   if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL ||
    111       sec_kind == SEC_BSS) {
    112     return SSEM_NOBITS;
    113   }
    114   if (stype == S_MOD_INIT_FUNC_POINTERS) return SSEM_INIT_ARRAY;
    115   if (stype == S_MOD_TERM_FUNC_POINTERS) return SSEM_FINI_ARRAY;
    116   return SSEM_PROGBITS;
    117 }
    118 
    119 /* Intern a Mach-O lc_str (NUL-terminated string embedded inside a load
    120  * command at `cmd_pos + str_off`, bounded by the command's cmdsize).
    121  * Returns 0 if the offset/string is malformed. */
    122 static Sym macho_lc_str(Compiler* c, const u8* data, u64 cmd_pos, u32 cmdsize,
    123                         u32 str_off) {
    124   if (str_off < 8 || str_off >= cmdsize) return 0;
    125   const char* p = (const char*)(data + cmd_pos + str_off);
    126   u32 maxlen = cmdsize - str_off;
    127   u32 nlen = 0;
    128   while (nlen < maxlen && p[nlen]) ++nlen;
    129   if (!nlen) return 0;
    130   return pool_intern_slice(c->global, (Slice){.s = p, .len = nlen});
    131 }
    132 
    133 /* ---- read_macho_image ----
    134  *
    135  * Linked-image (MH_EXECUTE / MH_DYLIB) view, the Mach-O peer of
    136  * read_elf_image. Walks the load commands a second time to populate the
    137  * ObjImage: LC_SEGMENT_64 -> segments (+ __TEXT base), LC_LOAD_DYLINKER ->
    138  * interp, LC_ID_DYLIB -> soname, LC_LOAD_DYLIB/WEAK/REEXPORT -> deps,
    139  * LC_RPATH -> rpaths, LC_MAIN/LC_UNIXTHREAD -> entry, the LC_SYMTAB external
    140  * nlist entries -> dynamic symbols, and LC_DYLD_CHAINED_FIXUPS binds/rebases
    141  * -> dynamic relocations. The section / symbol / reloc views are parsed by
    142  * read_macho's normal passes; this adds the orthogonal image dimension.
    143  * Lenient: a malformed sub-table is skipped rather than panicked, so a
    144  * partially-damaged image still yields a useful inspection.
    145  *
    146  * `msecs`/`nmsecs` carry the section table read in read_macho's pass 1 so a
    147  * defined dynamic symbol's n_sect maps back to its ObjSecId. */
    148 static void read_macho_image(Compiler* c, ObjBuilder* ob, const u8* data,
    149                              size_t len, u32 filetype, u32 cputype,
    150                              const MSecRec* msecs, u32 nmsecs) {
    151   ObjImage* im =
    152       obj_image_ensure(ob, filetype == MH_DYLIB ? OBJ_KIND_DYN : OBJ_KIND_EXEC);
    153   if (!im) compiler_panic(c, SRCLOC_NONE, "read_macho: obj_image_ensure failed");
    154 
    155   u32 ncmds = rd_u32_le(data + 16);
    156   u32 sizeofcmds = rd_u32_le(data + 20);
    157 
    158   /* Per-segment (vmaddr, file_off) recorded for chained-fixup vaddr
    159    * resolution below; sized to ncmds (segments are a subset of commands). */
    160   u64* seg_vaddr = arena_array(c->scratch, u64, ncmds ? ncmds : 1);
    161   u64* seg_fileoff = arena_array(c->scratch, u64, ncmds ? ncmds : 1);
    162   u32 nseg = 0;
    163 
    164   int have_text = 0;
    165   u64 text_vmaddr = 0;
    166   int have_main = 0;
    167   u64 main_entryoff = 0;
    168   u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0;
    169   u32 cf_off = 0, cf_size = 0;
    170 
    171   u64 pos = MACHO_HDR64_SIZE;
    172   u64 end = pos + sizeofcmds;
    173   for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) {
    174     u32 cmd = rd_u32_le(data + pos);
    175     u32 cmdsize = rd_u32_le(data + pos + 4);
    176     if (cmdsize < 8 || pos + cmdsize > end) break;
    177 
    178     /* Raw load-command view (escape hatch): one entry per LC_* command,
    179      * carrying its file offset and on-disk size. */
    180     {
    181       ObjImageRaw r;
    182       r.tag = cmd;
    183       r.value = pos;
    184       r.extra = cmdsize;
    185       obj_image_add_raw(im, &r);
    186     }
    187 
    188     if (cmd == LC_SEGMENT_64 && cmdsize >= MACHO_SEGCMD64_SIZE) {
    189       const char* segname = (const char*)(data + pos + 8);
    190       u32 seg_len = fixed16_len(segname);
    191       u64 vmaddr = rd_u64_le(data + pos + 24);
    192       u64 vmsize = rd_u64_le(data + pos + 32);
    193       u64 fileoff = rd_u64_le(data + pos + 40);
    194       u64 filesize = rd_u64_le(data + pos + 48);
    195       u32 initprot = rd_u32_le(data + pos + 60);
    196       ObjSegment seg;
    197       seg.name = seg_len ? pool_intern_slice(
    198                                c->global, (Slice){.s = segname, .len = seg_len})
    199                          : 0;
    200       seg.vaddr = vmaddr;
    201       seg.vsize = vmsize;
    202       seg.file_off = fileoff;
    203       seg.file_size = filesize;
    204       /* VM_PROT_* bits differ from OBJ_SEG_* — remap explicitly. */
    205       seg.perms = ((initprot & VM_PROT_READ) ? OBJ_SEG_R : 0) |
    206                   ((initprot & VM_PROT_WRITE) ? OBJ_SEG_W : 0) |
    207                   ((initprot & VM_PROT_EXECUTE) ? OBJ_SEG_X : 0);
    208       seg.align = 1; /* Mach-O segments don't carry an explicit p_align */
    209       obj_image_add_segment(im, &seg);
    210 
    211       seg_vaddr[nseg] = vmaddr;
    212       seg_fileoff[nseg] = fileoff;
    213       ++nseg;
    214       if (!have_text && seg_len == 6 && memcmp(segname, "__TEXT", 6) == 0) {
    215         have_text = 1;
    216         text_vmaddr = vmaddr;
    217       }
    218     } else if (cmd == LC_LOAD_DYLINKER) {
    219       Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8));
    220       if (s) obj_image_set_interp(im, s);
    221     } else if (cmd == LC_ID_DYLIB) {
    222       Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8));
    223       if (s) obj_image_set_soname(im, s);
    224     } else if (cmd == LC_LOAD_DYLIB || cmd == LC_LOAD_WEAK_DYLIB ||
    225                cmd == LC_REEXPORT_DYLIB) {
    226       Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8));
    227       if (s) {
    228         ObjImageDep d;
    229         d.name = s;
    230         d.imports = NULL;
    231         d.nimports = 0;
    232         obj_image_add_dep(im, &d);
    233       }
    234     } else if (cmd == LC_RPATH) {
    235       Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8));
    236       if (s) obj_image_add_rpath(im, s);
    237     } else if (cmd == LC_MAIN && cmdsize >= 16) {
    238       have_main = 1;
    239       main_entryoff = rd_u64_le(data + pos + 8);
    240     } else if (cmd == LC_UNIXTHREAD && cmdsize >= 16 && !have_main) {
    241       /* thread_command: flavor (u32) + count (u32) + register state. Pull
    242        * the program counter out of the arch's state. */
    243       u32 flavor = rd_u32_le(data + pos + 8);
    244       u64 pc_off = 0;
    245       int have_pc = 0;
    246       if (cputype == CPU_TYPE_ARM64 && flavor == 6 /* ARM_THREAD_STATE64 */) {
    247         pc_off = pos + 16 + 32u * 8u; /* x0..x28,fp,lr,sp,pc */
    248         have_pc = 1;
    249       } else if (cputype == CPU_TYPE_X86_64 &&
    250                  flavor == 4 /* x86_THREAD_STATE64 */) {
    251         pc_off = pos + 16 + 16u * 8u; /* rax..r15, then rip */
    252         have_pc = 1;
    253       }
    254       if (have_pc && pc_off + 8 <= pos + cmdsize)
    255         obj_image_set_entry(im, rd_u64_le(data + pc_off));
    256     } else if (cmd == LC_SYMTAB && cmdsize >= MACHO_SYMTAB_CMD_SIZE) {
    257       symoff = rd_u32_le(data + pos + 8);
    258       nsyms = rd_u32_le(data + pos + 12);
    259       stroff = rd_u32_le(data + pos + 16);
    260       strsize = rd_u32_le(data + pos + 20);
    261     } else if (cmd == LC_DYLD_CHAINED_FIXUPS && cmdsize >= 16) {
    262       cf_off = rd_u32_le(data + pos + 8);
    263       cf_size = rd_u32_le(data + pos + 12);
    264     }
    265     pos += cmdsize;
    266   }
    267 
    268   if (have_text) obj_image_set_base(im, text_vmaddr);
    269   /* LC_MAIN entryoff is a file offset within __TEXT (which maps file 0 to
    270    * its vmaddr); the entry vaddr is __TEXT base + entryoff. */
    271   if (have_main && have_text)
    272     obj_image_set_entry(im, text_vmaddr + main_entryoff);
    273 
    274   /* LC_SYMTAB external nlist entries -> dynamic symbols (Mach-O's analog of
    275    * .dynsym: the dynamically-visible exports and undefined imports). */
    276   if (nsyms && stroff + (u64)strsize <= len &&
    277       symoff + (u64)nsyms * MACHO_NLIST64_SIZE <= len) {
    278     const u8* strtab = data + stroff;
    279     const u8* sbase = data + symoff;
    280     for (u32 i = 0; i < nsyms; ++i) {
    281       const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE;
    282       u32 strx = rd_u32_le(p + 0);
    283       u8 n_type = p[4];
    284       u8 n_sect = p[5];
    285       u16 n_desc = rd_u16_le(p + 6);
    286       u64 n_value = rd_u64_le(p + 8);
    287       if (n_type & N_STAB) continue;   /* debug stab, not dynamic */
    288       if (!(n_type & N_EXT)) continue; /* locals aren't dynamic */
    289       if (strx >= strsize) continue;
    290       const char* nm = (const char*)(strtab + strx);
    291       u32 nlen = 0;
    292       while (strx + nlen < strsize && nm[nlen]) ++nlen;
    293       if (!nlen) continue;
    294 
    295       u8 type_field = (u8)(n_type & N_TYPE);
    296       ObjImageSym ds;
    297       ds.version = 0; /* Mach-O has no ELF-style symbol versioning */
    298       ds.name = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
    299       ds.bind = (n_desc & (N_WEAK_DEF | N_WEAK_REF)) ? SB_WEAK : SB_GLOBAL;
    300       ds.value = (type_field == N_SECT || type_field == N_ABS) ? n_value : 0;
    301       ds.size = 0;
    302       if (type_field == N_SECT && n_sect >= 1 && n_sect <= nmsecs) {
    303         ds.section = msecs[n_sect - 1].obj_sec;
    304         ds.kind = (msecs[n_sect - 1].flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC
    305                                                                        : SK_OBJ;
    306       } else {
    307         ds.section = OBJ_SEC_NONE; /* undefined import / absolute */
    308         ds.kind = SK_NOTYPE;
    309       }
    310       obj_image_add_dynsym(im, &ds);
    311     }
    312   }
    313 
    314   /* LC_DYLD_CHAINED_FIXUPS binds/rebases -> dynamic relocations. */
    315   if (cf_size >= 28 && (u64)cf_off + cf_size <= len) {
    316     const u8* cf = data + cf_off;
    317     u32 starts_offset = rd_u32_le(cf + 4);
    318     u32 imports_offset = rd_u32_le(cf + 8);
    319     u32 symbols_offset = rd_u32_le(cf + 12);
    320     u32 imports_count = rd_u32_le(cf + 16);
    321     u32 imports_format = rd_u32_le(cf + 20);
    322     u32 relative_kind =
    323         (cputype == CPU_TYPE_X86_64) ? R_X64_RELATIVE : R_AARCH64_RELATIVE;
    324 
    325     /* Import symbol names, indexed by 0-based bind ordinal. */
    326     Sym* imp_names =
    327         arena_zarray(c->scratch, Sym, imports_count ? imports_count : 1);
    328     if (imports_format == DYLD_CHAINED_IMPORT &&
    329         (u64)imports_offset + (u64)imports_count * 4u <= cf_size) {
    330       for (u32 i = 0; i < imports_count; ++i) {
    331         u32 packed = rd_u32_le(cf + imports_offset + i * 4u);
    332         u32 name_off = (packed >> 9) & 0x7fffffu;
    333         u64 so = (u64)symbols_offset + name_off;
    334         if (so >= cf_size) continue;
    335         const char* nm = (const char*)(cf + so);
    336         u32 maxn = (u32)(cf_size - so);
    337         u32 nlen = 0;
    338         while (nlen < maxn && nm[nlen]) ++nlen;
    339         if (nlen)
    340           imp_names[i] =
    341               pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
    342       }
    343     }
    344 
    345     if ((u64)starts_offset + 4u <= cf_size) {
    346       const u8* sib = cf + starts_offset;
    347       u32 seg_count = rd_u32_le(sib + 0);
    348       for (u32 si = 0; si < seg_count; ++si) {
    349         if ((u64)starts_offset + 4u + (u64)si * 4u + 4u > cf_size) break;
    350         u32 seg_info_offset = rd_u32_le(sib + 4 + si * 4u);
    351         if (!seg_info_offset) continue;
    352         if ((u64)starts_offset + seg_info_offset + 22u > cf_size) continue;
    353         const u8* sis = cf + starts_offset + seg_info_offset;
    354         u16 pointer_format = rd_u16_le(sis + 6);
    355         u64 segment_offset = rd_u64_le(sis + 8); /* file offset of segment */
    356         u16 page_count = rd_u16_le(sis + 20);
    357         /* Only the DYLD_CHAINED_PTR_64 family shares the bit layout below. */
    358         if (pointer_format != DYLD_CHAINED_PTR_64 && pointer_format != 6u)
    359           continue;
    360         u16 page_size = rd_u16_le(sis + 4);
    361         if (!page_size) continue;
    362         /* Resolve this segment's vmaddr from its file offset. */
    363         u64 seg_va = 0;
    364         int found_seg = 0;
    365         for (u32 k = 0; k < nseg; ++k) {
    366           if (seg_fileoff[k] == segment_offset) {
    367             seg_va = seg_vaddr[k];
    368             found_seg = 1;
    369             break;
    370           }
    371         }
    372         if (!found_seg) continue;
    373         for (u32 pg = 0; pg < page_count; ++pg) {
    374           u64 ps_pos = (u64)starts_offset + seg_info_offset + 22u + pg * 2u;
    375           if (ps_pos + 2u > cf_size) break;
    376           u16 ps = rd_u16_le(cf + ps_pos);
    377           if (ps == 0xFFFFu) continue;
    378           u32 cur = ps;
    379           for (;;) {
    380             u64 file_loc = segment_offset + (u64)pg * page_size + cur;
    381             if (file_loc + 8u > len) break;
    382             u64 v = rd_u64_le(data + file_loc);
    383             u64 vaddr = seg_va + (u64)pg * page_size + cur;
    384             int is_bind = (int)((v >> 63) & 1u);
    385             ObjImageReloc dr;
    386             dr.section = OBJ_SEC_NONE;
    387             dr.offset = vaddr;
    388             if (is_bind) {
    389               u32 ordinal = (u32)(v & 0xffffffu);
    390               dr.sym_name = (ordinal < imports_count) ? imp_names[ordinal] : 0;
    391               dr.addend = (i64)((v >> 24) & 0xffu);
    392               dr.kind = R_ABS64;
    393             } else {
    394               dr.sym_name = 0;
    395               dr.addend = (i64)(v & (((u64)1 << 36) - 1u));
    396               dr.kind = (RelocKind)relative_kind;
    397             }
    398             obj_image_add_dynreloc(im, &dr);
    399             u32 next = (u32)((v >> 51) & 0xfffu);
    400             if (!next) break;
    401             cur += next * 4u;
    402             if (cur >= page_size) break;
    403           }
    404         }
    405       }
    406     }
    407   }
    408 }
    409 
    410 ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data,
    411                        size_t len) {
    412   (void)name;
    413   if (len < MACHO_HDR64_SIZE)
    414     compiler_panic(c, SRCLOC_NONE, "read_macho: input shorter than header");
    415 
    416   u32 magic = rd_u32_le(data + 0);
    417   if (magic != MH_MAGIC_64)
    418     compiler_panic(c, SRCLOC_NONE, "read_macho: bad magic 0x%x", magic);
    419 
    420   u32 cputype = rd_u32_le(data + 4);
    421   const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_MACHO);
    422   const ObjMachoArchOps* macho =
    423       fmt && fmt->macho_cputype ? fmt->macho_cputype(cputype) : NULL;
    424   u32 filetype = rd_u32_le(data + 12);
    425   u32 ncmds = rd_u32_le(data + 16);
    426   u32 sizeofcmds = rd_u32_le(data + 20);
    427   u32 mh_flags = rd_u32_le(data + 24);
    428 
    429   if (!macho || !macho->reloc_from)
    430     compiler_panic(c, SRCLOC_NONE, "read_macho: unsupported cputype 0x%x",
    431                    cputype);
    432   /* MH_OBJECT parses to the section/symbol/reloc view only. MH_EXECUTE /
    433    * MH_DYLIB additionally get the linked-image view (read_macho_image, at
    434    * the end); their sections still parse through the same passes. */
    435   if (filetype != MH_OBJECT && filetype != MH_EXECUTE && filetype != MH_DYLIB)
    436     compiler_panic(c, SRCLOC_NONE,
    437                    "read_macho: unsupported filetype %u (expected MH_OBJECT, "
    438                    "MH_EXECUTE, or MH_DYLIB)",
    439                    filetype);
    440 
    441   if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len)
    442     compiler_panic(c, SRCLOC_NONE, "read_macho: load commands exceed file");
    443 
    444   /* ---- pass 1: walk load commands, collect sections, symtab cmd. */
    445   MSecRec* msecs = NULL;
    446   u32 nmsecs = 0;
    447   u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0;
    448 
    449   u64 pos = MACHO_HDR64_SIZE;
    450   u64 end = pos + sizeofcmds;
    451   for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) {
    452     u32 cmd = rd_u32_le(data + pos);
    453     u32 cmdsize = rd_u32_le(data + pos + 4);
    454     if (cmdsize < 8 || pos + cmdsize > end)
    455       compiler_panic(c, SRCLOC_NONE, "read_macho: malformed load command");
    456 
    457     if (cmd == LC_SEGMENT_64) {
    458       u32 nsects = rd_u32_le(data + pos + 64);
    459       if (MACHO_SEGCMD64_SIZE + (u64)nsects * MACHO_SECT64_SIZE > cmdsize)
    460         compiler_panic(c, SRCLOC_NONE, "read_macho: segment cmd truncated");
    461       MSecRec* extra = arena_array(c->scratch, MSecRec, nmsecs + nsects);
    462       if (msecs && nmsecs) memcpy(extra, msecs, sizeof(MSecRec) * nmsecs);
    463       msecs = extra;
    464       const u8* sp = data + pos + MACHO_SEGCMD64_SIZE;
    465       for (u32 si = 0; si < nsects; ++si, sp += MACHO_SECT64_SIZE) {
    466         MSecRec* m = &msecs[nmsecs++];
    467         memset(m, 0, sizeof *m);
    468         memcpy(m->sectname, sp + 0, 16);
    469         memcpy(m->segname, sp + 16, 16);
    470         m->seg_len = fixed16_len(m->segname);
    471         m->sect_len = fixed16_len(m->sectname);
    472         m->addr = rd_u64_le(sp + 32);
    473         m->size = rd_u64_le(sp + 40);
    474         m->fileoff = rd_u32_le(sp + 48);
    475         m->align_log2 = rd_u32_le(sp + 52);
    476         m->reloff = rd_u32_le(sp + 56);
    477         m->nreloc = rd_u32_le(sp + 60);
    478         m->flags = rd_u32_le(sp + 64);
    479         m->reserved2 = rd_u32_le(sp + 72);
    480       }
    481     } else if (cmd == LC_SYMTAB) {
    482       symoff = rd_u32_le(data + pos + 8);
    483       nsyms = rd_u32_le(data + pos + 12);
    484       stroff = rd_u32_le(data + pos + 16);
    485       strsize = rd_u32_le(data + pos + 20);
    486     }
    487     pos += cmdsize;
    488   }
    489 
    490   if (stroff + (u64)strsize > len)
    491     compiler_panic(c, SRCLOC_NONE, "read_macho: string table out of range");
    492   if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len)
    493     compiler_panic(c, SRCLOC_NONE, "read_macho: symbol table out of range");
    494   const u8* strtab = data + stroff;
    495 
    496   ObjBuilder* ob = obj_new(c);
    497   if (!ob) compiler_panic(c, SRCLOC_NONE, "read_macho: obj_new failed");
    498 
    499   /* ---- pass 2: create ObjSecs and copy bytes. */
    500   for (u32 i = 0; i < nmsecs; ++i) {
    501     MSecRec* m = &msecs[i];
    502     /* Build "__SEG,__sect"-form name; matches what emit_macho would
    503      * round-trip back out. */
    504     char nmbuf[34];
    505     u32 nlen = 0;
    506     memcpy(nmbuf + nlen, m->segname, m->seg_len);
    507     nlen += m->seg_len;
    508     nmbuf[nlen++] = ',';
    509     memcpy(nmbuf + nlen, m->sectname, m->sect_len);
    510     nlen += m->sect_len;
    511     Sym sn = pool_intern_slice(c->global, (Slice){.s = nmbuf, .len = nlen});
    512 
    513     u16 kind = sec_kind_from_seg_sect(m->segname, m->seg_len, m->sectname,
    514                                       m->sect_len, m->flags);
    515     u16 flags = sec_flags_from(m->flags, kind);
    516     u16 sem = sec_sem_from(m->flags, kind);
    517     u32 align = 1u << (m->align_log2 & 31);
    518 
    519     ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags,
    520                                  align, m->reserved2, 0, 0);
    521     if (id == OBJ_SEC_NONE)
    522       compiler_panic(c, SRCLOC_NONE, "read_macho: obj_section_ex failed");
    523 
    524     /* Preserve the raw mach section.flags so emit_macho can write back
    525      * the same S_TYPE / S_ATTR_* bits. */
    526     obj_section_set_ext(ob, id, OBJ_EXT_MACHO, m->flags, 0);
    527 
    528     if (sem == SSEM_NOBITS) {
    529       obj_reserve_bss(ob, id, (u32)m->size, align);
    530     } else if (m->size) {
    531       if (m->fileoff + m->size > len)
    532         compiler_panic(c, SRCLOC_NONE, "read_macho: section bytes out of range");
    533       obj_write(ob, id, data + m->fileoff, (size_t)m->size);
    534     }
    535     m->obj_sec = id;
    536   }
    537 
    538   /* ---- pass 3: parse symbol table.  Two-pass strategy: first pass
    539    *              creates undefs (so relocations can refer to them), second
    540    *              pass creates defined locals/extdefs.  Both write into
    541    *              mach_idx -> ObjSymId so reloc resolution works. */
    542   ObjSymId* sym_macho_to_obj =
    543       arena_zarray(c->scratch, ObjSymId, nsyms ? nsyms : 1);
    544   MAtomCand* atom_cands =
    545       arena_zarray(c->scratch, MAtomCand, nsyms ? nsyms : 1);
    546   u32 natom_cands = 0;
    547 
    548   const u8* sbase = data + symoff;
    549   for (u32 i = 0; i < nsyms; ++i) {
    550     const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE;
    551     u32 strx = rd_u32_le(p + 0);
    552     u8 n_type = p[4];
    553     u8 n_sect = p[5];
    554     u16 n_desc = rd_u16_le(p + 6);
    555     u64 n_value = rd_u64_le(p + 8);
    556 
    557     const char* nm = "";
    558     u32 nlen = 0;
    559     if (strx < strsize) {
    560       nm = (const char*)(strtab + strx);
    561       while (strx + nlen < strsize && nm[nlen]) ++nlen;
    562     }
    563     /* Mach-O names round-trip verbatim — the leading `_` Apple
    564      * toolchains apply to C symbols is part of the on-disk name as
    565      * far as ObjBuilder is concerned.  Name-canonicalization (the
    566      * `test_main` ↔ `_test_main` mapping for API callers) happens
    567      * one layer up at the linker API boundary (link_c_name_intern
    568      * in link.c); the on-disk shape stays byte-for-byte stable. */
    569     Sym sn =
    570         nlen ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}) : 0;
    571 
    572     u8 type_field = (u8)(n_type & N_TYPE);
    573     u8 ext = (u8)(n_type & N_EXT);
    574     u8 pext = (u8)(n_type & N_PEXT);
    575 
    576     u16 bind = ext ? SB_GLOBAL : SB_LOCAL;
    577     /* Weak DEFs (defined symbols) carry N_WEAK_DEF; weak REFs (undef
    578      * `__attribute__((weak))` references) carry N_WEAK_REF. Either
    579      * one collapses to SB_WEAK in the kit model. */
    580     if (ext && (n_desc & (N_WEAK_DEF | N_WEAK_REF))) bind = SB_WEAK;
    581     u8 vis = pext ? SV_HIDDEN : SV_DEFAULT;
    582 
    583     u16 kind;
    584     ObjSecId sec_id = OBJ_SEC_NONE;
    585     u64 value = 0;
    586     u64 size = 0;
    587     u64 cmnalign = 0;
    588 
    589     if (type_field == N_UNDF) {
    590       if (ext && n_value != 0) {
    591         /* Common: n_value is size, n_desc encodes log2(align) in
    592          * GET_COMM_ALIGN bits. */
    593         kind = SK_COMMON;
    594         value = 0;
    595         size = n_value;
    596         u32 la = (u32)((n_desc >> 8) & 0xf);
    597         cmnalign = 1u << la;
    598       } else {
    599         kind = SK_UNDEF;
    600       }
    601     } else if (type_field == N_ABS) {
    602       kind = SK_ABS;
    603       value = n_value;
    604     } else if (type_field == N_SECT) {
    605       if (n_sect == 0 || n_sect > nmsecs) {
    606         kind = SK_NOTYPE;
    607       } else {
    608         sec_id = msecs[n_sect - 1].obj_sec;
    609         /* MH_OBJECT: the obj model and the linker treat an input
    610          * symbol's value as a section-local offset, and a relocatable
    611          * .o's sections carry non-zero layout addrs, so subtract the
    612          * section base. Linked images (MH_EXECUTE/MH_DYLIB) keep the
    613          * absolute n_value so nm / objdump -t / size / addr2line report
    614          * real vaddrs — matching the ELF reader, whose st_value is
    615          * already absolute for images. */
    616         if (filetype == MH_OBJECT) {
    617           u64 base = msecs[n_sect - 1].addr;
    618           value = (n_value >= base) ? (n_value - base) : 0;
    619         } else {
    620           value = n_value;
    621         }
    622         kind = (msecs[n_sect - 1].flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC
    623                                                                     : SK_OBJ;
    624       }
    625     } else {
    626       kind = SK_NOTYPE;
    627     }
    628 
    629     ObjSymId id = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis,
    630                                 (SymKind)kind, sec_id, value, size, cmnalign);
    631     obj_sym_mark_referenced(ob, id);
    632     if ((mh_flags & MH_SUBSECTIONS_VIA_SYMBOLS) && type_field == N_SECT &&
    633         sec_id != OBJ_SEC_NONE) {
    634       MAtomCand* ac = &atom_cands[natom_cands++];
    635       ac->sec = sec_id;
    636       ac->sym = id;
    637       ac->offset = (u32)value;
    638       if ((n_desc & N_NO_DEAD_STRIP) ||
    639           (n_sect != 0 && n_sect <= nmsecs &&
    640            (msecs[n_sect - 1].flags & S_ATTR_NO_DEAD_STRIP))) {
    641         ac->flags |= OBJ_ATOM_RETAIN;
    642       }
    643     }
    644     /* n_desc carries Mach-O attribute bits beyond what bind/vis/kind
    645      * model — N_NO_DEAD_STRIP, N_REF_TO_WEAK, N_ARM_THUMB_DEF, etc.
    646      * Mask off the bits we already round-trip via bind (N_WEAK_DEF /
    647      * N_WEAK_REF) and the alignment field for commons (which lives
    648      * in cmnalign), then stash the remainder so emit_macho can OR it
    649      * back in. */
    650     u16 desc_pass = n_desc;
    651     desc_pass &= (u16) ~(N_WEAK_DEF | N_WEAK_REF);
    652     if (kind == SK_COMMON) desc_pass &= 0x00ff; /* drop align field */
    653     if (desc_pass) obj_symbol_set_flags(ob, id, desc_pass);
    654     sym_macho_to_obj[i] = id;
    655   }
    656 
    657   if (mh_flags & MH_SUBSECTIONS_VIA_SYMBOLS) {
    658     if (natom_cands > 1u)
    659       qsort(atom_cands, natom_cands, sizeof(*atom_cands), matom_cand_cmp);
    660     for (u32 i = 0; i < natom_cands; ++i) {
    661       MAtomCand* ac = &atom_cands[i];
    662       const Section* sec = obj_section_get(ob, ac->sec);
    663       u32 end = sec ? ((sec->sem == SSEM_NOBITS || sec->kind == SEC_BSS)
    664                            ? sec->bss_size
    665                            : sec->bytes.total)
    666                     : ac->offset;
    667       if (i + 1u < natom_cands && atom_cands[i + 1u].sec == ac->sec)
    668         end = atom_cands[i + 1u].offset;
    669       if (end >= ac->offset)
    670         obj_atom_define(ob, ac->sec, ac->offset, end - ac->offset, ac->sym,
    671                         ac->flags);
    672     }
    673   }
    674 
    675   /* ---- pass 4: parse per-section relocations into ObjBuilder relocs.
    676    *              Mach-O encodes addends out-of-band as a leading
    677    *              ARM64_RELOC_ADDEND followed by the real reloc; the
    678    *              reader collapses the pair on the way in. */
    679   /* Lazily-populated section-start local symbols, for clang-emitted
    680    * non-extern (section-relative) relocations.  See the r_extern==0
    681    * branch below for the encoding. */
    682   ObjSymId* sec_start_sym =
    683       arena_zarray(c->scratch, ObjSymId, nmsecs ? nmsecs : 1);
    684   for (u32 i = 0; i < nmsecs; ++i) sec_start_sym[i] = OBJ_SYM_NONE;
    685   for (u32 i = 0; i < nmsecs; ++i) {
    686     MSecRec* m = &msecs[i];
    687     if (!m->nreloc) continue;
    688     if (m->reloff + (u64)m->nreloc * MACHO_RELOC_SIZE > len)
    689       compiler_panic(c, SRCLOC_NONE, "read_macho: relocation table out of range");
    690     const u8* rp = data + m->reloff;
    691     i64 pending_addend = 0;
    692     int have_pending = 0;
    693     int pending_subtractor = 0;
    694     u32 pending_subtractor_offset = 0;
    695     u32 pending_subtractor_length = 0;
    696     for (u32 j = 0; j < m->nreloc; ++j) {
    697       u32 r_address = rd_u32_le(rp + j * MACHO_RELOC_SIZE);
    698       u32 packed = rd_u32_le(rp + j * MACHO_RELOC_SIZE + 4);
    699       u32 r_symbolnum = packed & 0x00ffffffu;
    700       u32 r_pcrel = (packed >> 24) & 1u;
    701       u32 r_length = (packed >> 25) & 3u;
    702       u32 r_extern = (packed >> 27) & 1u;
    703       u32 r_type = (packed >> 28) & 0xfu;
    704 
    705       if (r_type == ARM64_RELOC_ADDEND) {
    706         /* Sign-extend 24-bit addend. */
    707         i32 ad = (i32)(r_symbolnum & 0x00ffffffu);
    708         if (ad & 0x00800000) ad |= ~0x00ffffff;
    709         pending_addend = (i64)ad;
    710         have_pending = 1;
    711         continue;
    712       }
    713 
    714       u32 kind;
    715       if (r_type == ARM64_RELOC_SUBTRACTOR) {
    716         kind = (r_length == 3)   ? R_SUB64
    717                : (r_length == 2) ? R_SUB32
    718                : (r_length == 1) ? R_SUB16
    719                                  : R_SUB8;
    720       } else {
    721         kind = macho->reloc_from(r_type);
    722       }
    723       if (kind == (u32)-1)
    724         compiler_panic(c, SRCLOC_NONE, "read_macho: unsupported reloc type %u",
    725                        r_type);
    726 
    727       /* Refine kind by (r_pcrel, r_length) when the type field alone
    728        * is ambiguous.  ARM64_RELOC_UNSIGNED collapses R_ABS64/R_ABS32
    729        * and PC-relative variants. */
    730       if (r_type == ARM64_RELOC_UNSIGNED) {
    731         if (pending_subtractor && pending_subtractor_offset == r_address &&
    732             pending_subtractor_length == r_length) {
    733           kind = (r_length == 3)   ? R_ADD64
    734                  : (r_length == 2) ? R_ADD32
    735                  : (r_length == 1) ? R_ADD16
    736                                    : R_ADD8;
    737           pending_subtractor = 0;
    738         } else if (r_pcrel) {
    739           kind = (r_length == 3) ? R_PC64 : R_PC32;
    740         } else {
    741           kind = (r_length == 3) ? R_ABS64 : R_ABS32;
    742         }
    743       } else if (r_type == ARM64_RELOC_BRANCH26) {
    744         kind = R_AARCH64_CALL26;
    745       } else if (r_type == ARM64_RELOC_PAGEOFF12) {
    746         /* PAGEOFF12 is access-size-agnostic in Mach-O; the linker
    747          * applier needs to scale the immediate by the load/store size
    748          * (or apply it raw for ADD).  Inspect the patched instruction
    749          * at r_address to pick the right RelocKind so the applier in
    750          * link_reloc.c shifts the lo12 correctly. */
    751         if (m->fileoff + r_address + 4u > len)
    752           compiler_panic(c, SRCLOC_NONE,
    753                          "read_macho: PAGEOFF12 r_address %u out of range",
    754                          r_address);
    755         u32 ins = rd_u32_le(data + m->fileoff + r_address);
    756         /* ADD (immediate): bits 30:24 = 0010001 (W=10001 / X=10010001).
    757          * Mask 0x7f800000 isolates sf=0/1 + the 0010001 pattern; values
    758          * 0x11000000 (32-bit) and 0x91000000 (64-bit) — match the latter
    759          * via the same 0x7f mask leaving bit 31 free. */
    760         if ((ins & 0x7f800000u) == 0x11000000u) {
    761           kind = R_AARCH64_ADD_ABS_LO12_NC;
    762         } else if ((ins & 0x3b000000u) == 0x39000000u) {
    763           /* LDR/STR (immediate unsigned offset).  Bits 29:27=111, bit 26=V
    764            * (0=integer, 1=SIMD/FP), bits 25:24=01.  size in [31:30] plus
    765            * opc bit 23 for the SIMD 128-bit case (size=00, opc=11). */
    766           u32 sz = (ins >> 30) & 3u;
    767           u32 v_bit = (ins >> 26) & 1u;
    768           u32 opc1 = (ins >> 23) & 1u;
    769           if (v_bit && sz == 0 && opc1) {
    770             kind = R_AARCH64_LDST128_ABS_LO12_NC;
    771           } else {
    772             kind = (sz == 0)   ? R_AARCH64_LDST8_ABS_LO12_NC
    773                    : (sz == 1) ? R_AARCH64_LDST16_ABS_LO12_NC
    774                    : (sz == 2) ? R_AARCH64_LDST32_ABS_LO12_NC
    775                                : R_AARCH64_LDST64_ABS_LO12_NC;
    776           }
    777         }
    778         /* else: leave as the default R_AARCH64_ADD_ABS_LO12_NC. */
    779       }
    780 
    781       ObjSymId target = OBJ_SYM_NONE;
    782       i64 inplace_addend_override = 0;
    783       int use_inplace_addend = 0;
    784       if (r_extern) {
    785         if (r_symbolnum < nsyms) target = sym_macho_to_obj[r_symbolnum];
    786         if (!have_pending && r_type == ARM64_RELOC_UNSIGNED) {
    787           u32 rsz = 1u << r_length;
    788           if ((u64)m->fileoff + r_address + rsz > len)
    789             compiler_panic(c, SRCLOC_NONE,
    790                            "read_macho: extern unsigned reloc r_address out "
    791                            "of range");
    792           const u8* pv = data + m->fileoff + r_address;
    793           u64 inplace;
    794           if (r_length == 3)
    795             inplace = rd_u64_le(pv);
    796           else if (r_length == 2)
    797             inplace = (u64)rd_u32_le(pv);
    798           else if (r_length == 1)
    799             inplace = (u64)rd_u16_le(pv);
    800           else
    801             inplace = (u64)pv[0];
    802           inplace_addend_override = (i64)inplace;
    803           use_inplace_addend = 1;
    804         }
    805       } else {
    806         /* Section-relative reloc — clang emits these for compact unwind,
    807          * EH frame, and DWARF debug info.  r_symbolnum is the 1-based
    808          * section index; the in-place value at r_address is the absolute
    809          * .o virtual address of the referent.  Synthesize a local
    810          * symbol pointing to the target section's start (lazily, once
    811          * per section) and re-express the reloc as
    812          *   target = sec_start_sym,  addend = inplace - section.addr. */
    813         if (r_symbolnum == 0 || r_symbolnum > nmsecs)
    814           compiler_panic(c, SRCLOC_NONE,
    815                          "read_macho: section-relative reloc references "
    816                          "invalid section index %u",
    817                          r_symbolnum);
    818         u32 sec_idx = r_symbolnum - 1u;
    819         MSecRec* tm = &msecs[sec_idx];
    820         if (sec_start_sym[sec_idx] == OBJ_SYM_NONE) {
    821           /* Build ".Lkit.macho_secstart.<sec_idx>" without snprintf
    822            * (the freestanding build doesn't pull in stdio). */
    823           static const char prefix[] = ".Lkit.macho_secstart.";
    824           char nmbuf[sizeof(prefix) + 10];
    825           u32 nlen = (u32)(sizeof(prefix) - 1);
    826           memcpy(nmbuf, prefix, nlen);
    827           char dec[10];
    828           u32 dn = 0;
    829           u32 v = sec_idx;
    830           do {
    831             dec[dn++] = (char)('0' + (v % 10u));
    832             v /= 10u;
    833           } while (v);
    834           for (u32 k = 0; k < dn; ++k) nmbuf[nlen + k] = dec[dn - 1 - k];
    835           nlen += dn;
    836           Sym sn =
    837               pool_intern_slice(c->global, (Slice){.s = nmbuf, .len = nlen});
    838           u16 sk = (tm->flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC : SK_OBJ;
    839           sec_start_sym[sec_idx] =
    840               obj_symbol(ob, sn, SB_LOCAL, (SymKind)sk, tm->obj_sec, 0, 0);
    841         }
    842         target = sec_start_sym[sec_idx];
    843         u32 rsz = 1u << r_length;
    844         if ((u64)m->fileoff + r_address + rsz > len)
    845           compiler_panic(c, SRCLOC_NONE,
    846                          "read_macho: non-extern reloc r_address out of range");
    847         u64 inplace;
    848         const u8* pv = data + m->fileoff + r_address;
    849         if (r_length == 3)
    850           inplace = rd_u64_le(pv);
    851         else if (r_length == 2)
    852           inplace = (u64)rd_u32_le(pv);
    853         else if (r_length == 1)
    854           inplace = (u64)rd_u16_le(pv);
    855         else
    856           inplace = (u64)pv[0];
    857         inplace_addend_override = (i64)inplace - (i64)tm->addr;
    858         use_inplace_addend = 1;
    859       }
    860 
    861       i64 addend = have_pending
    862                        ? pending_addend
    863                        : (use_inplace_addend ? inplace_addend_override : 0);
    864       int has_explicit = have_pending || use_inplace_addend || addend != 0;
    865       have_pending = 0;
    866       pending_addend = 0;
    867 
    868       obj_reloc_ex(ob, m->obj_sec, r_address, (RelocKind)kind, target, addend,
    869                    has_explicit, 0);
    870       if (r_type == ARM64_RELOC_SUBTRACTOR) {
    871         pending_subtractor = 1;
    872         pending_subtractor_offset = r_address;
    873         pending_subtractor_length = r_length;
    874       }
    875     }
    876   }
    877 
    878   /* MH_EXECUTE / MH_DYLIB: attach the linked-image view (segments, dylibs,
    879    * entry, dynamic symbols + relocations). */
    880   if (filetype != MH_OBJECT)
    881     read_macho_image(c, ob, data, len, filetype, cputype, msecs, nmsecs);
    882 
    883   obj_finalize(ob);
    884   return ob;
    885 }
    886 
    887 /* ---- read_macho_dso ----
    888  *
    889  * MH_DYLIB reader.  Walks load commands once to find LC_ID_DYLIB
    890  * (install-name) and LC_SYMTAB (symbol table + string table), then
    891  * emits one defined ObjSym per externally-visible nlist entry.
    892  *
    893  * Like read_elf_dso, the produced ObjBuilder carries no sections /
    894  * relocations / groups — only symbol definitions in OBJ_SEC_NONE.  The
    895  * consumer's resolve_undefs sees these as defined globals and marks the
    896  * matching consumer-side undef as `imported`.  The dylib's own undefs
    897  * (its imports of other dylibs) are filtered: they don't satisfy any
    898  * undef in the consumer. */
    899 
    900 ObjBuilder* read_macho_dso(Compiler* c, const char* name, const u8* data,
    901                            size_t len, Sym* install_name_out) {
    902   (void)name;
    903   if (install_name_out) *install_name_out = 0;
    904   if (len < MACHO_HDR64_SIZE)
    905     compiler_panic(c, SRCLOC_NONE, "read_macho_dso: input shorter than header");
    906 
    907   u32 magic = rd_u32_le(data + 0);
    908   if (magic != MH_MAGIC_64)
    909     compiler_panic(c, SRCLOC_NONE, "read_macho_dso: bad magic 0x%x", magic);
    910 
    911   u32 cputype = rd_u32_le(data + 4);
    912   u32 filetype = rd_u32_le(data + 12);
    913   u32 ncmds = rd_u32_le(data + 16);
    914   u32 sizeofcmds = rd_u32_le(data + 20);
    915 
    916   {
    917     const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_MACHO);
    918     const ObjMachoArchOps* macho =
    919         fmt && fmt->macho_cputype ? fmt->macho_cputype(cputype) : NULL;
    920     if (!macho)
    921       compiler_panic(c, SRCLOC_NONE, "read_macho_dso: unsupported cputype 0x%x",
    922                      cputype);
    923   }
    924   if (filetype != MH_DYLIB && filetype != MH_BUNDLE)
    925     compiler_panic(c, SRCLOC_NONE,
    926                    "read_macho_dso: not MH_DYLIB/MH_BUNDLE (filetype=%u)",
    927                    filetype);
    928   if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len)
    929     compiler_panic(c, SRCLOC_NONE, "read_macho_dso: load commands exceed file");
    930 
    931   u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0;
    932   Sym install_name = 0;
    933 
    934   u64 pos = MACHO_HDR64_SIZE;
    935   u64 end = pos + sizeofcmds;
    936   for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) {
    937     u32 cmd = rd_u32_le(data + pos);
    938     u32 cmdsize = rd_u32_le(data + pos + 4);
    939     if (cmdsize < 8 || pos + cmdsize > end)
    940       compiler_panic(c, SRCLOC_NONE, "read_macho_dso: malformed load command");
    941     if (cmd == LC_ID_DYLIB) {
    942       /* dylib_command: cmd, cmdsize, name(lc_str: 4-byte offset within
    943        * the cmd), timestamp, current_version, compat_version. */
    944       if (cmdsize < 24) goto next;
    945       u32 nm_off = rd_u32_le(data + pos + 8);
    946       if (nm_off >= cmdsize) goto next;
    947       const char* p = (const char*)(data + pos + nm_off);
    948       u32 maxlen = cmdsize - nm_off;
    949       u32 nlen = 0;
    950       while (nlen < maxlen && p[nlen]) ++nlen;
    951       if (nlen)
    952         install_name =
    953             pool_intern_slice(c->global, (Slice){.s = p, .len = nlen});
    954     } else if (cmd == LC_SYMTAB) {
    955       symoff = rd_u32_le(data + pos + 8);
    956       nsyms = rd_u32_le(data + pos + 12);
    957       stroff = rd_u32_le(data + pos + 16);
    958       strsize = rd_u32_le(data + pos + 20);
    959     }
    960   next:
    961     pos += cmdsize;
    962   }
    963   if (install_name_out) *install_name_out = install_name;
    964 
    965   if (stroff + (u64)strsize > len)
    966     compiler_panic(c, SRCLOC_NONE, "read_macho_dso: string table out of range");
    967   if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len)
    968     compiler_panic(c, SRCLOC_NONE, "read_macho_dso: symbol table out of range");
    969 
    970   ObjBuilder* ob = obj_new(c);
    971   if (!ob) compiler_panic(c, SRCLOC_NONE, "read_macho_dso: obj_new failed");
    972 
    973   const u8* strtab = data + stroff;
    974   const u8* sbase = data + symoff;
    975   for (u32 i = 0; i < nsyms; ++i) {
    976     const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE;
    977     u32 strx = rd_u32_le(p + 0);
    978     u8 n_type = p[4];
    979     u16 n_desc = rd_u16_le(p + 6);
    980 
    981     u8 type_field = (u8)(n_type & N_TYPE);
    982     u8 ext = (u8)(n_type & N_EXT);
    983     /* Skip non-external (locals) and undef refs (the dylib's own imports). */
    984     if (!ext) continue;
    985     if (type_field == N_UNDF) continue;
    986     /* N_INDR / N_PBUD / N_STAB: skip — not interesting for static link. */
    987     if (n_type & N_STAB) continue;
    988 
    989     if (strx >= strsize) continue;
    990     const char* nm = (const char*)(strtab + strx);
    991     u32 nlen = 0;
    992     while (strx + nlen < strsize && nm[nlen]) ++nlen;
    993     if (!nlen) continue;
    994     Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
    995 
    996     SymBind bind = (n_desc & (N_WEAK_DEF | N_WEAK_REF)) ? SB_WEAK : SB_GLOBAL;
    997     SymKind kind = SK_NOTYPE;
    998     /* Mach-O dylib nlist doesn't carry STT_FUNC / STT_OBJECT cleanly —
    999      * default to NOTYPE.  The consuming linker uses dso_export_is_func
   1000      * to peek at this for ELF; for Mach-O the `imported` decision flows
   1001      * through synthetic __got / __stubs regardless of kind. */
   1002     {
   1003       ObjSymId did =
   1004           obj_symbol_ex(ob, sn, bind, SV_DEFAULT, kind, OBJ_SEC_NONE, 0, 0, 0);
   1005       obj_sym_mark_referenced(ob, did);
   1006     }
   1007   }
   1008 
   1009   obj_finalize(ob);
   1010   return ob;
   1011 }