read.c (40604B)
1 /* Mach-O MH_OBJECT reader. Parses a 64-bit little-endian relocatable 2 * object back into a fresh ObjBuilder. The post-finalize ObjBuilder 3 * shape is the canonical superset of the writer's input: 4 * read_macho of an emit_macho output produces an ObjBuilder 5 * shape-equivalent to the writer's input, modulo the synthesized 6 * "__SEG,__sect"-form section names. 7 * 8 * Scope: AArch64 little-endian. MH_OBJECT parses to the section/symbol/ 9 * reloc view; MH_EXECUTE / MH_DYLIB additionally get the linked-image view 10 * (read_macho_image: segments, dylibs, entry, dynamic symbols + relocs). 11 * read_macho_dso remains the linker's DSO-only input path. Other archs / 12 * endianness produce a compiler_panic with a diagnostic. */ 13 14 #include <stdlib.h> 15 #include <string.h> 16 17 #include "core/arena.h" 18 #include "core/bytes.h" 19 #include "core/heap.h" 20 #include "core/pool.h" 21 #include "core/slice.h" 22 #include "core/util.h" 23 #include "obj/format.h" 24 #include "obj/macho/macho.h" 25 26 /* ---- mach-section scratch struct ---- */ 27 28 typedef struct MSecRec { 29 char segname[16]; 30 char sectname[16]; 31 u32 seg_len; 32 u32 sect_len; 33 u64 addr; 34 u64 size; 35 u32 fileoff; 36 u32 align_log2; 37 u32 reloff; 38 u32 nreloc; 39 u32 flags; 40 u32 reserved2; 41 ObjSecId obj_sec; /* assigned in pass 1 */ 42 } MSecRec; 43 44 typedef struct MAtomCand { 45 ObjSecId sec; 46 ObjSymId sym; 47 u32 offset; 48 u32 flags; 49 } MAtomCand; 50 51 static int matom_cand_cmp(const void* av, const void* bv) { 52 const MAtomCand* a = (const MAtomCand*)av; 53 const MAtomCand* b = (const MAtomCand*)bv; 54 if (a->sec < b->sec) return -1; 55 if (a->sec > b->sec) return 1; 56 if (a->offset < b->offset) return -1; 57 if (a->offset > b->offset) return 1; 58 if (a->sym < b->sym) return -1; 59 if (a->sym > b->sym) return 1; 60 return 0; 61 } 62 63 static u32 fixed16_len(const char* s) { 64 u32 n = 0; 65 while (n < 16 && s[n] != 0) ++n; 66 return n; 67 } 68 69 static u16 sec_kind_from_seg_sect(const char* segname, u32 seg_len, 70 const char* sectname, u32 sect_len, 71 u32 flags) { 72 u32 stype = flags & SECTION_TYPE; 73 if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL) return SEC_BSS; 74 if (flags & S_ATTR_PURE_INSTRUCTIONS) return SEC_TEXT; 75 76 if (seg_len == 7 && memcmp(segname, "__DWARF", 7) == 0) return SEC_DEBUG; 77 if (seg_len == 6 && memcmp(segname, "__TEXT", 6) == 0) { 78 if (sect_len == 6 && memcmp(sectname, "__text", 6) == 0) return SEC_TEXT; 79 return SEC_RODATA; /* __const, __cstring, ... */ 80 } 81 if (seg_len == 6 && memcmp(segname, "__DATA", 6) == 0) { 82 if (sect_len == 5 && memcmp(sectname, "__bss", 5) == 0) return SEC_BSS; 83 return SEC_DATA; 84 } 85 return SEC_OTHER; 86 } 87 88 static u16 sec_flags_from(u32 mflags, u16 sec_kind) { 89 u16 f = 0; 90 if (sec_kind == SEC_TEXT || (mflags & S_ATTR_PURE_INSTRUCTIONS)) { 91 f |= SF_ALLOC | SF_EXEC; 92 } else if (sec_kind == SEC_RODATA) { 93 f |= SF_ALLOC; 94 } else if (sec_kind == SEC_DATA || sec_kind == SEC_BSS) { 95 f |= SF_ALLOC | SF_WRITE; 96 } 97 u32 stype = mflags & SECTION_TYPE; 98 if (stype == S_THREAD_LOCAL_REGULAR || stype == S_THREAD_LOCAL_ZEROFILL || 99 stype == S_THREAD_LOCAL_VARIABLES) { 100 f |= SF_TLS; 101 } 102 if (stype == S_CSTRING_LITERALS) { 103 f |= SF_MERGE | SF_STRINGS; 104 } 105 return f; 106 } 107 108 static u16 sec_sem_from(u32 mflags, u16 sec_kind) { 109 u32 stype = mflags & SECTION_TYPE; 110 if (stype == S_ZEROFILL || stype == S_THREAD_LOCAL_ZEROFILL || 111 sec_kind == SEC_BSS) { 112 return SSEM_NOBITS; 113 } 114 if (stype == S_MOD_INIT_FUNC_POINTERS) return SSEM_INIT_ARRAY; 115 if (stype == S_MOD_TERM_FUNC_POINTERS) return SSEM_FINI_ARRAY; 116 return SSEM_PROGBITS; 117 } 118 119 /* Intern a Mach-O lc_str (NUL-terminated string embedded inside a load 120 * command at `cmd_pos + str_off`, bounded by the command's cmdsize). 121 * Returns 0 if the offset/string is malformed. */ 122 static Sym macho_lc_str(Compiler* c, const u8* data, u64 cmd_pos, u32 cmdsize, 123 u32 str_off) { 124 if (str_off < 8 || str_off >= cmdsize) return 0; 125 const char* p = (const char*)(data + cmd_pos + str_off); 126 u32 maxlen = cmdsize - str_off; 127 u32 nlen = 0; 128 while (nlen < maxlen && p[nlen]) ++nlen; 129 if (!nlen) return 0; 130 return pool_intern_slice(c->global, (Slice){.s = p, .len = nlen}); 131 } 132 133 /* ---- read_macho_image ---- 134 * 135 * Linked-image (MH_EXECUTE / MH_DYLIB) view, the Mach-O peer of 136 * read_elf_image. Walks the load commands a second time to populate the 137 * ObjImage: LC_SEGMENT_64 -> segments (+ __TEXT base), LC_LOAD_DYLINKER -> 138 * interp, LC_ID_DYLIB -> soname, LC_LOAD_DYLIB/WEAK/REEXPORT -> deps, 139 * LC_RPATH -> rpaths, LC_MAIN/LC_UNIXTHREAD -> entry, the LC_SYMTAB external 140 * nlist entries -> dynamic symbols, and LC_DYLD_CHAINED_FIXUPS binds/rebases 141 * -> dynamic relocations. The section / symbol / reloc views are parsed by 142 * read_macho's normal passes; this adds the orthogonal image dimension. 143 * Lenient: a malformed sub-table is skipped rather than panicked, so a 144 * partially-damaged image still yields a useful inspection. 145 * 146 * `msecs`/`nmsecs` carry the section table read in read_macho's pass 1 so a 147 * defined dynamic symbol's n_sect maps back to its ObjSecId. */ 148 static void read_macho_image(Compiler* c, ObjBuilder* ob, const u8* data, 149 size_t len, u32 filetype, u32 cputype, 150 const MSecRec* msecs, u32 nmsecs) { 151 ObjImage* im = 152 obj_image_ensure(ob, filetype == MH_DYLIB ? OBJ_KIND_DYN : OBJ_KIND_EXEC); 153 if (!im) compiler_panic(c, SRCLOC_NONE, "read_macho: obj_image_ensure failed"); 154 155 u32 ncmds = rd_u32_le(data + 16); 156 u32 sizeofcmds = rd_u32_le(data + 20); 157 158 /* Per-segment (vmaddr, file_off) recorded for chained-fixup vaddr 159 * resolution below; sized to ncmds (segments are a subset of commands). */ 160 u64* seg_vaddr = arena_array(c->scratch, u64, ncmds ? ncmds : 1); 161 u64* seg_fileoff = arena_array(c->scratch, u64, ncmds ? ncmds : 1); 162 u32 nseg = 0; 163 164 int have_text = 0; 165 u64 text_vmaddr = 0; 166 int have_main = 0; 167 u64 main_entryoff = 0; 168 u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0; 169 u32 cf_off = 0, cf_size = 0; 170 171 u64 pos = MACHO_HDR64_SIZE; 172 u64 end = pos + sizeofcmds; 173 for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) { 174 u32 cmd = rd_u32_le(data + pos); 175 u32 cmdsize = rd_u32_le(data + pos + 4); 176 if (cmdsize < 8 || pos + cmdsize > end) break; 177 178 /* Raw load-command view (escape hatch): one entry per LC_* command, 179 * carrying its file offset and on-disk size. */ 180 { 181 ObjImageRaw r; 182 r.tag = cmd; 183 r.value = pos; 184 r.extra = cmdsize; 185 obj_image_add_raw(im, &r); 186 } 187 188 if (cmd == LC_SEGMENT_64 && cmdsize >= MACHO_SEGCMD64_SIZE) { 189 const char* segname = (const char*)(data + pos + 8); 190 u32 seg_len = fixed16_len(segname); 191 u64 vmaddr = rd_u64_le(data + pos + 24); 192 u64 vmsize = rd_u64_le(data + pos + 32); 193 u64 fileoff = rd_u64_le(data + pos + 40); 194 u64 filesize = rd_u64_le(data + pos + 48); 195 u32 initprot = rd_u32_le(data + pos + 60); 196 ObjSegment seg; 197 seg.name = seg_len ? pool_intern_slice( 198 c->global, (Slice){.s = segname, .len = seg_len}) 199 : 0; 200 seg.vaddr = vmaddr; 201 seg.vsize = vmsize; 202 seg.file_off = fileoff; 203 seg.file_size = filesize; 204 /* VM_PROT_* bits differ from OBJ_SEG_* — remap explicitly. */ 205 seg.perms = ((initprot & VM_PROT_READ) ? OBJ_SEG_R : 0) | 206 ((initprot & VM_PROT_WRITE) ? OBJ_SEG_W : 0) | 207 ((initprot & VM_PROT_EXECUTE) ? OBJ_SEG_X : 0); 208 seg.align = 1; /* Mach-O segments don't carry an explicit p_align */ 209 obj_image_add_segment(im, &seg); 210 211 seg_vaddr[nseg] = vmaddr; 212 seg_fileoff[nseg] = fileoff; 213 ++nseg; 214 if (!have_text && seg_len == 6 && memcmp(segname, "__TEXT", 6) == 0) { 215 have_text = 1; 216 text_vmaddr = vmaddr; 217 } 218 } else if (cmd == LC_LOAD_DYLINKER) { 219 Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8)); 220 if (s) obj_image_set_interp(im, s); 221 } else if (cmd == LC_ID_DYLIB) { 222 Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8)); 223 if (s) obj_image_set_soname(im, s); 224 } else if (cmd == LC_LOAD_DYLIB || cmd == LC_LOAD_WEAK_DYLIB || 225 cmd == LC_REEXPORT_DYLIB) { 226 Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8)); 227 if (s) { 228 ObjImageDep d; 229 d.name = s; 230 d.imports = NULL; 231 d.nimports = 0; 232 obj_image_add_dep(im, &d); 233 } 234 } else if (cmd == LC_RPATH) { 235 Sym s = macho_lc_str(c, data, pos, cmdsize, rd_u32_le(data + pos + 8)); 236 if (s) obj_image_add_rpath(im, s); 237 } else if (cmd == LC_MAIN && cmdsize >= 16) { 238 have_main = 1; 239 main_entryoff = rd_u64_le(data + pos + 8); 240 } else if (cmd == LC_UNIXTHREAD && cmdsize >= 16 && !have_main) { 241 /* thread_command: flavor (u32) + count (u32) + register state. Pull 242 * the program counter out of the arch's state. */ 243 u32 flavor = rd_u32_le(data + pos + 8); 244 u64 pc_off = 0; 245 int have_pc = 0; 246 if (cputype == CPU_TYPE_ARM64 && flavor == 6 /* ARM_THREAD_STATE64 */) { 247 pc_off = pos + 16 + 32u * 8u; /* x0..x28,fp,lr,sp,pc */ 248 have_pc = 1; 249 } else if (cputype == CPU_TYPE_X86_64 && 250 flavor == 4 /* x86_THREAD_STATE64 */) { 251 pc_off = pos + 16 + 16u * 8u; /* rax..r15, then rip */ 252 have_pc = 1; 253 } 254 if (have_pc && pc_off + 8 <= pos + cmdsize) 255 obj_image_set_entry(im, rd_u64_le(data + pc_off)); 256 } else if (cmd == LC_SYMTAB && cmdsize >= MACHO_SYMTAB_CMD_SIZE) { 257 symoff = rd_u32_le(data + pos + 8); 258 nsyms = rd_u32_le(data + pos + 12); 259 stroff = rd_u32_le(data + pos + 16); 260 strsize = rd_u32_le(data + pos + 20); 261 } else if (cmd == LC_DYLD_CHAINED_FIXUPS && cmdsize >= 16) { 262 cf_off = rd_u32_le(data + pos + 8); 263 cf_size = rd_u32_le(data + pos + 12); 264 } 265 pos += cmdsize; 266 } 267 268 if (have_text) obj_image_set_base(im, text_vmaddr); 269 /* LC_MAIN entryoff is a file offset within __TEXT (which maps file 0 to 270 * its vmaddr); the entry vaddr is __TEXT base + entryoff. */ 271 if (have_main && have_text) 272 obj_image_set_entry(im, text_vmaddr + main_entryoff); 273 274 /* LC_SYMTAB external nlist entries -> dynamic symbols (Mach-O's analog of 275 * .dynsym: the dynamically-visible exports and undefined imports). */ 276 if (nsyms && stroff + (u64)strsize <= len && 277 symoff + (u64)nsyms * MACHO_NLIST64_SIZE <= len) { 278 const u8* strtab = data + stroff; 279 const u8* sbase = data + symoff; 280 for (u32 i = 0; i < nsyms; ++i) { 281 const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE; 282 u32 strx = rd_u32_le(p + 0); 283 u8 n_type = p[4]; 284 u8 n_sect = p[5]; 285 u16 n_desc = rd_u16_le(p + 6); 286 u64 n_value = rd_u64_le(p + 8); 287 if (n_type & N_STAB) continue; /* debug stab, not dynamic */ 288 if (!(n_type & N_EXT)) continue; /* locals aren't dynamic */ 289 if (strx >= strsize) continue; 290 const char* nm = (const char*)(strtab + strx); 291 u32 nlen = 0; 292 while (strx + nlen < strsize && nm[nlen]) ++nlen; 293 if (!nlen) continue; 294 295 u8 type_field = (u8)(n_type & N_TYPE); 296 ObjImageSym ds; 297 ds.version = 0; /* Mach-O has no ELF-style symbol versioning */ 298 ds.name = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); 299 ds.bind = (n_desc & (N_WEAK_DEF | N_WEAK_REF)) ? SB_WEAK : SB_GLOBAL; 300 ds.value = (type_field == N_SECT || type_field == N_ABS) ? n_value : 0; 301 ds.size = 0; 302 if (type_field == N_SECT && n_sect >= 1 && n_sect <= nmsecs) { 303 ds.section = msecs[n_sect - 1].obj_sec; 304 ds.kind = (msecs[n_sect - 1].flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC 305 : SK_OBJ; 306 } else { 307 ds.section = OBJ_SEC_NONE; /* undefined import / absolute */ 308 ds.kind = SK_NOTYPE; 309 } 310 obj_image_add_dynsym(im, &ds); 311 } 312 } 313 314 /* LC_DYLD_CHAINED_FIXUPS binds/rebases -> dynamic relocations. */ 315 if (cf_size >= 28 && (u64)cf_off + cf_size <= len) { 316 const u8* cf = data + cf_off; 317 u32 starts_offset = rd_u32_le(cf + 4); 318 u32 imports_offset = rd_u32_le(cf + 8); 319 u32 symbols_offset = rd_u32_le(cf + 12); 320 u32 imports_count = rd_u32_le(cf + 16); 321 u32 imports_format = rd_u32_le(cf + 20); 322 u32 relative_kind = 323 (cputype == CPU_TYPE_X86_64) ? R_X64_RELATIVE : R_AARCH64_RELATIVE; 324 325 /* Import symbol names, indexed by 0-based bind ordinal. */ 326 Sym* imp_names = 327 arena_zarray(c->scratch, Sym, imports_count ? imports_count : 1); 328 if (imports_format == DYLD_CHAINED_IMPORT && 329 (u64)imports_offset + (u64)imports_count * 4u <= cf_size) { 330 for (u32 i = 0; i < imports_count; ++i) { 331 u32 packed = rd_u32_le(cf + imports_offset + i * 4u); 332 u32 name_off = (packed >> 9) & 0x7fffffu; 333 u64 so = (u64)symbols_offset + name_off; 334 if (so >= cf_size) continue; 335 const char* nm = (const char*)(cf + so); 336 u32 maxn = (u32)(cf_size - so); 337 u32 nlen = 0; 338 while (nlen < maxn && nm[nlen]) ++nlen; 339 if (nlen) 340 imp_names[i] = 341 pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); 342 } 343 } 344 345 if ((u64)starts_offset + 4u <= cf_size) { 346 const u8* sib = cf + starts_offset; 347 u32 seg_count = rd_u32_le(sib + 0); 348 for (u32 si = 0; si < seg_count; ++si) { 349 if ((u64)starts_offset + 4u + (u64)si * 4u + 4u > cf_size) break; 350 u32 seg_info_offset = rd_u32_le(sib + 4 + si * 4u); 351 if (!seg_info_offset) continue; 352 if ((u64)starts_offset + seg_info_offset + 22u > cf_size) continue; 353 const u8* sis = cf + starts_offset + seg_info_offset; 354 u16 pointer_format = rd_u16_le(sis + 6); 355 u64 segment_offset = rd_u64_le(sis + 8); /* file offset of segment */ 356 u16 page_count = rd_u16_le(sis + 20); 357 /* Only the DYLD_CHAINED_PTR_64 family shares the bit layout below. */ 358 if (pointer_format != DYLD_CHAINED_PTR_64 && pointer_format != 6u) 359 continue; 360 u16 page_size = rd_u16_le(sis + 4); 361 if (!page_size) continue; 362 /* Resolve this segment's vmaddr from its file offset. */ 363 u64 seg_va = 0; 364 int found_seg = 0; 365 for (u32 k = 0; k < nseg; ++k) { 366 if (seg_fileoff[k] == segment_offset) { 367 seg_va = seg_vaddr[k]; 368 found_seg = 1; 369 break; 370 } 371 } 372 if (!found_seg) continue; 373 for (u32 pg = 0; pg < page_count; ++pg) { 374 u64 ps_pos = (u64)starts_offset + seg_info_offset + 22u + pg * 2u; 375 if (ps_pos + 2u > cf_size) break; 376 u16 ps = rd_u16_le(cf + ps_pos); 377 if (ps == 0xFFFFu) continue; 378 u32 cur = ps; 379 for (;;) { 380 u64 file_loc = segment_offset + (u64)pg * page_size + cur; 381 if (file_loc + 8u > len) break; 382 u64 v = rd_u64_le(data + file_loc); 383 u64 vaddr = seg_va + (u64)pg * page_size + cur; 384 int is_bind = (int)((v >> 63) & 1u); 385 ObjImageReloc dr; 386 dr.section = OBJ_SEC_NONE; 387 dr.offset = vaddr; 388 if (is_bind) { 389 u32 ordinal = (u32)(v & 0xffffffu); 390 dr.sym_name = (ordinal < imports_count) ? imp_names[ordinal] : 0; 391 dr.addend = (i64)((v >> 24) & 0xffu); 392 dr.kind = R_ABS64; 393 } else { 394 dr.sym_name = 0; 395 dr.addend = (i64)(v & (((u64)1 << 36) - 1u)); 396 dr.kind = (RelocKind)relative_kind; 397 } 398 obj_image_add_dynreloc(im, &dr); 399 u32 next = (u32)((v >> 51) & 0xfffu); 400 if (!next) break; 401 cur += next * 4u; 402 if (cur >= page_size) break; 403 } 404 } 405 } 406 } 407 } 408 } 409 410 ObjBuilder* read_macho(Compiler* c, const char* name, const u8* data, 411 size_t len) { 412 (void)name; 413 if (len < MACHO_HDR64_SIZE) 414 compiler_panic(c, SRCLOC_NONE, "read_macho: input shorter than header"); 415 416 u32 magic = rd_u32_le(data + 0); 417 if (magic != MH_MAGIC_64) 418 compiler_panic(c, SRCLOC_NONE, "read_macho: bad magic 0x%x", magic); 419 420 u32 cputype = rd_u32_le(data + 4); 421 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_MACHO); 422 const ObjMachoArchOps* macho = 423 fmt && fmt->macho_cputype ? fmt->macho_cputype(cputype) : NULL; 424 u32 filetype = rd_u32_le(data + 12); 425 u32 ncmds = rd_u32_le(data + 16); 426 u32 sizeofcmds = rd_u32_le(data + 20); 427 u32 mh_flags = rd_u32_le(data + 24); 428 429 if (!macho || !macho->reloc_from) 430 compiler_panic(c, SRCLOC_NONE, "read_macho: unsupported cputype 0x%x", 431 cputype); 432 /* MH_OBJECT parses to the section/symbol/reloc view only. MH_EXECUTE / 433 * MH_DYLIB additionally get the linked-image view (read_macho_image, at 434 * the end); their sections still parse through the same passes. */ 435 if (filetype != MH_OBJECT && filetype != MH_EXECUTE && filetype != MH_DYLIB) 436 compiler_panic(c, SRCLOC_NONE, 437 "read_macho: unsupported filetype %u (expected MH_OBJECT, " 438 "MH_EXECUTE, or MH_DYLIB)", 439 filetype); 440 441 if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len) 442 compiler_panic(c, SRCLOC_NONE, "read_macho: load commands exceed file"); 443 444 /* ---- pass 1: walk load commands, collect sections, symtab cmd. */ 445 MSecRec* msecs = NULL; 446 u32 nmsecs = 0; 447 u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0; 448 449 u64 pos = MACHO_HDR64_SIZE; 450 u64 end = pos + sizeofcmds; 451 for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) { 452 u32 cmd = rd_u32_le(data + pos); 453 u32 cmdsize = rd_u32_le(data + pos + 4); 454 if (cmdsize < 8 || pos + cmdsize > end) 455 compiler_panic(c, SRCLOC_NONE, "read_macho: malformed load command"); 456 457 if (cmd == LC_SEGMENT_64) { 458 u32 nsects = rd_u32_le(data + pos + 64); 459 if (MACHO_SEGCMD64_SIZE + (u64)nsects * MACHO_SECT64_SIZE > cmdsize) 460 compiler_panic(c, SRCLOC_NONE, "read_macho: segment cmd truncated"); 461 MSecRec* extra = arena_array(c->scratch, MSecRec, nmsecs + nsects); 462 if (msecs && nmsecs) memcpy(extra, msecs, sizeof(MSecRec) * nmsecs); 463 msecs = extra; 464 const u8* sp = data + pos + MACHO_SEGCMD64_SIZE; 465 for (u32 si = 0; si < nsects; ++si, sp += MACHO_SECT64_SIZE) { 466 MSecRec* m = &msecs[nmsecs++]; 467 memset(m, 0, sizeof *m); 468 memcpy(m->sectname, sp + 0, 16); 469 memcpy(m->segname, sp + 16, 16); 470 m->seg_len = fixed16_len(m->segname); 471 m->sect_len = fixed16_len(m->sectname); 472 m->addr = rd_u64_le(sp + 32); 473 m->size = rd_u64_le(sp + 40); 474 m->fileoff = rd_u32_le(sp + 48); 475 m->align_log2 = rd_u32_le(sp + 52); 476 m->reloff = rd_u32_le(sp + 56); 477 m->nreloc = rd_u32_le(sp + 60); 478 m->flags = rd_u32_le(sp + 64); 479 m->reserved2 = rd_u32_le(sp + 72); 480 } 481 } else if (cmd == LC_SYMTAB) { 482 symoff = rd_u32_le(data + pos + 8); 483 nsyms = rd_u32_le(data + pos + 12); 484 stroff = rd_u32_le(data + pos + 16); 485 strsize = rd_u32_le(data + pos + 20); 486 } 487 pos += cmdsize; 488 } 489 490 if (stroff + (u64)strsize > len) 491 compiler_panic(c, SRCLOC_NONE, "read_macho: string table out of range"); 492 if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len) 493 compiler_panic(c, SRCLOC_NONE, "read_macho: symbol table out of range"); 494 const u8* strtab = data + stroff; 495 496 ObjBuilder* ob = obj_new(c); 497 if (!ob) compiler_panic(c, SRCLOC_NONE, "read_macho: obj_new failed"); 498 499 /* ---- pass 2: create ObjSecs and copy bytes. */ 500 for (u32 i = 0; i < nmsecs; ++i) { 501 MSecRec* m = &msecs[i]; 502 /* Build "__SEG,__sect"-form name; matches what emit_macho would 503 * round-trip back out. */ 504 char nmbuf[34]; 505 u32 nlen = 0; 506 memcpy(nmbuf + nlen, m->segname, m->seg_len); 507 nlen += m->seg_len; 508 nmbuf[nlen++] = ','; 509 memcpy(nmbuf + nlen, m->sectname, m->sect_len); 510 nlen += m->sect_len; 511 Sym sn = pool_intern_slice(c->global, (Slice){.s = nmbuf, .len = nlen}); 512 513 u16 kind = sec_kind_from_seg_sect(m->segname, m->seg_len, m->sectname, 514 m->sect_len, m->flags); 515 u16 flags = sec_flags_from(m->flags, kind); 516 u16 sem = sec_sem_from(m->flags, kind); 517 u32 align = 1u << (m->align_log2 & 31); 518 519 ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags, 520 align, m->reserved2, 0, 0); 521 if (id == OBJ_SEC_NONE) 522 compiler_panic(c, SRCLOC_NONE, "read_macho: obj_section_ex failed"); 523 524 /* Preserve the raw mach section.flags so emit_macho can write back 525 * the same S_TYPE / S_ATTR_* bits. */ 526 obj_section_set_ext(ob, id, OBJ_EXT_MACHO, m->flags, 0); 527 528 if (sem == SSEM_NOBITS) { 529 obj_reserve_bss(ob, id, (u32)m->size, align); 530 } else if (m->size) { 531 if (m->fileoff + m->size > len) 532 compiler_panic(c, SRCLOC_NONE, "read_macho: section bytes out of range"); 533 obj_write(ob, id, data + m->fileoff, (size_t)m->size); 534 } 535 m->obj_sec = id; 536 } 537 538 /* ---- pass 3: parse symbol table. Two-pass strategy: first pass 539 * creates undefs (so relocations can refer to them), second 540 * pass creates defined locals/extdefs. Both write into 541 * mach_idx -> ObjSymId so reloc resolution works. */ 542 ObjSymId* sym_macho_to_obj = 543 arena_zarray(c->scratch, ObjSymId, nsyms ? nsyms : 1); 544 MAtomCand* atom_cands = 545 arena_zarray(c->scratch, MAtomCand, nsyms ? nsyms : 1); 546 u32 natom_cands = 0; 547 548 const u8* sbase = data + symoff; 549 for (u32 i = 0; i < nsyms; ++i) { 550 const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE; 551 u32 strx = rd_u32_le(p + 0); 552 u8 n_type = p[4]; 553 u8 n_sect = p[5]; 554 u16 n_desc = rd_u16_le(p + 6); 555 u64 n_value = rd_u64_le(p + 8); 556 557 const char* nm = ""; 558 u32 nlen = 0; 559 if (strx < strsize) { 560 nm = (const char*)(strtab + strx); 561 while (strx + nlen < strsize && nm[nlen]) ++nlen; 562 } 563 /* Mach-O names round-trip verbatim — the leading `_` Apple 564 * toolchains apply to C symbols is part of the on-disk name as 565 * far as ObjBuilder is concerned. Name-canonicalization (the 566 * `test_main` ↔ `_test_main` mapping for API callers) happens 567 * one layer up at the linker API boundary (link_c_name_intern 568 * in link.c); the on-disk shape stays byte-for-byte stable. */ 569 Sym sn = 570 nlen ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}) : 0; 571 572 u8 type_field = (u8)(n_type & N_TYPE); 573 u8 ext = (u8)(n_type & N_EXT); 574 u8 pext = (u8)(n_type & N_PEXT); 575 576 u16 bind = ext ? SB_GLOBAL : SB_LOCAL; 577 /* Weak DEFs (defined symbols) carry N_WEAK_DEF; weak REFs (undef 578 * `__attribute__((weak))` references) carry N_WEAK_REF. Either 579 * one collapses to SB_WEAK in the kit model. */ 580 if (ext && (n_desc & (N_WEAK_DEF | N_WEAK_REF))) bind = SB_WEAK; 581 u8 vis = pext ? SV_HIDDEN : SV_DEFAULT; 582 583 u16 kind; 584 ObjSecId sec_id = OBJ_SEC_NONE; 585 u64 value = 0; 586 u64 size = 0; 587 u64 cmnalign = 0; 588 589 if (type_field == N_UNDF) { 590 if (ext && n_value != 0) { 591 /* Common: n_value is size, n_desc encodes log2(align) in 592 * GET_COMM_ALIGN bits. */ 593 kind = SK_COMMON; 594 value = 0; 595 size = n_value; 596 u32 la = (u32)((n_desc >> 8) & 0xf); 597 cmnalign = 1u << la; 598 } else { 599 kind = SK_UNDEF; 600 } 601 } else if (type_field == N_ABS) { 602 kind = SK_ABS; 603 value = n_value; 604 } else if (type_field == N_SECT) { 605 if (n_sect == 0 || n_sect > nmsecs) { 606 kind = SK_NOTYPE; 607 } else { 608 sec_id = msecs[n_sect - 1].obj_sec; 609 /* MH_OBJECT: the obj model and the linker treat an input 610 * symbol's value as a section-local offset, and a relocatable 611 * .o's sections carry non-zero layout addrs, so subtract the 612 * section base. Linked images (MH_EXECUTE/MH_DYLIB) keep the 613 * absolute n_value so nm / objdump -t / size / addr2line report 614 * real vaddrs — matching the ELF reader, whose st_value is 615 * already absolute for images. */ 616 if (filetype == MH_OBJECT) { 617 u64 base = msecs[n_sect - 1].addr; 618 value = (n_value >= base) ? (n_value - base) : 0; 619 } else { 620 value = n_value; 621 } 622 kind = (msecs[n_sect - 1].flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC 623 : SK_OBJ; 624 } 625 } else { 626 kind = SK_NOTYPE; 627 } 628 629 ObjSymId id = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, 630 (SymKind)kind, sec_id, value, size, cmnalign); 631 obj_sym_mark_referenced(ob, id); 632 if ((mh_flags & MH_SUBSECTIONS_VIA_SYMBOLS) && type_field == N_SECT && 633 sec_id != OBJ_SEC_NONE) { 634 MAtomCand* ac = &atom_cands[natom_cands++]; 635 ac->sec = sec_id; 636 ac->sym = id; 637 ac->offset = (u32)value; 638 if ((n_desc & N_NO_DEAD_STRIP) || 639 (n_sect != 0 && n_sect <= nmsecs && 640 (msecs[n_sect - 1].flags & S_ATTR_NO_DEAD_STRIP))) { 641 ac->flags |= OBJ_ATOM_RETAIN; 642 } 643 } 644 /* n_desc carries Mach-O attribute bits beyond what bind/vis/kind 645 * model — N_NO_DEAD_STRIP, N_REF_TO_WEAK, N_ARM_THUMB_DEF, etc. 646 * Mask off the bits we already round-trip via bind (N_WEAK_DEF / 647 * N_WEAK_REF) and the alignment field for commons (which lives 648 * in cmnalign), then stash the remainder so emit_macho can OR it 649 * back in. */ 650 u16 desc_pass = n_desc; 651 desc_pass &= (u16) ~(N_WEAK_DEF | N_WEAK_REF); 652 if (kind == SK_COMMON) desc_pass &= 0x00ff; /* drop align field */ 653 if (desc_pass) obj_symbol_set_flags(ob, id, desc_pass); 654 sym_macho_to_obj[i] = id; 655 } 656 657 if (mh_flags & MH_SUBSECTIONS_VIA_SYMBOLS) { 658 if (natom_cands > 1u) 659 qsort(atom_cands, natom_cands, sizeof(*atom_cands), matom_cand_cmp); 660 for (u32 i = 0; i < natom_cands; ++i) { 661 MAtomCand* ac = &atom_cands[i]; 662 const Section* sec = obj_section_get(ob, ac->sec); 663 u32 end = sec ? ((sec->sem == SSEM_NOBITS || sec->kind == SEC_BSS) 664 ? sec->bss_size 665 : sec->bytes.total) 666 : ac->offset; 667 if (i + 1u < natom_cands && atom_cands[i + 1u].sec == ac->sec) 668 end = atom_cands[i + 1u].offset; 669 if (end >= ac->offset) 670 obj_atom_define(ob, ac->sec, ac->offset, end - ac->offset, ac->sym, 671 ac->flags); 672 } 673 } 674 675 /* ---- pass 4: parse per-section relocations into ObjBuilder relocs. 676 * Mach-O encodes addends out-of-band as a leading 677 * ARM64_RELOC_ADDEND followed by the real reloc; the 678 * reader collapses the pair on the way in. */ 679 /* Lazily-populated section-start local symbols, for clang-emitted 680 * non-extern (section-relative) relocations. See the r_extern==0 681 * branch below for the encoding. */ 682 ObjSymId* sec_start_sym = 683 arena_zarray(c->scratch, ObjSymId, nmsecs ? nmsecs : 1); 684 for (u32 i = 0; i < nmsecs; ++i) sec_start_sym[i] = OBJ_SYM_NONE; 685 for (u32 i = 0; i < nmsecs; ++i) { 686 MSecRec* m = &msecs[i]; 687 if (!m->nreloc) continue; 688 if (m->reloff + (u64)m->nreloc * MACHO_RELOC_SIZE > len) 689 compiler_panic(c, SRCLOC_NONE, "read_macho: relocation table out of range"); 690 const u8* rp = data + m->reloff; 691 i64 pending_addend = 0; 692 int have_pending = 0; 693 int pending_subtractor = 0; 694 u32 pending_subtractor_offset = 0; 695 u32 pending_subtractor_length = 0; 696 for (u32 j = 0; j < m->nreloc; ++j) { 697 u32 r_address = rd_u32_le(rp + j * MACHO_RELOC_SIZE); 698 u32 packed = rd_u32_le(rp + j * MACHO_RELOC_SIZE + 4); 699 u32 r_symbolnum = packed & 0x00ffffffu; 700 u32 r_pcrel = (packed >> 24) & 1u; 701 u32 r_length = (packed >> 25) & 3u; 702 u32 r_extern = (packed >> 27) & 1u; 703 u32 r_type = (packed >> 28) & 0xfu; 704 705 if (r_type == ARM64_RELOC_ADDEND) { 706 /* Sign-extend 24-bit addend. */ 707 i32 ad = (i32)(r_symbolnum & 0x00ffffffu); 708 if (ad & 0x00800000) ad |= ~0x00ffffff; 709 pending_addend = (i64)ad; 710 have_pending = 1; 711 continue; 712 } 713 714 u32 kind; 715 if (r_type == ARM64_RELOC_SUBTRACTOR) { 716 kind = (r_length == 3) ? R_SUB64 717 : (r_length == 2) ? R_SUB32 718 : (r_length == 1) ? R_SUB16 719 : R_SUB8; 720 } else { 721 kind = macho->reloc_from(r_type); 722 } 723 if (kind == (u32)-1) 724 compiler_panic(c, SRCLOC_NONE, "read_macho: unsupported reloc type %u", 725 r_type); 726 727 /* Refine kind by (r_pcrel, r_length) when the type field alone 728 * is ambiguous. ARM64_RELOC_UNSIGNED collapses R_ABS64/R_ABS32 729 * and PC-relative variants. */ 730 if (r_type == ARM64_RELOC_UNSIGNED) { 731 if (pending_subtractor && pending_subtractor_offset == r_address && 732 pending_subtractor_length == r_length) { 733 kind = (r_length == 3) ? R_ADD64 734 : (r_length == 2) ? R_ADD32 735 : (r_length == 1) ? R_ADD16 736 : R_ADD8; 737 pending_subtractor = 0; 738 } else if (r_pcrel) { 739 kind = (r_length == 3) ? R_PC64 : R_PC32; 740 } else { 741 kind = (r_length == 3) ? R_ABS64 : R_ABS32; 742 } 743 } else if (r_type == ARM64_RELOC_BRANCH26) { 744 kind = R_AARCH64_CALL26; 745 } else if (r_type == ARM64_RELOC_PAGEOFF12) { 746 /* PAGEOFF12 is access-size-agnostic in Mach-O; the linker 747 * applier needs to scale the immediate by the load/store size 748 * (or apply it raw for ADD). Inspect the patched instruction 749 * at r_address to pick the right RelocKind so the applier in 750 * link_reloc.c shifts the lo12 correctly. */ 751 if (m->fileoff + r_address + 4u > len) 752 compiler_panic(c, SRCLOC_NONE, 753 "read_macho: PAGEOFF12 r_address %u out of range", 754 r_address); 755 u32 ins = rd_u32_le(data + m->fileoff + r_address); 756 /* ADD (immediate): bits 30:24 = 0010001 (W=10001 / X=10010001). 757 * Mask 0x7f800000 isolates sf=0/1 + the 0010001 pattern; values 758 * 0x11000000 (32-bit) and 0x91000000 (64-bit) — match the latter 759 * via the same 0x7f mask leaving bit 31 free. */ 760 if ((ins & 0x7f800000u) == 0x11000000u) { 761 kind = R_AARCH64_ADD_ABS_LO12_NC; 762 } else if ((ins & 0x3b000000u) == 0x39000000u) { 763 /* LDR/STR (immediate unsigned offset). Bits 29:27=111, bit 26=V 764 * (0=integer, 1=SIMD/FP), bits 25:24=01. size in [31:30] plus 765 * opc bit 23 for the SIMD 128-bit case (size=00, opc=11). */ 766 u32 sz = (ins >> 30) & 3u; 767 u32 v_bit = (ins >> 26) & 1u; 768 u32 opc1 = (ins >> 23) & 1u; 769 if (v_bit && sz == 0 && opc1) { 770 kind = R_AARCH64_LDST128_ABS_LO12_NC; 771 } else { 772 kind = (sz == 0) ? R_AARCH64_LDST8_ABS_LO12_NC 773 : (sz == 1) ? R_AARCH64_LDST16_ABS_LO12_NC 774 : (sz == 2) ? R_AARCH64_LDST32_ABS_LO12_NC 775 : R_AARCH64_LDST64_ABS_LO12_NC; 776 } 777 } 778 /* else: leave as the default R_AARCH64_ADD_ABS_LO12_NC. */ 779 } 780 781 ObjSymId target = OBJ_SYM_NONE; 782 i64 inplace_addend_override = 0; 783 int use_inplace_addend = 0; 784 if (r_extern) { 785 if (r_symbolnum < nsyms) target = sym_macho_to_obj[r_symbolnum]; 786 if (!have_pending && r_type == ARM64_RELOC_UNSIGNED) { 787 u32 rsz = 1u << r_length; 788 if ((u64)m->fileoff + r_address + rsz > len) 789 compiler_panic(c, SRCLOC_NONE, 790 "read_macho: extern unsigned reloc r_address out " 791 "of range"); 792 const u8* pv = data + m->fileoff + r_address; 793 u64 inplace; 794 if (r_length == 3) 795 inplace = rd_u64_le(pv); 796 else if (r_length == 2) 797 inplace = (u64)rd_u32_le(pv); 798 else if (r_length == 1) 799 inplace = (u64)rd_u16_le(pv); 800 else 801 inplace = (u64)pv[0]; 802 inplace_addend_override = (i64)inplace; 803 use_inplace_addend = 1; 804 } 805 } else { 806 /* Section-relative reloc — clang emits these for compact unwind, 807 * EH frame, and DWARF debug info. r_symbolnum is the 1-based 808 * section index; the in-place value at r_address is the absolute 809 * .o virtual address of the referent. Synthesize a local 810 * symbol pointing to the target section's start (lazily, once 811 * per section) and re-express the reloc as 812 * target = sec_start_sym, addend = inplace - section.addr. */ 813 if (r_symbolnum == 0 || r_symbolnum > nmsecs) 814 compiler_panic(c, SRCLOC_NONE, 815 "read_macho: section-relative reloc references " 816 "invalid section index %u", 817 r_symbolnum); 818 u32 sec_idx = r_symbolnum - 1u; 819 MSecRec* tm = &msecs[sec_idx]; 820 if (sec_start_sym[sec_idx] == OBJ_SYM_NONE) { 821 /* Build ".Lkit.macho_secstart.<sec_idx>" without snprintf 822 * (the freestanding build doesn't pull in stdio). */ 823 static const char prefix[] = ".Lkit.macho_secstart."; 824 char nmbuf[sizeof(prefix) + 10]; 825 u32 nlen = (u32)(sizeof(prefix) - 1); 826 memcpy(nmbuf, prefix, nlen); 827 char dec[10]; 828 u32 dn = 0; 829 u32 v = sec_idx; 830 do { 831 dec[dn++] = (char)('0' + (v % 10u)); 832 v /= 10u; 833 } while (v); 834 for (u32 k = 0; k < dn; ++k) nmbuf[nlen + k] = dec[dn - 1 - k]; 835 nlen += dn; 836 Sym sn = 837 pool_intern_slice(c->global, (Slice){.s = nmbuf, .len = nlen}); 838 u16 sk = (tm->flags & S_ATTR_PURE_INSTRUCTIONS) ? SK_FUNC : SK_OBJ; 839 sec_start_sym[sec_idx] = 840 obj_symbol(ob, sn, SB_LOCAL, (SymKind)sk, tm->obj_sec, 0, 0); 841 } 842 target = sec_start_sym[sec_idx]; 843 u32 rsz = 1u << r_length; 844 if ((u64)m->fileoff + r_address + rsz > len) 845 compiler_panic(c, SRCLOC_NONE, 846 "read_macho: non-extern reloc r_address out of range"); 847 u64 inplace; 848 const u8* pv = data + m->fileoff + r_address; 849 if (r_length == 3) 850 inplace = rd_u64_le(pv); 851 else if (r_length == 2) 852 inplace = (u64)rd_u32_le(pv); 853 else if (r_length == 1) 854 inplace = (u64)rd_u16_le(pv); 855 else 856 inplace = (u64)pv[0]; 857 inplace_addend_override = (i64)inplace - (i64)tm->addr; 858 use_inplace_addend = 1; 859 } 860 861 i64 addend = have_pending 862 ? pending_addend 863 : (use_inplace_addend ? inplace_addend_override : 0); 864 int has_explicit = have_pending || use_inplace_addend || addend != 0; 865 have_pending = 0; 866 pending_addend = 0; 867 868 obj_reloc_ex(ob, m->obj_sec, r_address, (RelocKind)kind, target, addend, 869 has_explicit, 0); 870 if (r_type == ARM64_RELOC_SUBTRACTOR) { 871 pending_subtractor = 1; 872 pending_subtractor_offset = r_address; 873 pending_subtractor_length = r_length; 874 } 875 } 876 } 877 878 /* MH_EXECUTE / MH_DYLIB: attach the linked-image view (segments, dylibs, 879 * entry, dynamic symbols + relocations). */ 880 if (filetype != MH_OBJECT) 881 read_macho_image(c, ob, data, len, filetype, cputype, msecs, nmsecs); 882 883 obj_finalize(ob); 884 return ob; 885 } 886 887 /* ---- read_macho_dso ---- 888 * 889 * MH_DYLIB reader. Walks load commands once to find LC_ID_DYLIB 890 * (install-name) and LC_SYMTAB (symbol table + string table), then 891 * emits one defined ObjSym per externally-visible nlist entry. 892 * 893 * Like read_elf_dso, the produced ObjBuilder carries no sections / 894 * relocations / groups — only symbol definitions in OBJ_SEC_NONE. The 895 * consumer's resolve_undefs sees these as defined globals and marks the 896 * matching consumer-side undef as `imported`. The dylib's own undefs 897 * (its imports of other dylibs) are filtered: they don't satisfy any 898 * undef in the consumer. */ 899 900 ObjBuilder* read_macho_dso(Compiler* c, const char* name, const u8* data, 901 size_t len, Sym* install_name_out) { 902 (void)name; 903 if (install_name_out) *install_name_out = 0; 904 if (len < MACHO_HDR64_SIZE) 905 compiler_panic(c, SRCLOC_NONE, "read_macho_dso: input shorter than header"); 906 907 u32 magic = rd_u32_le(data + 0); 908 if (magic != MH_MAGIC_64) 909 compiler_panic(c, SRCLOC_NONE, "read_macho_dso: bad magic 0x%x", magic); 910 911 u32 cputype = rd_u32_le(data + 4); 912 u32 filetype = rd_u32_le(data + 12); 913 u32 ncmds = rd_u32_le(data + 16); 914 u32 sizeofcmds = rd_u32_le(data + 20); 915 916 { 917 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_MACHO); 918 const ObjMachoArchOps* macho = 919 fmt && fmt->macho_cputype ? fmt->macho_cputype(cputype) : NULL; 920 if (!macho) 921 compiler_panic(c, SRCLOC_NONE, "read_macho_dso: unsupported cputype 0x%x", 922 cputype); 923 } 924 if (filetype != MH_DYLIB && filetype != MH_BUNDLE) 925 compiler_panic(c, SRCLOC_NONE, 926 "read_macho_dso: not MH_DYLIB/MH_BUNDLE (filetype=%u)", 927 filetype); 928 if ((u64)MACHO_HDR64_SIZE + sizeofcmds > len) 929 compiler_panic(c, SRCLOC_NONE, "read_macho_dso: load commands exceed file"); 930 931 u32 symoff = 0, nsyms = 0, stroff = 0, strsize = 0; 932 Sym install_name = 0; 933 934 u64 pos = MACHO_HDR64_SIZE; 935 u64 end = pos + sizeofcmds; 936 for (u32 ci = 0; ci < ncmds && pos + 8 <= end; ++ci) { 937 u32 cmd = rd_u32_le(data + pos); 938 u32 cmdsize = rd_u32_le(data + pos + 4); 939 if (cmdsize < 8 || pos + cmdsize > end) 940 compiler_panic(c, SRCLOC_NONE, "read_macho_dso: malformed load command"); 941 if (cmd == LC_ID_DYLIB) { 942 /* dylib_command: cmd, cmdsize, name(lc_str: 4-byte offset within 943 * the cmd), timestamp, current_version, compat_version. */ 944 if (cmdsize < 24) goto next; 945 u32 nm_off = rd_u32_le(data + pos + 8); 946 if (nm_off >= cmdsize) goto next; 947 const char* p = (const char*)(data + pos + nm_off); 948 u32 maxlen = cmdsize - nm_off; 949 u32 nlen = 0; 950 while (nlen < maxlen && p[nlen]) ++nlen; 951 if (nlen) 952 install_name = 953 pool_intern_slice(c->global, (Slice){.s = p, .len = nlen}); 954 } else if (cmd == LC_SYMTAB) { 955 symoff = rd_u32_le(data + pos + 8); 956 nsyms = rd_u32_le(data + pos + 12); 957 stroff = rd_u32_le(data + pos + 16); 958 strsize = rd_u32_le(data + pos + 20); 959 } 960 next: 961 pos += cmdsize; 962 } 963 if (install_name_out) *install_name_out = install_name; 964 965 if (stroff + (u64)strsize > len) 966 compiler_panic(c, SRCLOC_NONE, "read_macho_dso: string table out of range"); 967 if (symoff + (u64)nsyms * MACHO_NLIST64_SIZE > len) 968 compiler_panic(c, SRCLOC_NONE, "read_macho_dso: symbol table out of range"); 969 970 ObjBuilder* ob = obj_new(c); 971 if (!ob) compiler_panic(c, SRCLOC_NONE, "read_macho_dso: obj_new failed"); 972 973 const u8* strtab = data + stroff; 974 const u8* sbase = data + symoff; 975 for (u32 i = 0; i < nsyms; ++i) { 976 const u8* p = sbase + (u64)i * MACHO_NLIST64_SIZE; 977 u32 strx = rd_u32_le(p + 0); 978 u8 n_type = p[4]; 979 u16 n_desc = rd_u16_le(p + 6); 980 981 u8 type_field = (u8)(n_type & N_TYPE); 982 u8 ext = (u8)(n_type & N_EXT); 983 /* Skip non-external (locals) and undef refs (the dylib's own imports). */ 984 if (!ext) continue; 985 if (type_field == N_UNDF) continue; 986 /* N_INDR / N_PBUD / N_STAB: skip — not interesting for static link. */ 987 if (n_type & N_STAB) continue; 988 989 if (strx >= strsize) continue; 990 const char* nm = (const char*)(strtab + strx); 991 u32 nlen = 0; 992 while (strx + nlen < strsize && nm[nlen]) ++nlen; 993 if (!nlen) continue; 994 Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); 995 996 SymBind bind = (n_desc & (N_WEAK_DEF | N_WEAK_REF)) ? SB_WEAK : SB_GLOBAL; 997 SymKind kind = SK_NOTYPE; 998 /* Mach-O dylib nlist doesn't carry STT_FUNC / STT_OBJECT cleanly — 999 * default to NOTYPE. The consuming linker uses dso_export_is_func 1000 * to peek at this for ELF; for Mach-O the `imported` decision flows 1001 * through synthetic __got / __stubs regardless of kind. */ 1002 { 1003 ObjSymId did = 1004 obj_symbol_ex(ob, sn, bind, SV_DEFAULT, kind, OBJ_SEC_NONE, 0, 0, 0); 1005 obj_sym_mark_referenced(ob, did); 1006 } 1007 } 1008 1009 obj_finalize(ob); 1010 return ob; 1011 }