link.c (99433B)
1 /* link_emit_macho — write a dyld-loadable arm64 MH_EXECUTE. 2 * 3 * Mach-O peer of link_emit_elf. Produces a position-independent 4 * MH_EXECUTE that links against libSystem.B.dylib (or any other 5 * dylib/.tbd input) via LC_LOAD_DYLIB + LC_DYLD_CHAINED_FIXUPS. The 6 * binary is ad-hoc codesigned at the tail so the kernel will exec it 7 * on macOS 11+. 8 * 9 * Layout (Apple's stock arm64 layout): 10 * 11 * __PAGEZERO vmaddr 0, vmsize 0x100000000, no file bytes 12 * __TEXT (R-X) 13 * mach_header_64 14 * load commands 15 * [SF_EXEC sections — .text] 16 * [SF_ALLOC R-only sections — .rodata, init/fini_array, etc.] 17 * __stubs (12B per import-func) 18 * __DATA_CONST (RW initially, dyld marks R-only after fixups) 19 * __got (8B per import — both data and func imports) 20 * __DATA (R-W) 21 * [SF_WRITE sections — .data, .bss] 22 * __LINKEDIT (R) 23 * dyld_chained_fixups blob 24 * dyld_exports_trie blob 25 * function starts (empty) 26 * data in code (empty) 27 * symtab 28 * indirect symbol table (one entry per __stubs and __got slot) 29 * strtab 30 * code signature 31 * 32 * Imports are routed: 33 * CALL26/JUMP26 against an imported function -> __stubs entry 34 * GOT_LOAD_PAGE21/PAGEOFF12 against any import -> __got slot 35 * ABS64 against an imported symbol -> chained-bind at site 36 * ABS64 against a defined internal symbol -> chained-rebase at site 37 * 38 * arm64-only. x86_64-macos arrives with x64 codegen. */ 39 40 #include "link/link.h" 41 42 #include <string.h> 43 44 #include "core/bytes.h" 45 #include "core/heap.h" 46 #include "core/pool.h" 47 #include "core/sha256.h" 48 #include "core/slice.h" 49 #include "core/util.h" 50 #include "core/vec.h" 51 #include "link/link_arch.h" 52 #include "link/link_internal.h" 53 #include "link/link_reloc_desc.h" 54 #include "obj/format.h" 55 #include "obj/macho/macho.h" 56 57 /* ---- constants ---- */ 58 #define MZ_PAGEZERO 0x100000000ULL 59 #define MZ_PAGE 0x4000ULL 60 #define MZ_GOT_SIZE 8u 61 /* __DATA,__thread_ptrs slot size — one pointer per unique TLV referenced 62 * via TLVP_LOAD_PAGE21/PAGEOFF12. Each slot holds the address of the 63 * matching TLV descriptor in __DATA,__thread_vars. */ 64 #define MZ_TLVP_SIZE 8u 65 66 #define DYLD_CHAINED_PTR_64 2u 67 #define DYLD_CHAINED_IMPORT 1u 68 69 #define VM_PROT_READ 0x1u 70 #define VM_PROT_WRITE 0x2u 71 #define VM_PROT_EXECUTE 0x4u 72 73 #define CS_MAGIC_EMBEDDED_SIGNATURE 0xfade0cc0u 74 #define CS_MAGIC_CODEDIRECTORY 0xfade0c02u 75 #define CSSLOT_CODEDIRECTORY 0u 76 #define CS_HASHTYPE_SHA256 2u 77 #define CS_SHA256_LEN SHA256_DIGEST_LEN 78 #define CS_PAGE_SIZE_LOG2 12u 79 #define CS_EXECSEG_MAIN_BINARY 1u 80 81 /* extra LC ids */ 82 #define LC_DYLD_INFO_ONLY (0x22u | 0x80000000u) 83 #define LC_FUNCTION_STARTS_C 0x26u 84 #define LC_DATA_IN_CODE_C 0x29u 85 #define LC_CODE_SIGNATURE_C 0x1du 86 87 /* ---- byte buffer ---- */ 88 89 typedef struct MByte { 90 Heap* heap; 91 u8* data; 92 u32 len; 93 u32 cap; 94 } MByte; 95 96 static void mbuf_init(MByte* b, Heap* h) { 97 b->heap = h; 98 b->data = NULL; 99 b->len = 0; 100 b->cap = 0; 101 } 102 static void mbuf_fini(MByte* b) { 103 if (b->data) b->heap->free(b->heap, b->data, b->cap); 104 b->data = NULL; 105 b->cap = b->len = 0; 106 } 107 static void mbuf_reserve(MByte* b, u32 need) { 108 if (need <= b->cap) return; 109 (void)VEC_GROW(b->heap, b->data, b->cap, need); 110 } 111 static u32 mbuf_align(MByte* b, u32 a) { 112 u32 n = (u32)ALIGN_UP((u64)b->len, (u64)a); 113 if (n > b->len) { 114 mbuf_reserve(b, n); 115 memset(b->data + b->len, 0, n - b->len); 116 b->len = n; 117 } 118 return b->len; 119 } 120 static u32 mbuf_append(MByte* b, const void* src, u32 n) { 121 u32 off = b->len; 122 mbuf_reserve(b, b->len + n); 123 if (n) memcpy(b->data + b->len, src, n); 124 b->len += n; 125 return off; 126 } 127 static u32 mbuf_u32(MByte* b, u32 v) { 128 u8 t[4]; 129 wr_u32_le(t, v); 130 return mbuf_append(b, t, 4); 131 } 132 static u32 mbuf_u16(MByte* b, u16 v) { 133 u8 t[2]; 134 wr_u16_le(t, v); 135 return mbuf_append(b, t, 2); 136 } 137 static u32 mbuf_u64(MByte* b, u64 v) { 138 u8 t[8]; 139 wr_u64_le(t, v); 140 return mbuf_append(b, t, 8); 141 } 142 static u32 mbuf_u8(MByte* b, u8 v) { return mbuf_append(b, &v, 1); } 143 static u32 mbuf_str(MByte* b, const char* s, u32 n) { 144 u32 off = b->len; 145 mbuf_reserve(b, b->len + n + 1u); 146 if (n) memcpy(b->data + b->len, s, n); 147 b->data[b->len + n] = 0; 148 b->len += n + 1u; 149 return off; 150 } 151 152 /* ---- imports + dylibs ---- */ 153 154 typedef struct MachImp { 155 LinkSymId sym; 156 Sym name; 157 u32 dylib_ord; /* 1-based ordinal into LC_LOAD_DYLIB list */ 158 u32 stub_idx; /* 1-based index into __stubs (0 if data import) */ 159 u32 got_idx; /* 1-based index into __got */ 160 u32 imports_strx; /* offset into chained-fixups symbol pool */ 161 u8 is_func; 162 u8 weak; 163 /* internal=1 means this entry is an in-image symbol that's referenced 164 * via GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC (clang emits these for any 165 * extern global so a single static-link can later become PIC). The 166 * GOT slot stores the symbol's image-relative vaddr and gets a 167 * chained-fixup rebase entry (or no entry at all for a weak-undef 168 * resolving to NULL). No dylib_ord / stub_idx / chained-fixup bind. */ 169 u8 internal; 170 u8 pad[1]; 171 u64 internal_vaddr; /* image-relative target vaddr; meaningful only when 172 internal=1 */ 173 } MachImp; 174 175 typedef struct MachDylib { 176 Sym install; 177 } MachDylib; 178 179 /* One slot in the synthetic __DATA,__thread_ptrs section per unique TLV 180 * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12. Modeled after 181 * MachImp's internal-GOT entries: the slot holds the descriptor address 182 * (REBASE for internal-to-image descriptors, BIND for dylib-imported 183 * ones). The descriptor itself is laid out in __DATA,__thread_vars by 184 * either the input objects (internal) or the providing dylib (imported). */ 185 typedef struct MachTlv { 186 LinkSymId sym; /* canonical descriptor LinkSymId */ 187 u32 tlv_idx; /* 1-based slot index in __thread_ptrs */ 188 u8 imported; /* 1 == descriptor lives in a dylib (BIND), 0 == internal 189 (REBASE) */ 190 u8 pad[3]; 191 u32 import_idx; /* 1-based MachImp index when imported (for chained-bind 192 ordinal) */ 193 } MachTlv; 194 195 /* ---- planned section ---- */ 196 197 typedef struct MSec { 198 /* Source: either a LinkSection (link_sec_id != 0) or a synthetic 199 * pre-built byte buffer (data + size). */ 200 LinkSectionId link_sec_id; 201 const u8* synth_data; 202 u32 synth_size; 203 /* Mach-O placement */ 204 const char* segname; 205 const char* sectname; 206 /* Inline storage for segname/sectname when split from a Mach-O 207 * `__SEG,__sect`-form LinkSection name. Names from string literals 208 * (synth sections, derived-from-flags defaults) point at .rodata 209 * and don't use these. 17 bytes: the on-disk field is a fixed 16 210 * (no NUL needed there), but these are read as C strings, so a full 211 * 16-char name (e.g. __debug_line_str) needs the extra NUL slot. */ 212 char segname_buf[17]; 213 char sectname_buf[17]; 214 u64 vaddr; 215 u64 file_offset; 216 u64 size; 217 u32 align; 218 u32 flags; /* S_TYPE | S_ATTR_* */ 219 u32 reserved1; 220 u32 reserved2; 221 u8 segidx; /* 1=__TEXT, 2=__DATA_CONST, 3=__DATA */ 222 u8 is_zerofill; 223 u8 pad[6]; 224 } MSec; 225 226 static void msec_repair_name_ptrs(MSec* m) { 227 if (m->segname_buf[0]) m->segname = m->segname_buf; 228 if (m->sectname_buf[0]) m->sectname = m->sectname_buf; 229 } 230 231 /* Segment slot indices in MCtx.segs[]. __DWARF carries the file-only 232 * .debug_* sections (debug-info retention); it sits before __LINKEDIT so 233 * the ad-hoc code signature stays the last bytes of the file. */ 234 enum { 235 MSEG_PAGEZERO = 0, 236 MSEG_TEXT = 1, 237 MSEG_DATA_CONST = 2, 238 MSEG_DATA = 3, 239 MSEG_DWARF = 4, 240 MSEG_LINKEDIT = 5, 241 MSEG_COUNT = 6, 242 }; 243 244 typedef struct MSeg { 245 const char* name; 246 u32 maxprot; 247 u32 initprot; 248 u64 vmaddr; 249 u64 vmsize; 250 u64 fileoff; 251 u64 filesize; 252 u32 nsects; /* MSec count in segment — internal layout */ 253 u32 first_sec; /* first index into MSec[] */ 254 u32 nouts; /* OutSec count in segment — what hits the file */ 255 u32 first_out; /* first index into OutSec[] */ 256 } MSeg; 257 258 /* On-disk section view: one record per (segname, sectname) within a 259 * segment. Mach-O requires this — emitting one section_64 per input 260 * MSec yields sibling __TEXT,__text records that violate the spec. 261 * Built from MSec[] after vaddr placement; reloc-apply still uses 262 * MSec[] for byte-buffer addressing. */ 263 typedef struct OutSec { 264 const char* segname; 265 const char* sectname; 266 u64 vaddr; 267 u64 file_offset; 268 u64 size; 269 u32 align; 270 u32 flags; 271 u32 reserved1; 272 u32 reserved2; 273 u8 segidx; 274 u8 is_zerofill; 275 } OutSec; 276 277 /* ---- main context ---- */ 278 279 typedef struct MCtx { 280 LinkImage* img; 281 Compiler* c; 282 Heap* h; 283 Writer* w; 284 Linker* linker; 285 const LinkArchDesc* link_arch; 286 const ObjMachoArchOps* macho; 287 288 /* imports */ 289 MachImp* imports; 290 u32 nimports; 291 u32 nimports_real; /* count of imports with internal=0 (== prefix length; 292 * collect_imports appends internal=1 entries last) */ 293 u32 nimport_funcs; 294 MachDylib* dylibs; 295 u32 ndylibs; 296 /* sym->import index, 1-based, 0 = not an import. Sized to LinkSymId space 297 * + 1. */ 298 u32* sym_to_imp; 299 u32 sym_to_imp_size; 300 301 /* sections + segments */ 302 MSec* secs; 303 u32 nsecs; 304 OutSec* outs; 305 u32 nouts; 306 MSeg segs[MSEG_COUNT]; /* PAGEZERO, TEXT, DATA_CONST, DATA, DWARF, LINKEDIT */ 307 u32 nsegs; 308 309 /* Synthetic byte buffers, owned. */ 310 u8* stubs_bytes; 311 u32 stubs_size; 312 u8* got_bytes; 313 u32 got_size; 314 /* TLV pointer slots — one entry in __DATA,__thread_ptrs per unique 315 * descriptor referenced via TLVP_LOAD_PAGE21/PAGEOFF12. sym_to_tlv 316 * maps LinkSymId → 1-based slot index (parallel to sym_to_imp). Slot 317 * bytes are populated at apply_relocs time once shift_sections has 318 * pinned descriptor vaddrs. */ 319 MachTlv* tlv_slots; 320 u32 ntlv; 321 u32* sym_to_tlv; 322 u32 sym_to_tlv_size; 323 u8* tlv_ptrs_bytes; 324 u32 tlv_ptrs_size; 325 u64 tlv_ptrs_vaddr; 326 /* Vaddr of the first thread-local-storage section 327 * (__thread_data / __thread_bss). Each TLV descriptor's word 2 328 * stores the symbol's offset within this image rather than an 329 * absolute address — see apply_relocs's S_THREAD_LOCAL_VARIABLES 330 * ABS64 special case. */ 331 u64 tls_image_vaddr; 332 u8 has_tls_image; 333 334 /* Final layout (computed during plan) */ 335 u64 text_vaddr; 336 u64 text_filesz; 337 u64 stubs_vaddr; 338 u64 got_vaddr; 339 u64 data_const_vaddr; 340 u64 data_vaddr; 341 u64 data_const_filesz; 342 u64 data_filesz; 343 u64 data_memsz; 344 u64 linkedit_vaddr; 345 u64 linkedit_fileoff; 346 u32 entry_offset; /* offset of entry within __TEXT segment */ 347 348 u64 headers_size; /* header + loadcmds */ 349 350 /* LINKEDIT contents */ 351 MByte chained_fixups; 352 MByte exports_trie; 353 MByte symtab; /* binary nlist_64 array */ 354 MByte strtab; 355 MByte indirect; /* u32 array */ 356 MByte fn_starts; 357 MByte data_in_code; 358 MByte codesig; 359 360 u32 chained_fixups_off; 361 u32 exports_trie_off; 362 u32 fn_starts_off; 363 u32 data_in_code_off; 364 u32 symtab_off; 365 u32 indirect_off; 366 u32 strtab_off; 367 u32 codesig_off; 368 u32 codesig_size; 369 u32 nsyms; 370 371 u8 uuid[16]; 372 } MCtx; 373 374 /* ---- helpers for finding LinkSymbol vaddr ---- */ 375 376 static LinkSymbol* sym_at(LinkImage* img, LinkSymId id) { 377 if (id == LINK_SYM_NONE || id > LinkSyms_count(&img->syms)) return NULL; 378 return LinkSyms_at(&img->syms, id - 1); 379 } 380 381 /* ---- pass: collect imports ---- */ 382 383 static u32 dylib_ordinal_of(MCtx* x, Sym install) { 384 for (u32 j = 0; j < x->ndylibs; ++j) 385 if (x->dylibs[j].install == install) return j + 1u; 386 return 0; 387 } 388 389 static void collect_imports(MCtx* x) { 390 LinkImage* img = x->img; 391 Heap* h = x->h; 392 393 x->sym_to_imp_size = LinkSyms_count(&img->syms) + 1u; 394 x->sym_to_imp = 395 (u32*)h->alloc(h, sizeof(u32) * x->sym_to_imp_size, _Alignof(u32)); 396 if (!x->sym_to_imp) 397 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on sym_to_imp"); 398 memset(x->sym_to_imp, 0, sizeof(u32) * x->sym_to_imp_size); 399 400 u32 cap = 0, cap_d = 0; 401 for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) { 402 LinkSymbol* s = LinkSyms_at(&img->syms, i); 403 if (!s->imported) continue; 404 if (s->name == 0) continue; 405 LinkSymId canon = symhash_get(&img->globals, s->name); 406 if (canon != LINK_SYM_NONE && canon != s->id) continue; 407 if (VEC_GROW(h, x->imports, cap, x->nimports + 1u)) 408 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on imports"); 409 MachImp* mi = &x->imports[x->nimports++]; 410 memset(mi, 0, sizeof(*mi)); 411 mi->sym = s->id; 412 mi->name = s->name; 413 mi->is_func = (s->kind == SK_FUNC || s->kind == SK_IFUNC) ? 1 : 0; 414 mi->weak = (s->bind == SB_WEAK) ? 1 : 0; 415 x->sym_to_imp[s->id] = x->nimports; 416 } 417 418 /* Back-classify: any CALL26/JUMP26 reloc target -> function. */ 419 for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { 420 LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); 421 if (!reloc_kind_is_branch(x->c, r->kind)) continue; 422 if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue; 423 u32 idx = x->sym_to_imp[r->target]; 424 if (!idx) { 425 /* Resolve through canonical. */ 426 LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); 427 if (tgt->name == 0) continue; 428 LinkSymId canon = symhash_get(&img->globals, tgt->name); 429 if (canon == LINK_SYM_NONE || canon >= x->sym_to_imp_size) continue; 430 idx = x->sym_to_imp[canon]; 431 if (!idx) continue; 432 /* Stash so future lookups skip this loop. */ 433 x->sym_to_imp[r->target] = idx; 434 } 435 x->imports[idx - 1].is_func = 1; 436 } 437 438 /* Build dylib ordinal table. Pull soname from the providing DSO. */ 439 for (u32 i = 0; i < x->nimports; ++i) { 440 MachImp* mi = &x->imports[i]; 441 LinkSymbol* s = sym_at(img, mi->sym); 442 LinkInputId dso_id = s ? s->dso_input_id : LINK_INPUT_NONE; 443 Sym install = 0; 444 if (dso_id != LINK_INPUT_NONE && x->linker && 445 dso_id - 1u < LinkInputs_count(&x->linker->inputs)) { 446 LinkInput* in = LinkInputs_at(&x->linker->inputs, dso_id - 1u); 447 if (in->kind == LINK_INPUT_DSO_BYTES) install = in->soname; 448 } 449 if (install == 0) 450 install = pool_intern_slice(x->c->global, 451 SLICE_LIT("/usr/lib/libSystem.B.dylib")); 452 u32 ord = dylib_ordinal_of(x, install); 453 if (!ord) { 454 if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u)) 455 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on dylibs"); 456 x->dylibs[x->ndylibs].install = install; 457 ++x->ndylibs; 458 ord = x->ndylibs; 459 } 460 mi->dylib_ord = ord; 461 } 462 463 /* Always include every DSO input's install-name. */ 464 if (x->linker) { 465 for (u32 ii = 0; ii < LinkInputs_count(&x->linker->inputs); ++ii) { 466 LinkInput* in = LinkInputs_at(&x->linker->inputs, ii); 467 if (in->kind != LINK_INPUT_DSO_BYTES) continue; 468 if (in->soname == 0) continue; 469 if (dylib_ordinal_of(x, in->soname)) continue; 470 if (VEC_GROW(h, x->dylibs, cap_d, x->ndylibs + 1u)) 471 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on dylibs"); 472 x->dylibs[x->ndylibs].install = in->soname; 473 ++x->ndylibs; 474 } 475 } 476 477 /* All entries so far are real imports; remember the partition point 478 * so import/symtab table emit loops can skip the appended internals. */ 479 x->nimports_real = x->nimports; 480 481 /* Internal GOT pass. clang on Mach-O routes every extern-global 482 * reference through the GOT (GOT_LOAD_PAGE21 / LD64_GOT_LO12_NC), so 483 * even a common symbol or weak-undef that ends up resolved within the 484 * image still needs a __got slot. For each such reloc whose target 485 * isn't an existing import, materialize a MachImp with internal=1. 486 * The slot's contents are filled at write time and a chained-fixup 487 * REBASE entry (or none, for weak undef → NULL) keeps it valid 488 * post-ASLR. */ 489 for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { 490 LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); 491 if (!reloc_kind_is_got_load(x->c, r->kind)) continue; 492 if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_imp_size) continue; 493 if (x->sym_to_imp[r->target]) continue; 494 LinkSymbol* t = sym_at(img, r->target); 495 if (!t) continue; 496 /* Resolve through canonical so we share a single slot per symbol. */ 497 LinkSymId canon = r->target; 498 if (t->name != 0) { 499 LinkSymId hit = symhash_get(&img->globals, t->name); 500 if (hit != LINK_SYM_NONE) { 501 canon = hit; 502 if (x->sym_to_imp[canon]) { 503 x->sym_to_imp[r->target] = x->sym_to_imp[canon]; 504 continue; 505 } 506 t = sym_at(img, canon); 507 if (!t) continue; 508 } 509 } 510 if (VEC_GROW(h, x->imports, cap, x->nimports + 1u)) 511 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on internal got"); 512 MachImp* mi = &x->imports[x->nimports++]; 513 memset(mi, 0, sizeof(*mi)); 514 mi->sym = canon; 515 mi->name = t->name; 516 mi->is_func = (t->kind == SK_FUNC || t->kind == SK_IFUNC) ? 1 : 0; 517 mi->weak = (t->bind == SB_WEAK) ? 1 : 0; 518 mi->internal = 1; 519 /* internal_vaddr is read fresh from the LinkSymbol when the slot 520 * gets initialized — collect_imports runs before shift_sections 521 * rebases section vaddrs to Mach-O layout, so capturing here would 522 * be stale by the time __got bytes are written. */ 523 mi->internal_vaddr = 0; 524 x->sym_to_imp[canon] = x->nimports; 525 if (canon != r->target) x->sym_to_imp[r->target] = x->nimports; 526 } 527 528 /* Assign stub_idx + got_idx. Internal entries get a slot but no stub: 529 * the call site (CALL26) on internal funcs goes direct, not via stub. */ 530 u32 stub_run = 0; 531 for (u32 i = 0; i < x->nimports; ++i) { 532 MachImp* mi = &x->imports[i]; 533 mi->got_idx = i + 1u; 534 if (mi->is_func && !mi->internal) mi->stub_idx = ++stub_run; 535 } 536 x->nimport_funcs = stub_run; 537 } 538 539 /* ---- pass: collect TLV pointer slots ---- 540 * 541 * Mirror of collect_imports' internal-GOT pass, but for TLV descriptors: 542 * each unique descriptor referenced via ARM64_RELOC_TLVP_LOAD_PAGE21 / 543 * PAGEOFF12 gets one slot in the synthetic __DATA,__thread_ptrs section. 544 * The slot's runtime value is the descriptor's address; we patch it at 545 * apply_relocs time (REBASE for in-image descriptors, BIND for ones in 546 * a dylib). 547 * 548 * Slots are deduplicated by canonical LinkSymId so a single descriptor 549 * referenced from N call sites shares one __thread_ptrs entry. */ 550 static void collect_tlv(MCtx* x) { 551 LinkImage* img = x->img; 552 Heap* h = x->h; 553 x->sym_to_tlv_size = LinkSyms_count(&img->syms) + 1u; 554 x->sym_to_tlv = 555 (u32*)h->alloc(h, sizeof(u32) * x->sym_to_tlv_size, _Alignof(u32)); 556 if (!x->sym_to_tlv) 557 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on sym_to_tlv"); 558 memset(x->sym_to_tlv, 0, sizeof(u32) * x->sym_to_tlv_size); 559 560 u32 cap = 0; 561 for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { 562 LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); 563 if (!reloc_kind_is_tlvp(x->c, r->kind)) continue; 564 if (r->target == LINK_SYM_NONE || r->target >= x->sym_to_tlv_size) continue; 565 /* Resolve through canonical so multiple per-input duplicate undefs 566 * collapse onto one __thread_ptrs slot. */ 567 LinkSymId canon = r->target; 568 LinkSymbol* t = sym_at(img, r->target); 569 if (!t) continue; 570 if (t->name != 0) { 571 LinkSymId hit = symhash_get(&img->globals, t->name); 572 if (hit != LINK_SYM_NONE) { 573 canon = hit; 574 t = sym_at(img, canon); 575 if (!t) continue; 576 } 577 } 578 if (x->sym_to_tlv[canon]) { 579 if (canon != r->target) x->sym_to_tlv[r->target] = x->sym_to_tlv[canon]; 580 continue; 581 } 582 if (VEC_GROW(h, x->tlv_slots, cap, x->ntlv + 1u)) 583 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on tlv_slots"); 584 MachTlv* ts = &x->tlv_slots[x->ntlv++]; 585 memset(ts, 0, sizeof(*ts)); 586 ts->sym = canon; 587 ts->tlv_idx = x->ntlv; 588 ts->imported = t->imported ? 1u : 0u; 589 /* If the descriptor is imported we route the bind through the 590 * symbol's MachImp slot — that's where dyld's chained-import index 591 * comes from. When this loop fires the imp pass has already 592 * materialized the entry (real imports were processed first); the 593 * lookup may also have stashed an alias for non-canonical ids. */ 594 if (ts->imported) { 595 u32 idx = (canon < x->sym_to_imp_size) ? x->sym_to_imp[canon] : 0u; 596 if (!idx && t->name != 0) { 597 LinkSymId hit2 = symhash_get(&img->globals, t->name); 598 if (hit2 != LINK_SYM_NONE && hit2 < x->sym_to_imp_size) 599 idx = x->sym_to_imp[hit2]; 600 } 601 ts->import_idx = idx; 602 } 603 x->sym_to_tlv[canon] = x->ntlv; 604 if (canon != r->target) x->sym_to_tlv[r->target] = x->ntlv; 605 } 606 } 607 608 /* ---- pass: plan Mach-O sections ---- 609 * 610 * Walks LinkImage sections. Each non-zero-size LinkSection becomes one 611 * MSec. Synthetic __stubs and __got are appended at the right segment 612 * boundaries. Vaddr and file_offset are assigned in a single forward 613 * pass starting at __TEXT base; __PAGEZERO and __LINKEDIT are special. */ 614 615 static void seg_init(MSeg* s, const char* name, u32 maxp, u32 initp) { 616 memset(s, 0, sizeof(*s)); 617 s->name = name; 618 s->maxprot = maxp; 619 s->initprot = initp; 620 } 621 622 static int sec_is_writable(const LinkSection* ls) { 623 return (ls->flags & SF_WRITE) != 0u; 624 } 625 static int sec_is_exec(const LinkSection* ls) { 626 return (ls->flags & SF_EXEC) != 0u; 627 } 628 static int sec_is_zerofill(const LinkSection* ls) { 629 return ls->sem == SSEM_NOBITS; 630 } 631 632 static int section_has_abs64_reloc(const LinkImage* img, LinkSectionId id) { 633 for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { 634 const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); 635 if (r->link_section_id == id && r->kind == R_ABS64) return 1; 636 } 637 return 0; 638 } 639 640 static int sec_needs_data_const(const LinkImage* img, const LinkSection* ls) { 641 if (!ls || !ls->size || sec_is_exec(ls) || sec_is_writable(ls) || 642 sec_is_zerofill(ls)) { 643 return 0; 644 } 645 return section_has_abs64_reloc(img, ls->id); 646 } 647 648 /* Pick (segname, sectname) for a LinkSection. Comma-form Mach-O names 649 * round-trip into MSec's inline 16-byte buffers; literal defaults point 650 * at .rodata strings. Caller passes the MSec for per-section storage — 651 * a previous version used a shared static buffer which aliased all 652 * sections to whichever name was set last. */ 653 static void pick_macho_names(const LinkSection* ls, Compiler* c, MSec* m) { 654 Slice nm_s = pool_slice(c->global, ls->name); 655 const char* nm = nm_s.s; 656 size_t nlen = nm_s.len; 657 if (nm) { 658 /* Comma-form: "__SEG,__sect" round-tripped from a Mach-O input. */ 659 for (size_t i = 0; i < nlen; ++i) { 660 if (nm[i] == ',') { 661 u32 seg_n = (u32)(i > 16 ? 16 : i); 662 memcpy(m->segname_buf, nm, seg_n); 663 m->segname_buf[seg_n] = 0; 664 u32 sect_n = (u32)((nlen - i - 1) > 16 ? 16 : (nlen - i - 1)); 665 memcpy(m->sectname_buf, nm + i + 1, sect_n); 666 m->sectname_buf[sect_n] = 0; 667 m->segname = m->segname_buf; 668 m->sectname = m->sectname_buf; 669 return; 670 } 671 } 672 } 673 /* Derive from flags. */ 674 if (sec_is_exec(ls)) { 675 m->segname = "__TEXT"; 676 m->sectname = "__text"; 677 } else if (sec_is_writable(ls)) { 678 m->segname = "__DATA"; 679 m->sectname = sec_is_zerofill(ls) ? "__bss" : "__data"; 680 } else { 681 m->segname = "__TEXT"; 682 m->sectname = "__const"; 683 } 684 } 685 686 static void plan_layout(MCtx* x) { 687 LinkImage* img = x->img; 688 Heap* h = x->h; 689 690 /* PAGEZERO */ 691 seg_init(&x->segs[0], "__PAGEZERO", 0, 0); 692 x->segs[0].vmaddr = 0; 693 x->segs[0].vmsize = MZ_PAGEZERO; 694 x->segs[0].fileoff = 0; 695 x->segs[0].filesize = 0; 696 x->segs[0].nsects = 0; 697 x->segs[0].first_sec = 0; 698 699 /* Segments 1..4 */ 700 seg_init(&x->segs[1], "__TEXT", VM_PROT_READ | VM_PROT_EXECUTE, 701 VM_PROT_READ | VM_PROT_EXECUTE); 702 seg_init(&x->segs[2], "__DATA_CONST", VM_PROT_READ | VM_PROT_WRITE, 703 VM_PROT_READ | VM_PROT_WRITE); 704 seg_init(&x->segs[3], "__DATA", VM_PROT_READ | VM_PROT_WRITE, 705 VM_PROT_READ | VM_PROT_WRITE); 706 /* __DWARF holds the file-only .debug_* sections; mapped R but never 707 * referenced at runtime. Empty (nsects 0) when there's no debug info. */ 708 seg_init(&x->segs[MSEG_DWARF], "__DWARF", VM_PROT_READ, VM_PROT_READ); 709 seg_init(&x->segs[MSEG_LINKEDIT], "__LINKEDIT", VM_PROT_READ, VM_PROT_READ); 710 x->nsegs = MSEG_COUNT; 711 712 /* Pre-allocate MSec capacity: every LinkSection + 2 synth (__stubs, 713 * __got). (LinkSections from the dynamic-link layer — .dynsym / .plt 714 * etc. — were synthesized by layout_dyn for ELF; we won't have them 715 * since pie wasn't set on this Linker. Still, oversize by a few.) */ 716 u32 cap = LinkRelocs_count(&img->relocs) + img->nsections + 4u; 717 x->secs = (MSec*)h->alloc(h, sizeof(MSec) * cap, _Alignof(MSec)); 718 if (!x->secs) compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on MSec"); 719 memset(x->secs, 0, sizeof(MSec) * cap); 720 x->nsecs = 0; 721 722 /* Pass 1: __TEXT segment. Header + loadcmds reserve front. */ 723 /* We need the exact header_size to set first sec's file_offset. We'll 724 * compute it later, but reserve a placeholder; for now use 0 and patch 725 * in pass 4 (offsets get bumped). */ 726 727 u64 text_vaddr = MZ_PAGEZERO; 728 /* We'll compute headers_size after plan; stash starting vaddr only. */ 729 x->segs[1].vmaddr = text_vaddr; 730 x->segs[1].fileoff = 0; 731 x->text_vaddr = text_vaddr; 732 733 /* Collect: (a) exec sections, (b) read-only allocatable sections. */ 734 /* (cursor advances per-segment in pass 2; nothing to track here) */ 735 736 /* We don't know the header size yet; walk sections first to enumerate 737 * MSec entries, then back-fill file_offset/vaddr after we know the 738 * load-command count. */ 739 740 u32 first_text_sec = x->nsecs; 741 742 for (u32 i = 0; i < img->nsections; ++i) { 743 LinkSection* ls = &img->sections[i]; 744 if (!ls->size) continue; 745 if (ls->file_only) continue; /* .debug_* → __DWARF segment below */ 746 if (sec_is_writable(ls)) continue; 747 if (sec_is_zerofill(ls)) continue; /* placed in __DATA */ 748 if (sec_needs_data_const(img, ls)) continue; 749 MSec* m = &x->secs[x->nsecs++]; 750 memset(m, 0, sizeof(*m)); 751 m->link_sec_id = ls->id; 752 pick_macho_names(ls, x->c, m); 753 /* Force into __TEXT. */ 754 if (!slice_eq_cstr(slice_from_cstr(m->segname), "__TEXT")) 755 m->segname = "__TEXT"; 756 m->align = ls->align ? ls->align : 1u; 757 m->size = ls->size; 758 m->segidx = 1; 759 m->flags = sec_is_exec(ls) ? (0x80000000u /*S_ATTR_PURE_INSTRUCTIONS*/ | 760 0x00000400u /*S_ATTR_SOME_INSTRUCTIONS*/) 761 : 0u; 762 } 763 764 /* __stubs synthetic */ 765 if (x->nimport_funcs) { 766 x->stubs_size = x->nimport_funcs * x->macho->stub_size; 767 x->stubs_bytes = (u8*)h->alloc(h, x->stubs_size, 4); 768 if (!x->stubs_bytes) 769 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on stubs"); 770 memset(x->stubs_bytes, 0, x->stubs_size); 771 MSec* m = &x->secs[x->nsecs++]; 772 memset(m, 0, sizeof(*m)); 773 m->synth_data = x->stubs_bytes; 774 m->synth_size = x->stubs_size; 775 m->segname = "__TEXT"; 776 m->sectname = "__stubs"; 777 m->align = 4u; 778 m->size = x->stubs_size; 779 m->segidx = 1; 780 m->flags = 0x80000000u | 0x00000400u | 0x00000008u /*S_SYMBOL_STUBS*/; 781 m->reserved1 = 0; /* fill in later: indirect-symtab base */ 782 m->reserved2 = x->macho->stub_size; 783 } 784 x->segs[1].nsects = x->nsecs - first_text_sec; 785 x->segs[1].first_sec = first_text_sec; 786 787 /* __DATA_CONST: __got synth */ 788 u32 first_dc = x->nsecs; 789 if (x->nimports) { 790 x->got_size = x->nimports * MZ_GOT_SIZE; 791 x->got_bytes = (u8*)h->alloc(h, x->got_size, 8); 792 if (!x->got_bytes) 793 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on got"); 794 memset(x->got_bytes, 0, x->got_size); 795 MSec* m = &x->secs[x->nsecs++]; 796 memset(m, 0, sizeof(*m)); 797 m->synth_data = x->got_bytes; 798 m->synth_size = x->got_size; 799 m->segname = "__DATA_CONST"; 800 m->sectname = "__got"; 801 m->align = 8u; 802 m->size = x->got_size; 803 m->segidx = 2; 804 m->flags = 0x00000006u /*S_NON_LAZY_SYMBOL_POINTERS*/; 805 m->reserved1 = 0; /* indirect-symtab base */ 806 } 807 for (u32 i = 0; i < img->nsections; ++i) { 808 LinkSection* ls = &img->sections[i]; 809 if (ls->file_only) continue; /* .debug_* → __DWARF (has ABS64 relocs) */ 810 if (!sec_needs_data_const(img, ls)) continue; 811 MSec* m = &x->secs[x->nsecs++]; 812 memset(m, 0, sizeof(*m)); 813 m->link_sec_id = ls->id; 814 pick_macho_names(ls, x->c, m); 815 m->segname = "__DATA_CONST"; 816 m->align = ls->align ? ls->align : 1u; 817 m->size = ls->size; 818 m->segidx = 2; 819 m->flags = 0; 820 } 821 x->segs[2].nsects = x->nsecs - first_dc; 822 x->segs[2].first_sec = first_dc; 823 824 /* __DATA segment: writable sections + zerofill. */ 825 u32 first_d = x->nsecs; 826 for (u32 i = 0; i < img->nsections; ++i) { 827 LinkSection* ls = &img->sections[i]; 828 if (ls->file_only) continue; /* .debug_* → __DWARF */ 829 if (!ls->size && !sec_is_zerofill(ls)) continue; 830 if (!sec_is_writable(ls)) continue; 831 MSec* m = &x->secs[x->nsecs++]; 832 memset(m, 0, sizeof(*m)); 833 m->link_sec_id = ls->id; 834 pick_macho_names(ls, x->c, m); 835 if (!slice_eq_cstr(slice_from_cstr(m->segname), "__DATA")) 836 m->segname = "__DATA"; 837 m->align = ls->align ? ls->align : 1u; 838 m->size = ls->size; 839 m->segidx = 3; 840 m->is_zerofill = sec_is_zerofill(ls) ? 1 : 0; 841 m->flags = m->is_zerofill ? 0x00000001u /*S_ZEROFILL*/ : 0; 842 /* dyld dispatches on the section type byte (low 8 bits of flags). 843 * __mod_init_func / __mod_term_func sections must carry the 844 * S_MOD_INIT_FUNC_POINTERS / S_MOD_TERM_FUNC_POINTERS type or dyld 845 * skips them entirely — leaving constructors unrun at startup. */ 846 if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_init_func")) 847 m->flags = 0x00000009u /*S_MOD_INIT_FUNC_POINTERS*/; 848 else if (slice_eq_cstr(slice_from_cstr(m->sectname), "__mod_term_func")) 849 m->flags = 0x0000000au /*S_MOD_TERM_FUNC_POINTERS*/; 850 else if (ls->flags & SF_TLS) { 851 /* TLV sections: dyld dispatches by section type, not name. Map 852 * __thread_vars → S_THREAD_LOCAL_VARIABLES (descriptor records), 853 * __thread_data → S_THREAD_LOCAL_REGULAR (initial data), 854 * __thread_bss → S_THREAD_LOCAL_ZEROFILL (zero-init data). Done 855 * by sectname so per-TU inputs without a Mach-O ext_type still 856 * get the right section type. */ 857 if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_vars")) { 858 m->flags = S_THREAD_LOCAL_VARIABLES; 859 /* Each descriptor is three pointers (24B) whose first word is 860 * dyld's _tlv_bootstrap thunk pointer. Clang/llvm emit 861 * __thread_vars with on-disk alignment 1 (relying on layout to 862 * land it on 8); force 8-alignment here so the descriptor 863 * pointers fall on 8-byte boundaries — dyld's chained-fixup 864 * processing assumes that. */ 865 if (m->align < 8u) m->align = 8u; 866 } else if (m->is_zerofill) 867 m->flags = S_THREAD_LOCAL_ZEROFILL; 868 else 869 m->flags = S_THREAD_LOCAL_REGULAR; 870 } 871 } 872 /* __thread_ptrs synthetic (TLV pointer slots). Emitted into __DATA 873 * after the user's TLV input sections so descriptors and their 874 * pointers share the same segment. Each slot's runtime initial 875 * value (= TLV descriptor address) is patched during apply_relocs. */ 876 if (x->ntlv) { 877 x->tlv_ptrs_size = x->ntlv * MZ_TLVP_SIZE; 878 x->tlv_ptrs_bytes = (u8*)h->alloc(h, x->tlv_ptrs_size, 8); 879 if (!x->tlv_ptrs_bytes) 880 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on tlv_ptrs"); 881 memset(x->tlv_ptrs_bytes, 0, x->tlv_ptrs_size); 882 MSec* m = &x->secs[x->nsecs++]; 883 memset(m, 0, sizeof(*m)); 884 m->synth_data = x->tlv_ptrs_bytes; 885 m->synth_size = x->tlv_ptrs_size; 886 m->segname = "__DATA"; 887 m->sectname = "__thread_ptrs"; 888 m->align = 8u; 889 m->size = x->tlv_ptrs_size; 890 m->segidx = 3; 891 m->flags = S_THREAD_LOCAL_VARIABLE_POINTERS; 892 } 893 x->segs[3].nsects = x->nsecs - first_d; 894 x->segs[3].first_sec = first_d; 895 896 /* __DWARF: file-only .debug_* sections (debug-info retention). Each 897 * contribution becomes a synth MSec whose bytes are the per-image 898 * debug registry buffer (relocs applied in place at apply_relocs). 899 * Iterated in registry order (= input order) so same-name runs land 900 * adjacent and the per-input DWARF-relative bases line up with the 901 * coalesced section's byte layout. */ 902 u32 first_dw = x->nsecs; 903 for (u32 i = 0; i < img->nsections; ++i) { 904 LinkSection* ls = &img->sections[i]; 905 u8* dbg; 906 if (!ls->file_only || !ls->size) continue; 907 dbg = link_fileonly_bytes(img, ls->id); 908 if (!dbg) continue; 909 MSec* m = &x->secs[x->nsecs++]; 910 memset(m, 0, sizeof(*m)); 911 m->synth_data = dbg; 912 m->synth_size = (u32)ls->size; 913 /* Section name: a Mach-O input already carries "__DWARF,__debug_*"; 914 * an in-process .debug_* maps via obj_macho_debug_sectname. */ 915 { 916 Slice nm = pool_slice(x->c->global, ls->name); 917 const char* comma = nm.s ? memchr(nm.s, ',', nm.len) : NULL; 918 char sect[17]; 919 if (comma) { 920 u32 sgn = (u32)(comma - nm.s); 921 if (sgn > 16u) sgn = 16u; 922 memcpy(m->segname_buf, nm.s, sgn); 923 m->segname_buf[sgn] = 0; 924 u32 stn = (u32)(nm.len - (comma - nm.s) - 1); 925 if (stn > 16u) stn = 16u; 926 memcpy(m->sectname_buf, comma + 1, stn); 927 m->sectname_buf[stn] = 0; 928 } else if (obj_macho_debug_sectname(nm.s, nm.len, sect)) { 929 memcpy(m->segname_buf, "__DWARF", 8); 930 memcpy(m->sectname_buf, sect, slice_from_cstr(sect).len + 1); 931 } else { 932 memcpy(m->segname_buf, "__DWARF", 8); 933 u32 stn = nm.len > 16u ? 16u : (u32)nm.len; 934 memcpy(m->sectname_buf, nm.s, stn); 935 m->sectname_buf[stn] = 0; 936 } 937 m->segname = m->segname_buf; 938 m->sectname = m->sectname_buf; 939 } 940 /* align 1: contributions concatenate gap-free so the DWARF-relative 941 * bases (assigned without padding in link_layout_debug) stay valid. */ 942 m->align = 1u; 943 m->size = ls->size; 944 m->segidx = MSEG_DWARF; 945 m->flags = 0; /* S_REGULAR */ 946 } 947 x->segs[MSEG_DWARF].nsects = x->nsecs - first_dw; 948 x->segs[MSEG_DWARF].first_sec = first_dw; 949 950 /* Group MSecs by (segname, sectname) within each segment so vaddr 951 * placement keeps same-named runs contiguous. Otherwise Phase B's 952 * adjacency-based coalescing splits a single Mach-O section into 953 * multiple OutSecs (e.g. `.text` from an in-memory ObjBuilder and 954 * `__TEXT,__text` from a Mach-O .o input both map to `__TEXT,__text` 955 * but arrive in separate link_layout groups, interleaved with other 956 * sections from each input). Stable insertion sort preserves input 957 * order within a name, which matters for synth __stubs/__thread_ptrs 958 * order relative to peers. */ 959 for (u32 i = 0; i < x->nsegs; ++i) { 960 MSeg* sg = &x->segs[i]; 961 if (sg->nsects < 2) continue; 962 u32 base = sg->first_sec; 963 u32 n = sg->nsects; 964 for (u32 a = 1; a < n; ++a) { 965 MSec key = x->secs[base + a]; 966 msec_repair_name_ptrs(&key); 967 u32 j = a; 968 while (j > 0) { 969 MSec* prev = &x->secs[base + j - 1]; 970 /* Ordering compare for stable sort: slices don't order, keep strcmp. */ 971 int cmp = strcmp(prev->segname, key.segname); /* ordering */ 972 if (cmp == 0) cmp = strcmp(prev->sectname, key.sectname); /* ordering */ 973 if (cmp <= 0) break; 974 x->secs[base + j] = x->secs[base + j - 1]; 975 msec_repair_name_ptrs(&x->secs[base + j]); 976 --j; 977 } 978 x->secs[base + j] = key; 979 msec_repair_name_ptrs(&x->secs[base + j]); 980 } 981 } 982 983 /* Phase A: count OutSecs per segment (distinct sectnames) so we can 984 * size the load commands before placing vaddrs. Phase B builds the 985 * actual OutSec[] after placement, when vaddrs are final. */ 986 for (u32 i = 0; i < x->nsegs; ++i) { 987 MSeg* sg = &x->segs[i]; 988 u32 cnt = 0; 989 for (u32 a = sg->first_sec; a < sg->first_sec + sg->nsects; ++a) { 990 int seen = 0; 991 for (u32 b = sg->first_sec; b < a; ++b) { 992 if (slice_eq_cstr(slice_from_cstr(x->secs[a].sectname), 993 x->secs[b].sectname) && 994 slice_eq_cstr(slice_from_cstr(x->secs[a].segname), 995 x->secs[b].segname)) { 996 seen = 1; 997 break; 998 } 999 } 1000 if (!seen) ++cnt; 1001 } 1002 sg->nouts = cnt; 1003 sg->first_out = 0; /* assigned in Phase B */ 1004 } 1005 1006 /* Compute load-command count + sizeofcmds, then back-fill section 1007 * offsets. Layout pass 2. */ 1008 u32 nseg_real = 0; 1009 for (u32 i = 0; i < x->nsegs; ++i) { 1010 /* Skip __DATA_CONST or __DATA if no sections (edge case). */ 1011 if (i == 0) { 1012 ++nseg_real; 1013 continue; 1014 } /* PAGEZERO */ 1015 if (i == MSEG_LINKEDIT) { 1016 ++nseg_real; 1017 continue; 1018 } /* LINKEDIT always */ 1019 if (x->segs[i].nsects > 0) ++nseg_real; /* incl. __DWARF when present */ 1020 } 1021 /* Each LC_SEGMENT_64 carries 72 + 80*nouts bytes (one section_64 1022 * record per coalesced (segname,sectname), not per MSec). */ 1023 u32 sizeofcmds = 0; 1024 for (u32 i = 0; i < x->nsegs; ++i) { 1025 if (i == 0 || i == MSEG_LINKEDIT) { 1026 sizeofcmds += MACHO_SEGCMD64_SIZE; /* no sections */ 1027 continue; 1028 } 1029 if (x->segs[i].nsects == 0) continue; 1030 sizeofcmds += MACHO_SEGCMD64_SIZE + x->segs[i].nouts * MACHO_SECT64_SIZE; 1031 } 1032 (void)nseg_real; 1033 /* LC_DYLD_CHAINED_FIXUPS / LC_DYLD_EXPORTS_TRIE */ 1034 sizeofcmds += 16u + 16u; 1035 /* LC_SYMTAB / LC_DYSYMTAB */ 1036 sizeofcmds += MACHO_SYMTAB_CMD_SIZE + MACHO_DYSYMTAB_CMD_SIZE; 1037 /* LC_LOAD_DYLINKER */ 1038 { 1039 u32 ld_size = 12u + (u32)(sizeof("/usr/lib/dyld") - 1u) + 1u; 1040 sizeofcmds += (u32)ALIGN_UP((u64)ld_size, 8u); 1041 } 1042 /* LC_UUID + LC_BUILD_VERSION + LC_MAIN */ 1043 sizeofcmds += 24u + 24u + 24u; 1044 /* LC_LOAD_DYLIB per dylib */ 1045 for (u32 i = 0; i < x->ndylibs; ++i) { 1046 size_t nl = pool_slice(x->c->global, x->dylibs[i].install).len; 1047 u32 sz = 24u + (u32)nl + 1u; 1048 sizeofcmds += (u32)ALIGN_UP((u64)sz, 8u); 1049 } 1050 /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE / LC_CODE_SIGNATURE */ 1051 sizeofcmds += 16u + 16u + 16u; 1052 1053 x->headers_size = MACHO_HDR64_SIZE + sizeofcmds; 1054 1055 /* Now place sections in __TEXT, __DATA_CONST, __DATA. */ 1056 u64 vaddr = MZ_PAGEZERO + x->headers_size; 1057 u64 fileoff = x->headers_size; 1058 /* Pad __TEXT sections to natural alignment. */ 1059 for (u32 i = 0; i < x->nsegs; ++i) { 1060 if (i == 0 || i == MSEG_LINKEDIT) continue; /* DWARF placed here too */ 1061 MSeg* sg = &x->segs[i]; 1062 if (i > 1) { 1063 /* page-align the start of __DATA_CONST and __DATA */ 1064 vaddr = ALIGN_UP(vaddr, MZ_PAGE); 1065 fileoff = ALIGN_UP(fileoff, MZ_PAGE); 1066 } 1067 sg->vmaddr = (i == 1) ? MZ_PAGEZERO : vaddr; 1068 sg->fileoff = (i == 1) ? 0 : fileoff; 1069 /* __TEXT carries the headers_size + sections. */ 1070 u64 seg_start_v = sg->vmaddr; 1071 u64 seg_start_f = sg->fileoff; 1072 /* For __TEXT, sections begin after the header area. */ 1073 u64 cur_v = (i == 1) ? (seg_start_v + x->headers_size) : seg_start_v; 1074 u64 cur_f = (i == 1) ? (seg_start_f + x->headers_size) : seg_start_f; 1075 u64 first_zerofill_v = 0; 1076 int seen_zerofill = 0; 1077 /* Non-zerofill first */ 1078 for (u32 j = 0; j < sg->nsects; ++j) { 1079 MSec* m = &x->secs[sg->first_sec + j]; 1080 if (m->is_zerofill) continue; 1081 cur_v = ALIGN_UP(cur_v, (u64)m->align); 1082 cur_f = ALIGN_UP(cur_f, (u64)m->align); 1083 m->vaddr = cur_v; 1084 m->file_offset = cur_f; 1085 cur_v += m->size; 1086 cur_f += m->size; 1087 } 1088 first_zerofill_v = cur_v; 1089 /* zerofill last (no file bytes) */ 1090 for (u32 j = 0; j < sg->nsects; ++j) { 1091 MSec* m = &x->secs[sg->first_sec + j]; 1092 if (!m->is_zerofill) continue; 1093 cur_v = ALIGN_UP(cur_v, (u64)m->align); 1094 m->vaddr = cur_v; 1095 m->file_offset = 0; 1096 cur_v += m->size; 1097 seen_zerofill = 1; 1098 } 1099 sg->filesize = (i == 1) 1100 ? (cur_f - seg_start_f) 1101 : (first_zerofill_v ? (first_zerofill_v - seg_start_v) 1102 : (cur_v - seg_start_v)); 1103 sg->vmsize = ALIGN_UP(cur_v - seg_start_v, MZ_PAGE); 1104 if (sg->vmsize == 0 && sg->nsects > 0) sg->vmsize = MZ_PAGE; 1105 if (i == 1) { 1106 x->stubs_vaddr = 0; 1107 for (u32 j = 0; j < sg->nsects; ++j) { 1108 MSec* m = &x->secs[sg->first_sec + j]; 1109 if (slice_eq_cstr(slice_from_cstr(m->sectname), "__stubs")) 1110 x->stubs_vaddr = m->vaddr; 1111 } 1112 x->text_filesz = sg->filesize; 1113 } 1114 if (i == 2) { 1115 for (u32 j = 0; j < sg->nsects; ++j) { 1116 MSec* m = &x->secs[sg->first_sec + j]; 1117 if (slice_eq_cstr(slice_from_cstr(m->sectname), "__got")) 1118 x->got_vaddr = m->vaddr; 1119 } 1120 x->data_const_vaddr = sg->vmaddr; 1121 x->data_const_filesz = sg->filesize; 1122 } 1123 if (i == 3) { 1124 for (u32 j = 0; j < sg->nsects; ++j) { 1125 MSec* m = &x->secs[sg->first_sec + j]; 1126 if (slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_ptrs")) 1127 x->tlv_ptrs_vaddr = m->vaddr; 1128 /* TLS storage image base: min vaddr across __thread_data and 1129 * __thread_bss sections. __thread_vars is excluded — it holds 1130 * the descriptors, not the data that maps into the per-thread 1131 * block. */ 1132 if ((slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_data") || 1133 slice_eq_cstr(slice_from_cstr(m->sectname), "__thread_bss")) && 1134 (!x->has_tls_image || m->vaddr < x->tls_image_vaddr)) { 1135 x->tls_image_vaddr = m->vaddr; 1136 x->has_tls_image = 1; 1137 } 1138 } 1139 x->data_vaddr = sg->vmaddr; 1140 x->data_filesz = sg->filesize; 1141 x->data_memsz = sg->vmsize; 1142 } 1143 vaddr = sg->vmaddr + sg->vmsize; 1144 /* Mach-O segments are mapped in page units. If a segment's memory 1145 * image extends past its initialized file bytes (for example 1146 * __DATA,__bss), the following segment's fileoff must not reuse those 1147 * pages or the kernel can map later file contents into the zero-fill 1148 * tail. */ 1149 fileoff = sg->fileoff + ((sg->vmsize > ALIGN_UP(sg->filesize, MZ_PAGE)) 1150 ? sg->vmsize 1151 : sg->filesize); 1152 (void)seen_zerofill; 1153 } 1154 /* LINKEDIT placeholder; size is filled after blob assembly. */ 1155 vaddr = ALIGN_UP(vaddr, MZ_PAGE); 1156 fileoff = ALIGN_UP(fileoff, MZ_PAGE); 1157 x->segs[MSEG_LINKEDIT].vmaddr = vaddr; 1158 x->segs[MSEG_LINKEDIT].fileoff = fileoff; 1159 x->linkedit_vaddr = vaddr; 1160 x->linkedit_fileoff = fileoff; 1161 1162 /* Encode __stubs bytes now that vaddrs are settled. Internal-GOT 1163 * entries have stub_idx=0 (direct CALL26, no stub) and must be 1164 * skipped so the (stub_idx - 1u) arithmetic doesn't wrap. */ 1165 for (u32 i = 0; i < x->nimports; ++i) { 1166 MachImp* mi = &x->imports[i]; 1167 if (!mi->is_func || !mi->stub_idx) continue; 1168 u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * x->macho->stub_size; 1169 u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; 1170 x->macho->emit_stub( 1171 x->stubs_bytes + (mi->stub_idx - 1u) * x->macho->stub_size, stub_v, 1172 got_v); 1173 } 1174 1175 /* Phase B: build OutSec[] now that all MSec vaddrs are final. Walk 1176 * MSecs sorted by (segidx, vaddr) and coalesce adjacent same-name 1177 * runs. Mirrors link_elf.c's OutShdr build at link_elf.c:879. */ 1178 { 1179 u32* order = 1180 (u32*)h->alloc(h, sizeof(u32) * (x->nsecs + 1u), _Alignof(u32)); 1181 if (!order && x->nsecs) 1182 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on outsec sort"); 1183 for (u32 i = 0; i < x->nsecs; ++i) order[i] = i; 1184 /* Insertion sort — section count is small. */ 1185 for (u32 i = 1; i < x->nsecs; ++i) { 1186 u32 cur = order[i]; 1187 MSec* a = &x->secs[cur]; 1188 u32 j = i; 1189 while (j > 0) { 1190 MSec* b = &x->secs[order[j - 1]]; 1191 if ((b->segidx < a->segidx) || 1192 (b->segidx == a->segidx && b->vaddr <= a->vaddr)) 1193 break; 1194 order[j] = order[j - 1]; 1195 --j; 1196 } 1197 order[j] = cur; 1198 } 1199 u32 cap = x->nsecs + 1u; 1200 x->outs = (OutSec*)h->alloc(h, sizeof(OutSec) * cap, _Alignof(OutSec)); 1201 if (!x->outs) 1202 compiler_panic(x->c, SRCLOC_NONE, "link_macho: oom on OutSec"); 1203 memset(x->outs, 0, sizeof(OutSec) * cap); 1204 x->nouts = 0; 1205 for (u32 i = 0; i < x->nsecs; ++i) { 1206 MSec* m = &x->secs[order[i]]; 1207 OutSec* tail = x->nouts ? &x->outs[x->nouts - 1] : NULL; 1208 int merge = tail && tail->segidx == m->segidx && 1209 slice_eq_cstr(slice_from_cstr(tail->sectname), m->sectname) && 1210 slice_eq_cstr(slice_from_cstr(tail->segname), m->segname); 1211 if (merge) { 1212 if (tail->flags != m->flags || tail->is_zerofill != m->is_zerofill) 1213 compiler_panic( 1214 x->c, SRCLOC_NONE, 1215 "link_macho: coalesce mismatch on %.*s,%.*s (flags/zerofill)", 1216 SLICE_ARG(slice_from_cstr(m->segname)), 1217 SLICE_ARG(slice_from_cstr(m->sectname))); 1218 u64 end = m->vaddr + m->size; 1219 u64 prev_end = tail->vaddr + tail->size; 1220 if (end > prev_end) tail->size = end - tail->vaddr; 1221 if (m->align > tail->align) tail->align = m->align; 1222 } else { 1223 OutSec* o = &x->outs[x->nouts++]; 1224 o->segname = m->segname; 1225 o->sectname = m->sectname; 1226 o->vaddr = m->vaddr; 1227 o->file_offset = m->file_offset; 1228 o->size = m->size; 1229 o->align = m->align; 1230 o->flags = m->flags; 1231 o->reserved1 = m->reserved1; 1232 o->reserved2 = m->reserved2; 1233 o->segidx = m->segidx; 1234 o->is_zerofill = m->is_zerofill; 1235 } 1236 } 1237 h->free(h, order, sizeof(u32) * (x->nsecs + 1u)); 1238 /* Recompute per-segment OutSec span; Phase A's count was for 1239 * sizeofcmds sizing — recompute it here as the source of truth and 1240 * assert agreement. */ 1241 for (u32 i = 0; i < x->nsegs; ++i) { 1242 x->segs[i].first_out = 0; 1243 } 1244 u32 prev_nouts[MSEG_COUNT]; 1245 for (u32 i = 0; i < x->nsegs; ++i) prev_nouts[i] = x->segs[i].nouts; 1246 for (u32 i = 0; i < x->nsegs; ++i) x->segs[i].nouts = 0; 1247 for (u32 i = 0; i < x->nouts; ++i) { 1248 u8 sx = x->outs[i].segidx; 1249 if (x->segs[sx].nouts == 0) x->segs[sx].first_out = i; 1250 ++x->segs[sx].nouts; 1251 } 1252 for (u32 i = 0; i < x->nsegs; ++i) { 1253 if (prev_nouts[i] != x->segs[i].nouts) 1254 compiler_panic(x->c, SRCLOC_NONE, 1255 "link_macho: OutSec count drift seg %u (%u vs %u)", 1256 (u32)i, prev_nouts[i], x->segs[i].nouts); 1257 } 1258 } 1259 } 1260 1261 /* ---- pass: shift LinkImage into final vaddrs/file_offsets ---- 1262 * 1263 * The sections in img->sections are still in their original 1264 * link_layout coordinates. Map each LinkSection -> its MSec and copy 1265 * the final vaddr/file_offset so reloc-apply walks correctly. */ 1266 1267 static void shift_sections(MCtx* x) { 1268 LinkImage* img = x->img; 1269 /* Build a quick lookup: link_sec_id -> MSec*. */ 1270 for (u32 i = 0; i < x->nsecs; ++i) { 1271 MSec* m = &x->secs[i]; 1272 if (!m->link_sec_id) continue; 1273 /* Walk link_section_id slot. */ 1274 LinkSection* ls = &img->sections[m->link_sec_id - 1u]; 1275 /* shift relocs whose write_vaddr/file_offset live within this 1276 * section's original [old_vaddr, old_vaddr+size). */ 1277 u64 old_v = ls->vaddr; 1278 u64 old_f = ls->file_offset; 1279 u64 new_v = m->vaddr; 1280 u64 new_f = m->file_offset; 1281 if (old_v == new_v && old_f == new_f) continue; 1282 /* Update the LinkSection itself. */ 1283 ls->vaddr = new_v; 1284 ls->file_offset = new_f; 1285 /* Update relocs that target this section. */ 1286 for (u32 ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) { 1287 LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri); 1288 if (r->link_section_id != ls->id) continue; 1289 r->write_vaddr = new_v + (r->write_vaddr - old_v); 1290 r->write_file_offset = new_f + (r->write_file_offset - old_f); 1291 } 1292 /* Update LinkSyms that belong to this LinkSection. Match by 1293 * section_id rather than vaddr range — multiple input sections 1294 * may share the same pre-shift vaddr (each bucket in 1295 * link_layout starts at offset 0). */ 1296 for (u32 si = 0; si < LinkSyms_count(&img->syms); ++si) { 1297 LinkSymbol* s = LinkSyms_at(&img->syms, si); 1298 if (!s->defined) continue; 1299 if (s->kind == SK_ABS) continue; 1300 if (s->section_id != ls->id) continue; 1301 s->vaddr = new_v + (s->vaddr - old_v); 1302 } 1303 } 1304 } 1305 1306 /* ---- pass: apply relocations + collect chained-fixup sites ---- 1307 * 1308 * Reloc dispatch: 1309 * target=imported func + CALL26/JUMP26 -> S = stub vaddr 1310 * target=import + GOT_LOAD_PAGE21/PAGEOFF12 -> S = got slot vaddr 1311 * target=import + ABS64 -> write 0; collect bind site 1312 * target=internal + ABS64 -> write target VA; collect rebase site 1313 * everything else -> standard apply 1314 * 1315 * Patch sites for chained fixups are 8-byte slots; for ABS32 we do not 1316 * support fixups (no chained-fixup format for 32-bit pointers in 1317 * standard arm64 — would need DYLD_CHAINED_PTR_32). Internal R_ABS32 1318 * still works (no slide adjustment is wrong technically, but for 1319 * compile-time-known offsets it suffices). 1320 */ 1321 1322 typedef struct FixSite { 1323 u8 segidx; /* 2 = __DATA_CONST, 3 = __DATA */ 1324 u8 is_bind; /* 0 = rebase, 1 = bind */ 1325 u8 pad[2]; 1326 u32 import_idx; /* 1-based import index for binds, 0 for rebases */ 1327 u64 vaddr; /* absolute VA of the slot */ 1328 u64 rebase_target; /* unslid target VA; only used for rebases */ 1329 } FixSite; 1330 1331 typedef struct FixList { 1332 Heap* heap; 1333 FixSite* a; 1334 u32 n; 1335 u32 cap; 1336 } FixList; 1337 1338 static void fix_init(FixList* fl, Heap* h) { 1339 fl->heap = h; 1340 fl->a = NULL; 1341 fl->n = 0; 1342 fl->cap = 0; 1343 } 1344 static void fix_fini(FixList* fl) { 1345 if (fl->a) fl->heap->free(fl->heap, fl->a, sizeof(*fl->a) * fl->cap); 1346 fl->a = NULL; 1347 fl->n = fl->cap = 0; 1348 } 1349 static void fix_push(FixList* fl, const FixSite* s) { 1350 if (VEC_GROW(fl->heap, fl->a, fl->cap, fl->n + 1u)) return; 1351 fl->a[fl->n++] = *s; 1352 } 1353 1354 /* find MSec covering an absolute vaddr */ 1355 static MSec* msec_for_vaddr(MCtx* x, u64 v) { 1356 for (u32 i = 0; i < x->nsecs; ++i) { 1357 MSec* m = &x->secs[i]; 1358 if (v >= m->vaddr && v < m->vaddr + m->size) return m; 1359 } 1360 return NULL; 1361 } 1362 1363 static u8* bytes_for_section(MCtx* x, MSec* m, LinkImage* img) { 1364 if (m->synth_data) { 1365 /* Synthetic — caller reads/writes via x->stubs_bytes / x->got_bytes. */ 1366 if (m->synth_data == x->stubs_bytes) return x->stubs_bytes; 1367 if (m->synth_data == x->got_bytes) return x->got_bytes; 1368 return NULL; 1369 } 1370 /* Backed by a LinkSection: find the LinkSegment buffer that section 1371 * sits in (link_layout.c stored input section bytes there). */ 1372 LinkSection* ls = &img->sections[m->link_sec_id - 1u]; 1373 u32 segid = ls->segment_id; 1374 if (segid == LINK_SEG_NONE) return NULL; 1375 return img->segment_bytes[segid - 1u]; 1376 } 1377 1378 /* Map the LinkSection that backs a write_vaddr to an MSec, then to the 1379 * underlying byte buffer. */ 1380 static u8* patch_ptr(MCtx* x, LinkImage* img, const LinkRelocApply* r, 1381 MSec** out_msec) { 1382 /* Look up via the LinkSection. After shift_sections the section 1383 * vaddr is the Mach-O vaddr; the corresponding MSec backs it. */ 1384 if (r->link_section_id == LINK_SEC_NONE) return NULL; 1385 LinkSection* ls = &img->sections[r->link_section_id - 1u]; 1386 /* Find the MSec by link_sec_id. */ 1387 MSec* m = NULL; 1388 for (u32 i = 0; i < x->nsecs; ++i) { 1389 if (x->secs[i].link_sec_id == ls->id) { 1390 m = &x->secs[i]; 1391 break; 1392 } 1393 } 1394 if (!m) return NULL; 1395 /* The LinkSegment's bytes are valid (not shifted), but the offset 1396 * within them is the original input_offset. Use input_offset for 1397 * the byte offset, since the LinkSegment buffer wasn't reshuffled. */ 1398 /* link_layout.c set ls->file_offset = seg.file_offset + input_offset 1399 * originally. ls->vaddr similarly. After our shift, they're new. 1400 * The byte offset within the segment buffer is still input_offset. */ 1401 u8* base = bytes_for_section(x, m, img); 1402 if (!base) return NULL; 1403 u32 within_section = (u32)(r->write_vaddr - m->vaddr); 1404 /* The segment buffer's first byte corresponds to ls->input_offset==0 1405 * for the FIRST section in the segment. But that's a complication. 1406 * For simplicity we recompute the segment-relative byte offset by 1407 * (file_offset - segment.file_offset) where segment.file_offset is 1408 * unchanged. Wait: the original layout produced `ls->file_offset = 1409 * seg.file_offset + input_offset`, and we may have changed 1410 * ls->file_offset. Let's just use input_offset stored on the 1411 * LinkSection. */ 1412 u32 in_off = (u32)(ls->input_offset + within_section); 1413 if (out_msec) *out_msec = m; 1414 return base + in_off; 1415 } 1416 1417 /* Symbol-relative resolved-address S, accounting for imports. */ 1418 static int sym_S(MCtx* x, LinkImage* img, LinkSymId id, u64* out_S, 1419 int* out_imp_idx) { 1420 *out_S = 0; 1421 *out_imp_idx = 0; 1422 if (id == LINK_SYM_NONE) return 0; 1423 LinkSymbol* s = sym_at(img, id); 1424 if (!s) return 0; 1425 /* Look up the import index — real imports plus internal-GOT entries 1426 * the collect_imports pass materialized for GOT-routed internal refs. */ 1427 u32 idx = 0; 1428 if (id < x->sym_to_imp_size) idx = x->sym_to_imp[id]; 1429 if (!idx && s->name != 0) { 1430 LinkSymId canon = symhash_get(&img->globals, s->name); 1431 if (canon != LINK_SYM_NONE && canon < x->sym_to_imp_size) 1432 idx = x->sym_to_imp[canon]; 1433 } 1434 if (s->imported) { 1435 *out_imp_idx = (int)idx; 1436 return 1; 1437 } 1438 /* Internal symbol that has a GOT slot — surface the import index so 1439 * the GOT_LOAD reloc paths in apply_relocs find it, but also expose 1440 * S=vaddr so non-GOT relocs (CALL26 etc.) still apply directly. */ 1441 *out_imp_idx = (int)idx; 1442 *out_S = s->vaddr; 1443 return 0; 1444 } 1445 1446 static void apply_relocs(MCtx* x, FixList* fl) { 1447 LinkImage* img = x->img; 1448 for (u32 i = 0; i < LinkRelocs_count(&img->relocs); ++i) { 1449 LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); 1450 if (r->target == LINK_SYM_NONE) continue; 1451 /* File-only .debug_* section: patch the registry buffer in place (no 1452 * __got/stub/chained-fixup — debug bytes aren't loaded or slid). A 1453 * SK_SECTION target resolves to its DWARF-section-relative base; a 1454 * code/data symbol to its final (absolute) vaddr for low_pc. Mach-O 1455 * vaddrs are already absolute, so there's no extra image base. */ 1456 { 1457 const LinkSection* sec = &img->sections[r->link_section_id - 1u]; 1458 if (sec->file_only) { 1459 u8* dbg = link_fileonly_bytes(img, r->link_section_id); 1460 const LinkSymbol* tgt = sym_at(img, r->target); 1461 if (dbg && tgt) 1462 link_reloc_apply(x->c, r->kind, dbg + r->offset, tgt->vaddr, 1463 r->addend, 0); 1464 continue; 1465 } 1466 } 1467 MSec* msec = NULL; 1468 u8* P_bytes = patch_ptr(x, img, r, &msec); 1469 if (!P_bytes) continue; 1470 u64 P = r->write_vaddr; 1471 1472 u64 S; 1473 int imp_idx; 1474 int is_imp = sym_S(x, img, r->target, &S, &imp_idx); 1475 1476 /* TLVP relocs route through a __thread_ptrs slot regardless of 1477 * whether the descriptor target is in-image or imported. Resolved 1478 * before the import / internal split because an imported TLV 1479 * descriptor doesn't use the __got slot (its address lives in 1480 * __thread_ptrs with its own chained bind). */ 1481 if (reloc_kind_is_tlvp(x->c, r->kind)) { 1482 u32 tlv_idx = 1483 (r->target < x->sym_to_tlv_size) ? x->sym_to_tlv[r->target] : 0u; 1484 if (!tlv_idx) 1485 compiler_panic(x->c, SRCLOC_NONE, 1486 "link_macho: TLVP reloc has no __thread_ptrs slot"); 1487 u64 slot_v = x->tlv_ptrs_vaddr + (tlv_idx - 1u) * MZ_TLVP_SIZE; 1488 link_reloc_apply(x->c, r->kind, P_bytes, slot_v, r->addend, P); 1489 continue; 1490 } 1491 1492 if (is_imp) { 1493 MachImp* mi = (imp_idx > 0) ? &x->imports[imp_idx - 1] : NULL; 1494 if (reloc_kind_is_branch(x->c, r->kind)) { 1495 if (!mi || !mi->stub_idx) 1496 compiler_panic(x->c, SRCLOC_NONE, 1497 "link_macho: import has no stub for branch"); 1498 u64 stub_v = x->stubs_vaddr + (mi->stub_idx - 1u) * x->macho->stub_size; 1499 link_reloc_apply(x->c, r->kind, P_bytes, stub_v, r->addend, P); 1500 continue; 1501 } 1502 if (reloc_kind_is_got_load(x->c, r->kind)) { 1503 if (!mi) 1504 compiler_panic(x->c, SRCLOC_NONE, 1505 "link_macho: GOT reloc for unknown import"); 1506 u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; 1507 link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); 1508 continue; 1509 } 1510 if (reloc_kind_is_direct_page(x->c, r->kind)) { 1511 /* Direct page/lo12 against an import: route through __got. */ 1512 if (!mi) 1513 compiler_panic(x->c, SRCLOC_NONE, 1514 "link_macho: PAGE/LO12 against unknown import"); 1515 u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; 1516 link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); 1517 continue; 1518 } 1519 if (r->kind == R_ABS64) { 1520 /* Direct 8-byte absolute against an import: bind the slot. */ 1521 wr_u64_le(P_bytes, 0); 1522 FixSite fs = {(u8)msec->segidx, 1, {0}, (u32)imp_idx, P, 0}; 1523 fix_push(fl, &fs); 1524 continue; 1525 } 1526 compiler_panic(x->c, SRCLOC_NONE, 1527 "link_macho: unhandled reloc kind %u against imported " 1528 "symbol", 1529 (u32)r->kind); 1530 } 1531 1532 /* Internal relocs. */ 1533 if (r->kind == R_ABS64) { 1534 /* Special case: ABS64 reloc inside a TLV descriptor record 1535 * (__thread_vars section) targeting in-image TLS storage. This 1536 * is the descriptor's word-2 "offset" field — dyld interprets it 1537 * as the per-thread offset of the storage within the TLS image, 1538 * NOT as an absolute address. Apple's ld writes the literal 1539 * offset and emits no chained-fixup entry; replicate that so the 1540 * chain skips over this slot (chained_fixups already does the 1541 * right thing: no fixsite -> no chain link). */ 1542 if (msec && (msec->flags & SECTION_TYPE) == S_THREAD_LOCAL_VARIABLES && 1543 x->has_tls_image) { 1544 u64 offset = (S + (u64)r->addend) - x->tls_image_vaddr; 1545 wr_u64_le(P_bytes, offset); 1546 continue; 1547 } 1548 /* Rebase site. */ 1549 wr_u64_le(P_bytes, S + (u64)r->addend); 1550 FixSite fs = {(u8)msec->segidx, 0, {0}, 0, P, S + (u64)r->addend}; 1551 fix_push(fl, &fs); 1552 continue; 1553 } 1554 /* Internal symbol routed through __got (clang emits GOT_LOAD_PAGE21 1555 * for any extern global, even if the def is in-image). imp_idx 1556 * was populated by collect_imports' internal-GOT pass; redirect 1557 * the page/lo12 reloc to the GOT slot's vaddr. */ 1558 if (imp_idx > 0 && reloc_kind_is_got_load(x->c, r->kind)) { 1559 MachImp* mi = &x->imports[imp_idx - 1]; 1560 u64 got_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; 1561 link_reloc_apply(x->c, r->kind, P_bytes, got_v, r->addend, P); 1562 continue; 1563 } 1564 /* Generic apply. */ 1565 link_reloc_apply(x->c, r->kind, P_bytes, S, r->addend, P); 1566 } 1567 1568 /* Per-slot chained fixup. Real imports → bind (dyld resolves at 1569 * load). Internal GOT entries → rebase pointing at the symbol's 1570 * image-relative vaddr; a target vaddr of 0 (weak undef → NULL) gets 1571 * no fixup, just a literal zero slot — chained fixups treat 0 as a 1572 * gap and won't disturb it. */ 1573 for (u32 i = 0; i < x->nimports; ++i) { 1574 MachImp* mi = &x->imports[i]; 1575 u64 slot_v = x->got_vaddr + (mi->got_idx - 1u) * MZ_GOT_SIZE; 1576 if (mi->internal) { 1577 /* Re-read the symbol's final vaddr now that shift_sections has 1578 * rebased every defined symbol into the Mach-O image layout 1579 * (collect_imports snapshotted too early). */ 1580 LinkSymbol* s = sym_at(img, mi->sym); 1581 u64 tgt_v = s ? s->vaddr : 0; 1582 u8* slot = x->got_bytes + (mi->got_idx - 1u) * MZ_GOT_SIZE; 1583 wr_u64_le(slot, tgt_v); 1584 if (tgt_v == 0) continue; /* weak-undef → NULL */ 1585 FixSite fs = {2u, 0, {0}, 0, slot_v, tgt_v}; 1586 fix_push(fl, &fs); 1587 } else { 1588 /* clear slot bytes (already zero) — dyld writes via chain */ 1589 FixSite fs = {2u, 1, {0}, i + 1u, slot_v, 0}; 1590 fix_push(fl, &fs); 1591 } 1592 } 1593 1594 /* Per-slot TLV pointer fixups. Mirror of the __got loop above: each 1595 * __thread_ptrs slot points at the descriptor record. When the 1596 * descriptor is in-image (internal) we REBASE to its final vaddr; when 1597 * it lives in a dylib we BIND through the descriptor's MachImp. The 1598 * slot itself lives in __DATA (segidx=3), distinct from __got's 1599 * __DATA_CONST (segidx=2). */ 1600 for (u32 i = 0; i < x->ntlv; ++i) { 1601 MachTlv* ts = &x->tlv_slots[i]; 1602 u64 slot_v = x->tlv_ptrs_vaddr + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE; 1603 u8* slot = x->tlv_ptrs_bytes + (ts->tlv_idx - 1u) * MZ_TLVP_SIZE; 1604 if (ts->imported) { 1605 if (!ts->import_idx) 1606 compiler_panic(x->c, SRCLOC_NONE, 1607 "link_macho: imported TLV without matching import slot"); 1608 wr_u64_le(slot, 0); 1609 FixSite fs = {3u, 1, {0}, ts->import_idx, slot_v, 0}; 1610 fix_push(fl, &fs); 1611 } else { 1612 LinkSymbol* s = sym_at(img, ts->sym); 1613 u64 tgt_v = s ? s->vaddr : 0; 1614 wr_u64_le(slot, tgt_v); 1615 if (tgt_v == 0) continue; /* weak-undef descriptor → NULL */ 1616 FixSite fs = {3u, 0, {0}, 0, slot_v, tgt_v}; 1617 fix_push(fl, &fs); 1618 } 1619 } 1620 } 1621 1622 /* ---- chained fixups blob assembler ---- 1623 * 1624 * For each segment that has fixups, build a dyld_chained_starts_in_segment 1625 * with one chain per page (MZ_PAGE). Within a page, sort sites by 1626 * offset, encode each as DYLD_CHAINED_PTR_64, and link via the `next` 1627 * field (4-byte units, 0 = end of chain). 1628 */ 1629 1630 typedef struct PageChain { 1631 u32 first_offset_in_page; /* relative to page start */ 1632 u32 nsites; 1633 u32 first_site_idx; /* into a per-segment site array */ 1634 } PageChain; 1635 1636 static int site_cmp_by_vaddr(const void* a, const void* b) { 1637 const FixSite* x = a; 1638 const FixSite* y = b; 1639 if (x->vaddr < y->vaddr) return -1; 1640 if (x->vaddr > y->vaddr) return 1; 1641 return 0; 1642 } 1643 1644 /* tiny insertion sort to avoid pulling qsort */ 1645 static void sort_sites(FixSite* a, u32 n) { 1646 for (u32 i = 1; i < n; ++i) { 1647 FixSite tmp = a[i]; 1648 u32 j = i; 1649 while (j > 0 && site_cmp_by_vaddr(&a[j - 1], &tmp) > 0) { 1650 a[j] = a[j - 1]; 1651 --j; 1652 } 1653 a[j] = tmp; 1654 } 1655 } 1656 1657 static void emit_pointer(u8* slot, int is_bind, u32 ord_or_target_lo, 1658 u32 high_or_target_hi, u32 next4) { 1659 /* DYLD_CHAINED_PTR_64: 1660 * bind : ordinal:24, addend:8, reserved:19, next:12, bind:1=1 1661 * rebase: target:36 (vmaddr), high8:8, reserved:7, next:12, bind:1=0 1662 */ 1663 u64 v = 0; 1664 if (is_bind) { 1665 u64 ordinal = (u64)ord_or_target_lo & 0xffffffull; /* 24 bits */ 1666 u64 addend = 0; 1667 u64 next = (u64)next4 & 0xfffull; 1668 v = ordinal | (addend << 24) | (0ull /* reserved */ << 32) | (next << 51) | 1669 ((u64)1 << 63); 1670 } else { 1671 /* rebase: target is full vmaddr; we get hi:lo split. */ 1672 u64 target = ((u64)high_or_target_hi << 32) | (u64)ord_or_target_lo; 1673 target &= ((u64)1 << 36) - 1u; /* 36 bits */ 1674 u64 high8 = 0; 1675 u64 next = (u64)next4 & 0xfffull; 1676 v = target | (high8 << 36) | (0ull /* reserved */ << 44) | (next << 51) | 1677 ((u64)0 << 63); 1678 } 1679 wr_u64_le(slot, v); 1680 } 1681 1682 static void build_chained_fixups(MCtx* x, FixList* fl) { 1683 Heap* h = x->h; 1684 MByte* out = &x->chained_fixups; 1685 mbuf_init(out, h); 1686 1687 /* Header (32 B): 1688 * uint32 fixups_version (=0) 1689 * uint32 starts_offset 1690 * uint32 imports_offset 1691 * uint32 symbols_offset 1692 * uint32 imports_count 1693 * uint32 imports_format (=1) 1694 * uint32 symbols_format (=0) 1695 */ 1696 u32 hdr_pos = mbuf_u32(out, 0); /* fixups_version */ 1697 (void)hdr_pos; 1698 u32 starts_offset_pos = mbuf_u32(out, 0); 1699 u32 imports_offset_pos = mbuf_u32(out, 0); 1700 u32 symbols_offset_pos = mbuf_u32(out, 0); 1701 mbuf_u32(out, x->nimports_real); 1702 mbuf_u32(out, DYLD_CHAINED_IMPORT); 1703 mbuf_u32(out, 0); /* symbols uncompressed */ 1704 /* dyld expects 8-byte alignment of the starts table. */ 1705 mbuf_align(out, 4); 1706 1707 /* dyld_chained_starts_in_image: 1708 * uint32 seg_count 1709 * uint32 seg_info_offset[seg_count] 1710 * 1711 * seg_count must equal mach-O segment count (5). 1712 * seg_info_offset[i] = 0 means no fixups in that segment. 1713 */ 1714 u32 starts_off = out->len; 1715 wr_u32_le(out->data + starts_offset_pos, starts_off); 1716 mbuf_u32(out, x->nsegs); 1717 /* Reserve seg_info_offset[]. */ 1718 u32 seg_info_offsets_pos = out->len; 1719 for (u32 i = 0; i < x->nsegs; ++i) mbuf_u32(out, 0); 1720 1721 /* Sort fixsites by vaddr globally. */ 1722 sort_sites(fl->a, fl->n); 1723 1724 /* Per segment, emit dyld_chained_starts_in_segment when fixups present. */ 1725 for (u32 si = 0; si < x->nsegs; ++si) { 1726 /* count sites in this segment */ 1727 u32 first = (u32)-1, count = 0; 1728 for (u32 k = 0; k < fl->n; ++k) { 1729 if (fl->a[k].segidx == si) { 1730 if (first == (u32)-1) first = k; 1731 ++count; 1732 } 1733 } 1734 if (!count) continue; 1735 /* Page-align this struct to 4. */ 1736 mbuf_align(out, 4); 1737 u32 sis_off = out->len; 1738 /* Patch seg_info_offset[si] to (sis_off - starts_off). */ 1739 wr_u32_le(out->data + seg_info_offsets_pos + si * 4u, sis_off - starts_off); 1740 1741 /* Compute page count for this segment. */ 1742 u64 seg_va = x->segs[si].vmaddr; 1743 u64 seg_size = x->segs[si].vmsize ? x->segs[si].vmsize : MZ_PAGE; 1744 u32 page_count = (u32)((seg_size + MZ_PAGE - 1u) / MZ_PAGE); 1745 1746 /* dyld_chained_starts_in_segment: 1747 * uint32 size 1748 * uint16 page_size 1749 * uint16 pointer_format 1750 * uint64 segment_offset (offset of segment's first byte from 1751 * mach_header) 1752 * uint32 max_valid_pointer (0 for 64-bit) 1753 * uint16 page_count 1754 * uint16 page_start[page_count] (0xFFFF = no fixups in page) 1755 */ 1756 u32 sis_size_pos = mbuf_u32(out, 0); /* fill below */ 1757 mbuf_u16(out, (u16)MZ_PAGE); 1758 mbuf_u16(out, (u16)DYLD_CHAINED_PTR_64); 1759 mbuf_u64(out, (u64)x->segs[si].fileoff); /* segment file offset */ 1760 mbuf_u32(out, 0); 1761 mbuf_u16(out, (u16)page_count); 1762 u32 page_starts_pos = out->len; 1763 for (u32 p = 0; p < page_count; ++p) mbuf_u16(out, 0xFFFFu); 1764 /* size includes the page_start array */ 1765 u32 sis_size = out->len - sis_size_pos + 4u; 1766 /* Hmm, the `size` field is the size of *this* struct. We measure 1767 * from sis_off through end of page_starts. */ 1768 sis_size = out->len - sis_off; 1769 wr_u32_le(out->data + sis_size_pos, sis_size); 1770 1771 /* Now: walk sites in this segment, group by page, write 1772 * page_start[i] = offset_in_page of first site, and chain via 1773 * next-field in the actual segment's bytes. */ 1774 /* Sites are sorted globally; collect contiguous run for this seg. */ 1775 u32 cur = first; 1776 while (cur < first + count) { 1777 u32 page_idx = (u32)((fl->a[cur].vaddr - seg_va) / MZ_PAGE); 1778 u32 offset_in_page = (u32)((fl->a[cur].vaddr - seg_va) % MZ_PAGE); 1779 wr_u16_le(out->data + page_starts_pos + page_idx * 2u, 1780 (u16)offset_in_page); 1781 /* Walk this page's chain. */ 1782 u32 next_in_page = cur; 1783 while (next_in_page + 1 < first + count) { 1784 u64 nv = fl->a[next_in_page + 1].vaddr; 1785 if (nv >= seg_va + (u64)(page_idx + 1) * MZ_PAGE) break; 1786 ++next_in_page; 1787 } 1788 /* Encode chain pointers. */ 1789 for (u32 k = cur; k <= next_in_page; ++k) { 1790 FixSite* s = &fl->a[k]; 1791 u32 next4 = 0; 1792 if (k < next_in_page) { 1793 u64 dist = fl->a[k + 1].vaddr - s->vaddr; 1794 next4 = (u32)(dist / 4u); 1795 } 1796 /* Find segment bytes. Synthetic pointer sections have private 1797 * buffers; file-backed sections can live in any segment, including 1798 * pointer-bearing read-only constants in __TEXT. */ 1799 u8* slot = NULL; 1800 if (s->segidx == 2 && x->got_bytes && s->vaddr >= x->got_vaddr && 1801 s->vaddr < x->got_vaddr + x->got_size) { 1802 /* __DATA_CONST: __got slot. */ 1803 slot = x->got_bytes + (s->vaddr - x->got_vaddr); 1804 } else if (x->tlv_ptrs_bytes && s->vaddr >= x->tlv_ptrs_vaddr && 1805 s->vaddr < x->tlv_ptrs_vaddr + x->tlv_ptrs_size) { 1806 slot = x->tlv_ptrs_bytes + (s->vaddr - x->tlv_ptrs_vaddr); 1807 } else { 1808 MSec* m = msec_for_vaddr(x, s->vaddr); 1809 if (m && m->link_sec_id) { 1810 u8* base = bytes_for_section(x, m, x->img); 1811 if (base) { 1812 LinkSection* ls = &x->img->sections[m->link_sec_id - 1u]; 1813 u32 in_off = (u32)(ls->input_offset + (s->vaddr - m->vaddr)); 1814 slot = base + in_off; 1815 } 1816 } 1817 } 1818 if (!slot) 1819 compiler_panic(x->c, SRCLOC_NONE, 1820 "link_macho: chained-fixup slot for vaddr 0x%llx not " 1821 "in any segment buffer", 1822 (unsigned long long)s->vaddr); 1823 if (s->is_bind) { 1824 /* ordinal is import index (1-based) - 1; chained-import format 1825 * uses 0-based. */ 1826 if (s->import_idx == 0 || s->import_idx > x->nimports_real) { 1827 compiler_panic( 1828 x->c, SRCLOC_NONE, 1829 "link_macho: chained bind for vaddr 0x%llx uses import index " 1830 "%u outside real import table size %u", 1831 (unsigned long long)s->vaddr, (unsigned)s->import_idx, 1832 (unsigned)x->nimports_real); 1833 } 1834 u32 ord = s->import_idx - 1u; 1835 emit_pointer(slot, 1, ord, 0, next4); 1836 } else { 1837 /* rebase target = unslid vmaddr */ 1838 u32 lo = (u32)(s->rebase_target & 0xffffffffu); 1839 u32 hi = (u32)(s->rebase_target >> 32); 1840 emit_pointer(slot, 0, lo, hi, next4); 1841 } 1842 } 1843 cur = next_in_page + 1u; 1844 } 1845 } 1846 1847 /* Imports table: one dyld_chained_import (4B) per real import. 1848 * Layout: lib_ordinal:8, weak:1, name_offset:23. Internal-GOT 1849 * entries are not bound by dyld so they're omitted here. */ 1850 mbuf_align(out, 4); 1851 u32 imports_off = out->len; 1852 wr_u32_le(out->data + imports_offset_pos, imports_off); 1853 /* We need to first build the symbol pool to know name offsets. */ 1854 u32 symbols_off = imports_off + x->nimports_real * 4u; 1855 /* Reserve imports area. */ 1856 for (u32 i = 0; i < x->nimports_real; ++i) mbuf_u32(out, 0); 1857 /* Emit symbols (each NUL-terminated). Set name_offset on each import. */ 1858 wr_u32_le(out->data + symbols_offset_pos, out->len); 1859 /* Leading NUL for offset 0. */ 1860 mbuf_u8(out, 0); 1861 for (u32 i = 0; i < x->nimports_real; ++i) { 1862 MachImp* mi = &x->imports[i]; 1863 Slice nm_s = pool_slice(x->c->global, mi->name); 1864 const char* nm = nm_s.s; 1865 size_t nl = nm_s.len; 1866 if (!nm || !nl || mi->dylib_ord == 0 || mi->dylib_ord > x->ndylibs) { 1867 compiler_panic(x->c, SRCLOC_NONE, 1868 "link_macho: invalid chained import %u " 1869 "(name=%u dylib_ord=%u ndylibs=%u)", 1870 (unsigned)i, (unsigned)mi->name, (unsigned)mi->dylib_ord, 1871 (unsigned)x->ndylibs); 1872 } 1873 u32 off = out->len - symbols_off; 1874 mbuf_str(out, nm, (u32)nl); 1875 /* Patch the import slot. */ 1876 u32 packed = ((u32)mi->dylib_ord & 0xffu) | 1877 ((u32)(mi->weak ? 1u : 0u) << 8) | ((off & 0x7fffffu) << 9); 1878 wr_u32_le(out->data + imports_off + i * 4u, packed); 1879 } 1880 (void)symbols_off; 1881 } 1882 1883 /* ---- exports trie ---- * 1884 * 1885 * Minimal trie: one node carrying a single export "_main" with the 1886 * entry symbol's VA-relative offset. This is enough for dyld; binaries 1887 * with a real exports trie include more data but we don't need it. */ 1888 1889 static void uleb128(MByte* out, u64 v) { 1890 do { 1891 u8 byte = v & 0x7fu; 1892 v >>= 7; 1893 if (v) byte |= 0x80u; 1894 mbuf_u8(out, byte); 1895 } while (v); 1896 } 1897 1898 static u32 uleb128_size(u64 v) { 1899 u32 n = 0; 1900 do { 1901 ++n; 1902 v >>= 7; 1903 } while (v); 1904 return n; 1905 } 1906 1907 static void build_exports_trie(MCtx* x) { 1908 /* Format: 1909 * node = (terminal_size: uleb128) (export_data)? (children_count: u8) 1910 * (children: [(label NUL) (offset uleb128)]*) 1911 * 1912 * We emit a trie with a single leaf at "_main" with offset 1913 * entry_offset (from __TEXT base). 1914 * 1915 * Easiest: single root node with children_count=1, child label = "_main", 1916 * child offset points to a leaf node. 1917 */ 1918 MByte* out = &x->exports_trie; 1919 mbuf_init(out, x->h); 1920 1921 LinkImage* img = x->img; 1922 LinkSymbol* esym = sym_at(img, img->entry_sym); 1923 if (!esym || !esym->defined) { 1924 /* No entry — emit a single empty terminal trie. */ 1925 mbuf_u8(out, 0); /* terminal_size 0 */ 1926 mbuf_u8(out, 0); /* children 0 */ 1927 return; 1928 } 1929 Slice nm_s = pool_slice(x->c->global, esym->name); 1930 const char* nm = nm_s.s; 1931 size_t nl = nm_s.len; 1932 if (!nm || nl == 0) { 1933 mbuf_u8(out, 0); 1934 mbuf_u8(out, 0); 1935 return; 1936 } 1937 /* leaf node: terminal_size = sizeof(uleb(flags)+uleb(offset)) 1938 * flags = 0 (regular export); offset = vaddr - __TEXT.vmaddr */ 1939 u64 entry_off = esym->vaddr - x->text_vaddr; 1940 1941 /* Compute leaf-node bytes length: uleb(flags=0) + uleb(offset). */ 1942 u32 flags = 0; 1943 u32 leaf_payload_len = uleb128_size(flags) + uleb128_size(entry_off); 1944 /* Layout: root node first, then leaf. The root node's child entry 1945 * carries the absolute offset of the leaf within the trie. */ 1946 1947 /* root: terminal_size=0, children_count=1, "_main"\0, child_offset= 1948 * (leaf-position uleb). 1949 * 1950 * The child offset's own ULEB width contributes to the leaf position, so 1951 * solve for the fixed point before emitting. */ 1952 u32 leaf_pos = 2u + (u32)nl + 1u + 1u; 1953 for (;;) { 1954 u32 n = uleb128_size(leaf_pos); 1955 u32 next = 2u + (u32)nl + 1u + n; 1956 if (next == leaf_pos) break; 1957 leaf_pos = next; 1958 } 1959 1960 mbuf_u8(out, 0); /* root terminal size */ 1961 mbuf_u8(out, 1); /* children_count */ 1962 mbuf_str(out, nm, (u32)nl); 1963 uleb128(out, leaf_pos); 1964 /* leaf node */ 1965 if (out->len != leaf_pos) 1966 compiler_panic(x->c, SRCLOC_NONE, 1967 "macho: exports trie leaf offset mismatch"); 1968 /* terminal_size byte then payload */ 1969 mbuf_u8(out, (u8)leaf_payload_len); 1970 uleb128(out, flags); 1971 uleb128(out, entry_off); 1972 mbuf_u8(out, 0); /* children_count */ 1973 /* Pad trie to 8 bytes. */ 1974 mbuf_align(out, 8); 1975 } 1976 1977 /* ---- symtab + strtab + indirect symtab ---- */ 1978 1979 typedef struct NlistRec { 1980 u32 strx; 1981 u8 type; 1982 u8 sect; /* 1-based section index (Mach-O) */ 1983 u16 desc; 1984 u64 value; 1985 } NlistRec; 1986 1987 static void build_symtab(MCtx* x) { 1988 Heap* h = x->h; 1989 LinkImage* img = x->img; 1990 mbuf_init(&x->symtab, h); 1991 mbuf_init(&x->strtab, h); 1992 mbuf_init(&x->indirect, h); 1993 1994 /* strtab leading NUL */ 1995 mbuf_u8(&x->strtab, 0); 1996 1997 /* Approach: 1998 * - Add one local nlist per defined LinkSymbol (locals + non-imported 1999 * externs) — but to keep things simple we only emit external defined 2000 * syms (mainly _main), plus all imports as N_UNDF|N_EXT. 2001 * 2002 * Mach-O dyld requires the symtab order: locals first, ext-defs next, 2003 * undef last (matched by LC_DYSYMTAB ranges). 2004 */ 2005 2006 /* Pass A: defined externals. */ 2007 u32 n_local = 0; 2008 u32 n_extdef = 0; 2009 u32 n_undef = 0; 2010 2011 /* For now we emit only externals + imports. No locals. */ 2012 /* extdef pass */ 2013 for (u32 i = 0; i < LinkSyms_count(&img->syms); ++i) { 2014 LinkSymbol* s = LinkSyms_at(&img->syms, i); 2015 if (!s->defined) continue; 2016 if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue; 2017 if (s->name == 0) continue; 2018 if (s->kind == SK_ABS) continue; /* skip abs externs */ 2019 /* Locate which OutSec contains this vaddr to figure out n_sect. 2020 * n_sect is the 1-based index into the flat section_64 table the 2021 * file actually contains (post-coalesce), matching what we emit 2022 * in emit_load_command_segment. */ 2023 u8 n_sect = 0; 2024 /* Prefer the section whose half-open [vaddr, vaddr+size) range contains 2025 * the symbol. This must win over the end-boundary fallback below: when 2026 * two sections abut (A ends exactly where B begins), a symbol at the 2027 * boundary is the *start* of B, not the end of A. */ 2028 for (u32 k = 0; k < x->nouts; ++k) { 2029 OutSec* o = &x->outs[k]; 2030 if (s->vaddr >= o->vaddr && s->vaddr < o->vaddr + o->size) { 2031 n_sect = (u8)(k + 1u); 2032 break; 2033 } 2034 } 2035 /* Fallback: a symbol sitting exactly one-past-the-end of a section with 2036 * no following section covering it (e.g. an end-of-section marker) is 2037 * attributed to the section that ends there. */ 2038 if (n_sect == 0) { 2039 for (u32 k = 0; k < x->nouts; ++k) { 2040 OutSec* o = &x->outs[k]; 2041 if (s->vaddr == o->vaddr + o->size) { 2042 n_sect = (u8)(k + 1u); 2043 break; 2044 } 2045 } 2046 } 2047 if (n_sect == 0) continue; 2048 Slice nm_s = pool_slice(x->c->global, s->name); 2049 const char* nm = nm_s.s; 2050 size_t nl = nm_s.len; 2051 u32 strx = x->strtab.len; 2052 if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl); 2053 2054 u8 t[16]; 2055 u8 nt = N_SECT | N_EXT; 2056 if (s->bind == SB_WEAK) { 2057 /* N_WEAK_DEF in n_desc (not a flag in n_type) */ 2058 } 2059 wr_u32_le(t + 0, strx); 2060 t[4] = nt; 2061 t[5] = n_sect; 2062 wr_u16_le(t + 6, s->bind == SB_WEAK ? N_WEAK_DEF : 0); 2063 wr_u64_le(t + 8, s->vaddr); 2064 mbuf_append(&x->symtab, t, 16); 2065 ++n_extdef; 2066 } 2067 2068 /* undef imports — real imports only. Internal-GOT entries don't get 2069 * N_UNDF nlist records since they're defined in the image. */ 2070 u32 imp_first_symtab_idx = n_extdef; 2071 for (u32 i = 0; i < x->nimports_real; ++i) { 2072 MachImp* mi = &x->imports[i]; 2073 Slice nm_s = pool_slice(x->c->global, mi->name); 2074 const char* nm = nm_s.s; 2075 size_t nl = nm_s.len; 2076 u32 strx = x->strtab.len; 2077 if (nm && nl) mbuf_str(&x->strtab, nm, (u32)nl); 2078 2079 u8 t[16]; 2080 wr_u32_le(t + 0, strx); 2081 t[4] = N_UNDF | N_EXT; 2082 t[5] = 0; 2083 /* n_desc carries dylib ordinal in high byte (REFERENCED_DYNAMICALLY etc.) 2084 */ 2085 u16 desc = (u16)(((u16)mi->dylib_ord & 0xff) << 8); 2086 if (mi->weak) desc |= N_WEAK_REF; 2087 wr_u16_le(t + 6, desc); 2088 wr_u64_le(t + 8, 0); 2089 mbuf_append(&x->symtab, t, 16); 2090 ++n_undef; 2091 } 2092 2093 /* indirect symtab: one entry per __stubs slot, then one per __got 2094 * slot. Internal-GOT slots use INDIRECT_SYMBOL_LOCAL (0x80000000) 2095 * since they have no nlist entry. */ 2096 u32 indirect_start = 0; 2097 /* Patch reserved1 of each synth OutSec. __stubs and __got are each 2098 * singleton OutSecs (synth sections never coalesce with user input), 2099 * so a sectname match identifies them unambiguously. */ 2100 for (u32 i = 0; i < x->nouts; ++i) { 2101 OutSec* o = &x->outs[i]; 2102 if (slice_eq_cstr(slice_from_cstr(o->sectname), "__stubs") && o->size) { 2103 o->reserved1 = indirect_start; 2104 for (u32 k = 0; k < x->nimports; ++k) { 2105 MachImp* mi = &x->imports[k]; 2106 if (!mi->stub_idx) continue; 2107 u32 sym_idx = imp_first_symtab_idx + k; 2108 mbuf_u32(&x->indirect, sym_idx); 2109 ++indirect_start; 2110 } 2111 } 2112 } 2113 for (u32 i = 0; i < x->nouts; ++i) { 2114 OutSec* o = &x->outs[i]; 2115 if (slice_eq_cstr(slice_from_cstr(o->sectname), "__got") && o->size) { 2116 o->reserved1 = indirect_start; 2117 for (u32 k = 0; k < x->nimports; ++k) { 2118 MachImp* mi = &x->imports[k]; 2119 u32 sym_idx = mi->internal ? 0x80000000u /* INDIRECT_SYMBOL_LOCAL */ 2120 : (imp_first_symtab_idx + k); 2121 mbuf_u32(&x->indirect, sym_idx); 2122 ++indirect_start; 2123 } 2124 } 2125 } 2126 2127 x->nsyms = n_local + n_extdef + n_undef; 2128 (void)n_local; 2129 (void)imp_first_symtab_idx; 2130 } 2131 2132 /* ---- LINKEDIT layout assembly ---- 2133 * 2134 * Place blobs in the order Apple prefers: 2135 * chained_fixups, exports_trie, fn_starts, data_in_code, 2136 * symtab, indirect, strtab, codesig 2137 */ 2138 2139 static void layout_linkedit(MCtx* x) { 2140 /* LC_FUNCTION_STARTS is a ULEB128 stream terminated by a zero byte. Keep a 2141 * real empty table here so tools that rewrite LINKEDIT preserve the 2142 * canonical blob order between exports and the symbol table. */ 2143 mbuf_init(&x->fn_starts, x->h); 2144 mbuf_u8(&x->fn_starts, 0); 2145 mbuf_init(&x->data_in_code, x->h); 2146 mbuf_init(&x->codesig, x->h); 2147 2148 u64 cur = x->linkedit_fileoff; 2149 /* chained fixups */ 2150 cur = ALIGN_UP(cur, 8u); 2151 x->chained_fixups_off = (u32)cur; 2152 cur += x->chained_fixups.len; 2153 /* exports trie. Keep LINKEDIT data blobs contiguous; Apple strip rejects 2154 * padding between chained fixups and the exports trie. */ 2155 x->exports_trie_off = (u32)cur; 2156 cur += x->exports_trie.len; 2157 /* function starts */ 2158 x->fn_starts_off = (u32)cur; 2159 cur += x->fn_starts.len; 2160 /* data in code */ 2161 cur = ALIGN_UP(cur, 8u); 2162 x->data_in_code_off = (u32)cur; 2163 /* symtab */ 2164 cur = ALIGN_UP(cur, 8u); 2165 x->symtab_off = (u32)cur; 2166 cur += x->symtab.len; 2167 /* indirect symtab */ 2168 cur = ALIGN_UP(cur, 4u); 2169 x->indirect_off = (u32)cur; 2170 cur += x->indirect.len; 2171 /* strtab */ 2172 cur = ALIGN_UP(cur, 8u); 2173 x->strtab_off = (u32)cur; 2174 cur += x->strtab.len; 2175 /* code signature: end-aligned to 16 */ 2176 cur = ALIGN_UP(cur, 16u); 2177 x->codesig_off = (u32)cur; 2178 2179 /* Linkedit segment file_size includes everything up to (but not yet 2180 * including) codesig. Codesig is computed below. */ 2181 u64 le_size = cur - x->linkedit_fileoff; 2182 /* Set linkedit segment size; will be increased after codesig. */ 2183 x->segs[MSEG_LINKEDIT].filesize = le_size; 2184 x->segs[MSEG_LINKEDIT].vmsize = ALIGN_UP(le_size, MZ_PAGE); 2185 if (!x->segs[MSEG_LINKEDIT].vmsize) x->segs[MSEG_LINKEDIT].vmsize = MZ_PAGE; 2186 } 2187 2188 /* ---- ad-hoc code signature (CodeDirectory + SuperBlob) ---- 2189 * 2190 * Produces a minimal embedded SuperBlob with a single CodeDirectory. 2191 * The CD is sha256-hashed over CS_PAGE_SIZE_LOG2 = 4096-byte pages of 2192 * the file (excluding the codesig itself). The kernel verifies the 2193 * CD's hash chain on exec. 2194 * 2195 * Output format (in big-endian for SuperBlob/CodeDirectory headers): 2196 * [SuperBlob] 2197 * u32 magic (0xfade0cc0) 2198 * u32 length 2199 * u32 count (=1) 2200 * [Slot] 2201 * u32 type (=0 CSSLOT_CODEDIRECTORY) 2202 * u32 offset (=20) -- relative to start of SuperBlob 2203 * [CodeDirectory] 2204 * u32 magic (0xfade0c02) 2205 * u32 length (bytes including all hashes) 2206 * u32 version (>=0x20400 for execSeg fields) 2207 * u32 flags (=0 ad-hoc — actually flags must include 0x2 2208 * (kSecCodeSignatureAdhoc)) u32 hashOffset (offset of first slot hash) u32 2209 * identOffset (offset of identifier string) u32 nSpecialSlots (=0) u32 2210 * nCodeSlots u32 codeLimit (file bytes covered) u8 hashSize (=32) u8 2211 * hashType (=2 sha256) u8 platform (=0) u8 pageSize (=12 for 4096) u32 2212 * spare2 (=0) u32 scatterOffset (=0) u32 teamOffset (=0) u32 spare3 (=0) 2213 * u64 codeLimit64 (=0) 2214 * u64 execSegBase (=__TEXT.fileoff) 2215 * u64 execSegLimit (=__TEXT.filesize) 2216 * u64 execSegFlags (=1 main binary) 2217 * [identifier bytes "a.out\0"] 2218 * [codeslot hashes nCodeSlots * 32 B] 2219 * 2220 * Hashes computed AFTER everything else is final — including the codesig 2221 * blob's own offset in the file (the hash range stops just before 2222 * codeLimit). */ 2223 2224 static void wr_u64_be(u8* p, u64 v) { 2225 for (u32 i = 0; i < 8; ++i) p[7 - i] = (u8)(v >> (i * 8)); 2226 } 2227 2228 /* Build the codesig blob with placeholder hashes; size is precise so 2229 * file layout is final after this. */ 2230 static void build_codesig_skeleton(MCtx* x, u32 code_limit, const char* ident) { 2231 u32 code_page = 1u << CS_PAGE_SIZE_LOG2; /* 4096 */ 2232 u32 nslots = (code_limit + code_page - 1u) / code_page; 2233 2234 /* CodeDirectory size: 2235 * header 88 bytes through execSegFlags 2236 * identifier (ident_len + 1) 2237 * hashes (nslots * 32) 2238 */ 2239 u32 ident_len = (u32)slice_from_cstr(ident).len + 1u; 2240 u32 cd_hdr = 88u; 2241 u32 cd_size = cd_hdr + ident_len + nslots * CS_SHA256_LEN; 2242 /* SuperBlob: 12 hdr + 8 slot + cd. */ 2243 u32 sb_size = 12u + 8u + cd_size; 2244 2245 MByte* out = &x->codesig; 2246 mbuf_init(out, x->h); 2247 mbuf_reserve(out, sb_size); 2248 memset(out->data, 0, sb_size); 2249 out->len = sb_size; 2250 2251 u8* sb = out->data; 2252 /* SuperBlob header */ 2253 wr_u32_be(sb + 0, CS_MAGIC_EMBEDDED_SIGNATURE); 2254 wr_u32_be(sb + 4, sb_size); 2255 wr_u32_be(sb + 8, 1); /* count */ 2256 /* slot 0: type=CSSLOT_CODEDIRECTORY, offset=20 */ 2257 wr_u32_be(sb + 12, CSSLOT_CODEDIRECTORY); 2258 wr_u32_be(sb + 16, 20u); 2259 2260 /* CodeDirectory */ 2261 u8* cd = sb + 20; 2262 wr_u32_be(cd + 0, CS_MAGIC_CODEDIRECTORY); 2263 wr_u32_be(cd + 4, cd_size); 2264 wr_u32_be(cd + 8, 0x20400u); /* version with execSeg */ 2265 wr_u32_be(cd + 12, 0x2u); /* flags = adhoc */ 2266 wr_u32_be(cd + 16, cd_hdr + ident_len); /* hashOffset */ 2267 wr_u32_be(cd + 20, cd_hdr); /* identOffset */ 2268 wr_u32_be(cd + 24, 0); /* nSpecialSlots */ 2269 wr_u32_be(cd + 28, nslots); 2270 wr_u32_be(cd + 32, code_limit); 2271 cd[36] = (u8)CS_SHA256_LEN; 2272 cd[37] = (u8)CS_HASHTYPE_SHA256; 2273 cd[38] = 0; /* platform */ 2274 cd[39] = (u8)CS_PAGE_SIZE_LOG2; 2275 wr_u32_be(cd + 40, 0); /* spare2 */ 2276 wr_u32_be(cd + 44, 0); /* scatterOffset */ 2277 wr_u32_be(cd + 48, 0); /* teamOffset */ 2278 wr_u32_be(cd + 52, 0); /* spare3 */ 2279 wr_u64_be(cd + 56, 0); /* codeLimit64 */ 2280 wr_u64_be(cd + 64, x->segs[1].fileoff); /* execSegBase */ 2281 wr_u64_be(cd + 72, x->segs[1].filesize); /* execSegLimit */ 2282 wr_u64_be(cd + 80, CS_EXECSEG_MAIN_BINARY); 2283 2284 /* identifier */ 2285 memcpy(cd + cd_hdr, ident, ident_len); 2286 2287 x->codesig_size = sb_size; 2288 } 2289 2290 static void compute_codesig(MCtx* x, const u8* full_file, u32 file_len_excl_cs, 2291 const char* ident) { 2292 u32 code_page = 1u << CS_PAGE_SIZE_LOG2; 2293 u32 nslots = (file_len_excl_cs + code_page - 1u) / code_page; 2294 u32 ident_len = (u32)slice_from_cstr(ident).len + 1u; 2295 u8* cd = x->codesig.data + 12 + 8; 2296 u8* hashes = cd + 88u + ident_len; 2297 2298 for (u32 i = 0; i < nslots; ++i) { 2299 u32 off = i * code_page; 2300 u32 take = (off + code_page <= file_len_excl_cs) ? code_page 2301 : (file_len_excl_cs - off); 2302 Sha256 s; 2303 sha256_init(&s); 2304 sha256_update(&s, full_file + off, take); 2305 /* Pages shorter than code_page get the standard SHA over the 2306 * partial bytes — Apple's tools do exactly this (no zero padding 2307 * on the tail). */ 2308 sha256_final(&s, hashes + i * CS_SHA256_LEN); 2309 } 2310 } 2311 2312 /* ---- final emission ---- */ 2313 2314 static void emit_load_command_segment(MByte* lc, MCtx* x, u32 segidx) { 2315 MSeg* sg = &x->segs[segidx]; 2316 u32 seg_cmd_size = MACHO_SEGCMD64_SIZE + sg->nouts * MACHO_SECT64_SIZE; 2317 u32 base = lc->len; 2318 mbuf_u32(lc, LC_SEGMENT_64); 2319 mbuf_u32(lc, seg_cmd_size); 2320 /* segname: 16 bytes zero-padded */ 2321 u8 nm[16]; 2322 memset(nm, 0, 16); 2323 size_t nlen = slice_from_cstr(sg->name).len; 2324 if (nlen > 16) nlen = 16; 2325 memcpy(nm, sg->name, nlen); 2326 mbuf_append(lc, nm, 16); 2327 mbuf_u64(lc, sg->vmaddr); 2328 mbuf_u64(lc, sg->vmsize); 2329 mbuf_u64(lc, sg->fileoff); 2330 mbuf_u64(lc, sg->filesize); 2331 mbuf_u32(lc, sg->maxprot); 2332 mbuf_u32(lc, sg->initprot); 2333 mbuf_u32(lc, sg->nouts); 2334 mbuf_u32(lc, 0); /* flags */ 2335 2336 for (u32 j = 0; j < sg->nouts; ++j) { 2337 OutSec* o = &x->outs[sg->first_out + j]; 2338 u8 sname[16], gname[16]; 2339 memset(sname, 0, 16); 2340 memset(gname, 0, 16); 2341 size_t sl = o->sectname ? slice_from_cstr(o->sectname).len : 0; 2342 if (sl > 16) sl = 16; 2343 if (sl) memcpy(sname, o->sectname, sl); 2344 size_t gl = slice_from_cstr(sg->name).len; /* segname must match */ 2345 if (gl > 16) gl = 16; 2346 memcpy(gname, sg->name, gl); 2347 mbuf_append(lc, sname, 16); 2348 mbuf_append(lc, gname, 16); 2349 mbuf_u64(lc, o->vaddr); 2350 mbuf_u64(lc, o->size); 2351 mbuf_u32(lc, (u32)o->file_offset); 2352 /* align is power of 2; encode as log2. */ 2353 u32 a = o->align ? o->align : 1u; 2354 u32 al = 0; 2355 while ((1u << al) < a) ++al; 2356 mbuf_u32(lc, al); 2357 mbuf_u32(lc, 0); /* reloff */ 2358 mbuf_u32(lc, 0); /* nreloc */ 2359 mbuf_u32(lc, o->flags); 2360 mbuf_u32(lc, o->reserved1); 2361 mbuf_u32(lc, o->reserved2); 2362 mbuf_u32(lc, 0); /* reserved3 */ 2363 } 2364 (void)base; 2365 } 2366 2367 void link_emit_macho(LinkImage* img, Writer* w); 2368 2369 void link_emit_macho(LinkImage* img, Writer* w) { 2370 MCtx x; 2371 memset(&x, 0, sizeof(x)); 2372 x.img = img; 2373 x.c = img->c; 2374 x.h = img->heap; 2375 x.w = w; 2376 x.linker = img->linker; 2377 x.link_arch = link_arch_desc_for(img->c); 2378 { 2379 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_MACHO); 2380 x.macho = 2381 fmt && fmt->macho_arch ? fmt->macho_arch(img->c->target.arch) : NULL; 2382 } 2383 2384 if (!x.link_arch || !x.macho || !x.macho->cputype || !x.macho->emit_stub || 2385 !x.macho->stub_size) 2386 compiler_panic(x.c, SRCLOC_NONE, 2387 "link_emit_macho: no Mach-O descriptor for target"); 2388 if (img->entry_sym == LINK_SYM_NONE) 2389 compiler_panic(x.c, SRCLOC_NONE, "link_emit_macho: no resolved entry"); 2390 2391 collect_imports(&x); 2392 collect_tlv(&x); 2393 plan_layout(&x); 2394 shift_sections(&x); 2395 2396 /* entry offset within __TEXT segment. */ 2397 LinkSymbol* esym = sym_at(img, img->entry_sym); 2398 if (!esym || !esym->defined) 2399 compiler_panic(x.c, SRCLOC_NONE, "link_emit_macho: entry symbol undefined"); 2400 if (esym->vaddr < x.text_vaddr) 2401 compiler_panic(x.c, SRCLOC_NONE, 2402 "link_emit_macho: entry symbol below __TEXT base"); 2403 x.entry_offset = (u32)(esym->vaddr - x.text_vaddr); 2404 2405 /* image-id UUID. */ 2406 u8 image_id[LINK_IMAGE_ID_BYTES]; 2407 link_image_id_compute(img, image_id); 2408 memcpy(x.uuid, image_id, 16); 2409 2410 /* Reloc apply collects fixsites. */ 2411 FixList fl; 2412 fix_init(&fl, x.h); 2413 apply_relocs(&x, &fl); 2414 2415 /* Build LINKEDIT contents. */ 2416 build_chained_fixups(&x, &fl); 2417 build_exports_trie(&x); 2418 build_symtab(&x); 2419 layout_linkedit(&x); 2420 2421 /* Compute code-sig skeleton sized to file bytes excluding sig. */ 2422 u32 code_limit = x.codesig_off; 2423 build_codesig_skeleton(&x, code_limit, "a.out"); 2424 /* Now extend linkedit segment to include codesig. */ 2425 u64 le_size = (u64)x.codesig_off + (u64)x.codesig_size - x.linkedit_fileoff; 2426 x.segs[MSEG_LINKEDIT].filesize = le_size; 2427 x.segs[MSEG_LINKEDIT].vmsize = ALIGN_UP(le_size, MZ_PAGE); 2428 2429 /* Build load commands buffer. */ 2430 MByte lc; 2431 mbuf_init(&lc, x.h); 2432 2433 /* LC_SEGMENT_64 for each segment with sections (and PAGEZERO/LINKEDIT). */ 2434 emit_load_command_segment(&lc, &x, 0); /* PAGEZERO */ 2435 emit_load_command_segment(&lc, &x, 1); /* TEXT */ 2436 if (x.segs[2].nsects > 0) 2437 emit_load_command_segment(&lc, &x, 2); /* DATA_CONST */ 2438 if (x.segs[3].nsects > 0) emit_load_command_segment(&lc, &x, 3); /* DATA */ 2439 if (x.segs[MSEG_DWARF].nsects > 0) 2440 emit_load_command_segment(&lc, &x, MSEG_DWARF); /* DWARF (debug info) */ 2441 emit_load_command_segment(&lc, &x, MSEG_LINKEDIT); /* LINKEDIT */ 2442 2443 /* LC_DYLD_CHAINED_FIXUPS (linkedit_data_command: 16B) */ 2444 mbuf_u32(&lc, LC_DYLD_CHAINED_FIXUPS); 2445 mbuf_u32(&lc, 16); 2446 mbuf_u32(&lc, x.chained_fixups_off); 2447 mbuf_u32(&lc, x.chained_fixups.len); 2448 2449 /* LC_DYLD_EXPORTS_TRIE */ 2450 mbuf_u32(&lc, LC_DYLD_EXPORTS_TRIE); 2451 mbuf_u32(&lc, 16); 2452 mbuf_u32(&lc, x.exports_trie_off); 2453 mbuf_u32(&lc, x.exports_trie.len); 2454 2455 /* LC_SYMTAB */ 2456 mbuf_u32(&lc, LC_SYMTAB); 2457 mbuf_u32(&lc, MACHO_SYMTAB_CMD_SIZE); 2458 mbuf_u32(&lc, x.symtab_off); 2459 mbuf_u32(&lc, x.nsyms); 2460 mbuf_u32(&lc, x.strtab_off); 2461 mbuf_u32(&lc, x.strtab.len); 2462 2463 /* LC_DYSYMTAB */ 2464 /* nlocal=0, nextdef=#defined-globals, nundef=#imports. We tracked 2465 * those during build_symtab; recompute by inspecting strtab... easier 2466 * to recount: defined globals are total - imports. */ 2467 u32 nlocal = 0; 2468 u32 nundef = x.nimports_real; 2469 u32 nextdef = (x.nsyms > nundef) ? x.nsyms - nundef - nlocal : 0; 2470 mbuf_u32(&lc, LC_DYSYMTAB); 2471 mbuf_u32(&lc, MACHO_DYSYMTAB_CMD_SIZE); 2472 mbuf_u32(&lc, 0); /* ilocalsym */ 2473 mbuf_u32(&lc, nlocal); 2474 mbuf_u32(&lc, nlocal); 2475 mbuf_u32(&lc, nextdef); 2476 mbuf_u32(&lc, nlocal + nextdef); 2477 mbuf_u32(&lc, nundef); 2478 mbuf_u32(&lc, 0); 2479 mbuf_u32(&lc, 0); /* tocoff, ntoc */ 2480 mbuf_u32(&lc, 0); 2481 mbuf_u32(&lc, 0); /* modtaboff, nmodtab */ 2482 mbuf_u32(&lc, 0); 2483 mbuf_u32(&lc, 0); /* extrefsymoff, nextrefsyms */ 2484 mbuf_u32(&lc, x.indirect_off); 2485 mbuf_u32(&lc, x.indirect.len / 4u); 2486 mbuf_u32(&lc, 0); 2487 mbuf_u32(&lc, 0); /* extreloff, nextrel */ 2488 mbuf_u32(&lc, 0); 2489 mbuf_u32(&lc, 0); /* locreloff, nlocrel */ 2490 2491 /* LC_LOAD_DYLINKER */ 2492 { 2493 const char* dyld = "/usr/lib/dyld"; 2494 u32 dyld_len = (u32)slice_from_cstr(dyld).len; 2495 u32 cmd_size = (u32)ALIGN_UP((u64)(12u + dyld_len + 1u), 8u); 2496 mbuf_u32(&lc, LC_LOAD_DYLINKER); 2497 mbuf_u32(&lc, cmd_size); 2498 mbuf_u32(&lc, 12u); /* name offset within cmd */ 2499 u32 wrote = mbuf_str(&lc, dyld, dyld_len); 2500 (void)wrote; 2501 /* Pad to cmd_size. */ 2502 while (lc.len < (u32)((u64)mbuf_align(&lc, 1) + 0)) { 2503 /* no-op */ 2504 break; 2505 } 2506 /* Re-align to cmd_size. */ 2507 u32 want = (u32)(lc.len); 2508 /* Walk back: lc grew by 12 + (strlen+1). Pad to cmd_size. */ 2509 u32 cmd_start_back = lc.len - (12u + dyld_len + 1u); 2510 u32 pad_needed = cmd_size - (lc.len - cmd_start_back); 2511 while (pad_needed-- > 0) mbuf_u8(&lc, 0); 2512 (void)want; 2513 } 2514 2515 /* LC_UUID */ 2516 mbuf_u32(&lc, LC_UUID); 2517 mbuf_u32(&lc, 24); 2518 mbuf_append(&lc, x.uuid, 16); 2519 2520 /* LC_BUILD_VERSION */ 2521 mbuf_u32(&lc, LC_BUILD_VERSION); 2522 mbuf_u32(&lc, 24); 2523 mbuf_u32(&lc, 1); /* PLATFORM_MACOS */ 2524 mbuf_u32(&lc, (12u << 16) | 0); /* minos 12.0.0 */ 2525 mbuf_u32(&lc, (12u << 16) | 0); /* sdk 12.0.0 */ 2526 mbuf_u32(&lc, 0); /* ntools */ 2527 2528 /* LC_MAIN — entryoff is offset within __TEXT segment from its file 2529 * start (0). */ 2530 mbuf_u32(&lc, LC_MAIN); 2531 mbuf_u32(&lc, 24); 2532 mbuf_u64(&lc, (u64)x.entry_offset); /* entryoff = vaddr - __TEXT.vmaddr */ 2533 mbuf_u64(&lc, 0); /* stacksize */ 2534 2535 /* LC_LOAD_DYLIB per dylib. */ 2536 for (u32 i = 0; i < x.ndylibs; ++i) { 2537 Slice nm_s = pool_slice(x.c->global, x.dylibs[i].install); 2538 const char* nm = nm_s.s; 2539 size_t nl = nm_s.len; 2540 u32 cmd_size = (u32)ALIGN_UP((u64)(24u + (u32)nl + 1u), 8u); 2541 u32 cmd_start = lc.len; 2542 mbuf_u32(&lc, LC_LOAD_DYLIB); 2543 mbuf_u32(&lc, cmd_size); 2544 mbuf_u32(&lc, 24u); /* name offset */ 2545 mbuf_u32(&lc, 0); /* timestamp */ 2546 mbuf_u32(&lc, (1u << 16)); /* current_version 1.0 */ 2547 mbuf_u32(&lc, (1u << 16)); /* compat_version 1.0 */ 2548 mbuf_str(&lc, nm ? nm : "", (u32)nl); 2549 while (lc.len - cmd_start < cmd_size) mbuf_u8(&lc, 0); 2550 } 2551 2552 /* LC_FUNCTION_STARTS / LC_DATA_IN_CODE */ 2553 mbuf_u32(&lc, LC_FUNCTION_STARTS_C); 2554 mbuf_u32(&lc, 16); 2555 mbuf_u32(&lc, x.fn_starts_off); 2556 mbuf_u32(&lc, x.fn_starts.len); 2557 2558 mbuf_u32(&lc, LC_DATA_IN_CODE_C); 2559 mbuf_u32(&lc, 16); 2560 mbuf_u32(&lc, x.data_in_code_off); 2561 mbuf_u32(&lc, 0); 2562 2563 /* LC_CODE_SIGNATURE */ 2564 mbuf_u32(&lc, LC_CODE_SIGNATURE_C); 2565 mbuf_u32(&lc, 16); 2566 mbuf_u32(&lc, x.codesig_off); 2567 mbuf_u32(&lc, x.codesig_size); 2568 2569 /* Sanity: lc.len + MACHO_HDR64_SIZE must equal headers_size we 2570 * predicted in plan_layout. If not, we mis-sized — panic. */ 2571 if ((u64)lc.len + MACHO_HDR64_SIZE != x.headers_size) { 2572 compiler_panic(x.c, SRCLOC_NONE, 2573 "link_macho: load-cmd size mismatch: predicted %llu got %u", 2574 (unsigned long long)(x.headers_size - MACHO_HDR64_SIZE), 2575 lc.len); 2576 } 2577 2578 /* ---- now stream the file ---- */ 2579 /* The Writer in kit allows seek; we'll write a flat buffer first 2580 * (so we can hash it for codesig) and flush at the end. */ 2581 MByte file; 2582 mbuf_init(&file, x.h); 2583 2584 /* mach_header_64 */ 2585 u32 ncmds = 0; 2586 /* Recount: PAGEZERO + TEXT + maybe DATA_CONST + maybe DATA + LINKEDIT 2587 * + chained + exports_trie + symtab + dysymtab + dyld + uuid + 2588 * build_version + main + nDylibs + fn_starts + data_in_code + 2589 * codesig. */ 2590 ncmds += 2; /* PAGEZERO + TEXT */ 2591 if (x.segs[2].nsects > 0) ncmds++; 2592 if (x.segs[3].nsects > 0) ncmds++; 2593 if (x.segs[MSEG_DWARF].nsects > 0) ncmds++; /* __DWARF (debug info) */ 2594 ncmds++; /* LINKEDIT */ 2595 ncmds += 11 + x.ndylibs; 2596 /* (chained, exports_trie, symtab, dysymtab, dyld, uuid, build_version, 2597 * main, fn_starts, data_in_code, codesig) = 11 */ 2598 2599 mbuf_u32(&file, MH_MAGIC_64); 2600 mbuf_u32(&file, x.macho->cputype); 2601 mbuf_u32(&file, x.macho->cpusubtype); 2602 mbuf_u32(&file, MH_EXECUTE); 2603 mbuf_u32(&file, ncmds); 2604 mbuf_u32(&file, lc.len); 2605 { 2606 u32 mh_flags = MH_DYLDLINK | MH_TWOLEVEL | MH_NOUNDEFS | MH_PIE; 2607 /* dyld scans __thread_vars and allocates a pthread_key for each 2608 * descriptor only when this flag is set; without it the descriptor's 2609 * thunk pointer is silently patched to _tlv_bootstrap_error. Apple's 2610 * ld sets it whenever the image contains S_THREAD_LOCAL_* sections. */ 2611 if (x.ntlv) mh_flags |= MH_HAS_TLV_DESCRIPTORS; 2612 mbuf_u32(&file, mh_flags); 2613 } 2614 mbuf_u32(&file, 0); /* reserved */ 2615 mbuf_append(&file, lc.data, lc.len); 2616 2617 /* Pad to first section's file offset. */ 2618 /* __TEXT first section begins at headers_size; we wrote header+lc = 2619 * headers_size, so no pad needed. Then each MSec's file_offset 2620 * tells us where to write its bytes. */ 2621 2622 /* Now emit segment payload bytes per MSec. */ 2623 for (u32 i = 0; i < x.nsecs; ++i) { 2624 MSec* m = &x.secs[i]; 2625 if (m->is_zerofill || m->size == 0) continue; 2626 /* Pad up to m->file_offset. */ 2627 while (file.len < m->file_offset) mbuf_u8(&file, 0); 2628 if (m->synth_data) { 2629 mbuf_append(&file, m->synth_data, m->synth_size); 2630 } else { 2631 LinkSection* ls = &img->sections[m->link_sec_id - 1u]; 2632 u32 segid = ls->segment_id; 2633 u8* base = 2634 (segid != LINK_SEG_NONE) ? img->segment_bytes[segid - 1u] : NULL; 2635 if (base && ls->size) { 2636 mbuf_append(&file, base + ls->input_offset, (u32)ls->size); 2637 } else if (ls->size) { 2638 for (u64 k = 0; k < ls->size; ++k) mbuf_u8(&file, 0); 2639 } 2640 } 2641 } 2642 2643 /* Pad to LINKEDIT start. */ 2644 while (file.len < x.linkedit_fileoff) mbuf_u8(&file, 0); 2645 2646 /* LINKEDIT contents in declared order. */ 2647 while (file.len < x.chained_fixups_off) mbuf_u8(&file, 0); 2648 mbuf_append(&file, x.chained_fixups.data, x.chained_fixups.len); 2649 while (file.len < x.exports_trie_off) mbuf_u8(&file, 0); 2650 mbuf_append(&file, x.exports_trie.data, x.exports_trie.len); 2651 while (file.len < x.fn_starts_off) mbuf_u8(&file, 0); 2652 mbuf_append(&file, x.fn_starts.data, x.fn_starts.len); 2653 while (file.len < x.data_in_code_off) mbuf_u8(&file, 0); 2654 /* empty */ 2655 while (file.len < x.symtab_off) mbuf_u8(&file, 0); 2656 mbuf_append(&file, x.symtab.data, x.symtab.len); 2657 while (file.len < x.indirect_off) mbuf_u8(&file, 0); 2658 mbuf_append(&file, x.indirect.data, x.indirect.len); 2659 while (file.len < x.strtab_off) mbuf_u8(&file, 0); 2660 mbuf_append(&file, x.strtab.data, x.strtab.len); 2661 while (file.len < x.codesig_off) mbuf_u8(&file, 0); 2662 2663 /* Compute codesig hashes over file bytes [0, codesig_off). */ 2664 /* The codesig blob currently has zero hashes; hash now. */ 2665 compute_codesig(&x, file.data, x.codesig_off, "a.out"); 2666 /* Append codesig. */ 2667 mbuf_append(&file, x.codesig.data, x.codesig.len); 2668 2669 /* Stream out. */ 2670 kit_writer_seek(w, 0); 2671 kit_writer_write(w, file.data, file.len); 2672 2673 /* Cleanup. */ 2674 fix_fini(&fl); 2675 mbuf_fini(&lc); 2676 mbuf_fini(&file); 2677 mbuf_fini(&x.chained_fixups); 2678 mbuf_fini(&x.exports_trie); 2679 mbuf_fini(&x.symtab); 2680 mbuf_fini(&x.strtab); 2681 mbuf_fini(&x.indirect); 2682 mbuf_fini(&x.fn_starts); 2683 mbuf_fini(&x.data_in_code); 2684 mbuf_fini(&x.codesig); 2685 if (x.imports) x.h->free(x.h, x.imports, 0); /* VEC_GROW: cap unknown */ 2686 if (x.dylibs) x.h->free(x.h, x.dylibs, 0); 2687 if (x.sym_to_imp) 2688 x.h->free(x.h, x.sym_to_imp, sizeof(u32) * x.sym_to_imp_size); 2689 if (x.secs) x.h->free(x.h, x.secs, 0); 2690 if (x.stubs_bytes) x.h->free(x.h, x.stubs_bytes, x.stubs_size); 2691 if (x.got_bytes) x.h->free(x.h, x.got_bytes, x.got_size); 2692 if (x.tlv_ptrs_bytes) x.h->free(x.h, x.tlv_ptrs_bytes, x.tlv_ptrs_size); 2693 if (x.tlv_slots) x.h->free(x.h, x.tlv_slots, 0); 2694 if (x.sym_to_tlv) 2695 x.h->free(x.h, x.sym_to_tlv, sizeof(u32) * x.sym_to_tlv_size); 2696 }