kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

emit.c (29096B)


      1 /* Mach-O MH_OBJECT writer.  Walks a finalized ObjBuilder and emits a
      2  * 64-bit little-endian relocatable object via the supplied Writer.
      3  *
      4  * Layout strategy (MH_OBJECT — everything in one anonymous segment):
      5  *   1. plan Mach-O sections (one per non-symtab/strtab/rela ObjSection),
      6  *      mapping kit section names to (segname, sectname) pairs;
      7  *   2. partition ObjSyms into local / extdef / undef and assign final
      8  *      indices for LC_DYSYMTAB;
      9  *   3. build per-section relocation tables via the per-arch translator
     10  *      (only aarch64 is wired today);
     11  *   4. assign file offsets sequentially: header, load commands, section
     12  *      bytes, relocation tables, symbol table, string table;
     13  *   5. write header → load commands → section bytes → relocs → symtab
     14  *      → strtab.
     15  *
     16  * 64-bit little-endian only.  Big-endian / 32-bit panics at entry.
     17  *
     18  * Round-trip invariant: read_macho of
     19  * this output must produce an ObjBuilder shape-equivalent to the input,
     20  * modulo (a) Mach-O's mandatory (segname, sectname) pairing and (b)
     21  * any synthesized N_SECT symbols.  The (segname,sectname) form chosen
     22  * here is the canonical post-roundtrip shape — read_macho stores the
     23  * comma-joined "__SEG,__sect" form in Section.name so a re-emit
     24  * produces the same bytes. */
     25 
     26 #include <string.h>
     27 
     28 #include "core/arena.h"
     29 #include "core/buf.h"
     30 #include "core/bytes.h"
     31 #include "core/heap.h"
     32 #include "core/pool.h"
     33 #include "core/slice.h"
     34 #include "core/util.h"
     35 #include "obj/format.h"
     36 #include "obj/macho/macho.h"
     37 
     38 /* ---- LE writer helpers (Writer-based) ----
     39  * Thin aliases onto the shared writer_u*_le helpers (core/bytes.h). */
     40 
     41 #define wr_u32 writer_u32_le
     42 #define wr_u64 writer_u64_le
     43 
     44 static void wr_name16(Writer* w, const char* s, u32 len) {
     45   /* Mach-O section/segment names are 16-byte zero-padded fields.  Names
     46    * longer than 16 are truncated; the on-disk format leaves no room for
     47    * a longer encoding. */
     48   u8 buf[16];
     49   u32 n = len > 16 ? 16 : len;
     50   memcpy(buf, s, n);
     51   if (n < 16) memset(buf + n, 0, 16 - n);
     52   kit_writer_write(w, buf, 16);
     53 }
     54 
     55 /* ---- (segname,sectname) derivation ---- */
     56 
     57 /* Split a kit section name into Mach-O (segname, sectname) pair.
     58  * If `name` contains a comma, it is treated as already in
     59  * "__SEG,__sect" form and split at the first comma.  Otherwise we
     60  * derive the pair from SecKind, ignoring `name` (the input was an
     61  * ELF-shaped name like ".text" or ".rodata"). */
     62 typedef struct MSegSect {
     63   char segname[16];
     64   char sectname[16];
     65   u32 seg_len;
     66   u32 sect_len;
     67 } MSegSect;
     68 
     69 static void copy_fixed16(char* dst, u32* len_out, const char* src,
     70                          u32 src_len) {
     71   u32 n = src_len > 16 ? 16 : src_len;
     72   memcpy(dst, src, n);
     73   if (n < 16) memset(dst + n, 0, 16 - n);
     74   *len_out = n;
     75 }
     76 
     77 static void name_to_seg_sect(const char* name, u32 nlen, u16 sec_kind,
     78                              MSegSect* out) {
     79   /* Comma-separated form: take prefix as segname, suffix as sectname. */
     80   for (u32 i = 0; i < nlen; ++i) {
     81     if (name[i] == ',') {
     82       copy_fixed16(out->segname, &out->seg_len, name, i);
     83       copy_fixed16(out->sectname, &out->sect_len, name + i + 1, nlen - i - 1);
     84       return;
     85     }
     86   }
     87 
     88   /* Not comma-separated.  Derive from SecKind; ignore `name`. */
     89   const char* seg;
     90   const char* sect;
     91   switch (sec_kind) {
     92     case SEC_TEXT:
     93       seg = "__TEXT";
     94       sect = "__text";
     95       break;
     96     case SEC_RODATA:
     97       seg = "__TEXT";
     98       sect = "__const";
     99       break;
    100     case SEC_DATA:
    101       seg = "__DATA";
    102       sect = "__data";
    103       break;
    104     case SEC_BSS:
    105       seg = "__DATA";
    106       sect = "__bss";
    107       break;
    108     case SEC_DEBUG: {
    109       /* ".debug_*" → "__DWARF,__debug_*" (truncated to Mach-O's 16-byte
    110        * sectname, matching Apple's spelling). Shared with the DWARF
    111        * reader so the names round-trip. Any non-".debug_*" SEC_DEBUG
    112        * name falls back to the leading-dot strip. */
    113       char ds[17];
    114       seg = "__DWARF";
    115       copy_fixed16(out->segname, &out->seg_len, seg,
    116                    (u32)slice_from_cstr(seg).len);
    117       if (obj_macho_debug_sectname(name, nlen, ds)) {
    118         copy_fixed16(out->sectname, &out->sect_len, ds,
    119                      (u32)slice_from_cstr(ds).len);
    120       } else {
    121         sect = (nlen && name[0] == '.') ? name + 1 : name;
    122         copy_fixed16(out->sectname, &out->sect_len, sect,
    123                      (u32)((nlen && name[0] == '.') ? nlen - 1 : nlen));
    124       }
    125       return;
    126     }
    127     default:
    128       seg = "__DATA";
    129       sect = "__data";
    130       break;
    131   }
    132   copy_fixed16(out->segname, &out->seg_len, seg, (u32)slice_from_cstr(seg).len);
    133   copy_fixed16(out->sectname, &out->sect_len, sect,
    134                (u32)slice_from_cstr(sect).len);
    135 }
    136 
    137 /* ---- per-section plan ---- */
    138 
    139 typedef struct MSec {
    140   MSegSect ns;
    141   u64 addr;    /* assigned vmaddr within the segment */
    142   u64 size;    /* bytes (or bss size) */
    143   u32 fileoff; /* 0 for zerofill */
    144   u32 align;   /* power-of-two; stored as log2 in section_64.align */
    145   u32 reloff;  /* 0 if no relocs */
    146   u32 nreloc;
    147   u32 flags; /* S_TYPE | S_ATTR_* */
    148   u32 entsize;
    149   u32 obj_sec; /* originating ObjSecId */
    150   int is_zerofill;
    151   const Buf* obj_bytes; /* NULL when zerofill */
    152   u8* relocs;           /* arena-allocated; nreloc * 8 bytes */
    153 } MSec;
    154 
    155 static u32 log2_align(u32 a) {
    156   u32 r = 0;
    157   while ((1u << r) < a) ++r;
    158   return r;
    159 }
    160 
    161 static u32 section_flags_for(u16 sec_kind, u16 sec_flags, const char* sectname,
    162                              u32 sect_len) {
    163   u32 f = 0;
    164   if (sec_kind == SEC_TEXT || (sec_flags & SF_EXEC)) {
    165     f |= S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS;
    166   }
    167   if (sec_flags & SF_TLS) {
    168     /* Mach-O distinguishes three TLV section types by sectname:
    169      *   __thread_data  → S_THREAD_LOCAL_REGULAR (initial data)
    170      *   __thread_bss   → S_THREAD_LOCAL_ZEROFILL (zero-init data)
    171      *   __thread_vars  → S_THREAD_LOCAL_VARIABLES (descriptor records)
    172      * dyld dispatches its TLV-bootstrap pass off the S_TYPE; the
    173      * S_ATTR_* bits don't carry TLV semantics so we just emit the type. */
    174     if (sect_len >= 13 && memcmp(sectname, "__thread_vars", 13) == 0)
    175       return S_THREAD_LOCAL_VARIABLES;
    176     if (sec_kind == SEC_BSS) return S_THREAD_LOCAL_ZEROFILL;
    177     return S_THREAD_LOCAL_REGULAR;
    178   }
    179   if (sec_kind == SEC_BSS ||
    180       (sect_len >= 5 && memcmp(sectname, "__bss", 5) == 0)) {
    181     f |= S_ZEROFILL;
    182   }
    183   if (sec_flags & SF_STRINGS) {
    184     f = (f & ~SECTION_TYPE) | S_CSTRING_LITERALS;
    185   }
    186   if (sec_flags & SF_RETAIN) {
    187     f |= S_ATTR_NO_DEAD_STRIP;
    188   }
    189   /* Default S_REGULAR (0) for all others. */
    190   return f;
    191 }
    192 
    193 /* ---- symbol partition ---- */
    194 
    195 typedef struct MSym {
    196   ObjSymId obj_id;
    197   u32 strx; /* offset in string table */
    198   u8 n_type;
    199   u8 n_sect;
    200   u16 n_desc;
    201   u64 n_value;
    202 } MSym;
    203 
    204 static int sym_is_undef(const ObjSym* s) {
    205   return s->section_id == OBJ_SEC_NONE && s->kind != SK_ABS &&
    206          s->kind != SK_COMMON;
    207 }
    208 
    209 static int sym_is_extdef(const ObjSym* s) {
    210   if (sym_is_undef(s)) return 0;
    211   return s->bind == SB_GLOBAL || s->bind == SB_WEAK;
    212 }
    213 
    214 /* ---- string table ----
    215  *
    216  * Mach-O strtab: leading zero byte at offset 0 represents the empty
    217  * string.  Entries are NUL-terminated; we don't dedupe (small symbol
    218  * counts in v1; matches the simplest llvm output).  The "_" prefix on
    219  * C symbols is added inline in the writer below. */
    220 
    221 void emit_macho(Compiler* c, ObjBuilder* ob, Writer* w) {
    222   Heap* h = (Heap*)c->ctx->heap;
    223 
    224   /* Tombstone sweep first — strip/objcopy mutations and the historical
    225    * UNDEF prune are both expressed via Section.removed / ObjSym.removed
    226    * post-sweep. See obj_sweep_dead. */
    227   obj_sweep_dead(ob);
    228 
    229   /* ---- target validation ---------------------------------------- */
    230   const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_MACHO);
    231   const ObjMachoArchOps* macho =
    232       fmt && fmt->macho_arch ? fmt->macho_arch(c->target.arch) : NULL;
    233   u32 cputype, cpusubtype;
    234   u32 (*reloc_to)(u32);
    235   u32 (*reloc_pcrel)(u32);
    236   u32 (*reloc_length)(u32);
    237   if (!macho || !macho->reloc_to || !macho->reloc_pcrel ||
    238       !macho->reloc_length) {
    239     compiler_panic(c, SRCLOC_NONE, "emit_macho: unsupported target arch %u",
    240                    (u32)c->target.arch);
    241   }
    242   cputype = macho->cputype;
    243   cpusubtype = macho->cpusubtype;
    244   reloc_to = macho->reloc_to;
    245   reloc_pcrel = macho->reloc_pcrel;
    246   reloc_length = macho->reloc_length;
    247   if (c->target.big_endian) {
    248     compiler_panic(c, SRCLOC_NONE, "emit_macho: big-endian not supported");
    249   }
    250   if (c->target.ptr_size != 8) {
    251     compiler_panic(c, SRCLOC_NONE, "emit_macho: ptr_size %u (expected 8)",
    252                    (u32)c->target.ptr_size);
    253   }
    254 
    255   /* ---- pass 1: plan Mach-O sections ----------------------------- */
    256   u32 nobjsec = obj_section_count(ob);
    257   MSec* secs = arena_zarray(c->scratch, MSec, nobjsec ? nobjsec : 1);
    258   u32* obj_to_msec = arena_zarray(c->scratch, u32, nobjsec ? nobjsec : 1);
    259   u32 nsecs = 0;
    260   int has_explicit_atoms = obj_atom_count(ob) > 1u;
    261 
    262   for (u32 i = 1; i < nobjsec; ++i) {
    263     const Section* s = obj_section_get(ob, i);
    264     if (s->removed) continue; /* see obj_sweep_dead */
    265     /* Skip ELF-style synthetic sections that read_elf would have
    266      * filtered: SYMTAB / STRTAB / RELA / GROUP have no Mach-O
    267      * representation as data sections. */
    268     if (s->sem == SSEM_SYMTAB || s->sem == SSEM_STRTAB || s->sem == SSEM_RELA ||
    269         s->sem == SSEM_REL || s->sem == SSEM_GROUP) {
    270       continue;
    271     }
    272     Slice nm_s = pool_slice(c->global, s->name);
    273     const char* nm = nm_s.s;
    274     size_t nlen = nm_s.len;
    275     MSec* m = &secs[nsecs];
    276     name_to_seg_sect(nm ? nm : "", (u32)nlen, s->kind, &m->ns);
    277     m->obj_sec = i;
    278     m->align = s->align ? s->align : 1;
    279     m->entsize = s->entsize;
    280     /* Mach-O reader stashes the raw section.flags (S_TYPE | S_ATTR_*)
    281      * in Section.ext_type when reading a Mach-O input.  Use it
    282      * verbatim so attribute bits like S_ATTR_NO_DEAD_STRIP /
    283      * S_ATTR_LIVE_SUPPORT round-trip.  Fall back to the kind-derived
    284      * default for sections originating from non-Mach-O readers (e.g.
    285      * kit codegen). */
    286     if (s->ext_kind == OBJ_EXT_MACHO && s->ext_type) {
    287       m->flags = s->ext_type;
    288     } else {
    289       m->flags =
    290           section_flags_for(s->kind, s->flags, m->ns.sectname, m->ns.sect_len);
    291     }
    292     if (s->sem == SSEM_NOBITS || s->kind == SEC_BSS) {
    293       m->is_zerofill = 1;
    294       m->size = s->bss_size;
    295       m->obj_bytes = NULL;
    296       /* Preserve S_THREAD_LOCAL_ZEROFILL when SF_TLS routed us there;
    297        * a regular BSS section gets the plain S_ZEROFILL type. */
    298       u32 stype = m->flags & SECTION_TYPE;
    299       if (stype != S_THREAD_LOCAL_ZEROFILL)
    300         m->flags = (m->flags & ~SECTION_TYPE) | S_ZEROFILL;
    301     } else {
    302       m->is_zerofill = 0;
    303       m->size = s->bytes.total;
    304       m->obj_bytes = &s->bytes;
    305     }
    306     obj_to_msec[i] = nsecs + 1; /* 1-based: matches Mach-O n_sect. */
    307     nsecs++;
    308   }
    309   if (nsecs > 255u) {
    310     compiler_panic(c, SRCLOC_NONE,
    311                    "emit_macho: too many physical sections for Mach-O "
    312                    "symbol n_sect ordinals (%u > 255); use atom splitting "
    313                    "instead of physical split sections",
    314                    nsecs);
    315   }
    316 
    317   /* ---- pass 2: assign vmaddrs (segment-relative) and per-section
    318    *              flat-layout addresses.  MH_OBJECT keeps everything in
    319    *              one segment with vmaddr=0; section addr fields are
    320    *              relative offsets within the segment.
    321    *
    322    * Two-pass to match the conventional Mach-O `MH_OBJECT` layout:
    323    * non-zerofill sections come first in vmaddr order, then zerofill
    324    * sections at the tail.  Apple `as` and clang `-c` both lay out
    325    * this way, and roundtripping must reproduce it so symbol n_values
    326    * (which are segment-relative addresses) compare equal. */
    327   u64 cur_addr = 0;
    328   for (u32 i = 0; i < nsecs; ++i) {
    329     MSec* m = &secs[i];
    330     if (m->is_zerofill) continue;
    331     cur_addr = ALIGN_UP(cur_addr, (u64)m->align);
    332     m->addr = cur_addr;
    333     cur_addr += m->size;
    334   }
    335   for (u32 i = 0; i < nsecs; ++i) {
    336     MSec* m = &secs[i];
    337     if (!m->is_zerofill) continue;
    338     cur_addr = ALIGN_UP(cur_addr, (u64)m->align);
    339     m->addr = cur_addr;
    340     cur_addr += m->size;
    341   }
    342   u64 segment_vmsize = cur_addr;
    343 
    344   /* ---- pass 3: partition symbols (locals, extdefs, undefs) ------ */
    345   u32 nobjsym = 0;
    346   {
    347     ObjSymIter* it = obj_symiter_new(ob);
    348     ObjSymEntry e;
    349     while (obj_symiter_next(it, &e)) ++nobjsym;
    350     obj_symiter_free(it);
    351   }
    352 
    353   MSym* msyms = arena_zarray(c->scratch, MSym, nobjsym + 1);
    354   u32 nmsyms = 0;
    355   u32* sym_obj_to_macho =
    356       arena_zarray(c->scratch, u32, nobjsym + 2); /* obj_id -> mach idx */
    357 
    358   Buf strtab;
    359   buf_init(&strtab, h);
    360   /* Mach-O strtab convention: the first byte is " " (space) or NUL —
    361    * llvm/Apple emit a single NUL.  We start with NUL for offset 0. */
    362   {
    363     u8 z = 0;
    364     buf_write(&strtab, &z, 1);
    365   }
    366 
    367   /* Emit in three passes so n_type/sect ordering matches LC_DYSYMTAB
    368    * (locals, then extdefs, then undefs). */
    369   for (int pass = 0; pass < 3; ++pass) {
    370     ObjSymIter* it = obj_symiter_new(ob);
    371     ObjSymEntry e;
    372     while (obj_symiter_next(it, &e)) {
    373       const ObjSym* s = e.sym;
    374       if (s->removed) continue; /* spurious-UNDEF prune + explicit removal */
    375       int undef = sym_is_undef(s);
    376       int extdef = sym_is_extdef(s);
    377       int local = !undef && !extdef;
    378       int want =
    379           (pass == 0 && local) || (pass == 1 && extdef) || (pass == 2 && undef);
    380       if (!want) continue;
    381       MSym* ms = &msyms[nmsyms];
    382       ms->obj_id = e.id;
    383 
    384       Slice nm_s = pool_slice(c->global, s->name);
    385       const char* nm = nm_s.s;
    386       size_t nlen = nm_s.len;
    387       /* Mach-O symbol names are stored on disk verbatim — including
    388        * the leading `_` Apple toolchains use for C-source-level
    389        * symbols ("_main" for `int main()`).  kit treats the prefix
    390        * as part of the on-disk name, not a transform applied at emit.
    391        * Name-canonicalization for API callers (kit_jit_lookup,
    392        * link_set_entry) lives one layer up at the linker boundary
    393        * (link.c), so emit/read stay byte-for-byte stable. */
    394       if (nlen && nm) {
    395         u32 off = buf_pos(&strtab);
    396         buf_write(&strtab, nm, nlen);
    397         u8 z = 0;
    398         buf_write(&strtab, &z, 1);
    399         ms->strx = off;
    400       } else {
    401         ms->strx = 0;
    402       }
    403 
    404       u8 type = 0;
    405       if (extdef) type |= N_EXT;
    406       if (s->vis == SV_HIDDEN || s->vis == SV_INTERNAL) {
    407         /* Mach-O encodes hidden externals as N_PEXT|N_EXT. */
    408         type |= N_PEXT;
    409       }
    410       u8 n_sect = NO_SECT;
    411       u16 n_desc = 0;
    412       u64 value = s->value;
    413 
    414       if (undef) {
    415         type |= N_UNDF;
    416         /* Undefined symbols with non-LOCAL bind are external references
    417          * (the common case — every `extern int x;`).  Setting N_EXT
    418          * matches what clang emits and what Apple `ld` expects. */
    419         if (s->bind == SB_GLOBAL || s->bind == SB_WEAK) type |= N_EXT;
    420         if (s->bind == SB_WEAK) n_desc |= N_WEAK_REF;
    421         value = 0;
    422       } else if (s->kind == SK_ABS) {
    423         type |= N_ABS;
    424       } else if (s->kind == SK_COMMON) {
    425         /* Mach-O common symbols are N_UNDF|N_EXT with n_value=size and
    426          * n_desc carrying log2(align) in the GET_COMM_ALIGN bits. */
    427         type = N_UNDF | N_EXT;
    428         value = s->size;
    429         u32 a = s->common_align ? (u32)s->common_align : 1;
    430         n_desc = (u16)(log2_align(a) << 8); /* GET_COMM_ALIGN field */
    431       } else {
    432         type |= N_SECT;
    433         u32 ms_idx = (s->section_id < nobjsec) ? obj_to_msec[s->section_id] : 0;
    434         if (ms_idx > 255u) {
    435           compiler_panic(c, SRCLOC_NONE,
    436                          "emit_macho: symbol section ordinal %u exceeds "
    437                          "Mach-O n_sect range",
    438                          ms_idx);
    439         }
    440         n_sect = (u8)ms_idx;
    441         if (n_sect && ms_idx <= nsecs) {
    442           value = secs[ms_idx - 1].addr + s->value;
    443         }
    444         if (s->bind == SB_WEAK) n_desc |= N_WEAK_DEF;
    445       }
    446 
    447       /* OR in any pass-through n_desc bits the reader stashed in
    448        * sym->flags (N_NO_DEAD_STRIP, etc.). The bits we already
    449        * compute (N_WEAK_DEF / N_WEAK_REF and the common-alignment
    450        * field) are already excluded by read_macho before stashing,
    451        * so a plain OR can't double-count. */
    452       n_desc |= s->flags;
    453       {
    454         ObjAtomId aid = obj_atom_find_symbol(ob, e.id);
    455         const ObjAtom* atom = obj_atom_get(ob, aid);
    456         if (atom && atom->signature == e.id && (atom->flags & OBJ_ATOM_RETAIN))
    457           n_desc |= N_NO_DEAD_STRIP;
    458       }
    459 
    460       ms->n_type = type;
    461       ms->n_sect = n_sect;
    462       ms->n_desc = n_desc;
    463       ms->n_value = value;
    464 
    465       sym_obj_to_macho[e.id] = nmsyms + 1; /* 1-based index, 0 = none. */
    466       nmsyms++;
    467     }
    468     obj_symiter_free(it);
    469   }
    470 
    471   u32 nlocals = 0, nextdefs = 0, nundefs = 0;
    472   for (u32 i = 0; i < nmsyms; ++i) {
    473     u8 t = msyms[i].n_type;
    474     u8 ext = (t & N_EXT) != 0;
    475     u8 typ = (u8)(t & N_TYPE);
    476     if (typ == N_UNDF && ext) {
    477       /* Could be undef or common — common has nonzero n_value. */
    478       if (msyms[i].n_value != 0)
    479         ++nextdefs; /* common is conventionally extdef-shaped */
    480       else
    481         ++nundefs;
    482     } else if (ext) {
    483       ++nextdefs;
    484     } else {
    485       ++nlocals;
    486     }
    487   }
    488   /* Re-derive without the common fudge by counting partition pass: we
    489    * already wrote them in (locals,extdefs,undefs) order, so the prefix
    490    * counts are just the per-pass counts. Mirror the spurious-UNDEF
    491    * prune from the emit loop above so the LC_DYSYMTAB index counts
    492    * line up with the symbols we actually wrote. */
    493   nlocals = 0;
    494   nextdefs = 0;
    495   nundefs = 0;
    496   {
    497     ObjSymIter* it = obj_symiter_new(ob);
    498     ObjSymEntry e;
    499     while (obj_symiter_next(it, &e)) {
    500       const ObjSym* s = e.sym;
    501       if (s->removed) continue;
    502       int undef = sym_is_undef(s);
    503       if (undef)
    504         ++nundefs;
    505       else if (sym_is_extdef(s))
    506         ++nextdefs;
    507       else
    508         ++nlocals;
    509     }
    510     obj_symiter_free(it);
    511   }
    512 
    513   /* ---- pass 4: build per-section relocation tables -------------- */
    514   u32 total_relocs = obj_reloc_total(ob);
    515   for (u32 i = 0; i < nsecs; ++i) {
    516     MSec* m = &secs[i];
    517     u32 nr = obj_reloc_count(ob, m->obj_sec);
    518     if (!nr) continue;
    519     /* Worst case: each reloc may be preceded by an ARM64_RELOC_ADDEND
    520      * pair entry.  We size the buffer for that upper bound. */
    521     u8* buf = (u8*)arena_alloc(c->scratch, (size_t)MACHO_RELOC_SIZE * nr * 2,
    522                                _Alignof(u32));
    523     u32 j = 0;
    524     for (u32 ri = 0; ri < total_relocs; ++ri) {
    525       const Reloc* r = obj_reloc_at(ob, ri);
    526       if (r->removed) continue;
    527       if (r->section_id != m->obj_sec) continue;
    528       if ((r->kind == R_ADD8 || r->kind == R_ADD16 ||
    529            r->kind == R_ADD32 || r->kind == R_ADD64) &&
    530           ri + 1u < total_relocs) {
    531         const Reloc* sub = obj_reloc_at(ob, ri + 1u);
    532         int paired = sub && sub->section_id == r->section_id &&
    533                      sub->offset == r->offset &&
    534                      ((r->kind == R_ADD8 && sub->kind == R_SUB8) ||
    535                       (r->kind == R_ADD16 && sub->kind == R_SUB16) ||
    536                       (r->kind == R_ADD32 && sub->kind == R_SUB32) ||
    537                       (r->kind == R_ADD64 && sub->kind == R_SUB64));
    538         if (paired) {
    539           u32 length = (r->kind == R_ADD64)   ? 3u
    540                        : (r->kind == R_ADD32) ? 2u
    541                        : (r->kind == R_ADD16) ? 1u
    542                                               : 0u;
    543           u32 add_idx;
    544           u32 sub_idx;
    545           u32 sub_type = c->target.arch == KIT_ARCH_ARM_64
    546                              ? ARM64_RELOC_SUBTRACTOR
    547                              : X86_64_RELOC_SUBTRACTOR;
    548           u32 unsigned_type = c->target.arch == KIT_ARCH_ARM_64
    549                                   ? ARM64_RELOC_UNSIGNED
    550                                   : X86_64_RELOC_UNSIGNED;
    551           if (r->sym == OBJ_SYM_NONE || sub->sym == OBJ_SYM_NONE) {
    552             compiler_panic(c, SRCLOC_NONE,
    553                            "emit_macho: symdiff reloc without symbol");
    554           }
    555           add_idx = sym_obj_to_macho[r->sym];
    556           sub_idx = sym_obj_to_macho[sub->sym];
    557           if (add_idx == 0 || sub_idx == 0) {
    558             compiler_panic(c, SRCLOC_NONE,
    559                            "emit_macho: symdiff reloc target not in symtab");
    560           }
    561           {
    562             u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
    563             wr_u32_le(slot + 0, (u32)r->offset);
    564             wr_u32_le(slot + 4, ((sub_idx - 1u) & 0x00ffffffu) |
    565                                     (length << 25) | (1u << 27) |
    566                                     ((sub_type & 0xfu) << 28));
    567             ++j;
    568           }
    569           {
    570             u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
    571             wr_u32_le(slot + 0, (u32)r->offset);
    572             wr_u32_le(slot + 4, ((add_idx - 1u) & 0x00ffffffu) |
    573                                     (length << 25) | (1u << 27) |
    574                                     ((unsigned_type & 0xfu) << 28));
    575             ++j;
    576           }
    577           ++ri;
    578           continue;
    579         }
    580       }
    581       u32 mtype = reloc_to(r->kind);
    582       if (mtype == (u32)-1) {
    583         compiler_panic(c, SRCLOC_NONE,
    584                        "emit_macho: unsupported reloc kind %u for arch %u",
    585                        (u32)r->kind, (u32)c->target.arch);
    586       }
    587       u32 pcrel = reloc_pcrel(r->kind);
    588       u32 length = reloc_length(r->kind);
    589 
    590       /* Resolve target — extern always 1 in our model (every Reloc has
    591        * an ObjSymId).  Skip relocs without a symbol — they would map to
    592        * a section-relative reloc which the v1 cgtarget never emits. */
    593       if (r->sym == OBJ_SYM_NONE) {
    594         compiler_panic(c, SRCLOC_NONE,
    595                        "emit_macho: reloc without symbol not supported "
    596                        "(sec=%u offset=%u kind=%u)",
    597                        (u32)r->section_id, (u32)r->offset, (u32)r->kind);
    598       }
    599       u32 mach_sym_idx = sym_obj_to_macho[r->sym];
    600       if (mach_sym_idx == 0) {
    601         compiler_panic(c, SRCLOC_NONE,
    602                        "emit_macho: reloc target sym %u not in symtab",
    603                        (u32)r->sym);
    604       }
    605       u32 r_symbolnum = mach_sym_idx - 1; /* Mach-O uses 0-based. */
    606 
    607       /* Non-zero addend: emit a leading ARM64_RELOC_ADDEND pair (only
    608        * meaningful for non-UNSIGNED types — UNSIGNED carries the addend
    609        * inline in the patched bytes). */
    610       if (r->addend != 0 && mtype != ARM64_RELOC_UNSIGNED) {
    611         u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
    612         wr_u32_le(slot + 0, (u32)r->offset);
    613         /* ARM64_RELOC_ADDEND stores a signed 24-bit immediate in
    614          * r_symbolnum.  It is not a symbol-table reference; setting
    615          * r_extern would make readers interpret the addend as a symbol
    616          * index. */
    617         u32 packed = ((u32)(i64)r->addend & 0x00ffffffu) | (0u << 24) |
    618                      (length << 25) | (ARM64_RELOC_ADDEND << 28);
    619         wr_u32_le(slot + 4, packed);
    620         ++j;
    621       }
    622 
    623       u8* slot = buf + (size_t)j * MACHO_RELOC_SIZE;
    624       wr_u32_le(slot + 0, (u32)r->offset);
    625       u32 packed = (r_symbolnum & 0x00ffffffu) | ((pcrel & 1u) << 24) |
    626                    ((length & 3u) << 25) | (1u << 27) /*extern*/ |
    627                    ((mtype & 0xfu) << 28);
    628       wr_u32_le(slot + 4, packed);
    629       ++j;
    630     }
    631     m->relocs = buf;
    632     m->nreloc = j;
    633   }
    634 
    635   /* ---- pass 5: assign file offsets ------------------------------ */
    636   /* Layout after the load-command block:
    637    *   section bytes (in order, respecting align)
    638    *   relocation tables (per section, 4-aligned)
    639    *   symbol table (8-aligned)
    640    *   string table */
    641   u32 nload_cmds =
    642       4; /* LC_SEGMENT_64 + LC_BUILD_VERSION + LC_SYMTAB + LC_DYSYMTAB */
    643   u32 segcmd_size = MACHO_SEGCMD64_SIZE + nsecs * MACHO_SECT64_SIZE;
    644   u32 build_version_size =
    645       24; /* fixed: cmd+cmdsize+platform+minos+sdk+ntools(0) */
    646   u32 sizeofcmds = segcmd_size + build_version_size + MACHO_SYMTAB_CMD_SIZE +
    647                    MACHO_DYSYMTAB_CMD_SIZE;
    648 
    649   u64 cur = MACHO_HDR64_SIZE + sizeofcmds;
    650   u32 fileoff_first = (u32)cur;
    651   for (u32 i = 0; i < nsecs; ++i) {
    652     MSec* m = &secs[i];
    653     if (m->is_zerofill) {
    654       m->fileoff = 0;
    655       continue;
    656     }
    657     cur = ALIGN_UP(cur, (u64)m->align);
    658     m->fileoff = (u32)cur;
    659     cur += m->size;
    660   }
    661 
    662   /* Reloc tables. */
    663   for (u32 i = 0; i < nsecs; ++i) {
    664     MSec* m = &secs[i];
    665     if (!m->nreloc) {
    666       m->reloff = 0;
    667       continue;
    668     }
    669     cur = ALIGN_UP(cur, (u64)4);
    670     m->reloff = (u32)cur;
    671     cur += (u64)m->nreloc * MACHO_RELOC_SIZE;
    672   }
    673 
    674   cur = ALIGN_UP(cur, (u64)8);
    675   u64 symoff = cur;
    676   cur += (u64)nmsyms * MACHO_NLIST64_SIZE;
    677   u64 stroff = cur;
    678   u32 strtab_size = buf_pos(&strtab);
    679   cur += strtab_size;
    680 
    681   /* ---- pass 6: write the file ------------------------------------ */
    682   kit_writer_seek(w, 0);
    683 
    684   /* mach_header_64 */
    685   wr_u32(w, MH_MAGIC_64);
    686   wr_u32(w, cputype);
    687   wr_u32(w, cpusubtype);
    688   wr_u32(w, MH_OBJECT);
    689   wr_u32(w, nload_cmds);
    690   wr_u32(w, sizeofcmds);
    691   wr_u32(w, has_explicit_atoms ? MH_SUBSECTIONS_VIA_SYMBOLS : 0);
    692   wr_u32(w, 0); /* reserved */
    693 
    694   /* LC_SEGMENT_64 (anonymous, contains everything) */
    695   wr_u32(w, LC_SEGMENT_64);
    696   wr_u32(w, segcmd_size);
    697   wr_name16(w, "", 0);       /* segname: empty for MH_OBJECT */
    698   wr_u64(w, 0);              /* vmaddr */
    699   wr_u64(w, segment_vmsize); /* vmsize */
    700   wr_u64(w, fileoff_first);  /* fileoff */
    701   /* filesize = bytes covered by non-zerofill sections (post-section
    702    * file offset minus the start). */
    703   u64 filesize = 0;
    704   for (u32 i = 0; i < nsecs; ++i) {
    705     MSec* m = &secs[i];
    706     if (m->is_zerofill) continue;
    707     u64 end = (u64)m->fileoff + m->size;
    708     u64 begin = m->fileoff;
    709     if (end > filesize + fileoff_first) filesize = end - fileoff_first;
    710     (void)begin;
    711   }
    712   wr_u64(w, filesize);
    713   /* maxprot/initprot — VM_PROT_READ|WRITE|EXECUTE = 7 for object segs. */
    714   wr_u32(w, 7);
    715   wr_u32(w, 7);
    716   wr_u32(w, nsecs);
    717   wr_u32(w, 0); /* flags */
    718 
    719   /* sections inline within the segment command */
    720   for (u32 i = 0; i < nsecs; ++i) {
    721     MSec* m = &secs[i];
    722     wr_name16(w, m->ns.sectname, m->ns.sect_len);
    723     wr_name16(w, m->ns.segname, m->ns.seg_len);
    724     wr_u64(w, m->addr);
    725     wr_u64(w, m->size);
    726     wr_u32(w, m->fileoff);
    727     wr_u32(w, log2_align(m->align));
    728     wr_u32(w, m->reloff);
    729     wr_u32(w, m->nreloc);
    730     wr_u32(w, m->flags);
    731     wr_u32(w, 0);          /* reserved1 */
    732     wr_u32(w, m->entsize); /* reserved2 */
    733     wr_u32(w, 0);          /* reserved3 */
    734   }
    735 
    736   /* LC_BUILD_VERSION — platform=PLATFORM_MACOS(1), minos/sdk=14.0.0,
    737    * ntools=0.  The exact min-version isn't load-bearing for MH_OBJECT,
    738    * but Apple's `ld` warns when it's missing. */
    739   wr_u32(w, LC_BUILD_VERSION);
    740   wr_u32(w, build_version_size);
    741   wr_u32(w, 1);               /* platform: PLATFORM_MACOS */
    742   wr_u32(w, (14u << 16) | 0); /* minos: 14.0.0 */
    743   wr_u32(w, (14u << 16) | 0); /* sdk:   14.0.0 */
    744   wr_u32(w, 0);               /* ntools */
    745 
    746   /* LC_SYMTAB */
    747   wr_u32(w, LC_SYMTAB);
    748   wr_u32(w, MACHO_SYMTAB_CMD_SIZE);
    749   wr_u32(w, (u32)symoff);
    750   wr_u32(w, nmsyms);
    751   wr_u32(w, (u32)stroff);
    752   wr_u32(w, strtab_size);
    753 
    754   /* LC_DYSYMTAB */
    755   wr_u32(w, LC_DYSYMTAB);
    756   wr_u32(w, MACHO_DYSYMTAB_CMD_SIZE);
    757   wr_u32(w, 0); /* ilocalsym */
    758   wr_u32(w, nlocals);
    759   wr_u32(w, nlocals);
    760   wr_u32(w, nextdefs);
    761   wr_u32(w, nlocals + nextdefs);
    762   wr_u32(w, nundefs);
    763   wr_u32(w, 0);
    764   wr_u32(w, 0); /* tocoff, ntoc */
    765   wr_u32(w, 0);
    766   wr_u32(w, 0); /* modtaboff, nmodtab */
    767   wr_u32(w, 0);
    768   wr_u32(w, 0); /* extrefsymoff, nextrefsyms */
    769   wr_u32(w, 0);
    770   wr_u32(w, 0); /* indirectsymoff, nindirectsyms */
    771   wr_u32(w, 0);
    772   wr_u32(w, 0); /* extreloff, nextrel */
    773   wr_u32(w, 0);
    774   wr_u32(w, 0); /* locreloff, nlocrel */
    775 
    776   /* section bytes */
    777   for (u32 i = 0; i < nsecs; ++i) {
    778     MSec* m = &secs[i];
    779     if (m->is_zerofill || !m->size) continue;
    780     kit_writer_seek(w, m->fileoff);
    781     if (m->obj_bytes) {
    782       u32 sz = m->obj_bytes->total;
    783       u8* tmp = (u8*)h->alloc(h, sz ? sz : 1, 1);
    784       if (sz) buf_flatten(m->obj_bytes, tmp);
    785       kit_writer_write(w, tmp, sz);
    786       h->free(h, tmp, sz ? sz : 1);
    787     }
    788   }
    789 
    790   /* reloc tables */
    791   for (u32 i = 0; i < nsecs; ++i) {
    792     MSec* m = &secs[i];
    793     if (!m->nreloc) continue;
    794     kit_writer_seek(w, m->reloff);
    795     kit_writer_write(w, m->relocs, (size_t)m->nreloc * MACHO_RELOC_SIZE);
    796   }
    797 
    798   /* symtab */
    799   kit_writer_seek(w, symoff);
    800   for (u32 i = 0; i < nmsyms; ++i) {
    801     const MSym* ms = &msyms[i];
    802     u8 entry[MACHO_NLIST64_SIZE];
    803     wr_u32_le(entry + 0, ms->strx);
    804     entry[4] = ms->n_type;
    805     entry[5] = ms->n_sect;
    806     wr_u16_le(entry + 6, ms->n_desc);
    807     wr_u64_le(entry + 8, ms->n_value);
    808     kit_writer_write(w, entry, MACHO_NLIST64_SIZE);
    809   }
    810 
    811   /* strtab */
    812   {
    813     u8* flat = (u8*)arena_alloc(c->scratch, strtab_size ? strtab_size : 1, 1);
    814     if (strtab_size) buf_flatten(&strtab, flat);
    815     kit_writer_seek(w, stroff);
    816     kit_writer_write(w, flat, strtab_size);
    817   }
    818   buf_fini(&strtab);
    819 }