kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

read.c (33577B)


      1 /* PE/COFF .obj (IMAGE_FILE_HEADER + sections) reader.  Parses a 64-bit
      2  * little-endian relocatable object back into a fresh ObjBuilder.  Peer
      3  * of read_elf / read_macho; the post-finalize ObjBuilder shape is the
      4  * canonical superset doc/DESIGN.md §5.5 promises: read_coff of an
      5  * emit_coff output produces an ObjBuilder shape-equivalent to the
      6  * writer's input, modulo synthesized SECTION symbols and the COMDAT
      7  * section-definition aux records.
      8  *
      9  * Scope: IMAGE_FILE_MACHINE_AMD64 and IMAGE_FILE_MACHINE_ARM64.  PE
     10  * *images* (executables / DLLs, beginning with the DOS 'MZ' stub) are
     11  * detected at entry and dispatched to read_coff_image (read_image.c).
     12  * Microsoft "short import" records (Sig1=0, Sig2=0xFFFF) found inside
     13  * .lib archive members are likewise detected at entry and dispatched to
     14  * read_coff_short_import, which synthesizes a DSO-shaped ObjBuilder
     15  * annotated with the providing DLL name via obj_set_coff_import_dll. */
     16 
     17 #include <string.h>
     18 
     19 #include "core/arena.h"
     20 #include "core/heap.h"
     21 #include "core/pool.h"
     22 #include "core/slice.h"
     23 #include "obj/coff/coff.h"
     24 #include "obj/coff/read_util.h"
     25 #include "obj/format.h"
     26 
     27 /* ---- section-header scratch ---- */
     28 
     29 typedef struct CSecRec {
     30   char raw_name[8];
     31   u32 virtual_size;
     32   u32 size_of_raw_data;
     33   u32 pointer_to_raw_data;
     34   u32 pointer_to_relocations;
     35   u16 number_of_relocations;
     36   u32 characteristics;
     37   ObjSecId obj_sec; /* OBJ_SEC_NONE if skipped */
     38 } CSecRec;
     39 
     40 static void parse_shdr(const u8* p, CSecRec* out) {
     41   memcpy(out->raw_name, p, 8);
     42   out->virtual_size = coff_rd_u32(p + 8);
     43   out->size_of_raw_data = coff_rd_u32(p + 16);
     44   out->pointer_to_raw_data = coff_rd_u32(p + 20);
     45   out->pointer_to_relocations = coff_rd_u32(p + 24);
     46   out->number_of_relocations = coff_rd_u16(p + 32);
     47   out->characteristics = coff_rd_u32(p + 36);
     48   out->obj_sec = OBJ_SEC_NONE;
     49 }
     50 
     51 /* ---- string-table lookup (4-byte size prefix, NUL-terminated entries) ---- */
     52 
     53 static const char* strtab_lookup(const u8* tab, u32 tab_size, u32 off,
     54                                  u32* len_out) {
     55   if (off >= tab_size) {
     56     *len_out = 0;
     57     return "";
     58   }
     59   const char* s = (const char*)(tab + off);
     60   u32 max = tab_size - off;
     61   u32 n = 0;
     62   while (n < max && s[n] != '\0') ++n;
     63   *len_out = n;
     64   return s;
     65 }
     66 
     67 /* Resolve a section/symbol short-or-long name into (ptr, len).  COFF
     68  * section names use the "/<decimal>" convention for >8-byte names; COFF
     69  * symbol names use the (Zeroes==0, Offset) form instead.  This helper
     70  * handles the section form (8 raw bytes; leading '/' triggers strtab
     71  * lookup). */
     72 static void resolve_section_name(const char raw[8], const u8* strtab,
     73                                  u32 strtab_size, const char** name_out,
     74                                  u32* len_out) {
     75   if (raw[0] == '/') {
     76     /* Parse decimal offset.  Up to 7 ASCII digits. */
     77     u32 off = 0;
     78     for (u32 i = 1; i < 8 && raw[i] >= '0' && raw[i] <= '9'; ++i) {
     79       off = off * 10u + (u32)(raw[i] - '0');
     80     }
     81     *name_out = strtab_lookup(strtab, strtab_size, off, len_out);
     82     return;
     83   }
     84   /* Inline: up to 8 bytes, NUL-padded (not necessarily NUL-terminated). */
     85   u32 n = 0;
     86   while (n < 8 && raw[n] != '\0') ++n;
     87   *name_out = raw;
     88   *len_out = n;
     89 }
     90 
     91 /* characteristics -> SecKind / SecFlag / alignment live in read_util.c
     92  * (coff_sec_kind / coff_sec_flags / coff_sec_align), shared with the
     93  * image reader. */
     94 
     95 /* ---- symbol-name resolution ---- */
     96 
     97 static void resolve_sym_name(const u8* rec, const u8* strtab, u32 strtab_size,
     98                              const char** name_out, u32* len_out) {
     99   /* ShortName: 8 bytes.  If first 4 bytes are zero, second 4 bytes is
    100    * the strtab offset (LongName form). */
    101   u32 z = coff_rd_u32(rec + 0);
    102   if (z == 0) {
    103     u32 off = coff_rd_u32(rec + 4);
    104     *name_out = strtab_lookup(strtab, strtab_size, off, len_out);
    105     return;
    106   }
    107   u32 n = 0;
    108   while (n < 8 && rec[n] != '\0') ++n;
    109   *name_out = (const char*)rec;
    110   *len_out = n;
    111 }
    112 
    113 static int coff_reloc_inline_addend(const u8* data, size_t len,
    114                                     const CSecRec* s, u32 off, u32 width,
    115                                     i64* out) {
    116   if (!s || !s->size_of_raw_data) return 0;
    117   if ((u64)off + (u64)width > (u64)s->size_of_raw_data) return 0;
    118   if ((u64)s->pointer_to_raw_data + (u64)off + (u64)width > (u64)len)
    119     return 0;
    120   const u8* p = data + s->pointer_to_raw_data + off;
    121   switch (width) {
    122     case 4:
    123       *out = (i64)(i32)coff_rd_u32(p);
    124       return 1;
    125     case 8:
    126       *out = (i64)coff_rd_u64(p);
    127       return 1;
    128     default:
    129       return 0;
    130   }
    131 }
    132 
    133 /* ---- short-import record handler ----
    134  * Microsoft "short import" format: a 20-byte ImportObjectHeader
    135  * followed by SizeOfData bytes containing two NUL-terminated strings —
    136  * the imported symbol name then the DLL name.  These live as members
    137  * of .lib archives (mingw's libkernel32.dll.a etc.) and stand in for
    138  * a full long-form COFF import object.
    139  *
    140  * kit-side model: synthesize a DSO-shaped ObjBuilder with the
    141  * imported symbol defined at section_id = OBJ_SEC_NONE (the same
    142  * shape read_coff_dso / read_elf_dso produce for an exported name),
    143  * and stash the providing DLL name on the builder via
    144  * obj_set_coff_import_dll so the archive-ingestion layer can route
    145  * the resulting LinkInput as a DSO with this name as the soname.
    146  *
    147  * We also synthesize the `__imp_<name>` alias mingw codegen uses to
    148  * spell explicit IAT-slot access; both names ultimately resolve to
    149  * the same DLL export at link time. */
    150 static ObjBuilder* read_coff_short_import(Compiler* c, const char* name,
    151                                           const u8* data, size_t len) {
    152   if (len < COFF_IMPORT_OBJECT_HEADER_SIZE)
    153     compiler_panic(c, SRCLOC_NONE,
    154                    "read_coff: short-import record shorter than header");
    155 
    156   /* Sig1 / Sig2 already checked by the caller. */
    157   /* data + 4: Version (2 bytes, ignored). */
    158   u16 machine = coff_rd_u16(data + 6);
    159   /* data + 8: TimeDateStamp (4 bytes, ignored). */
    160   u32 size_of_data = coff_rd_u32(data + 12);
    161   u16 ordinal_or_hint = coff_rd_u16(data + 16);
    162   u16 type_flags = coff_rd_u16(data + 18);
    163 
    164   if ((u64)COFF_IMPORT_OBJECT_HEADER_SIZE + (u64)size_of_data > (u64)len)
    165     compiler_panic(c, SRCLOC_NONE,
    166                    "read_coff: short-import SizeOfData=%u extends past input "
    167                    "(len=%zu)",
    168                    size_of_data, len);
    169 
    170   if (machine != IMAGE_FILE_MACHINE_AMD64 &&
    171       machine != IMAGE_FILE_MACHINE_ARM64)
    172     compiler_panic(c, SRCLOC_NONE,
    173                    "read_coff: short-import unsupported machine %#x",
    174                    (u32)machine);
    175 
    176   /* Decode TypeFlags bitfield (Type:2, NameType:3, Reserved:11). */
    177   u32 import_type = (u32)(type_flags & 0x3u);
    178   u32 name_type = (u32)((type_flags >> 2) & 0x7u);
    179 
    180   /* Ordinal-only imports (NameType=IMPORT_OBJECT_ORDINAL) are not yet
    181    * implemented in kit.  None of the mingw / llvm-mingw system import
    182    * archives use this shape — every libfoo.a member in the supported
    183    * sysroots imports by name — so refusing here is a clean diagnostic,
    184    * not an internal panic.  When a real consumer surfaces, the work is
    185    * to thread the ordinal through link_resolve and into the PE import
    186    * directory hint/name tables. */
    187   if (name_type == IMPORT_OBJECT_ORDINAL)
    188     compiler_panic(
    189         c, SRCLOC_NONE,
    190         "read_coff: short-import by ordinal not implemented "
    191         "(archive member \"%.*s\", ordinal %u). kit links "
    192         "imports by name only; rebuild the consumer to import "
    193         "by name, or omit this archive from the link.",
    194         SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("<unnamed>")),
    195         (unsigned)ordinal_or_hint);
    196 
    197   /* Symbol name: NUL-terminated starting at data + 20. */
    198   const u8* body = data + COFF_IMPORT_OBJECT_HEADER_SIZE;
    199   u32 sym_name_max = size_of_data;
    200   u32 sym_name_len = 0;
    201   while (sym_name_len < sym_name_max && body[sym_name_len] != '\0')
    202     ++sym_name_len;
    203   if (sym_name_len == sym_name_max)
    204     compiler_panic(c, SRCLOC_NONE,
    205                    "read_coff: short-import symbol name not NUL-terminated");
    206 
    207   /* DLL name: NUL-terminated starting after the symbol name's NUL. */
    208   u32 dll_name_off = sym_name_len + 1u;
    209   if (dll_name_off >= size_of_data)
    210     compiler_panic(c, SRCLOC_NONE, "read_coff: short-import missing DLL name");
    211   const u8* dll_p = body + dll_name_off;
    212   u32 dll_name_max = size_of_data - dll_name_off;
    213   u32 dll_name_len = 0;
    214   while (dll_name_len < dll_name_max && dll_p[dll_name_len] != '\0')
    215     ++dll_name_len;
    216   if (dll_name_len == dll_name_max)
    217     compiler_panic(c, SRCLOC_NONE,
    218                    "read_coff: short-import DLL name not NUL-terminated");
    219 
    220   ObjBuilder* ob = obj_new(c);
    221   if (!ob) compiler_panic(c, SRCLOC_NONE, "read_coff: obj_new failed");
    222 
    223   /* Pick SymKind by import type: CODE -> function, DATA/CONST -> object.
    224    * Both are defined at section_id=OBJ_SEC_NONE, value=0, size=0 — the
    225    * shape read_coff_dso would produce for a DLL export. */
    226   SymKind k = (import_type == IMPORT_OBJECT_CODE) ? SK_FUNC : SK_OBJ;
    227 
    228   Sym sn = pool_intern_slice(
    229       c->global, (Slice){.s = (const char*)body, .len = sym_name_len});
    230   ObjSymId id =
    231       obj_symbol_ex(ob, sn, SB_GLOBAL, SV_DEFAULT, k, OBJ_SEC_NONE, 0, 0, 0);
    232   obj_sym_mark_referenced(ob, id);
    233 
    234   /* `__imp_<name>` alias for codegen that refers to the IAT slot
    235    * directly (mingw convention).  Even code imports use an object-like
    236    * `__imp_` symbol because references to it want the IAT data slot, not
    237    * the callable import stub. */
    238   static const char kImpPrefix[] = "__imp_";
    239   u32 imp_len = (u32)(sizeof kImpPrefix - 1u) + sym_name_len;
    240   char* imp_buf = arena_array(c->scratch, char, imp_len);
    241   memcpy(imp_buf, kImpPrefix, sizeof kImpPrefix - 1u);
    242   memcpy(imp_buf + (sizeof kImpPrefix - 1u), body, sym_name_len);
    243   Sym imp_sn =
    244       pool_intern_slice(c->global, (Slice){.s = imp_buf, .len = imp_len});
    245   ObjSymId imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ,
    246                                   OBJ_SEC_NONE, 0, 0, 0);
    247   obj_sym_mark_referenced(ob, imp_id);
    248 
    249   /* Stash the DLL name so the archive-ingestion layer (Phase 4.3) can
    250    * route this builder as a DSO with the DLL as soname. */
    251   Sym dll_sn = pool_intern_slice(
    252       c->global, (Slice){.s = (const char*)dll_p, .len = dll_name_len});
    253   obj_set_coff_import_dll(ob, dll_sn);
    254 
    255   /* NameType decides what the loader resolves IN THE DLL, which can differ
    256    * from the local symbol name. The local symbol keeps its own name (so kit's
    257    * references resolve); the PE hint/name-table entry must use the real
    258    * export name. Record an override whenever they differ. */
    259   Slice imp_name = {.s = (const char*)body, .len = sym_name_len};
    260   if (name_type == IMPORT_OBJECT_NAME_NOPREFIX ||
    261       name_type == IMPORT_OBJECT_NAME_UNDECORATE) {
    262     /* Strip one leading decoration char (?, @, or _). UNDECORATE also
    263      * truncates at the first '@' (MS @argbytes stdcall/fastcall suffix). */
    264     if (imp_name.len > 0 && (imp_name.s[0] == '?' || imp_name.s[0] == '@' ||
    265                              imp_name.s[0] == '_')) {
    266       ++imp_name.s;
    267       --imp_name.len;
    268     }
    269     if (name_type == IMPORT_OBJECT_NAME_UNDECORATE) {
    270       u32 at = 0;
    271       while (at < imp_name.len && imp_name.s[at] != '@') ++at;
    272       imp_name.len = at;
    273     }
    274   } else if (name_type == IMPORT_OBJECT_NAME_EXPORTAS) {
    275     /* The real export name is a third NUL-terminated string after the DLL. */
    276     u32 exp_off = dll_name_off + dll_name_len + 1u;
    277     if (exp_off >= size_of_data)
    278       compiler_panic(c, SRCLOC_NONE,
    279                      "read_coff: short-import EXPORTAS missing export name");
    280     const u8* exp_p = body + exp_off;
    281     u32 exp_max = size_of_data - exp_off;
    282     u32 exp_len = 0;
    283     while (exp_len < exp_max && exp_p[exp_len] != '\0') ++exp_len;
    284     if (exp_len == exp_max)
    285       compiler_panic(c, SRCLOC_NONE,
    286                      "read_coff: short-import EXPORTAS name not NUL-terminated");
    287     imp_name.s = (const char*)exp_p;
    288     imp_name.len = exp_len;
    289   }
    290   if (imp_name.len != sym_name_len ||
    291       memcmp(imp_name.s, body, sym_name_len) != 0) {
    292     obj_set_coff_import_name(ob, pool_intern_slice(c->global, imp_name));
    293   }
    294 
    295   obj_finalize(ob);
    296   return ob;
    297 }
    298 
    299 ObjBuilder* read_coff(Compiler* c, const char* name, const u8* data,
    300                       size_t len) {
    301   (void)name;
    302 
    303   /* ---- Step 0: header validation ---- */
    304   if (len < COFF_FILE_HEADER_SIZE)
    305     compiler_panic(c, SRCLOC_NONE, "read_coff: input shorter than COFF header");
    306 
    307   /* Microsoft short-import record? (Sig1=0, Sig2=0xFFFF.) These live
    308    * as members of .lib archives and stand in for a long-form import
    309    * object.  Detect at entry; the rest of read_coff assumes the
    310    * input is a real IMAGE_FILE_HEADER. */
    311   if (len >= 4 && coff_rd_u16(data + 0) == IMPORT_OBJECT_HDR_SIG1 &&
    312       coff_rd_u16(data + 2) == IMPORT_OBJECT_HDR_SIG2) {
    313     return read_coff_short_import(c, name, data, len);
    314   }
    315 
    316   /* PE image? A linked .exe/.dll begins with the DOS 'MZ' stub, not a bare
    317    * IMAGE_FILE_HEADER — dispatch to the image reader, which walks the
    318    * DOS -> PE-sig -> file/optional headers.  (Placed before the offset-0
    319    * machine read below, which assumes a bare header, and before the
    320    * optional-header rejection.) */
    321   if (len >= 2 && coff_rd_u16(data + 0) == IMAGE_DOS_SIGNATURE)
    322     return read_coff_image(c, name, data, len);
    323 
    324   u16 machine = coff_rd_u16(data + 0);
    325   u16 nsections = coff_rd_u16(data + 2);
    326   /* data + 4: TimeDateStamp (4 bytes, ignored). */
    327   u32 ptr_to_symtab = coff_rd_u32(data + 8);
    328   u32 nsymbols = coff_rd_u32(data + 12);
    329   u16 size_opt_hdr = coff_rd_u16(data + 16);
    330   /* data + 18: Characteristics (2 bytes, currently ignored). */
    331 
    332   if (size_opt_hdr != 0)
    333     compiler_panic(c, SRCLOC_NONE,
    334                    "read_coff: input has optional header (size=%u); "
    335                    "use read_coff_pe for executables",
    336                    (u32)size_opt_hdr);
    337 
    338   if (machine != IMAGE_FILE_MACHINE_AMD64 &&
    339       machine != IMAGE_FILE_MACHINE_ARM64 &&
    340       machine != IMAGE_FILE_MACHINE_ARM64EC)
    341     compiler_panic(c, SRCLOC_NONE, "read_coff: unsupported machine %#x",
    342                    (u32)machine);
    343 
    344   const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_COFF);
    345   const ObjCoffArchOps* coff =
    346       fmt && fmt->coff_machine ? fmt->coff_machine(machine) : NULL;
    347   if (!coff || !coff->reloc_from)
    348     compiler_panic(c, SRCLOC_NONE, "read_coff: no arch impl for machine %#x",
    349                    (u32)machine);
    350   u32 (*reloc_from)(u32) = coff->reloc_from;
    351 
    352   if ((u64)COFF_FILE_HEADER_SIZE +
    353           (u64)nsections * (u64)COFF_SECTION_HEADER_SIZE >
    354       (u64)len)
    355     compiler_panic(c, SRCLOC_NONE, "read_coff: section header table out of range");
    356 
    357   /* ---- Step 1: bootstrap, locate strtab ---- */
    358   /* Strtab is at PointerToSymbolTable + NumberOfSymbols * 18.  When the
    359    * file has no symbol table (ptr=0, n=0) we treat strtab as empty. */
    360   const u8* strtab = NULL;
    361   u32 strtab_size = 0;
    362   if (ptr_to_symtab && nsymbols) {
    363     u64 symtab_end = (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE;
    364     if (symtab_end + COFF_STRTAB_SIZE_FIELD_BYTES > (u64)len)
    365       compiler_panic(c, SRCLOC_NONE,
    366                      "read_coff: symbol table / strtab header out of range");
    367     u32 declared = coff_rd_u32(data + symtab_end);
    368     /* The size field is inclusive of the 4-byte prefix; treat <4 as
    369      * "empty" (some tools write 0). */
    370     if (declared < COFF_STRTAB_SIZE_FIELD_BYTES) declared = 0;
    371     if (declared) {
    372       if (symtab_end + (u64)declared > (u64)len)
    373         compiler_panic(c, SRCLOC_NONE, "read_coff: strtab body out of range");
    374       strtab = data + symtab_end;
    375       strtab_size = declared;
    376     } else {
    377       strtab = data + symtab_end;
    378       strtab_size = COFF_STRTAB_SIZE_FIELD_BYTES;
    379     }
    380   }
    381 
    382   ObjBuilder* ob = obj_new(c);
    383   if (!ob) compiler_panic(c, SRCLOC_NONE, "read_coff: obj_new failed");
    384 
    385   /* ---- Step 2: ingest sections ---- */
    386   CSecRec* secs = arena_array(c->scratch, CSecRec, nsections ? nsections : 1);
    387   const u8* shdr_base = data + COFF_FILE_HEADER_SIZE;
    388   for (u32 i = 0; i < nsections; ++i) {
    389     CSecRec* s = &secs[i];
    390     parse_shdr(shdr_base + (u64)i * COFF_SECTION_HEADER_SIZE, s);
    391 
    392     const char* nm;
    393     u32 nlen;
    394     resolve_section_name(s->raw_name, strtab, strtab_size, &nm, &nlen);
    395     Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
    396 
    397     u16 kind = coff_sec_kind(nm, nlen, s->characteristics);
    398     u16 flags = coff_sec_flags(nm, nlen, s->characteristics);
    399     u32 align = coff_sec_align(s->characteristics);
    400 
    401     int is_bss = (s->characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) != 0;
    402     u16 sem = is_bss ? SSEM_NOBITS : SSEM_PROGBITS;
    403 
    404     ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags,
    405                                  align, 0u, 0u, 0u);
    406     if (id == OBJ_SEC_NONE)
    407       compiler_panic(c, SRCLOC_NONE,
    408                      "read_coff: obj_section_ex failed for section %u", i);
    409     s->obj_sec = id;
    410 
    411     /* Preserve raw Characteristics so emit_coff can write back any bits
    412      * the canonical SecFlag/SecSem mapping doesn't model (LNK_INFO,
    413      * LNK_REMOVE, MEM_DISCARDABLE, MEM_SHARED, GPREL, alignment nibble). */
    414     obj_section_set_ext(ob, id, OBJ_EXT_COFF, s->characteristics, 0);
    415 
    416     if (is_bss) {
    417       u32 bss_size = s->virtual_size ? s->virtual_size : s->size_of_raw_data;
    418       obj_reserve_bss(ob, id, bss_size, align);
    419     } else if (s->size_of_raw_data) {
    420       u64 end = (u64)s->pointer_to_raw_data + (u64)s->size_of_raw_data;
    421       if (end > (u64)len)
    422         compiler_panic(c, SRCLOC_NONE, "read_coff: section %u bytes out of range",
    423                        i);
    424       u8* dst = obj_reserve(ob, id, s->size_of_raw_data);
    425       memcpy(dst, data + s->pointer_to_raw_data, s->size_of_raw_data);
    426     }
    427   }
    428 
    429   /* ---- Step 3: ingest symbols (with aux-record awareness) ----
    430    * sym_to_obj is indexed by RAW symbol-table index (including aux
    431    * slots), so reloc.SymbolTableIndex resolves directly without
    432    * adjusting for skipped aux records.  Aux slots map to OBJ_SYM_NONE. */
    433   ObjSymId* sym_to_obj =
    434       arena_zarray(c->scratch, ObjSymId, nsymbols ? nsymbols : 1);
    435 
    436   /* Track section-symbol primary symtab index per section, stored as
    437    * (raw_index + 1) so 0 can mean "not seen yet" without colliding
    438    * with the (legitimate) first symbol-table slot — emit_coff always
    439    * lays the first section's section-symbol at index 0. */
    440   u32* sec_sym_primary = arena_zarray(c->scratch, u32, nsections + 1u);
    441 
    442   const u8* sym_base = data + ptr_to_symtab;
    443   if (nsymbols) {
    444     if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE > (u64)len)
    445       compiler_panic(c, SRCLOC_NONE, "read_coff: symbol table body out of range");
    446   }
    447 
    448   for (u32 i = 0; i < nsymbols;) {
    449     const u8* p = sym_base + (u64)i * COFF_SYMBOL_SIZE;
    450     const char* nm;
    451     u32 nlen;
    452     resolve_sym_name(p, strtab, strtab_size, &nm, &nlen);
    453 
    454     u32 value = coff_rd_u32(p + 8);
    455     i16 sec_num = (i16)coff_rd_u16(p + 12);
    456     u16 type = coff_rd_u16(p + 14);
    457     u8 sclass = p[16];
    458     u8 naux = p[17];
    459 
    460     /* FILE storage class: concatenate aux records' raw bytes (each
    461      * 18 bytes, NUL-padded) for the source-file name. */
    462     if (sclass == IMAGE_SYM_CLASS_FILE) {
    463       /* Build name from aux records (up to naux*18 bytes); fall back
    464        * to the primary record's name if naux==0. */
    465       const char* fnm = nm;
    466       u32 fnlen = nlen;
    467       if (naux) {
    468         /* Each aux record's 18 bytes are interpreted as raw file-name
    469          * bytes; concatenate then trim trailing NULs. */
    470         u32 total = (u32)naux * COFF_SYMBOL_SIZE;
    471         if ((u64)i + 1u + (u64)naux > (u64)nsymbols)
    472           compiler_panic(c, SRCLOC_NONE,
    473                          "read_coff: FILE aux records extend past symbol "
    474                          "table");
    475         const u8* aux = p + COFF_SYMBOL_SIZE;
    476         u32 n = 0;
    477         while (n < total && aux[n] != '\0') ++n;
    478         fnm = (const char*)aux;
    479         fnlen = n;
    480       }
    481       Sym fsn =
    482           fnlen ? pool_intern_slice(c->global, (Slice){.s = fnm, .len = fnlen})
    483                 : 0;
    484       ObjSymId id = obj_symbol_ex(ob, fsn, SB_LOCAL, SV_DEFAULT, SK_FILE,
    485                                   OBJ_SEC_NONE, 0, 0, 0);
    486       obj_sym_mark_referenced(ob, id);
    487       sym_to_obj[i] = id;
    488       i += 1u + naux;
    489       continue;
    490     }
    491 
    492     /* Skip .bf/.ef debug pair primaries (FUNCTION storage class) and
    493      * the END_OF_FUNCTION marker: they carry no symbol kit models. */
    494     if (sclass == IMAGE_SYM_CLASS_FUNCTION ||
    495         sclass == IMAGE_SYM_CLASS_END_OF_FUNCTION) {
    496       sym_to_obj[i] = OBJ_SYM_NONE;
    497       i += 1u + naux;
    498       continue;
    499     }
    500 
    501     /* Resolve (bind, vis, kind, section_id, value, size, cmnalign). */
    502     SymBind bind = SB_LOCAL;
    503     SymVis vis = SV_DEFAULT;
    504     SymKind kind = SK_NOTYPE;
    505     ObjSecId target_sec = OBJ_SEC_NONE;
    506     u64 sym_value = 0;
    507     u64 sym_size = 0;
    508     u64 cmnalign = 0;
    509 
    510     if (sec_num == IMAGE_SYM_UNDEFINED) {
    511       /* Undef or common.  EXTERNAL with Value > 0 is a common. */
    512       if (sclass == IMAGE_SYM_CLASS_EXTERNAL && value > 0) {
    513         bind = SB_GLOBAL;
    514         kind = SK_COMMON;
    515         sym_size = value;
    516         cmnalign = 1; /* COFF doesn't carry per-common alignment */
    517       } else {
    518         bind = (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) ? SB_WEAK
    519                : (sclass == IMAGE_SYM_CLASS_EXTERNAL)    ? SB_GLOBAL
    520                                                          : SB_LOCAL;
    521         kind = SK_UNDEF;
    522       }
    523     } else if (sec_num == IMAGE_SYM_ABSOLUTE) {
    524       kind = SK_ABS;
    525       sym_value = value;
    526       bind = (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL : SB_LOCAL;
    527     } else if (sec_num == IMAGE_SYM_DEBUG) {
    528       /* Defined-in-debug — kit has no model for it.  Skip with an
    529        * OBJ_SYM_NONE entry; relocations against this slot will resolve
    530        * to OBJ_SYM_NONE, which obj_reloc_ex tolerates. */
    531       sym_to_obj[i] = OBJ_SYM_NONE;
    532       i += 1u + naux;
    533       continue;
    534     } else if (sec_num >= 1 && (u32)sec_num <= nsections) {
    535       target_sec = secs[sec_num - 1].obj_sec;
    536       sym_value = value;
    537       switch (sclass) {
    538         case IMAGE_SYM_CLASS_EXTERNAL:
    539           bind = SB_GLOBAL;
    540           break;
    541         case IMAGE_SYM_CLASS_WEAK_EXTERNAL:
    542           bind = SB_WEAK;
    543           break;
    544         case IMAGE_SYM_CLASS_STATIC:
    545         case IMAGE_SYM_CLASS_LABEL:
    546         default:
    547           bind = SB_LOCAL;
    548           break;
    549       }
    550 
    551       /* Detect SECTION symbols: STATIC, Value==0, name matches the
    552        * section's own name, and the section has at least one aux
    553        * record (the section-definition aux).  Mark as SK_SECTION so
    554        * emit_coff regenerates the synthetic entry. */
    555       int is_section_sym = 0;
    556       if (sclass == IMAGE_SYM_CLASS_STATIC && value == 0 && naux >= 1) {
    557         const CSecRec* cs = &secs[sec_num - 1];
    558         u32 raw_nlen = 0;
    559         while (raw_nlen < 8 && cs->raw_name[raw_nlen] != '\0') ++raw_nlen;
    560         if (raw_nlen == nlen && memcmp(cs->raw_name, nm, nlen) == 0) {
    561           is_section_sym = 1;
    562         } else if (cs->raw_name[0] == '/') {
    563           /* Long-named section: compare the resolved name. */
    564           const char* rn;
    565           u32 rnlen;
    566           resolve_section_name(cs->raw_name, strtab, strtab_size, &rn, &rnlen);
    567           if (rnlen == nlen && memcmp(rn, nm, nlen) == 0) is_section_sym = 1;
    568         }
    569       }
    570 
    571       if (is_section_sym) {
    572         kind = SK_SECTION;
    573         sec_sym_primary[sec_num] = i + 1u;
    574       } else if (sclass == IMAGE_SYM_CLASS_SECTION) {
    575         kind = SK_SECTION;
    576       } else if (sclass == IMAGE_SYM_CLASS_LABEL) {
    577         kind = SK_NOTYPE;
    578       } else if ((type >> 8) == IMAGE_SYM_DTYPE_FUNCTION) {
    579         kind = SK_FUNC;
    580       } else if (type == IMAGE_SYM_TYPE_NULL) {
    581         kind = (bind == SB_LOCAL) ? SK_NOTYPE : SK_OBJ;
    582       } else {
    583         kind = SK_OBJ;
    584       }
    585     } else {
    586       compiler_panic(c, SRCLOC_NONE,
    587                      "read_coff: symbol section number %d out of range",
    588                      (int)sec_num);
    589     }
    590 
    591     /* WEAK_EXTERNAL primary: aux record carries TagIndex + Characteristics. */
    592     if (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) bind = SB_WEAK;
    593 
    594     Sym sn =
    595         nlen ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}) : 0;
    596     ObjSymId id = obj_symbol_ex(ob, sn, bind, vis, kind, target_sec, sym_value,
    597                                 sym_size, cmnalign);
    598     obj_sym_mark_referenced(ob, id);
    599     sym_to_obj[i] = id;
    600 
    601     /* Genuine WEAK_EXTERNAL alias declaration (IMAGE_WEAK_EXTERN_SEARCH_ALIAS):
    602      * record the fall-back symbol (aux TagIndex) by name so the linker can
    603      * resolve this weak symbol to its target directly. mingw x86_64 spells
    604      * `_setjmp` this way, aliasing `__intrinsic_setjmp` — a redirection the
    605      * link-time single-underscore heuristic can't derive. Other weak-external
    606      * search policies (kit's own SB_WEAK emit uses SEARCH_LIBRARY with a
    607      * self/zero TagIndex, i.e. "weak, no fallback") are left to that heuristic
    608      * and the plain SB_WEAK-undef path. */
    609     if (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL && naux >= 1 && sn != 0) {
    610       const u8* aux = p + COFF_SYMBOL_SIZE;
    611       u32 tag_index = coff_rd_u32(aux + 0);
    612       u32 characteristics = coff_rd_u32(aux + 4);
    613       if (characteristics == IMAGE_WEAK_EXTERN_SEARCH_ALIAS &&
    614           tag_index < nsymbols && tag_index != i) {
    615         const u8* tp = sym_base + (u64)tag_index * COFF_SYMBOL_SIZE;
    616         const char* tnm;
    617         u32 tnlen;
    618         resolve_sym_name(tp, strtab, strtab_size, &tnm, &tnlen);
    619         if (tnlen != 0 && (tnlen != nlen || memcmp(tnm, nm, nlen) != 0)) {
    620           Sym target =
    621               pool_intern_slice(c->global, (Slice){.s = tnm, .len = tnlen});
    622           obj_set_weak_alias(ob, id, target);
    623         }
    624       }
    625     }
    626     i += 1u + naux;
    627   }
    628 
    629   /* ---- Step 4: stitch COMDAT groups from section-definition aux ----
    630    * Each COMDAT section has a STATIC primary symbol (the section
    631    * symbol) followed by one section-definition aux record.  Selection
    632    * != 0 marks the section as a COMDAT member; the signature symbol
    633    * is the section symbol itself (Number field's selection variant
    634    * controls dedup policy at link time). */
    635   for (u32 s = 1; s <= nsections; ++s) {
    636     u32 prim_plus1 = sec_sym_primary[s];
    637     if (!prim_plus1) continue;
    638     u32 prim = prim_plus1 - 1u;
    639     const CSecRec* cs = &secs[s - 1];
    640     if (!(cs->characteristics & IMAGE_SCN_LNK_COMDAT)) continue;
    641     const u8* p = sym_base + (u64)prim * COFF_SYMBOL_SIZE;
    642     u8 naux = p[17];
    643     if (!naux) continue;
    644     const u8* aux = p + COFF_SYMBOL_SIZE;
    645     /* Aux layout: Length(4), NumberOfRelocations(2), NumberOfLinenumbers(2),
    646      * CheckSum(4), Number(2), Selection(1), Unused(3). */
    647     u16 assoc_number = coff_rd_u16(aux + 12);
    648     u8 selection = aux[14];
    649     if (selection == 0) continue;
    650 
    651     ObjSymId sig = sym_to_obj[prim];
    652     const ObjSym* sigsym = obj_symbol_get(ob, sig);
    653     Sym gname = sigsym ? sigsym->name : 0;
    654     ObjGroupId gid = obj_group(ob, gname, sig, (u32)selection);
    655     obj_group_add_section(ob, gid, cs->obj_sec);
    656     obj_section_set_group(ob, cs->obj_sec, gid);
    657 
    658     /* ASSOCIATIVE: the COMDAT member is associated with another
    659      * section's group.  Add this section to that group's list too so
    660      * dead-strip keeps them paired. */
    661     if (selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && assoc_number >= 1 &&
    662         (u32)assoc_number <= nsections) {
    663       u32 other_prim_plus1 = sec_sym_primary[assoc_number];
    664       if (other_prim_plus1) {
    665         u32 other_prim = other_prim_plus1 - 1u;
    666         const u8* op = sym_base + (u64)other_prim * COFF_SYMBOL_SIZE;
    667         if (op[17]) {
    668           const u8* oaux = op + COFF_SYMBOL_SIZE;
    669           u8 osel = oaux[14];
    670           if (osel != 0) {
    671             ObjSymId osig = sym_to_obj[other_prim];
    672             const ObjSym* osigsym = obj_symbol_get(ob, osig);
    673             Sym ogname = osigsym ? osigsym->name : 0;
    674             ObjGroupId ogid = obj_group(ob, ogname, osig, (u32)osel);
    675             obj_group_add_section(ob, ogid, cs->obj_sec);
    676           }
    677         }
    678       }
    679     }
    680   }
    681 
    682   /* ---- Step 5: per-section relocations ---- */
    683   for (u32 i = 0; i < nsections; ++i) {
    684     const CSecRec* s = &secs[i];
    685     if (!s->number_of_relocations) continue;
    686     u64 reloc_end = (u64)s->pointer_to_relocations +
    687                     (u64)s->number_of_relocations * (u64)COFF_RELOC_SIZE;
    688     if (reloc_end > (u64)len)
    689       compiler_panic(c, SRCLOC_NONE,
    690                      "read_coff: relocation table for section %u out of range",
    691                      i);
    692     const u8* rbase = data + s->pointer_to_relocations;
    693     for (u32 j = 0; j < s->number_of_relocations; ++j) {
    694       const u8* rp = rbase + (u64)j * COFF_RELOC_SIZE;
    695       u32 r_va = coff_rd_u32(rp + 0);
    696       u32 r_sym = coff_rd_u32(rp + 4);
    697       u16 r_type = coff_rd_u16(rp + 8);
    698 
    699       u32 kind = reloc_from(r_type);
    700       if (kind == (u32)-1)
    701         compiler_panic(c, SRCLOC_NONE,
    702                        "read_coff: unsupported reloc type %u for machine %#x",
    703                        (u32)r_type, (u32)machine);
    704 
    705       ObjSymId target = OBJ_SYM_NONE;
    706       if (r_sym < nsymbols) target = sym_to_obj[r_sym];
    707 
    708       /* COFF stores addends inline in the relocated field.  Fold those
    709        * bytes into Reloc.addend for the reloc kinds whose apply path
    710        * overwrites the field.  AMD64 REL32 also subtracts from a PC after
    711        * the relocated field: plain REL32 is relative to P+4, and REL32_N is
    712        * relative to P+N.  Record that convention as an implicit negative
    713        * addend so link_reloc_apply can stay format neutral. */
    714       /* ARM64 PAGEOFFSET_12L is one wire code for LDST{8,16,32,64,128}.
    715        * The per-arch translator returns R_AARCH64_LDST64_ABS_LO12_NC by
    716        * default; recover the actual access width from the patched LDR/
    717        * STR instruction's size field at bits [31:30] (and a SIMD/FP
    718        * extension via bit 26 + opc[23]) so the linker applies the right
    719        * scale.  Mismatch panics at apply-time with "misaligned
    720        * address" otherwise — see link_reloc.c. */
    721       if ((machine == IMAGE_FILE_MACHINE_ARM64 ||
    722            machine == IMAGE_FILE_MACHINE_ARM64EC) &&
    723           r_type == IMAGE_REL_ARM64_PAGEOFFSET_12L && s->size_of_raw_data &&
    724           (u64)r_va + 4u <= (u64)s->size_of_raw_data) {
    725         const u8* ibytes = data + s->pointer_to_raw_data + r_va;
    726         u32 instr = (u32)ibytes[0] | ((u32)ibytes[1] << 8) |
    727                     ((u32)ibytes[2] << 16) | ((u32)ibytes[3] << 24);
    728         u32 sz = (instr >> 30) & 0x3u;
    729         int is_simd = (instr >> 26) & 0x1u;
    730         if (is_simd && ((instr >> 23) & 0x1u)) {
    731           kind = R_AARCH64_LDST128_ABS_LO12_NC;
    732         } else {
    733           switch (sz) {
    734             case 0:
    735               kind = R_AARCH64_LDST8_ABS_LO12_NC;
    736               break;
    737             case 1:
    738               kind = R_AARCH64_LDST16_ABS_LO12_NC;
    739               break;
    740             case 2:
    741               kind = R_AARCH64_LDST32_ABS_LO12_NC;
    742               break;
    743             default:
    744               kind = R_AARCH64_LDST64_ABS_LO12_NC;
    745               break;
    746           }
    747         }
    748       }
    749 
    750       i64 addend = 0;
    751       int has_explicit = 0;
    752       if (machine == IMAGE_FILE_MACHINE_AMD64) {
    753         i64 inline_addend = 0;
    754         switch (r_type) {
    755           case IMAGE_REL_AMD64_ADDR64:
    756             if (coff_reloc_inline_addend(data, len, s, r_va, 8,
    757                                          &inline_addend))
    758               addend = inline_addend;
    759             break;
    760           case IMAGE_REL_AMD64_ADDR32:
    761             if (coff_reloc_inline_addend(data, len, s, r_va, 4,
    762                                          &inline_addend))
    763               addend = inline_addend;
    764             break;
    765           case IMAGE_REL_AMD64_REL32:
    766             if (coff_reloc_inline_addend(data, len, s, r_va, 4,
    767                                          &inline_addend))
    768               addend = inline_addend;
    769             addend -= 4;
    770             break;
    771           case IMAGE_REL_AMD64_REL32_1:
    772             if (coff_reloc_inline_addend(data, len, s, r_va, 4,
    773                                          &inline_addend))
    774               addend = inline_addend;
    775             addend -= 1;
    776             break;
    777           case IMAGE_REL_AMD64_REL32_2:
    778             if (coff_reloc_inline_addend(data, len, s, r_va, 4,
    779                                          &inline_addend))
    780               addend = inline_addend;
    781             addend -= 2;
    782             break;
    783           case IMAGE_REL_AMD64_REL32_3:
    784             if (coff_reloc_inline_addend(data, len, s, r_va, 4,
    785                                          &inline_addend))
    786               addend = inline_addend;
    787             addend -= 3;
    788             break;
    789           case IMAGE_REL_AMD64_REL32_4:
    790             if (coff_reloc_inline_addend(data, len, s, r_va, 4,
    791                                          &inline_addend))
    792               addend = inline_addend;
    793             addend -= 4;
    794             break;
    795           case IMAGE_REL_AMD64_REL32_5:
    796             if (coff_reloc_inline_addend(data, len, s, r_va, 4,
    797                                          &inline_addend))
    798               addend = inline_addend;
    799             addend -= 5;
    800             break;
    801           default:
    802             break;
    803         }
    804       }
    805 
    806       obj_reloc_ex(ob, s->obj_sec, r_va, (RelocKind)kind, target, addend,
    807                    has_explicit, 0);
    808     }
    809   }
    810 
    811   /* ---- Step 6: finalize and return ---- */
    812   obj_finalize(ob);
    813   return ob;
    814 }