kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

read.c (49008B)


      1 /* ELF reader. Parses a 64-bit little-endian ELF object back into a fresh
      2  * ObjBuilder. ET_REL produces the section/symbol/reloc view; the
      3  * post-finalize shape is the canonical superset doc/DESIGN.md §5.5
      4  * promises: read_elf of an emit_elf output produces an ObjBuilder
      5  * equivalent to the writer's input, modulo (a) section ordering and
      6  * (b) STT_SECTION symbols synthesized by the writer.
      7  *
      8  * ET_EXEC / ET_DYN additionally attach the linked-image view via
      9  * read_elf_image (program-header segments, .dynamic dependencies,
     10  * .dynsym dynamic symbols, and allocatable dynamic relocations) — see
     11  * doc/OBJ.md. Their section tables still parse through the same
     12  * passes. The standalone read_elf_dso (below) remains the linker's
     13  * exports-only DSO-input path.
     14  *
     15  * Scope: AArch64 little-endian. Other archs / endianness produce a
     16  * compiler_panic with a diagnostic. */
     17 
     18 #include <string.h>
     19 
     20 #include "core/heap.h"
     21 #include "core/pool.h"
     22 #include "core/slice.h"
     23 #include "obj/elf/elf.h"
     24 #include "obj/format.h"
     25 
     26 /* ---- shdr scratch struct ---- */
     27 
     28 typedef struct ShdrRec {
     29   u32 sh_name;
     30   u32 sh_type;
     31   u64 sh_flags;
     32   u64 sh_addr;
     33   u64 sh_offset;
     34   u64 sh_size;
     35   u32 sh_link;
     36   u32 sh_info;
     37   u64 sh_addralign;
     38   u64 sh_entsize;
     39 } ShdrRec;
     40 
     41 static void parse_shdr(const u8* p, int is32, ShdrRec* out) {
     42   /* Elf32_Shdr (40B) shares field order with Elf64_Shdr (64B); only the
     43    * flags/addr/offset/size/addralign/entsize fields narrow to u32 and
     44    * shift the following offsets. The ShdrRec stays u64-wide. */
     45   if (is32) {
     46     out->sh_name = rd_u32_le(p + 0);
     47     out->sh_type = rd_u32_le(p + 4);
     48     out->sh_flags = rd_u32_le(p + 8);
     49     out->sh_addr = rd_u32_le(p + 12);
     50     out->sh_offset = rd_u32_le(p + 16);
     51     out->sh_size = rd_u32_le(p + 20);
     52     out->sh_link = rd_u32_le(p + 24);
     53     out->sh_info = rd_u32_le(p + 28);
     54     out->sh_addralign = rd_u32_le(p + 32);
     55     out->sh_entsize = rd_u32_le(p + 36);
     56   } else {
     57     out->sh_name = rd_u32_le(p + 0);
     58     out->sh_type = rd_u32_le(p + 4);
     59     out->sh_flags = rd_u64_le(p + 8);
     60     out->sh_addr = rd_u64_le(p + 16);
     61     out->sh_offset = rd_u64_le(p + 24);
     62     out->sh_size = rd_u64_le(p + 32);
     63     out->sh_link = rd_u32_le(p + 40);
     64     out->sh_info = rd_u32_le(p + 44);
     65     out->sh_addralign = rd_u64_le(p + 48);
     66     out->sh_entsize = rd_u64_le(p + 56);
     67   }
     68 }
     69 
     70 /* ---- mappers ---- */
     71 
     72 /* The bits this function maps to SecFlag — anything outside this mask is
     73  * treated as opaque and stashed in Section.ext_flags by the caller so the
     74  * emitter can write it back unchanged.  Examples of bits left over:
     75  * SHF_EXCLUDE (0x80000000) on .llvm_addrsig, SHF_COMPRESSED (0x800) on
     76  * compressed .debug_*, SHF_INFO_LINK (0x40) on .rela.* sections. */
     77 #define ELF_KNOWN_FLAGS_MASK                                           \
     78   ((u64)(SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE | SHF_TLS | SHF_MERGE | \
     79          SHF_STRINGS | SHF_GROUP | SHF_LINK_ORDER | SHF_GNU_RETAIN))
     80 
     81 static u16 elf_flags_to_obj(u64 f) {
     82   u16 r = 0;
     83   if (f & SHF_ALLOC) r |= SF_ALLOC;
     84   if (f & SHF_EXECINSTR) r |= SF_EXEC;
     85   if (f & SHF_WRITE) r |= SF_WRITE;
     86   if (f & SHF_TLS) r |= SF_TLS;
     87   if (f & SHF_MERGE) r |= SF_MERGE;
     88   if (f & SHF_STRINGS) r |= SF_STRINGS;
     89   if (f & SHF_GROUP) r |= SF_GROUP;
     90   if (f & SHF_LINK_ORDER) r |= SF_LINK_ORDER;
     91   if (f & SHF_GNU_RETAIN) r |= SF_RETAIN;
     92   return r;
     93 }
     94 
     95 /* Map ELF sh_type -> SecSem.  Sets *known to 1 if the value is one of
     96  * the canonical types the kit model knows about; 0 means the caller
     97  * fell through to the SSEM_PROGBITS fallback and should preserve the
     98  * raw sh_type via Section.ext_type so emit_elf can write it back. */
     99 static u16 elf_type_to_sem(u32 t, int* known) {
    100   *known = 1;
    101   switch (t) {
    102     case SHT_PROGBITS:
    103       return SSEM_PROGBITS;
    104     case SHT_NOBITS:
    105       return SSEM_NOBITS;
    106     case SHT_SYMTAB:
    107       return SSEM_SYMTAB;
    108     case SHT_STRTAB:
    109       return SSEM_STRTAB;
    110     case SHT_RELA:
    111       return SSEM_RELA;
    112     case SHT_REL:
    113       return SSEM_REL;
    114     case SHT_NOTE:
    115       return SSEM_NOTE;
    116     case SHT_INIT_ARRAY:
    117       return SSEM_INIT_ARRAY;
    118     case SHT_FINI_ARRAY:
    119       return SSEM_FINI_ARRAY;
    120     case SHT_PREINIT_ARRAY:
    121       return SSEM_PREINIT_ARRAY;
    122     case SHT_GROUP:
    123       return SSEM_GROUP;
    124     default:
    125       *known = 0;
    126       return SSEM_PROGBITS;
    127   }
    128 }
    129 
    130 static u16 elf_kind_from_name(const char* name, u32 nlen, u64 sh_flags,
    131                               u32 sh_type) {
    132   if (sh_type == SHT_NOBITS) return SEC_BSS;
    133   if (nlen >= 5 && memcmp(name, ".text", 5) == 0) return SEC_TEXT;
    134   if (nlen >= 7 && memcmp(name, ".rodata", 7) == 0) return SEC_RODATA;
    135   if (nlen >= 5 && memcmp(name, ".data", 5) == 0) return SEC_DATA;
    136   if (nlen >= 4 && memcmp(name, ".bss", 4) == 0) return SEC_BSS;
    137   if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG;
    138   /* Fallback: classify by flags. */
    139   if (sh_flags & SHF_EXECINSTR) return SEC_TEXT;
    140   if (sh_flags & SHF_WRITE) return SEC_DATA;
    141   if (sh_flags & SHF_ALLOC) return SEC_RODATA;
    142   return SEC_OTHER;
    143 }
    144 
    145 static u16 elf_bind_to_obj(u32 b) {
    146   switch (b) {
    147     case STB_GLOBAL:
    148     case STB_GNU_UNIQUE:
    149       /* GNU-unique is a global with extra runtime uniqueness semantics; for
    150        * link-time resolution it is an ordinary global definition. FreeBSD's
    151        * crt1.o brands the binary with a GNU-unique `.freebsd.note*` symbol. */
    152       return SB_GLOBAL;
    153     case STB_WEAK:
    154       return SB_WEAK;
    155     default:
    156       return SB_LOCAL;
    157   }
    158 }
    159 
    160 static u16 elf_type_to_kind(u32 t, u16 shndx) {
    161   if (shndx == SHN_UNDEF) return SK_UNDEF;
    162   if (shndx == SHN_COMMON) return SK_COMMON;
    163   /* SHN_ABS is the convention for STT_FILE and a few other defined
    164    * symbols whose value is not an address. Don't smother the type
    165    * with SK_ABS when the type field carries real information — only
    166    * fall through to SK_ABS for STT_NOTYPE-at-SHN_ABS. */
    167   if (shndx == SHN_ABS && t == STT_NOTYPE) return SK_ABS;
    168   switch (t) {
    169     case STT_FUNC:
    170       return SK_FUNC;
    171     case STT_OBJECT:
    172       return SK_OBJ;
    173     case STT_SECTION:
    174       return SK_SECTION;
    175     case STT_FILE:
    176       return SK_FILE;
    177     case STT_TLS:
    178       return SK_TLS;
    179     case STT_COMMON:
    180       return SK_COMMON;
    181     case STT_GNU_IFUNC:
    182       return SK_IFUNC;
    183     default:
    184       /* STT_NOTYPE on a defined symbol (e.g. AArch64 mapping symbols
    185        * `$x` / `$d`, or assembly labels) round-trips as SK_NOTYPE.
    186        * The linker keeps definedness keyed on SK_UNDEF; SK_NOTYPE is
    187        * "defined but typeless". */
    188       return SK_NOTYPE;
    189   }
    190 }
    191 
    192 static u8 elf_other_to_vis(u32 other) {
    193   switch (other & 3) {
    194     case STV_HIDDEN:
    195       return SV_HIDDEN;
    196     case STV_PROTECTED:
    197       return SV_PROTECTED;
    198     case STV_INTERNAL:
    199       return SV_INTERNAL;
    200     default:
    201       return SV_DEFAULT;
    202   }
    203 }
    204 
    205 /* Bounds-checked C-string slice from a strtab section. Returns "" on
    206  * out-of-range so callers don't have to special-case it. `len_out` is
    207  * set to the result's byte length. */
    208 static const char* strtab_lookup(const u8* tab, u64 tab_size, u32 off,
    209                                  u32* len_out) {
    210   if (off >= tab_size) {
    211     *len_out = 0;
    212     return "";
    213   }
    214   const char* s = (const char*)(tab + off);
    215   u32 max = (u32)(tab_size - off);
    216   u32 n = 0;
    217   while (n < max && s[n] != '\0') ++n;
    218   *len_out = n;
    219   return s;
    220 }
    221 
    222 static const char* pt_type_name(u32 t) {
    223   switch (t) {
    224     case PT_NULL:
    225       return "NULL";
    226     case PT_LOAD:
    227       return "LOAD";
    228     case PT_DYNAMIC:
    229       return "DYNAMIC";
    230     case PT_INTERP:
    231       return "INTERP";
    232     case PT_NOTE:
    233       return "NOTE";
    234     case PT_PHDR:
    235       return "PHDR";
    236     case PT_TLS:
    237       return "TLS";
    238     case PT_GNU_EH_FRAME:
    239       return "GNU_EH_FRAME";
    240     case PT_GNU_STACK:
    241       return "GNU_STACK";
    242     case PT_GNU_RELRO:
    243       return "GNU_RELRO";
    244     default:
    245       return "UNKNOWN";
    246   }
    247 }
    248 
    249 static Sym intern_cstr(Compiler* c, const char* s) {
    250   return pool_intern_slice(c->global, (Slice){.s = s, .len = (u32)strlen(s)});
    251 }
    252 
    253 /* ELF default-version normalization. A symbol "base@@VERSION" is the *default*
    254  * version of `base`: an unversioned reference binds to it. GNU as emits the
    255  * literal "@@" into a relocatable object's .symtab string (e.g. FreeBSD
    256  * libc.a's openat@@FBSD_1.2 / setcontext / swapcontext). Trim to the base so
    257  * kit's name-based resolution matches plain references. A single-'@'
    258  * (non-default) version is left intact -- those are inert compatibility
    259  * aliases (e.g. fstat@FBSD_1.0) that must NOT shadow the modern base symbol.
    260  * Shared-library exports keep their version in .gnu.version_d rather than the
    261  * name string, so this only fires for relocatable .symtab reads. Returns the
    262  * length of the base name (== nlen when there is no "@@"). */
    263 static u32 elf_default_version_namelen(const char* nm, u32 nlen) {
    264   u32 i;
    265   if (!nm) return nlen;
    266   for (i = 1; i + 1 < nlen; ++i)
    267     if (nm[i] == '@' && nm[i + 1] == '@') return i;
    268   return nlen;
    269 }
    270 
    271 /* Parse a DSO's .gnu.version_d (SHT_GNU_VERDEF) into an index->version-name
    272  * table so .dynsym entries (whose version lives in the parallel .gnu.version)
    273  * can be labelled. Returns an arena table indexed by version index (0/1 unused,
    274  * matching VER_NDX_LOCAL/GLOBAL) and sets *out_max to the highest index seen;
    275  * NULL when the input has no verdef. The Verdef/Verdaux wire layout is identical
    276  * on ELFCLASS32/64 (all Half/Word fields), so this is width-agnostic. */
    277 static Sym* read_elf_verdefs(Compiler* c, const u8* data, size_t len,
    278                              const ShdrRec* shdrs, u16 e_shnum, u32* out_max) {
    279   u32 i, verdef_idx = 0, max_ndx = 0;
    280   const ShdrRec* sh;
    281   const ShdrRec* str_sh;
    282   const u8* strtab;
    283   const u8* base;
    284   u64 strtab_sz, size, off;
    285   Sym* tbl;
    286   *out_max = 0;
    287   for (i = 1; i < e_shnum; ++i)
    288     if (shdrs[i].sh_type == SHT_GNU_VERDEF) {
    289       verdef_idx = i;
    290       break;
    291     }
    292   if (!verdef_idx) return NULL;
    293   sh = &shdrs[verdef_idx];
    294   if (sh->sh_link >= e_shnum) return NULL;
    295   str_sh = &shdrs[sh->sh_link];
    296   if (sh->sh_offset + sh->sh_size > len ||
    297       str_sh->sh_offset + str_sh->sh_size > len)
    298     return NULL;
    299   strtab = data + str_sh->sh_offset;
    300   strtab_sz = str_sh->sh_size;
    301   base = data + sh->sh_offset;
    302   size = sh->sh_size;
    303 
    304   /* Pass 1: highest version index, to size the table. */
    305   off = 0;
    306   while (off + ELF_VERDEF_SIZE <= size) {
    307     u32 ndx = (u32)(rd_u16_le(base + off + 4) & VERSYM_VERSION);
    308     u32 vd_next = rd_u32_le(base + off + 16);
    309     if (ndx > max_ndx) max_ndx = ndx;
    310     if (!vd_next) break;
    311     off += vd_next;
    312   }
    313   tbl = arena_zarray(c->scratch, Sym, (size_t)max_ndx + 1u);
    314 
    315   /* Pass 2: record each non-base version's name (its first Verdaux). */
    316   off = 0;
    317   while (off + ELF_VERDEF_SIZE <= size) {
    318     u16 vd_flags = rd_u16_le(base + off + 2);
    319     u32 ndx = (u32)(rd_u16_le(base + off + 4) & VERSYM_VERSION);
    320     u32 vd_aux = rd_u32_le(base + off + 12);
    321     u32 vd_next = rd_u32_le(base + off + 16);
    322     if (!(vd_flags & VER_FLG_BASE) && ndx <= max_ndx &&
    323         off + vd_aux + ELF_VERDAUX_SIZE <= size) {
    324       u32 nlen;
    325       const char* nm =
    326           strtab_lookup(strtab, strtab_sz, rd_u32_le(base + off + vd_aux), &nlen);
    327       if (nlen)
    328         tbl[ndx] = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
    329     }
    330     if (!vd_next) break;
    331     off += vd_next;
    332   }
    333   *out_max = max_ndx;
    334   return tbl;
    335 }
    336 
    337 /* Populate the builder's ObjImage from an ET_EXEC / ET_DYN input: the
    338  * program-header segment table (+ interp + image base), the .dynamic
    339  * dependency view (DT_NEEDED / DT_SONAME / DT_RPATH / DT_RUNPATH), the
    340  * .dynsym dynamic symbols, and the allocatable .rela.* / .rel.* dynamic
    341  * relocations. The section / symbol tables are parsed by read_elf's normal
    342  * passes; this adds the orthogonal image dimension. Lenient where a
    343  * malformed sub-table would otherwise abort a useful inspection: a bad
    344  * .dynamic / .dynsym / dyn-reloc table is skipped rather than panicked. */
    345 static void read_elf_image(Compiler* c, ObjBuilder* ob, const u8* data,
    346                            size_t len, u16 e_type, int is32,
    347                            const ShdrRec* shdrs, u16 e_shnum,
    348                            const u32* elf_to_obj, u32 (*reloc_from)(u32)) {
    349   u32 phdr_size = is32 ? ELF32_PHDR_SIZE : ELF64_PHDR_SIZE;
    350   u32 sym_size = is32 ? ELF32_SYM_SIZE : ELF64_SYM_SIZE;
    351   u32 rela_size = is32 ? ELF32_RELA_SIZE : ELF64_RELA_SIZE;
    352   u32 rel_size = is32 ? 8u : 16u;
    353   u32 dyn_size = is32 ? ELF32_DYN_SIZE : ELF64_DYN_SIZE;
    354   ObjImage* im =
    355       obj_image_ensure(ob, e_type == ET_DYN ? OBJ_KIND_DYN : OBJ_KIND_EXEC);
    356   if (!im) compiler_panic(c, SRCLOC_NONE, "read_elf: obj_image_ensure failed");
    357 
    358   /* e_entry is at offset 24 in both Ehdr32/Ehdr64, native width. */
    359   obj_image_set_entry(im, elf_rd_addr(data + 24, is32));
    360 
    361   /* Program headers -> segments (+ PT_INTERP string, image base). */
    362   {
    363     /* e_phoff: 4B@28 on ELF32, 8B@32 on ELF64. e_phentsize/e_phnum
    364      * shift accordingly (42/44 vs 54/56). */
    365     u64 e_phoff = is32 ? (u64)rd_u32_le(data + 28) : rd_u64_le(data + 32);
    366     u16 e_phentsize = rd_u16_le(data + (is32 ? 42 : 54));
    367     u16 e_phnum = rd_u16_le(data + (is32 ? 44 : 56));
    368     int have_base = 0;
    369     u64 image_base = 0;
    370     if (e_phnum) {
    371       if (e_phentsize != phdr_size)
    372         compiler_panic(c, SRCLOC_NONE, "read_elf: unexpected e_phentsize %u",
    373                        (u32)e_phentsize);
    374       if (e_phoff + (u64)e_phnum * phdr_size > len)
    375         compiler_panic(c, SRCLOC_NONE,
    376                        "read_elf: program header table out of range");
    377       for (u16 i = 0; i < e_phnum; ++i) {
    378         const u8* p = data + e_phoff + (u64)i * phdr_size;
    379         /* Elf32_Phdr REORDERS p_flags AFTER the sizes:
    380          *   p_type@0,p_offset@4,p_vaddr@8,p_paddr@12,p_filesz@16,
    381          *   p_memsz@20,p_flags@24,p_align@28 (all u32).
    382          * Elf64_Phdr: p_type@0,p_flags@4,p_offset@8,p_vaddr@16,
    383          *   p_filesz@32,p_memsz@40,p_align@48. */
    384         u32 p_type = rd_u32_le(p + 0);
    385         u32 p_flags = is32 ? rd_u32_le(p + 24) : rd_u32_le(p + 4);
    386         u64 p_offset = is32 ? (u64)rd_u32_le(p + 4) : rd_u64_le(p + 8);
    387         u64 p_vaddr = is32 ? (u64)rd_u32_le(p + 8) : rd_u64_le(p + 16);
    388         u64 p_filesz = is32 ? (u64)rd_u32_le(p + 16) : rd_u64_le(p + 32);
    389         u64 p_memsz = is32 ? (u64)rd_u32_le(p + 20) : rd_u64_le(p + 40);
    390         u64 p_align = is32 ? (u64)rd_u32_le(p + 28) : rd_u64_le(p + 48);
    391         ObjSegment seg;
    392         seg.name = intern_cstr(c, pt_type_name(p_type));
    393         seg.vaddr = p_vaddr;
    394         seg.vsize = p_memsz;
    395         seg.file_off = p_offset;
    396         seg.file_size = p_filesz;
    397         /* PF_R/W/X share bit values with OBJ_SEG_R/W/X. */
    398         seg.perms = p_flags & (PF_R | PF_W | PF_X);
    399         seg.align = (u32)(p_align ? p_align : 1);
    400         obj_image_add_segment(im, &seg);
    401 
    402         if (p_type == PT_LOAD && (!have_base || p_vaddr < image_base)) {
    403           image_base = p_vaddr;
    404           have_base = 1;
    405         }
    406         if (p_type == PT_INTERP && p_filesz && p_offset + p_filesz <= len) {
    407           u32 ilen = (u32)p_filesz;
    408           while (ilen && data[p_offset + ilen - 1] == '\0') --ilen;
    409           if (ilen)
    410             obj_image_set_interp(
    411                 im, pool_intern_slice(
    412                         c->global, (Slice){.s = (const char*)(data + p_offset),
    413                                            .len = ilen}));
    414         }
    415       }
    416     }
    417     if (have_base) obj_image_set_base(im, image_base);
    418   }
    419 
    420   /* Locate .dynamic and .dynsym. */
    421   u32 dynamic_idx = 0, dynsym_idx = 0;
    422   for (u16 i = 1; i < e_shnum; ++i) {
    423     if (shdrs[i].sh_type == SHT_DYNAMIC && !dynamic_idx) dynamic_idx = i;
    424     if (shdrs[i].sh_type == SHT_DYNSYM && !dynsym_idx) dynsym_idx = i;
    425   }
    426 
    427   /* .dynamic -> dependency view. */
    428   if (dynamic_idx) {
    429     const ShdrRec* dsh = &shdrs[dynamic_idx];
    430     if (dsh->sh_link < e_shnum) {
    431       const ShdrRec* str_sh = &shdrs[dsh->sh_link];
    432       if (str_sh->sh_offset + str_sh->sh_size <= len &&
    433           dsh->sh_offset + dsh->sh_size <= len) {
    434         const u8* dynstr = data + str_sh->sh_offset;
    435         u64 dynstr_sz = str_sh->sh_size;
    436         const u8* dynp = data + dsh->sh_offset;
    437         u64 dynsz = dsh->sh_size;
    438         /* ELF32 DT entries are 8B (d_tag:u32, d_un:u32); ELF64 16B. */
    439         for (u64 off = 0; off + dyn_size <= dynsz; off += dyn_size) {
    440           u64 tag = elf_rd_addr(dynp + off, is32);
    441           u64 val = elf_rd_addr(dynp + off + (is32 ? 4 : 8), is32);
    442           /* Raw .dynamic view (escape hatch): one entry per DT_* tag, the
    443            * terminating DT_NULL included, before the NEEDED/SONAME/RPATH
    444            * filtering below. */
    445           {
    446             ObjImageRaw r;
    447             r.tag = (u32)tag;
    448             r.value = val;
    449             r.extra = 0;
    450             obj_image_add_raw(im, &r);
    451           }
    452           if (tag == DT_NULL) break;
    453           if (tag != DT_NEEDED && tag != DT_SONAME && tag != DT_RPATH &&
    454               tag != DT_RUNPATH)
    455             continue;
    456           {
    457             u32 nlen;
    458             const char* nm = strtab_lookup(dynstr, dynstr_sz, (u32)val, &nlen);
    459             Sym s = nlen ? pool_intern_slice(c->global,
    460                                              (Slice){.s = nm, .len = nlen})
    461                          : 0;
    462             if (!s) continue;
    463             if (tag == DT_NEEDED) {
    464               ObjImageDep d;
    465               d.name = s;
    466               d.imports = NULL;
    467               d.nimports = 0;
    468               obj_image_add_dep(im, &d);
    469             } else if (tag == DT_SONAME) {
    470               obj_image_set_soname(im, s);
    471             } else {
    472               obj_image_add_rpath(im, s);
    473             }
    474           }
    475         }
    476       }
    477     }
    478   }
    479 
    480   /* .dynsym -> dynamic symbols, plus an index->name table for dyn relocs. */
    481   Sym* dynsym_names = NULL;
    482   u32 ndynsym = 0;
    483   if (dynsym_idx) {
    484     const ShdrRec* sh = &shdrs[dynsym_idx];
    485     if (sh->sh_entsize == sym_size && (sh->sh_size % sym_size) == 0 &&
    486         sh->sh_link < e_shnum && sh->sh_offset + sh->sh_size <= len) {
    487       const ShdrRec* str_sh = &shdrs[sh->sh_link];
    488       if (str_sh->sh_offset + str_sh->sh_size <= len) {
    489         const u8* strtab = data + str_sh->sh_offset;
    490         u64 strtab_sz = str_sh->sh_size;
    491         const u8* base = data + sh->sh_offset;
    492         ndynsym = (u32)(sh->sh_size / sym_size);
    493         dynsym_names = arena_zarray(c->scratch, Sym, ndynsym ? ndynsym : 1);
    494         /* Parallel symbol-version tables: .gnu.version_d names indexed by
    495          * version index, and .gnu.version (one u16 per dynsym entry). A
    496          * defined entry whose versym lacks VERSYM_HIDDEN is the *default*
    497          * version of its name — the version a plain reference should bind. */
    498         u32 verdef_max = 0;
    499         Sym* verdef_tbl = read_elf_verdefs(c, data, len, shdrs, e_shnum,
    500                                            &verdef_max);
    501         const u8* versym = NULL;
    502         u32 nversym = 0;
    503         for (u16 vi = 1; vi < e_shnum; ++vi) {
    504           if (shdrs[vi].sh_type != SHT_GNU_VERSYM) continue;
    505           if (shdrs[vi].sh_offset + shdrs[vi].sh_size <= len &&
    506               shdrs[vi].sh_entsize == 2)
    507             versym = data + shdrs[vi].sh_offset,
    508             nversym = (u32)(shdrs[vi].sh_size / 2u);
    509           break;
    510         }
    511         for (u32 i = 1; i < ndynsym; ++i) {
    512           const u8* p = base + (u64)i * sym_size;
    513           /* Elf32_Sym REORDERS: st_name@0, st_value@4, st_size@8,
    514            * st_info@12, st_other@13, st_shndx@14. Elf64_Sym:
    515            * st_name@0, st_info@4, st_other@5, st_shndx@6,
    516            * st_value@8, st_size@16. */
    517           u32 st_name = rd_u32_le(p + 0);
    518           u8 st_info = is32 ? p[12] : p[4];
    519           u16 st_shndx = is32 ? rd_u16_le(p + 14) : rd_u16_le(p + 6);
    520           u64 st_value = is32 ? (u64)rd_u32_le(p + 4) : rd_u64_le(p + 8);
    521           u64 st_size = is32 ? (u64)rd_u32_le(p + 8) : rd_u64_le(p + 16);
    522           u32 nlen;
    523           const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen);
    524           Sym sn =
    525               nlen ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen})
    526                    : 0;
    527           ObjImageSym ds;
    528           dynsym_names[i] = sn;
    529           ds.name = sn;
    530           ds.bind = (SymBind)elf_bind_to_obj(ELF64_ST_BIND(st_info));
    531           ds.kind = (SymKind)elf_type_to_kind(ELF64_ST_TYPE(st_info), st_shndx);
    532           ds.section = (st_shndx == SHN_UNDEF || st_shndx == SHN_ABS ||
    533                         st_shndx == SHN_COMMON || st_shndx >= e_shnum)
    534                            ? OBJ_SEC_NONE
    535                            : elf_to_obj[st_shndx];
    536           ds.value = st_value;
    537           ds.size = st_size;
    538           ds.version = 0;
    539           if (versym && verdef_tbl && i < nversym && st_shndx != SHN_UNDEF) {
    540             u16 v = rd_u16_le(versym + (u64)i * 2u);
    541             u32 ndx = (u32)(v & VERSYM_VERSION);
    542             if (!(v & VERSYM_HIDDEN) && ndx >= 2u && ndx <= verdef_max)
    543               ds.version = verdef_tbl[ndx];
    544           }
    545           obj_image_add_dynsym(im, &ds);
    546         }
    547       }
    548     }
    549   }
    550 
    551   /* Allocatable .rela.* / .rel.* -> dynamic relocations. */
    552   for (u16 i = 1; i < e_shnum; ++i) {
    553     const ShdrRec* sh = &shdrs[i];
    554     int is_rela = (sh->sh_type == SHT_RELA);
    555     int is_rel = (sh->sh_type == SHT_REL);
    556     u32 entsize, nrec, j;
    557     const u8* base;
    558     if (!is_rela && !is_rel) continue;
    559     if (!(sh->sh_flags & SHF_ALLOC))
    560       continue; /* link-time relocs: not dynamic */
    561     entsize = is_rela ? rela_size : rel_size;
    562     if (sh->sh_entsize != entsize || (sh->sh_size % entsize) != 0) continue;
    563     if (sh->sh_offset + sh->sh_size > len) continue;
    564     nrec = (u32)(sh->sh_size / entsize);
    565     base = data + sh->sh_offset;
    566     for (j = 0; j < nrec; ++j) {
    567       /* Elf32_Rela (12B): r_offset@0, r_info@4 (ELF32 packing),
    568        * r_addend@8. Elf64_Rela (24B): r_offset@0, r_info@8, r_addend@16. */
    569       const u8* p = base + (u64)j * entsize;
    570       u64 r_offset = elf_rd_addr(p + 0, is32);
    571       u64 r_info = is32 ? (u64)rd_u32_le(p + 4) : rd_u64_le(p + 8);
    572       i64 r_addend =
    573           is_rela ? (is32 ? (i64)(i32)rd_u32_le(p + 8) : (i64)rd_u64_le(p + 16))
    574                   : 0;
    575       u32 esym = is32 ? ELF32_R_SYM(r_info) : ELF64_R_SYM(r_info);
    576       u32 kind = reloc_from(is32 ? ELF32_R_TYPE(r_info) : ELF64_R_TYPE(r_info));
    577       ObjImageReloc dr;
    578       if (kind == (u32)-1) continue; /* unmodeled dyn reloc type: skip */
    579       dr.section = OBJ_SEC_NONE; /* offset is a vaddr, not section-relative */
    580       dr.offset = r_offset;
    581       dr.sym_name = (dynsym_names && esym < ndynsym) ? dynsym_names[esym] : 0;
    582       dr.addend = r_addend;
    583       dr.kind = (RelocKind)kind;
    584       obj_image_add_dynreloc(im, &dr);
    585     }
    586   }
    587 }
    588 
    589 ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data,
    590                      size_t len) {
    591   (void)name;
    592 
    593   /* Need at least the e_ident to read EI_CLASS; the full min-length
    594    * check below uses the class-selected ehdr size. */
    595   if (len < EI_NIDENT)
    596     compiler_panic(c, SRCLOC_NONE, "read_elf: input shorter than ELF header");
    597 
    598   if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 ||
    599       data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3)
    600     compiler_panic(c, SRCLOC_NONE, "read_elf: bad ELF magic");
    601 
    602   /* Accept both classes; is32 (EI_CLASS==ELFCLASS32) drives every
    603    * stride/offset/field-order decision below. RV32 and RV64 share
    604    * EM_RISCV — the reader cannot tell them apart by e_machine, only by
    605    * EI_CLASS, so is32 is the single source of truth here. */
    606   if (data[EI_CLASS] != ELFCLASS64 && data[EI_CLASS] != ELFCLASS32)
    607     compiler_panic(c, SRCLOC_NONE, "read_elf: not ELFCLASS32/64 (got %u)",
    608                    data[EI_CLASS]);
    609   if (data[EI_DATA] != ELFDATA2LSB)
    610     compiler_panic(c, SRCLOC_NONE, "read_elf: not ELFDATA2LSB (got %u)",
    611                    data[EI_DATA]);
    612 
    613   int is32 = (data[EI_CLASS] == ELFCLASS32);
    614   u32 ehdr_size = is32 ? ELF32_EHDR_SIZE : ELF64_EHDR_SIZE;
    615   u32 shdr_size = is32 ? ELF32_SHDR_SIZE : ELF64_SHDR_SIZE;
    616   u32 sym_size = is32 ? ELF32_SYM_SIZE : ELF64_SYM_SIZE;
    617   u32 rela_size = is32 ? ELF32_RELA_SIZE : ELF64_RELA_SIZE;
    618   u32 rel_size = is32 ? 8u : 16u;
    619   if (len < ehdr_size)
    620     compiler_panic(c, SRCLOC_NONE, "read_elf: input shorter than ELF header");
    621 
    622   u16 e_type = rd_u16_le(data + 16);
    623   /* ET_REL parses to the section/symbol/reloc view only. ET_EXEC/ET_DYN
    624    * additionally get the linked-image view (read_elf_image, below); their
    625    * section tables still parse through the same passes. ET_CORE and other
    626    * types are out of scope (see doc/plan/IMAGE_INSPECT.md). */
    627   if (e_type != ET_REL && e_type != ET_EXEC && e_type != ET_DYN)
    628     compiler_panic(c, SRCLOC_NONE,
    629                    "read_elf: unsupported e_type=%u (expected ET_REL, "
    630                    "ET_EXEC, or ET_DYN)",
    631                    (u32)e_type);
    632 
    633   u16 e_machine = rd_u16_le(data + 18);
    634   /* EM_RISCV is shared by RV32/RV64; disambiguate by EI_CLASS via
    635    * obj_elf_machine_class (obj_elf_machine keys on e_machine alone). */
    636   const ObjElfArchOps* arch = obj_elf_machine_class(e_machine, data[EI_CLASS]);
    637   u32 (*reloc_from)(u32);
    638   if (!arch || !arch->reloc_from) {
    639     compiler_panic(c, SRCLOC_NONE, "read_elf: unsupported e_machine 0x%x",
    640                    (u32)e_machine);
    641   }
    642   reloc_from = arch->reloc_from;
    643 
    644   /* Post-e_version Ehdr fields narrow + shift under ELF32: e_entry/
    645    * e_phoff/e_shoff are 4B (vs 8B), so e_flags@36, e_phentsize@42,
    646    * e_phnum@44, e_shentsize@46, e_shnum@48, e_shstrndx@50 (vs 48/54/
    647    * 56/58/60/62 on ELF64). */
    648   u64 e_shoff = is32 ? (u64)rd_u32_le(data + 32) : rd_u64_le(data + 40);
    649   u32 e_flags = rd_u32_le(data + (is32 ? 36 : 48));
    650   u16 e_shentsize = rd_u16_le(data + (is32 ? 46 : 58));
    651   u16 e_shnum = rd_u16_le(data + (is32 ? 48 : 60));
    652   u16 e_shstrndx = rd_u16_le(data + (is32 ? 50 : 62));
    653 
    654   /* A fully section-stripped image (objcopy --strip-sections, packers,
    655    * some release binaries) sets e_shoff/e_shnum to zero: the section
    656    * header table is gone, but the load segments still describe the file.
    657    * That's valid for ET_EXEC/ET_DYN — parse the image view (segments +
    658    * dynamic) and present an empty section view, matching GNU/LLVM. An
    659    * ET_REL with no sections carries no model state, so still reject it. */
    660   int has_sht = (e_shoff != 0 && e_shnum != 0);
    661   if (has_sht) {
    662     if (e_shentsize != shdr_size)
    663       compiler_panic(c, SRCLOC_NONE, "read_elf: unexpected e_shentsize %u",
    664                      (u32)e_shentsize);
    665     if (e_shoff + (u64)e_shnum * shdr_size > len)
    666       compiler_panic(c, SRCLOC_NONE,
    667                      "read_elf: section header table out of range");
    668     if (e_shstrndx >= e_shnum)
    669       compiler_panic(c, SRCLOC_NONE, "read_elf: e_shstrndx %u >= e_shnum %u",
    670                      (u32)e_shstrndx, (u32)e_shnum);
    671   } else {
    672     if (e_type == ET_REL)
    673       compiler_panic(c, SRCLOC_NONE,
    674                      "read_elf: ET_REL with no section header table");
    675     e_shnum = 0; /* normalize so the section/symbol/reloc passes are no-ops */
    676   }
    677 
    678   /* Parse all shdrs into scratch. NULL when the table is absent. */
    679   ShdrRec* shdrs = NULL;
    680   const u8* shstrtab = NULL;
    681   u64 shstrtab_sz = 0;
    682   if (has_sht) {
    683     shdrs = arena_array(c->scratch, ShdrRec, e_shnum);
    684     for (u32 i = 0; i < e_shnum; ++i)
    685       parse_shdr(data + e_shoff + (u64)i * shdr_size, is32, &shdrs[i]);
    686 
    687     const ShdrRec* shstr_sh = &shdrs[e_shstrndx];
    688     if (shstr_sh->sh_offset + shstr_sh->sh_size > len)
    689       compiler_panic(c, SRCLOC_NONE, "read_elf: .shstrtab out of range");
    690     shstrtab = data + shstr_sh->sh_offset;
    691     shstrtab_sz = shstr_sh->sh_size;
    692   }
    693 
    694   /* Build the ObjBuilder. */
    695   ObjBuilder* ob = obj_new(c);
    696   if (!ob) compiler_panic(c, SRCLOC_NONE, "read_elf: obj_new failed");
    697   obj_set_elf_e_flags(ob, e_flags);
    698 
    699   /* elf_to_obj[shndx] -> ObjSecId, OBJ_SEC_NONE for skipped sections. */
    700   u32* elf_to_obj = arena_zarray(c->scratch, u32, e_shnum ? e_shnum : 1);
    701 
    702   /* Pass 1: create obj sections for every non-NULL shdr that carries
    703    * load-bearing model state. SYMTAB / STRTAB / RELA / REL are
    704    * consumed below for symbols and relocations and do NOT round-trip
    705    * as obj sections — emit_elf re-synthesizes them from the
    706    * ObjBuilder's symbols / strtab / relocs. The shstrtab is a STRTAB
    707    * too, so it falls out the same way. */
    708   for (u32 i = 1; i < e_shnum; ++i) {
    709     const ShdrRec* sh = &shdrs[i];
    710     if (sh->sh_type == SHT_NULL) continue;
    711     if (sh->sh_type == SHT_SYMTAB) continue;
    712     if (sh->sh_type == SHT_STRTAB) continue;
    713     if (sh->sh_type == SHT_RELA) continue;
    714     if (sh->sh_type == SHT_REL) continue;
    715     /* SHT_GROUP is consumed below into an ObjGroup record (signature
    716      * symbol + member ObjSecIds). emit_elf re-synthesizes the group
    717      * section bytes from the ObjGroup, using current section indices
    718      * — so the original section's raw body would be stale anyway. */
    719     if (sh->sh_type == SHT_GROUP) continue;
    720 
    721     u32 nlen;
    722     const char* nm = strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nlen);
    723     Sym sym = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
    724 
    725     u16 sec_kind = elf_kind_from_name(nm, nlen, sh->sh_flags, sh->sh_type);
    726     int type_known;
    727     u16 sec_sem = elf_type_to_sem(sh->sh_type, &type_known);
    728     u16 flags = elf_flags_to_obj(sh->sh_flags);
    729     u32 align = sh->sh_addralign ? (u32)sh->sh_addralign : 1;
    730 
    731     ObjSecId id =
    732         obj_section_ex(ob, sym, (SecKind)sec_kind, (SecSem)sec_sem, flags,
    733                        align, (u32)sh->sh_entsize, sh->sh_link, sh->sh_info);
    734     if (id == OBJ_SEC_NONE)
    735       compiler_panic(c, SRCLOC_NONE,
    736                      "read_elf: obj_section_ex failed for '%.*s'",
    737                      SLICE_ARG(((Slice){.s = nm, .len = nlen})));
    738     elf_to_obj[i] = id;
    739 
    740     /* Load address: 0 for ET_REL, the assigned vaddr for linked images.
    741      * Lets the section view carry the load picture for execs/DSOs. */
    742     if (sh->sh_addr) obj_section_set_addr(ob, id, sh->sh_addr);
    743 
    744     /* Preserve format-specific bits the canonical SecSem/SecFlag
    745      * mapping can't represent so emit_elf can write them back
    746      * verbatim.  ext_type only set when the sh_type fell through
    747      * to the "unknown" path. */
    748     u32 leftover = (u32)(sh->sh_flags & ~ELF_KNOWN_FLAGS_MASK);
    749     if (!type_known || leftover) {
    750       obj_section_set_ext(ob, id, OBJ_EXT_ELF, type_known ? 0 : sh->sh_type,
    751                           leftover);
    752     }
    753 
    754     /* Body bytes. */
    755     if (sh->sh_type == SHT_NOBITS) {
    756       obj_reserve_bss(ob, id, (u32)sh->sh_size, align);
    757     } else if (sh->sh_size) {
    758       if (sh->sh_offset + sh->sh_size > len)
    759         compiler_panic(c, SRCLOC_NONE,
    760                        "read_elf: section '%.*s' bytes out of range",
    761                        SLICE_ARG(((Slice){.s = nm, .len = nlen})));
    762       /* For SYMTAB/STRTAB/RELA we still copy the raw bytes — the
    763        * post-finalize shape contract says these sections are
    764        * present; emit_elf will regenerate them on re-emit, so the
    765        * preserved bytes are informational rather than load-bearing.
    766        */
    767       obj_write(ob, id, data + sh->sh_offset, (size_t)sh->sh_size);
    768     }
    769   }
    770 
    771   /* Pass 2: parse the .symtab into ObjSyms, building an
    772    * elf_sym_idx -> ObjSymId table. There may be zero or one SYMTAB in
    773    * an ET_REL; pick the first. */
    774   u32 symtab_shndx = 0;
    775   for (u32 i = 1; i < e_shnum; ++i) {
    776     if (shdrs[i].sh_type == SHT_SYMTAB) {
    777       symtab_shndx = i;
    778       break;
    779     }
    780   }
    781 
    782   u32 nsyms = 0;
    783   u32* sym_elf_to_obj = NULL;
    784 
    785   if (symtab_shndx) {
    786     const ShdrRec* sh = &shdrs[symtab_shndx];
    787     if (sh->sh_entsize != sym_size)
    788       compiler_panic(c, SRCLOC_NONE, "read_elf: .symtab entsize %llu != %u",
    789                      (unsigned long long)sh->sh_entsize, sym_size);
    790     if (sh->sh_size % sym_size)
    791       compiler_panic(c, SRCLOC_NONE,
    792                      "read_elf: .symtab size %llu not a multiple of %u",
    793                      (unsigned long long)sh->sh_size, sym_size);
    794     if (sh->sh_link >= e_shnum)
    795       compiler_panic(c, SRCLOC_NONE,
    796                      "read_elf: .symtab sh_link %u out of range", sh->sh_link);
    797     const ShdrRec* str_sh = &shdrs[sh->sh_link];
    798     if (str_sh->sh_offset + str_sh->sh_size > len)
    799       compiler_panic(c, SRCLOC_NONE, "read_elf: .strtab out of range");
    800     const u8* strtab = data + str_sh->sh_offset;
    801     u64 strtab_sz = str_sh->sh_size;
    802 
    803     nsyms = (u32)(sh->sh_size / sym_size);
    804     sym_elf_to_obj = arena_zarray(c->scratch, u32, nsyms ? nsyms : 1);
    805 
    806     const u8* base = data + sh->sh_offset;
    807     for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */
    808       const u8* p = base + (u64)i * sym_size;
    809       /* Elf32_Sym REORDERS: st_name@0, st_value@4, st_size@8, st_info@12,
    810        * st_other@13, st_shndx@14. Elf64_Sym: st_name@0, st_info@4,
    811        * st_other@5, st_shndx@6, st_value@8, st_size@16. */
    812       u32 st_name = rd_u32_le(p + 0);
    813       u8 st_info = is32 ? p[12] : p[4];
    814       u8 st_other = is32 ? p[13] : p[5];
    815       u16 st_shndx = is32 ? rd_u16_le(p + 14) : rd_u16_le(p + 6);
    816       u64 st_value = is32 ? (u64)rd_u32_le(p + 4) : rd_u64_le(p + 8);
    817       u64 st_size = is32 ? (u64)rd_u32_le(p + 8) : rd_u64_le(p + 16);
    818 
    819       u32 nlen;
    820       const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen);
    821       nlen = elf_default_version_namelen(nm, nlen);
    822       Sym sn = nlen
    823                    ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen})
    824                    : 0;
    825 
    826       u32 e_bind = ELF64_ST_BIND(st_info);
    827       u32 e_type = ELF64_ST_TYPE(st_info);
    828       u16 bind = elf_bind_to_obj(e_bind);
    829       u16 kind = elf_type_to_kind(e_type, st_shndx);
    830       u8 vis = elf_other_to_vis(st_other);
    831 
    832       ObjSecId sec_id;
    833       u64 value;
    834       u64 cmnalign = 0;
    835       if (st_shndx == SHN_UNDEF) {
    836         sec_id = OBJ_SEC_NONE;
    837         value = st_value;
    838       } else if (st_shndx == SHN_ABS || st_shndx == SHN_COMMON) {
    839         sec_id = OBJ_SEC_NONE;
    840         value = st_value;
    841         if (st_shndx == SHN_COMMON) cmnalign = st_value;
    842       } else if (st_shndx < e_shnum && shdrs[st_shndx].sh_type == SHT_GROUP) {
    843         /* A COMDAT group's signature symbol is defined in its SHT_GROUP
    844          * section, which we consume into an ObjGroup and never keep as an
    845          * obj section (so elf_to_obj is OBJ_SEC_NONE for it). The symbol just
    846          * names the group; it is not a data location and is never a reloc
    847          * target. Record it as an absolute defined symbol so it doesn't look
    848          * like a phantom undefined reference -- FreeBSD's crt1.o brands the
    849          * binary with such a symbol (.freebsd.note*). */
    850         sec_id = OBJ_SEC_NONE;
    851         value = st_value;
    852         kind = SK_ABS;
    853       } else if (st_shndx < e_shnum) {
    854         sec_id = elf_to_obj[st_shndx];
    855         value = st_value;
    856       } else {
    857         compiler_panic(c, SRCLOC_NONE, "read_elf: symbol shndx %u out of range",
    858                        (u32)st_shndx);
    859         sec_id = OBJ_SEC_NONE;
    860         value = 0; /* unreachable */
    861       }
    862 
    863       ObjSymId id =
    864           obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, (SymKind)kind,
    865                         sec_id, value, st_size, cmnalign);
    866       obj_sym_mark_referenced(ob, id);
    867       sym_elf_to_obj[i] = id;
    868     }
    869   }
    870 
    871   /* Pass 3: parse each SHT_RELA / SHT_REL into ObjBuilder relocations
    872    * targeting the section the rela header's sh_info points at. */
    873   for (u32 i = 1; i < e_shnum; ++i) {
    874     const ShdrRec* sh = &shdrs[i];
    875     int is_rela = (sh->sh_type == SHT_RELA);
    876     int is_rel = (sh->sh_type == SHT_REL);
    877     if (!is_rela && !is_rel) continue;
    878     /* Allocatable rela/rel in ET_EXEC/ET_DYN are loader (dynamic)
    879      * relocations — sh_info is 0 or a .got index, not a target section.
    880      * They belong to the image's dynamic-reloc view (read_elf_image), not
    881      * the section-relocation table. ET_REL link-time relocs are never
    882      * SHF_ALLOC, so this is a no-op for relocatable objects. */
    883     if (sh->sh_flags & SHF_ALLOC) continue;
    884 
    885     u32 entsize = is_rela ? rela_size : rel_size;
    886     if (sh->sh_entsize != entsize)
    887       compiler_panic(c, SRCLOC_NONE, "read_elf: rela entsize %llu != %u",
    888                      (unsigned long long)sh->sh_entsize, entsize);
    889     if (sh->sh_info == 0 || sh->sh_info >= e_shnum)
    890       compiler_panic(c, SRCLOC_NONE, "read_elf: rela sh_info %u out of range",
    891                      sh->sh_info);
    892     ObjSecId target = elf_to_obj[sh->sh_info];
    893     if (target == OBJ_SEC_NONE) continue;
    894 
    895     u32 nrec = (u32)(sh->sh_size / entsize);
    896     const u8* base = data + sh->sh_offset;
    897     for (u32 j = 0; j < nrec; ++j) {
    898       /* Elf32_Rela (12B): r_offset@0, r_info@4 (ELF32 packing, 8-bit
    899        * type), r_addend@8. Elf64_Rela (24B): r_offset@0, r_info@8,
    900        * r_addend@16. */
    901       const u8* p = base + (u64)j * entsize;
    902       u64 r_offset = elf_rd_addr(p + 0, is32);
    903       u64 r_info = is32 ? (u64)rd_u32_le(p + 4) : rd_u64_le(p + 8);
    904       i64 r_addend =
    905           is_rela ? (is32 ? (i64)(i32)rd_u32_le(p + 8) : (i64)rd_u64_le(p + 16))
    906                   : 0;
    907       u32 esym = is32 ? ELF32_R_SYM(r_info) : ELF64_R_SYM(r_info);
    908       u32 etype = is32 ? ELF32_R_TYPE(r_info) : ELF64_R_TYPE(r_info);
    909 
    910       u32 kind = reloc_from(etype);
    911       if (kind == (u32)-1)
    912         compiler_panic(c, SRCLOC_NONE,
    913                        "read_elf: unsupported reloc type %u for e_machine 0x%x",
    914                        etype, (u32)e_machine);
    915 
    916       ObjSymId target_sym = OBJ_SYM_NONE;
    917       if (esym && sym_elf_to_obj && esym < nsyms)
    918         target_sym = sym_elf_to_obj[esym];
    919 
    920       obj_reloc_ex(ob, target, (u32)r_offset, (RelocKind)kind, target_sym,
    921                    r_addend, is_rela ? 1 : 0, 0);
    922     }
    923   }
    924 
    925   /* Pass 4: SHT_GROUP. Each GROUP section's body is a sequence of
    926    * 4-byte LE indices: [flags, shndx, shndx, ...]. The signature is
    927    * the symbol named by sh_link/sh_info convention (sh_link=symtab,
    928    * sh_info=symbol index in that symtab). */
    929   for (u32 i = 1; i < e_shnum; ++i) {
    930     const ShdrRec* sh = &shdrs[i];
    931     if (sh->sh_type != SHT_GROUP) continue;
    932 
    933     if (sh->sh_size < 4 || (sh->sh_size % 4)) continue;
    934     const u8* p = data + sh->sh_offset;
    935     u32 flags = rd_u32_le(p);
    936     u32 nm_len;
    937     const char* gnm =
    938         strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nm_len);
    939     Sym gname = pool_intern_slice(c->global, (Slice){.s = gnm, .len = nm_len});
    940 
    941     ObjSymId signature = OBJ_SYM_NONE;
    942     if (sym_elf_to_obj && sh->sh_info < nsyms)
    943       signature = sym_elf_to_obj[sh->sh_info];
    944 
    945     ObjGroupId gid = obj_group(ob, gname, signature, flags);
    946     u32 n = (u32)(sh->sh_size / 4) - 1;
    947     for (u32 j = 0; j < n; ++j) {
    948       u32 shndx = rd_u32_le(p + 4 + j * 4);
    949       if (shndx < e_shnum && elf_to_obj[shndx] != OBJ_SEC_NONE)
    950         obj_group_add_section(ob, gid, elf_to_obj[shndx]);
    951     }
    952   }
    953 
    954   /* ET_EXEC / ET_DYN: attach the linked-image view (segments + dynamic). */
    955   if (e_type != ET_REL)
    956     read_elf_image(c, ob, data, len, e_type, is32, shdrs, e_shnum, elf_to_obj,
    957                    reloc_from);
    958 
    959   obj_finalize(ob);
    960   return ob;
    961 }
    962 
    963 /* ---- ET_DYN (shared object) reader ----
    964  *
    965  * Produces an ObjBuilder containing only the DSO's exported symbols
    966  * (parsed from .dynsym, not .symtab). The DSO's sections, relocations,
    967  * and groups are skipped — DSOs contribute no bytes to the output
    968  * image. The DT_SONAME (if any) is interned and returned via
    969  * `*soname_out` so the caller can record DT_NEEDED at link time.
    970  *
    971  * Symbol shape: each defined dynsym entry produces an ObjSym whose
    972  * (bind, kind, vis) match the source. `section_id` is OBJ_SEC_NONE —
    973  * the symbol's value is its DSO-internal vaddr, not meaningful to the
    974  * consuming linker, so we record `value=0`. The linker layer
    975  * (resolve_undefs) only consults the name and the defined-ness flag.
    976  *
    977  * Undefined dynsym entries (st_shndx==SHN_UNDEF) are imports the DSO
    978  * itself has against other libraries; they're not relevant to a
    979  * consumer that's linking against this DSO and are dropped. */
    980 
    981 static int parse_phdr(const u8* data, size_t len, u64 e_phoff, u16 e_phentsize,
    982                       u16 e_phnum, u32 want_type, u64* out_offset,
    983                       u64* out_filesz) {
    984   u32 i;
    985   if (e_phentsize != ELF64_PHDR_SIZE) return 0;
    986   if (e_phoff + (u64)e_phnum * ELF64_PHDR_SIZE > len) return 0;
    987   for (i = 0; i < e_phnum; ++i) {
    988     const u8* p = data + e_phoff + (u64)i * ELF64_PHDR_SIZE;
    989     u32 p_type = rd_u32_le(p + 0);
    990     if (p_type != want_type) continue;
    991     *out_offset = rd_u64_le(p + 8);
    992     *out_filesz = rd_u64_le(p + 32);
    993     return 1;
    994   }
    995   return 0;
    996 }
    997 
    998 ObjBuilder* read_elf_dso(Compiler* c, const char* name, const u8* data,
    999                          size_t len, Sym* soname_out) {
   1000   (void)name;
   1001   if (soname_out) *soname_out = 0;
   1002 
   1003   if (len < ELF64_EHDR_SIZE)
   1004     compiler_panic(c, SRCLOC_NONE,
   1005                    "read_elf_dso: input shorter than ELF header");
   1006   if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 ||
   1007       data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3)
   1008     compiler_panic(c, SRCLOC_NONE, "read_elf_dso: bad ELF magic");
   1009   if (data[EI_CLASS] != ELFCLASS64)
   1010     compiler_panic(c, SRCLOC_NONE, "read_elf_dso: not ELFCLASS64");
   1011   if (data[EI_DATA] != ELFDATA2LSB)
   1012     compiler_panic(c, SRCLOC_NONE, "read_elf_dso: not ELFDATA2LSB");
   1013 
   1014   u16 e_type = rd_u16_le(data + 16);
   1015   if (e_type != ET_DYN)
   1016     compiler_panic(c, SRCLOC_NONE,
   1017                    "read_elf_dso: expected ET_DYN, got e_type=%u", (u32)e_type);
   1018 
   1019   u16 e_machine = rd_u16_le(data + 18);
   1020   {
   1021     const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_ELF);
   1022     const ObjElfArchOps* arch =
   1023         fmt && fmt->elf_machine ? fmt->elf_machine(e_machine) : NULL;
   1024     if (!arch)
   1025       compiler_panic(c, SRCLOC_NONE, "read_elf_dso: unsupported e_machine 0x%x",
   1026                      (u32)e_machine);
   1027   }
   1028 
   1029   u64 e_phoff = rd_u64_le(data + 32);
   1030   u64 e_shoff = rd_u64_le(data + 40);
   1031   u16 e_phentsize = rd_u16_le(data + 54);
   1032   u16 e_phnum = rd_u16_le(data + 56);
   1033   u16 e_shentsize = rd_u16_le(data + 58);
   1034   u16 e_shnum = rd_u16_le(data + 60);
   1035   u16 e_shstrndx = rd_u16_le(data + 62);
   1036 
   1037   if (e_shentsize != ELF64_SHDR_SIZE)
   1038     compiler_panic(c, SRCLOC_NONE, "read_elf_dso: unexpected e_shentsize %u",
   1039                    (u32)e_shentsize);
   1040   if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len)
   1041     compiler_panic(c, SRCLOC_NONE,
   1042                    "read_elf_dso: section header table out of range");
   1043   if (e_shstrndx >= e_shnum)
   1044     compiler_panic(c, SRCLOC_NONE, "read_elf_dso: e_shstrndx out of range");
   1045 
   1046   /* read_elf_dso is ELFCLASS64-only (panics above on other classes), so
   1047    * parse with the ELF64 layout (is32 = 0). */
   1048   ShdrRec* shdrs = arena_array(c->scratch, ShdrRec, e_shnum);
   1049   for (u32 i = 0; i < e_shnum; ++i)
   1050     parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, 0, &shdrs[i]);
   1051 
   1052   /* Locate .dynsym (preferred over .symtab — a stripped DSO carries
   1053    * only .dynsym) and its associated strtab via sh_link. */
   1054   u32 dynsym_idx = 0, dynamic_idx = 0;
   1055   for (u32 i = 1; i < e_shnum; ++i) {
   1056     if (shdrs[i].sh_type == SHT_DYNSYM && !dynsym_idx) dynsym_idx = i;
   1057     if (shdrs[i].sh_type == SHT_DYNAMIC && !dynamic_idx) dynamic_idx = i;
   1058   }
   1059 
   1060   if (!dynsym_idx)
   1061     compiler_panic(c, SRCLOC_NONE,
   1062                    "read_elf_dso: no SHT_DYNSYM in shared object");
   1063 
   1064   /* Parse PT_DYNAMIC for DT_SONAME. The .dynamic section gives us the
   1065    * dynstr to resolve the SONAME's offset; if there's no .dynamic
   1066    * section we fall back to scanning the PT_DYNAMIC segment. */
   1067   Sym soname = 0;
   1068   if (dynamic_idx) {
   1069     const ShdrRec* dsh = &shdrs[dynamic_idx];
   1070     if (dsh->sh_link >= e_shnum)
   1071       compiler_panic(c, SRCLOC_NONE,
   1072                      "read_elf_dso: .dynamic sh_link %u out of range",
   1073                      dsh->sh_link);
   1074     const ShdrRec* str_sh = &shdrs[dsh->sh_link];
   1075     if (str_sh->sh_offset + str_sh->sh_size > len)
   1076       compiler_panic(c, SRCLOC_NONE,
   1077                      "read_elf_dso: .dynamic strtab out of range");
   1078     const u8* dynstr = data + str_sh->sh_offset;
   1079     u64 dynstr_sz = str_sh->sh_size;
   1080 
   1081     if (dsh->sh_offset + dsh->sh_size > len)
   1082       compiler_panic(c, SRCLOC_NONE,
   1083                      "read_elf_dso: .dynamic body out of range");
   1084     const u8* dynp = data + dsh->sh_offset;
   1085     u64 dynsz = dsh->sh_size;
   1086     /* DT entries are 16 bytes: (d_tag: u64, d_un: u64). */
   1087     for (u64 off = 0; off + 16 <= dynsz; off += 16) {
   1088       u64 tag = rd_u64_le(dynp + off);
   1089       u64 val = rd_u64_le(dynp + off + 8);
   1090       if (tag == DT_NULL) break;
   1091       if (tag == DT_SONAME) {
   1092         u32 nlen;
   1093         const char* nm = strtab_lookup(dynstr, dynstr_sz, (u32)val, &nlen);
   1094         if (nlen)
   1095           soname = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
   1096         break;
   1097       }
   1098     }
   1099   } else if (e_phnum) {
   1100     /* Fallback: walk PT_DYNAMIC straight from program headers. We
   1101      * only need DT_SONAME, so skip if we can't find a strtab pointer
   1102      * inline (DT_STRTAB carries a vaddr, not a file offset — stripped
   1103      * DSOs without SHT_DYNAMIC are exceedingly rare in practice). */
   1104     u64 dyn_off, dyn_sz;
   1105     (void)parse_phdr(data, len, e_phoff, e_phentsize, e_phnum, PT_DYNAMIC,
   1106                      &dyn_off, &dyn_sz);
   1107   }
   1108   if (soname_out) *soname_out = soname;
   1109 
   1110   /* Now parse .dynsym. */
   1111   const ShdrRec* sh = &shdrs[dynsym_idx];
   1112   if (sh->sh_entsize != ELF64_SYM_SIZE)
   1113     compiler_panic(c, SRCLOC_NONE, "read_elf_dso: .dynsym entsize %llu != %u",
   1114                    (unsigned long long)sh->sh_entsize, (u32)ELF64_SYM_SIZE);
   1115   if (sh->sh_size % ELF64_SYM_SIZE)
   1116     compiler_panic(c, SRCLOC_NONE,
   1117                    "read_elf_dso: .dynsym size not multiple of entry size");
   1118   if (sh->sh_link >= e_shnum)
   1119     compiler_panic(c, SRCLOC_NONE,
   1120                    "read_elf_dso: .dynsym sh_link out of range");
   1121   const ShdrRec* str_sh = &shdrs[sh->sh_link];
   1122   if (str_sh->sh_offset + str_sh->sh_size > len)
   1123     compiler_panic(c, SRCLOC_NONE, "read_elf_dso: .dynstr out of range");
   1124   const u8* strtab = data + str_sh->sh_offset;
   1125   u64 strtab_sz = str_sh->sh_size;
   1126 
   1127   ObjBuilder* ob = obj_new(c);
   1128   if (!ob) compiler_panic(c, SRCLOC_NONE, "read_elf_dso: obj_new failed");
   1129 
   1130   /* The DSO always gets an ObjImage: its dynsyms record each export's default
   1131    * version (so the linker can emit a matching .gnu.version_r — see
   1132    * build_versions in link_dyn.c, harmless/empty for unversioned DSOs like
   1133    * musl), and its undef list records the symbols this DSO references so
   1134    * --gc-sections keeps the executable's definitions of them alive. */
   1135   u32 verdef_max = 0;
   1136   Sym* verdef_tbl = read_elf_verdefs(c, data, len, shdrs, e_shnum, &verdef_max);
   1137   const u8* versym = NULL;
   1138   u32 nversym = 0;
   1139   for (u32 i = 1; i < e_shnum; ++i) {
   1140     if (shdrs[i].sh_type != SHT_GNU_VERSYM) continue;
   1141     if (shdrs[i].sh_offset + shdrs[i].sh_size <= len && shdrs[i].sh_entsize == 2)
   1142       versym = data + shdrs[i].sh_offset,
   1143       nversym = (u32)(shdrs[i].sh_size / 2u);
   1144     break;
   1145   }
   1146   ObjImage* im = obj_image_ensure(ob, OBJ_KIND_DYN);
   1147   if (im && soname) obj_image_set_soname(im, soname);
   1148 
   1149   u32 nsyms = (u32)(sh->sh_size / ELF64_SYM_SIZE);
   1150   const u8* base = data + sh->sh_offset;
   1151   for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */
   1152     const u8* p = base + (u64)i * ELF64_SYM_SIZE;
   1153     u32 st_name = rd_u32_le(p + 0);
   1154     u8 st_info = p[4];
   1155     u8 st_other = p[5];
   1156     u16 st_shndx = rd_u16_le(p + 6);
   1157     u32 e_bind = ELF64_ST_BIND(st_info);
   1158     u32 nlen;
   1159     const char* nm;
   1160     Sym sn;
   1161 
   1162     /* Locals are neither exports nor reference dependencies we track. */
   1163     if (e_bind == STB_LOCAL) continue;
   1164     nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen);
   1165     if (!nlen) continue;
   1166     sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen});
   1167 
   1168     /* The DSO's own undefined references: not exports, but if the executable
   1169      * defines one (e.g. libc.so.7's `environ` / `__progname`, defined by the
   1170      * crt) the static linker must keep that definition under --gc-sections. */
   1171     if (st_shndx == SHN_UNDEF) {
   1172       obj_image_add_undef(im, sn);
   1173       continue;
   1174     }
   1175 
   1176     u32 e_type_field = ELF64_ST_TYPE(st_info);
   1177     u16 bind = elf_bind_to_obj(e_bind);
   1178     u16 kind = elf_type_to_kind(e_type_field, st_shndx);
   1179     u8 vis = elf_other_to_vis(st_other);
   1180 
   1181     /* DSO exports land as defined symbols in OBJ_SEC_NONE with
   1182      * value=0. The consumer treats them as imports — see
   1183      * resolve_undefs in src/link/link_layout.c. */
   1184     {
   1185       ObjSymId did = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis,
   1186                                    (SymKind)kind, OBJ_SEC_NONE, 0, 0, 0);
   1187       obj_sym_mark_referenced(ob, did);
   1188     }
   1189     if (im) {
   1190       ObjImageSym ds;
   1191       ds.name = sn;
   1192       ds.bind = (SymBind)bind;
   1193       ds.kind = (SymKind)kind;
   1194       ds.section = OBJ_SEC_NONE;
   1195       ds.value = 0;
   1196       ds.size = 0;
   1197       ds.version = 0;
   1198       if (i < nversym) {
   1199         u16 v = rd_u16_le(versym + (u64)i * 2u);
   1200         u32 ndx = (u32)(v & VERSYM_VERSION);
   1201         if (!(v & VERSYM_HIDDEN) && ndx >= 2u && ndx <= verdef_max)
   1202           ds.version = verdef_tbl[ndx];
   1203       }
   1204       obj_image_add_dynsym(im, &ds);
   1205     }
   1206   }
   1207 
   1208   obj_finalize(ob);
   1209   return ob;
   1210 }