read.c (49008B)
1 /* ELF reader. Parses a 64-bit little-endian ELF object back into a fresh 2 * ObjBuilder. ET_REL produces the section/symbol/reloc view; the 3 * post-finalize shape is the canonical superset doc/DESIGN.md §5.5 4 * promises: read_elf of an emit_elf output produces an ObjBuilder 5 * equivalent to the writer's input, modulo (a) section ordering and 6 * (b) STT_SECTION symbols synthesized by the writer. 7 * 8 * ET_EXEC / ET_DYN additionally attach the linked-image view via 9 * read_elf_image (program-header segments, .dynamic dependencies, 10 * .dynsym dynamic symbols, and allocatable dynamic relocations) — see 11 * doc/OBJ.md. Their section tables still parse through the same 12 * passes. The standalone read_elf_dso (below) remains the linker's 13 * exports-only DSO-input path. 14 * 15 * Scope: AArch64 little-endian. Other archs / endianness produce a 16 * compiler_panic with a diagnostic. */ 17 18 #include <string.h> 19 20 #include "core/heap.h" 21 #include "core/pool.h" 22 #include "core/slice.h" 23 #include "obj/elf/elf.h" 24 #include "obj/format.h" 25 26 /* ---- shdr scratch struct ---- */ 27 28 typedef struct ShdrRec { 29 u32 sh_name; 30 u32 sh_type; 31 u64 sh_flags; 32 u64 sh_addr; 33 u64 sh_offset; 34 u64 sh_size; 35 u32 sh_link; 36 u32 sh_info; 37 u64 sh_addralign; 38 u64 sh_entsize; 39 } ShdrRec; 40 41 static void parse_shdr(const u8* p, int is32, ShdrRec* out) { 42 /* Elf32_Shdr (40B) shares field order with Elf64_Shdr (64B); only the 43 * flags/addr/offset/size/addralign/entsize fields narrow to u32 and 44 * shift the following offsets. The ShdrRec stays u64-wide. */ 45 if (is32) { 46 out->sh_name = rd_u32_le(p + 0); 47 out->sh_type = rd_u32_le(p + 4); 48 out->sh_flags = rd_u32_le(p + 8); 49 out->sh_addr = rd_u32_le(p + 12); 50 out->sh_offset = rd_u32_le(p + 16); 51 out->sh_size = rd_u32_le(p + 20); 52 out->sh_link = rd_u32_le(p + 24); 53 out->sh_info = rd_u32_le(p + 28); 54 out->sh_addralign = rd_u32_le(p + 32); 55 out->sh_entsize = rd_u32_le(p + 36); 56 } else { 57 out->sh_name = rd_u32_le(p + 0); 58 out->sh_type = rd_u32_le(p + 4); 59 out->sh_flags = rd_u64_le(p + 8); 60 out->sh_addr = rd_u64_le(p + 16); 61 out->sh_offset = rd_u64_le(p + 24); 62 out->sh_size = rd_u64_le(p + 32); 63 out->sh_link = rd_u32_le(p + 40); 64 out->sh_info = rd_u32_le(p + 44); 65 out->sh_addralign = rd_u64_le(p + 48); 66 out->sh_entsize = rd_u64_le(p + 56); 67 } 68 } 69 70 /* ---- mappers ---- */ 71 72 /* The bits this function maps to SecFlag — anything outside this mask is 73 * treated as opaque and stashed in Section.ext_flags by the caller so the 74 * emitter can write it back unchanged. Examples of bits left over: 75 * SHF_EXCLUDE (0x80000000) on .llvm_addrsig, SHF_COMPRESSED (0x800) on 76 * compressed .debug_*, SHF_INFO_LINK (0x40) on .rela.* sections. */ 77 #define ELF_KNOWN_FLAGS_MASK \ 78 ((u64)(SHF_ALLOC | SHF_EXECINSTR | SHF_WRITE | SHF_TLS | SHF_MERGE | \ 79 SHF_STRINGS | SHF_GROUP | SHF_LINK_ORDER | SHF_GNU_RETAIN)) 80 81 static u16 elf_flags_to_obj(u64 f) { 82 u16 r = 0; 83 if (f & SHF_ALLOC) r |= SF_ALLOC; 84 if (f & SHF_EXECINSTR) r |= SF_EXEC; 85 if (f & SHF_WRITE) r |= SF_WRITE; 86 if (f & SHF_TLS) r |= SF_TLS; 87 if (f & SHF_MERGE) r |= SF_MERGE; 88 if (f & SHF_STRINGS) r |= SF_STRINGS; 89 if (f & SHF_GROUP) r |= SF_GROUP; 90 if (f & SHF_LINK_ORDER) r |= SF_LINK_ORDER; 91 if (f & SHF_GNU_RETAIN) r |= SF_RETAIN; 92 return r; 93 } 94 95 /* Map ELF sh_type -> SecSem. Sets *known to 1 if the value is one of 96 * the canonical types the kit model knows about; 0 means the caller 97 * fell through to the SSEM_PROGBITS fallback and should preserve the 98 * raw sh_type via Section.ext_type so emit_elf can write it back. */ 99 static u16 elf_type_to_sem(u32 t, int* known) { 100 *known = 1; 101 switch (t) { 102 case SHT_PROGBITS: 103 return SSEM_PROGBITS; 104 case SHT_NOBITS: 105 return SSEM_NOBITS; 106 case SHT_SYMTAB: 107 return SSEM_SYMTAB; 108 case SHT_STRTAB: 109 return SSEM_STRTAB; 110 case SHT_RELA: 111 return SSEM_RELA; 112 case SHT_REL: 113 return SSEM_REL; 114 case SHT_NOTE: 115 return SSEM_NOTE; 116 case SHT_INIT_ARRAY: 117 return SSEM_INIT_ARRAY; 118 case SHT_FINI_ARRAY: 119 return SSEM_FINI_ARRAY; 120 case SHT_PREINIT_ARRAY: 121 return SSEM_PREINIT_ARRAY; 122 case SHT_GROUP: 123 return SSEM_GROUP; 124 default: 125 *known = 0; 126 return SSEM_PROGBITS; 127 } 128 } 129 130 static u16 elf_kind_from_name(const char* name, u32 nlen, u64 sh_flags, 131 u32 sh_type) { 132 if (sh_type == SHT_NOBITS) return SEC_BSS; 133 if (nlen >= 5 && memcmp(name, ".text", 5) == 0) return SEC_TEXT; 134 if (nlen >= 7 && memcmp(name, ".rodata", 7) == 0) return SEC_RODATA; 135 if (nlen >= 5 && memcmp(name, ".data", 5) == 0) return SEC_DATA; 136 if (nlen >= 4 && memcmp(name, ".bss", 4) == 0) return SEC_BSS; 137 if (nlen >= 7 && memcmp(name, ".debug_", 7) == 0) return SEC_DEBUG; 138 /* Fallback: classify by flags. */ 139 if (sh_flags & SHF_EXECINSTR) return SEC_TEXT; 140 if (sh_flags & SHF_WRITE) return SEC_DATA; 141 if (sh_flags & SHF_ALLOC) return SEC_RODATA; 142 return SEC_OTHER; 143 } 144 145 static u16 elf_bind_to_obj(u32 b) { 146 switch (b) { 147 case STB_GLOBAL: 148 case STB_GNU_UNIQUE: 149 /* GNU-unique is a global with extra runtime uniqueness semantics; for 150 * link-time resolution it is an ordinary global definition. FreeBSD's 151 * crt1.o brands the binary with a GNU-unique `.freebsd.note*` symbol. */ 152 return SB_GLOBAL; 153 case STB_WEAK: 154 return SB_WEAK; 155 default: 156 return SB_LOCAL; 157 } 158 } 159 160 static u16 elf_type_to_kind(u32 t, u16 shndx) { 161 if (shndx == SHN_UNDEF) return SK_UNDEF; 162 if (shndx == SHN_COMMON) return SK_COMMON; 163 /* SHN_ABS is the convention for STT_FILE and a few other defined 164 * symbols whose value is not an address. Don't smother the type 165 * with SK_ABS when the type field carries real information — only 166 * fall through to SK_ABS for STT_NOTYPE-at-SHN_ABS. */ 167 if (shndx == SHN_ABS && t == STT_NOTYPE) return SK_ABS; 168 switch (t) { 169 case STT_FUNC: 170 return SK_FUNC; 171 case STT_OBJECT: 172 return SK_OBJ; 173 case STT_SECTION: 174 return SK_SECTION; 175 case STT_FILE: 176 return SK_FILE; 177 case STT_TLS: 178 return SK_TLS; 179 case STT_COMMON: 180 return SK_COMMON; 181 case STT_GNU_IFUNC: 182 return SK_IFUNC; 183 default: 184 /* STT_NOTYPE on a defined symbol (e.g. AArch64 mapping symbols 185 * `$x` / `$d`, or assembly labels) round-trips as SK_NOTYPE. 186 * The linker keeps definedness keyed on SK_UNDEF; SK_NOTYPE is 187 * "defined but typeless". */ 188 return SK_NOTYPE; 189 } 190 } 191 192 static u8 elf_other_to_vis(u32 other) { 193 switch (other & 3) { 194 case STV_HIDDEN: 195 return SV_HIDDEN; 196 case STV_PROTECTED: 197 return SV_PROTECTED; 198 case STV_INTERNAL: 199 return SV_INTERNAL; 200 default: 201 return SV_DEFAULT; 202 } 203 } 204 205 /* Bounds-checked C-string slice from a strtab section. Returns "" on 206 * out-of-range so callers don't have to special-case it. `len_out` is 207 * set to the result's byte length. */ 208 static const char* strtab_lookup(const u8* tab, u64 tab_size, u32 off, 209 u32* len_out) { 210 if (off >= tab_size) { 211 *len_out = 0; 212 return ""; 213 } 214 const char* s = (const char*)(tab + off); 215 u32 max = (u32)(tab_size - off); 216 u32 n = 0; 217 while (n < max && s[n] != '\0') ++n; 218 *len_out = n; 219 return s; 220 } 221 222 static const char* pt_type_name(u32 t) { 223 switch (t) { 224 case PT_NULL: 225 return "NULL"; 226 case PT_LOAD: 227 return "LOAD"; 228 case PT_DYNAMIC: 229 return "DYNAMIC"; 230 case PT_INTERP: 231 return "INTERP"; 232 case PT_NOTE: 233 return "NOTE"; 234 case PT_PHDR: 235 return "PHDR"; 236 case PT_TLS: 237 return "TLS"; 238 case PT_GNU_EH_FRAME: 239 return "GNU_EH_FRAME"; 240 case PT_GNU_STACK: 241 return "GNU_STACK"; 242 case PT_GNU_RELRO: 243 return "GNU_RELRO"; 244 default: 245 return "UNKNOWN"; 246 } 247 } 248 249 static Sym intern_cstr(Compiler* c, const char* s) { 250 return pool_intern_slice(c->global, (Slice){.s = s, .len = (u32)strlen(s)}); 251 } 252 253 /* ELF default-version normalization. A symbol "base@@VERSION" is the *default* 254 * version of `base`: an unversioned reference binds to it. GNU as emits the 255 * literal "@@" into a relocatable object's .symtab string (e.g. FreeBSD 256 * libc.a's openat@@FBSD_1.2 / setcontext / swapcontext). Trim to the base so 257 * kit's name-based resolution matches plain references. A single-'@' 258 * (non-default) version is left intact -- those are inert compatibility 259 * aliases (e.g. fstat@FBSD_1.0) that must NOT shadow the modern base symbol. 260 * Shared-library exports keep their version in .gnu.version_d rather than the 261 * name string, so this only fires for relocatable .symtab reads. Returns the 262 * length of the base name (== nlen when there is no "@@"). */ 263 static u32 elf_default_version_namelen(const char* nm, u32 nlen) { 264 u32 i; 265 if (!nm) return nlen; 266 for (i = 1; i + 1 < nlen; ++i) 267 if (nm[i] == '@' && nm[i + 1] == '@') return i; 268 return nlen; 269 } 270 271 /* Parse a DSO's .gnu.version_d (SHT_GNU_VERDEF) into an index->version-name 272 * table so .dynsym entries (whose version lives in the parallel .gnu.version) 273 * can be labelled. Returns an arena table indexed by version index (0/1 unused, 274 * matching VER_NDX_LOCAL/GLOBAL) and sets *out_max to the highest index seen; 275 * NULL when the input has no verdef. The Verdef/Verdaux wire layout is identical 276 * on ELFCLASS32/64 (all Half/Word fields), so this is width-agnostic. */ 277 static Sym* read_elf_verdefs(Compiler* c, const u8* data, size_t len, 278 const ShdrRec* shdrs, u16 e_shnum, u32* out_max) { 279 u32 i, verdef_idx = 0, max_ndx = 0; 280 const ShdrRec* sh; 281 const ShdrRec* str_sh; 282 const u8* strtab; 283 const u8* base; 284 u64 strtab_sz, size, off; 285 Sym* tbl; 286 *out_max = 0; 287 for (i = 1; i < e_shnum; ++i) 288 if (shdrs[i].sh_type == SHT_GNU_VERDEF) { 289 verdef_idx = i; 290 break; 291 } 292 if (!verdef_idx) return NULL; 293 sh = &shdrs[verdef_idx]; 294 if (sh->sh_link >= e_shnum) return NULL; 295 str_sh = &shdrs[sh->sh_link]; 296 if (sh->sh_offset + sh->sh_size > len || 297 str_sh->sh_offset + str_sh->sh_size > len) 298 return NULL; 299 strtab = data + str_sh->sh_offset; 300 strtab_sz = str_sh->sh_size; 301 base = data + sh->sh_offset; 302 size = sh->sh_size; 303 304 /* Pass 1: highest version index, to size the table. */ 305 off = 0; 306 while (off + ELF_VERDEF_SIZE <= size) { 307 u32 ndx = (u32)(rd_u16_le(base + off + 4) & VERSYM_VERSION); 308 u32 vd_next = rd_u32_le(base + off + 16); 309 if (ndx > max_ndx) max_ndx = ndx; 310 if (!vd_next) break; 311 off += vd_next; 312 } 313 tbl = arena_zarray(c->scratch, Sym, (size_t)max_ndx + 1u); 314 315 /* Pass 2: record each non-base version's name (its first Verdaux). */ 316 off = 0; 317 while (off + ELF_VERDEF_SIZE <= size) { 318 u16 vd_flags = rd_u16_le(base + off + 2); 319 u32 ndx = (u32)(rd_u16_le(base + off + 4) & VERSYM_VERSION); 320 u32 vd_aux = rd_u32_le(base + off + 12); 321 u32 vd_next = rd_u32_le(base + off + 16); 322 if (!(vd_flags & VER_FLG_BASE) && ndx <= max_ndx && 323 off + vd_aux + ELF_VERDAUX_SIZE <= size) { 324 u32 nlen; 325 const char* nm = 326 strtab_lookup(strtab, strtab_sz, rd_u32_le(base + off + vd_aux), &nlen); 327 if (nlen) 328 tbl[ndx] = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); 329 } 330 if (!vd_next) break; 331 off += vd_next; 332 } 333 *out_max = max_ndx; 334 return tbl; 335 } 336 337 /* Populate the builder's ObjImage from an ET_EXEC / ET_DYN input: the 338 * program-header segment table (+ interp + image base), the .dynamic 339 * dependency view (DT_NEEDED / DT_SONAME / DT_RPATH / DT_RUNPATH), the 340 * .dynsym dynamic symbols, and the allocatable .rela.* / .rel.* dynamic 341 * relocations. The section / symbol tables are parsed by read_elf's normal 342 * passes; this adds the orthogonal image dimension. Lenient where a 343 * malformed sub-table would otherwise abort a useful inspection: a bad 344 * .dynamic / .dynsym / dyn-reloc table is skipped rather than panicked. */ 345 static void read_elf_image(Compiler* c, ObjBuilder* ob, const u8* data, 346 size_t len, u16 e_type, int is32, 347 const ShdrRec* shdrs, u16 e_shnum, 348 const u32* elf_to_obj, u32 (*reloc_from)(u32)) { 349 u32 phdr_size = is32 ? ELF32_PHDR_SIZE : ELF64_PHDR_SIZE; 350 u32 sym_size = is32 ? ELF32_SYM_SIZE : ELF64_SYM_SIZE; 351 u32 rela_size = is32 ? ELF32_RELA_SIZE : ELF64_RELA_SIZE; 352 u32 rel_size = is32 ? 8u : 16u; 353 u32 dyn_size = is32 ? ELF32_DYN_SIZE : ELF64_DYN_SIZE; 354 ObjImage* im = 355 obj_image_ensure(ob, e_type == ET_DYN ? OBJ_KIND_DYN : OBJ_KIND_EXEC); 356 if (!im) compiler_panic(c, SRCLOC_NONE, "read_elf: obj_image_ensure failed"); 357 358 /* e_entry is at offset 24 in both Ehdr32/Ehdr64, native width. */ 359 obj_image_set_entry(im, elf_rd_addr(data + 24, is32)); 360 361 /* Program headers -> segments (+ PT_INTERP string, image base). */ 362 { 363 /* e_phoff: 4B@28 on ELF32, 8B@32 on ELF64. e_phentsize/e_phnum 364 * shift accordingly (42/44 vs 54/56). */ 365 u64 e_phoff = is32 ? (u64)rd_u32_le(data + 28) : rd_u64_le(data + 32); 366 u16 e_phentsize = rd_u16_le(data + (is32 ? 42 : 54)); 367 u16 e_phnum = rd_u16_le(data + (is32 ? 44 : 56)); 368 int have_base = 0; 369 u64 image_base = 0; 370 if (e_phnum) { 371 if (e_phentsize != phdr_size) 372 compiler_panic(c, SRCLOC_NONE, "read_elf: unexpected e_phentsize %u", 373 (u32)e_phentsize); 374 if (e_phoff + (u64)e_phnum * phdr_size > len) 375 compiler_panic(c, SRCLOC_NONE, 376 "read_elf: program header table out of range"); 377 for (u16 i = 0; i < e_phnum; ++i) { 378 const u8* p = data + e_phoff + (u64)i * phdr_size; 379 /* Elf32_Phdr REORDERS p_flags AFTER the sizes: 380 * p_type@0,p_offset@4,p_vaddr@8,p_paddr@12,p_filesz@16, 381 * p_memsz@20,p_flags@24,p_align@28 (all u32). 382 * Elf64_Phdr: p_type@0,p_flags@4,p_offset@8,p_vaddr@16, 383 * p_filesz@32,p_memsz@40,p_align@48. */ 384 u32 p_type = rd_u32_le(p + 0); 385 u32 p_flags = is32 ? rd_u32_le(p + 24) : rd_u32_le(p + 4); 386 u64 p_offset = is32 ? (u64)rd_u32_le(p + 4) : rd_u64_le(p + 8); 387 u64 p_vaddr = is32 ? (u64)rd_u32_le(p + 8) : rd_u64_le(p + 16); 388 u64 p_filesz = is32 ? (u64)rd_u32_le(p + 16) : rd_u64_le(p + 32); 389 u64 p_memsz = is32 ? (u64)rd_u32_le(p + 20) : rd_u64_le(p + 40); 390 u64 p_align = is32 ? (u64)rd_u32_le(p + 28) : rd_u64_le(p + 48); 391 ObjSegment seg; 392 seg.name = intern_cstr(c, pt_type_name(p_type)); 393 seg.vaddr = p_vaddr; 394 seg.vsize = p_memsz; 395 seg.file_off = p_offset; 396 seg.file_size = p_filesz; 397 /* PF_R/W/X share bit values with OBJ_SEG_R/W/X. */ 398 seg.perms = p_flags & (PF_R | PF_W | PF_X); 399 seg.align = (u32)(p_align ? p_align : 1); 400 obj_image_add_segment(im, &seg); 401 402 if (p_type == PT_LOAD && (!have_base || p_vaddr < image_base)) { 403 image_base = p_vaddr; 404 have_base = 1; 405 } 406 if (p_type == PT_INTERP && p_filesz && p_offset + p_filesz <= len) { 407 u32 ilen = (u32)p_filesz; 408 while (ilen && data[p_offset + ilen - 1] == '\0') --ilen; 409 if (ilen) 410 obj_image_set_interp( 411 im, pool_intern_slice( 412 c->global, (Slice){.s = (const char*)(data + p_offset), 413 .len = ilen})); 414 } 415 } 416 } 417 if (have_base) obj_image_set_base(im, image_base); 418 } 419 420 /* Locate .dynamic and .dynsym. */ 421 u32 dynamic_idx = 0, dynsym_idx = 0; 422 for (u16 i = 1; i < e_shnum; ++i) { 423 if (shdrs[i].sh_type == SHT_DYNAMIC && !dynamic_idx) dynamic_idx = i; 424 if (shdrs[i].sh_type == SHT_DYNSYM && !dynsym_idx) dynsym_idx = i; 425 } 426 427 /* .dynamic -> dependency view. */ 428 if (dynamic_idx) { 429 const ShdrRec* dsh = &shdrs[dynamic_idx]; 430 if (dsh->sh_link < e_shnum) { 431 const ShdrRec* str_sh = &shdrs[dsh->sh_link]; 432 if (str_sh->sh_offset + str_sh->sh_size <= len && 433 dsh->sh_offset + dsh->sh_size <= len) { 434 const u8* dynstr = data + str_sh->sh_offset; 435 u64 dynstr_sz = str_sh->sh_size; 436 const u8* dynp = data + dsh->sh_offset; 437 u64 dynsz = dsh->sh_size; 438 /* ELF32 DT entries are 8B (d_tag:u32, d_un:u32); ELF64 16B. */ 439 for (u64 off = 0; off + dyn_size <= dynsz; off += dyn_size) { 440 u64 tag = elf_rd_addr(dynp + off, is32); 441 u64 val = elf_rd_addr(dynp + off + (is32 ? 4 : 8), is32); 442 /* Raw .dynamic view (escape hatch): one entry per DT_* tag, the 443 * terminating DT_NULL included, before the NEEDED/SONAME/RPATH 444 * filtering below. */ 445 { 446 ObjImageRaw r; 447 r.tag = (u32)tag; 448 r.value = val; 449 r.extra = 0; 450 obj_image_add_raw(im, &r); 451 } 452 if (tag == DT_NULL) break; 453 if (tag != DT_NEEDED && tag != DT_SONAME && tag != DT_RPATH && 454 tag != DT_RUNPATH) 455 continue; 456 { 457 u32 nlen; 458 const char* nm = strtab_lookup(dynstr, dynstr_sz, (u32)val, &nlen); 459 Sym s = nlen ? pool_intern_slice(c->global, 460 (Slice){.s = nm, .len = nlen}) 461 : 0; 462 if (!s) continue; 463 if (tag == DT_NEEDED) { 464 ObjImageDep d; 465 d.name = s; 466 d.imports = NULL; 467 d.nimports = 0; 468 obj_image_add_dep(im, &d); 469 } else if (tag == DT_SONAME) { 470 obj_image_set_soname(im, s); 471 } else { 472 obj_image_add_rpath(im, s); 473 } 474 } 475 } 476 } 477 } 478 } 479 480 /* .dynsym -> dynamic symbols, plus an index->name table for dyn relocs. */ 481 Sym* dynsym_names = NULL; 482 u32 ndynsym = 0; 483 if (dynsym_idx) { 484 const ShdrRec* sh = &shdrs[dynsym_idx]; 485 if (sh->sh_entsize == sym_size && (sh->sh_size % sym_size) == 0 && 486 sh->sh_link < e_shnum && sh->sh_offset + sh->sh_size <= len) { 487 const ShdrRec* str_sh = &shdrs[sh->sh_link]; 488 if (str_sh->sh_offset + str_sh->sh_size <= len) { 489 const u8* strtab = data + str_sh->sh_offset; 490 u64 strtab_sz = str_sh->sh_size; 491 const u8* base = data + sh->sh_offset; 492 ndynsym = (u32)(sh->sh_size / sym_size); 493 dynsym_names = arena_zarray(c->scratch, Sym, ndynsym ? ndynsym : 1); 494 /* Parallel symbol-version tables: .gnu.version_d names indexed by 495 * version index, and .gnu.version (one u16 per dynsym entry). A 496 * defined entry whose versym lacks VERSYM_HIDDEN is the *default* 497 * version of its name — the version a plain reference should bind. */ 498 u32 verdef_max = 0; 499 Sym* verdef_tbl = read_elf_verdefs(c, data, len, shdrs, e_shnum, 500 &verdef_max); 501 const u8* versym = NULL; 502 u32 nversym = 0; 503 for (u16 vi = 1; vi < e_shnum; ++vi) { 504 if (shdrs[vi].sh_type != SHT_GNU_VERSYM) continue; 505 if (shdrs[vi].sh_offset + shdrs[vi].sh_size <= len && 506 shdrs[vi].sh_entsize == 2) 507 versym = data + shdrs[vi].sh_offset, 508 nversym = (u32)(shdrs[vi].sh_size / 2u); 509 break; 510 } 511 for (u32 i = 1; i < ndynsym; ++i) { 512 const u8* p = base + (u64)i * sym_size; 513 /* Elf32_Sym REORDERS: st_name@0, st_value@4, st_size@8, 514 * st_info@12, st_other@13, st_shndx@14. Elf64_Sym: 515 * st_name@0, st_info@4, st_other@5, st_shndx@6, 516 * st_value@8, st_size@16. */ 517 u32 st_name = rd_u32_le(p + 0); 518 u8 st_info = is32 ? p[12] : p[4]; 519 u16 st_shndx = is32 ? rd_u16_le(p + 14) : rd_u16_le(p + 6); 520 u64 st_value = is32 ? (u64)rd_u32_le(p + 4) : rd_u64_le(p + 8); 521 u64 st_size = is32 ? (u64)rd_u32_le(p + 8) : rd_u64_le(p + 16); 522 u32 nlen; 523 const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen); 524 Sym sn = 525 nlen ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}) 526 : 0; 527 ObjImageSym ds; 528 dynsym_names[i] = sn; 529 ds.name = sn; 530 ds.bind = (SymBind)elf_bind_to_obj(ELF64_ST_BIND(st_info)); 531 ds.kind = (SymKind)elf_type_to_kind(ELF64_ST_TYPE(st_info), st_shndx); 532 ds.section = (st_shndx == SHN_UNDEF || st_shndx == SHN_ABS || 533 st_shndx == SHN_COMMON || st_shndx >= e_shnum) 534 ? OBJ_SEC_NONE 535 : elf_to_obj[st_shndx]; 536 ds.value = st_value; 537 ds.size = st_size; 538 ds.version = 0; 539 if (versym && verdef_tbl && i < nversym && st_shndx != SHN_UNDEF) { 540 u16 v = rd_u16_le(versym + (u64)i * 2u); 541 u32 ndx = (u32)(v & VERSYM_VERSION); 542 if (!(v & VERSYM_HIDDEN) && ndx >= 2u && ndx <= verdef_max) 543 ds.version = verdef_tbl[ndx]; 544 } 545 obj_image_add_dynsym(im, &ds); 546 } 547 } 548 } 549 } 550 551 /* Allocatable .rela.* / .rel.* -> dynamic relocations. */ 552 for (u16 i = 1; i < e_shnum; ++i) { 553 const ShdrRec* sh = &shdrs[i]; 554 int is_rela = (sh->sh_type == SHT_RELA); 555 int is_rel = (sh->sh_type == SHT_REL); 556 u32 entsize, nrec, j; 557 const u8* base; 558 if (!is_rela && !is_rel) continue; 559 if (!(sh->sh_flags & SHF_ALLOC)) 560 continue; /* link-time relocs: not dynamic */ 561 entsize = is_rela ? rela_size : rel_size; 562 if (sh->sh_entsize != entsize || (sh->sh_size % entsize) != 0) continue; 563 if (sh->sh_offset + sh->sh_size > len) continue; 564 nrec = (u32)(sh->sh_size / entsize); 565 base = data + sh->sh_offset; 566 for (j = 0; j < nrec; ++j) { 567 /* Elf32_Rela (12B): r_offset@0, r_info@4 (ELF32 packing), 568 * r_addend@8. Elf64_Rela (24B): r_offset@0, r_info@8, r_addend@16. */ 569 const u8* p = base + (u64)j * entsize; 570 u64 r_offset = elf_rd_addr(p + 0, is32); 571 u64 r_info = is32 ? (u64)rd_u32_le(p + 4) : rd_u64_le(p + 8); 572 i64 r_addend = 573 is_rela ? (is32 ? (i64)(i32)rd_u32_le(p + 8) : (i64)rd_u64_le(p + 16)) 574 : 0; 575 u32 esym = is32 ? ELF32_R_SYM(r_info) : ELF64_R_SYM(r_info); 576 u32 kind = reloc_from(is32 ? ELF32_R_TYPE(r_info) : ELF64_R_TYPE(r_info)); 577 ObjImageReloc dr; 578 if (kind == (u32)-1) continue; /* unmodeled dyn reloc type: skip */ 579 dr.section = OBJ_SEC_NONE; /* offset is a vaddr, not section-relative */ 580 dr.offset = r_offset; 581 dr.sym_name = (dynsym_names && esym < ndynsym) ? dynsym_names[esym] : 0; 582 dr.addend = r_addend; 583 dr.kind = (RelocKind)kind; 584 obj_image_add_dynreloc(im, &dr); 585 } 586 } 587 } 588 589 ObjBuilder* read_elf(Compiler* c, const char* name, const u8* data, 590 size_t len) { 591 (void)name; 592 593 /* Need at least the e_ident to read EI_CLASS; the full min-length 594 * check below uses the class-selected ehdr size. */ 595 if (len < EI_NIDENT) 596 compiler_panic(c, SRCLOC_NONE, "read_elf: input shorter than ELF header"); 597 598 if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 || 599 data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3) 600 compiler_panic(c, SRCLOC_NONE, "read_elf: bad ELF magic"); 601 602 /* Accept both classes; is32 (EI_CLASS==ELFCLASS32) drives every 603 * stride/offset/field-order decision below. RV32 and RV64 share 604 * EM_RISCV — the reader cannot tell them apart by e_machine, only by 605 * EI_CLASS, so is32 is the single source of truth here. */ 606 if (data[EI_CLASS] != ELFCLASS64 && data[EI_CLASS] != ELFCLASS32) 607 compiler_panic(c, SRCLOC_NONE, "read_elf: not ELFCLASS32/64 (got %u)", 608 data[EI_CLASS]); 609 if (data[EI_DATA] != ELFDATA2LSB) 610 compiler_panic(c, SRCLOC_NONE, "read_elf: not ELFDATA2LSB (got %u)", 611 data[EI_DATA]); 612 613 int is32 = (data[EI_CLASS] == ELFCLASS32); 614 u32 ehdr_size = is32 ? ELF32_EHDR_SIZE : ELF64_EHDR_SIZE; 615 u32 shdr_size = is32 ? ELF32_SHDR_SIZE : ELF64_SHDR_SIZE; 616 u32 sym_size = is32 ? ELF32_SYM_SIZE : ELF64_SYM_SIZE; 617 u32 rela_size = is32 ? ELF32_RELA_SIZE : ELF64_RELA_SIZE; 618 u32 rel_size = is32 ? 8u : 16u; 619 if (len < ehdr_size) 620 compiler_panic(c, SRCLOC_NONE, "read_elf: input shorter than ELF header"); 621 622 u16 e_type = rd_u16_le(data + 16); 623 /* ET_REL parses to the section/symbol/reloc view only. ET_EXEC/ET_DYN 624 * additionally get the linked-image view (read_elf_image, below); their 625 * section tables still parse through the same passes. ET_CORE and other 626 * types are out of scope (see doc/plan/IMAGE_INSPECT.md). */ 627 if (e_type != ET_REL && e_type != ET_EXEC && e_type != ET_DYN) 628 compiler_panic(c, SRCLOC_NONE, 629 "read_elf: unsupported e_type=%u (expected ET_REL, " 630 "ET_EXEC, or ET_DYN)", 631 (u32)e_type); 632 633 u16 e_machine = rd_u16_le(data + 18); 634 /* EM_RISCV is shared by RV32/RV64; disambiguate by EI_CLASS via 635 * obj_elf_machine_class (obj_elf_machine keys on e_machine alone). */ 636 const ObjElfArchOps* arch = obj_elf_machine_class(e_machine, data[EI_CLASS]); 637 u32 (*reloc_from)(u32); 638 if (!arch || !arch->reloc_from) { 639 compiler_panic(c, SRCLOC_NONE, "read_elf: unsupported e_machine 0x%x", 640 (u32)e_machine); 641 } 642 reloc_from = arch->reloc_from; 643 644 /* Post-e_version Ehdr fields narrow + shift under ELF32: e_entry/ 645 * e_phoff/e_shoff are 4B (vs 8B), so e_flags@36, e_phentsize@42, 646 * e_phnum@44, e_shentsize@46, e_shnum@48, e_shstrndx@50 (vs 48/54/ 647 * 56/58/60/62 on ELF64). */ 648 u64 e_shoff = is32 ? (u64)rd_u32_le(data + 32) : rd_u64_le(data + 40); 649 u32 e_flags = rd_u32_le(data + (is32 ? 36 : 48)); 650 u16 e_shentsize = rd_u16_le(data + (is32 ? 46 : 58)); 651 u16 e_shnum = rd_u16_le(data + (is32 ? 48 : 60)); 652 u16 e_shstrndx = rd_u16_le(data + (is32 ? 50 : 62)); 653 654 /* A fully section-stripped image (objcopy --strip-sections, packers, 655 * some release binaries) sets e_shoff/e_shnum to zero: the section 656 * header table is gone, but the load segments still describe the file. 657 * That's valid for ET_EXEC/ET_DYN — parse the image view (segments + 658 * dynamic) and present an empty section view, matching GNU/LLVM. An 659 * ET_REL with no sections carries no model state, so still reject it. */ 660 int has_sht = (e_shoff != 0 && e_shnum != 0); 661 if (has_sht) { 662 if (e_shentsize != shdr_size) 663 compiler_panic(c, SRCLOC_NONE, "read_elf: unexpected e_shentsize %u", 664 (u32)e_shentsize); 665 if (e_shoff + (u64)e_shnum * shdr_size > len) 666 compiler_panic(c, SRCLOC_NONE, 667 "read_elf: section header table out of range"); 668 if (e_shstrndx >= e_shnum) 669 compiler_panic(c, SRCLOC_NONE, "read_elf: e_shstrndx %u >= e_shnum %u", 670 (u32)e_shstrndx, (u32)e_shnum); 671 } else { 672 if (e_type == ET_REL) 673 compiler_panic(c, SRCLOC_NONE, 674 "read_elf: ET_REL with no section header table"); 675 e_shnum = 0; /* normalize so the section/symbol/reloc passes are no-ops */ 676 } 677 678 /* Parse all shdrs into scratch. NULL when the table is absent. */ 679 ShdrRec* shdrs = NULL; 680 const u8* shstrtab = NULL; 681 u64 shstrtab_sz = 0; 682 if (has_sht) { 683 shdrs = arena_array(c->scratch, ShdrRec, e_shnum); 684 for (u32 i = 0; i < e_shnum; ++i) 685 parse_shdr(data + e_shoff + (u64)i * shdr_size, is32, &shdrs[i]); 686 687 const ShdrRec* shstr_sh = &shdrs[e_shstrndx]; 688 if (shstr_sh->sh_offset + shstr_sh->sh_size > len) 689 compiler_panic(c, SRCLOC_NONE, "read_elf: .shstrtab out of range"); 690 shstrtab = data + shstr_sh->sh_offset; 691 shstrtab_sz = shstr_sh->sh_size; 692 } 693 694 /* Build the ObjBuilder. */ 695 ObjBuilder* ob = obj_new(c); 696 if (!ob) compiler_panic(c, SRCLOC_NONE, "read_elf: obj_new failed"); 697 obj_set_elf_e_flags(ob, e_flags); 698 699 /* elf_to_obj[shndx] -> ObjSecId, OBJ_SEC_NONE for skipped sections. */ 700 u32* elf_to_obj = arena_zarray(c->scratch, u32, e_shnum ? e_shnum : 1); 701 702 /* Pass 1: create obj sections for every non-NULL shdr that carries 703 * load-bearing model state. SYMTAB / STRTAB / RELA / REL are 704 * consumed below for symbols and relocations and do NOT round-trip 705 * as obj sections — emit_elf re-synthesizes them from the 706 * ObjBuilder's symbols / strtab / relocs. The shstrtab is a STRTAB 707 * too, so it falls out the same way. */ 708 for (u32 i = 1; i < e_shnum; ++i) { 709 const ShdrRec* sh = &shdrs[i]; 710 if (sh->sh_type == SHT_NULL) continue; 711 if (sh->sh_type == SHT_SYMTAB) continue; 712 if (sh->sh_type == SHT_STRTAB) continue; 713 if (sh->sh_type == SHT_RELA) continue; 714 if (sh->sh_type == SHT_REL) continue; 715 /* SHT_GROUP is consumed below into an ObjGroup record (signature 716 * symbol + member ObjSecIds). emit_elf re-synthesizes the group 717 * section bytes from the ObjGroup, using current section indices 718 * — so the original section's raw body would be stale anyway. */ 719 if (sh->sh_type == SHT_GROUP) continue; 720 721 u32 nlen; 722 const char* nm = strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nlen); 723 Sym sym = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); 724 725 u16 sec_kind = elf_kind_from_name(nm, nlen, sh->sh_flags, sh->sh_type); 726 int type_known; 727 u16 sec_sem = elf_type_to_sem(sh->sh_type, &type_known); 728 u16 flags = elf_flags_to_obj(sh->sh_flags); 729 u32 align = sh->sh_addralign ? (u32)sh->sh_addralign : 1; 730 731 ObjSecId id = 732 obj_section_ex(ob, sym, (SecKind)sec_kind, (SecSem)sec_sem, flags, 733 align, (u32)sh->sh_entsize, sh->sh_link, sh->sh_info); 734 if (id == OBJ_SEC_NONE) 735 compiler_panic(c, SRCLOC_NONE, 736 "read_elf: obj_section_ex failed for '%.*s'", 737 SLICE_ARG(((Slice){.s = nm, .len = nlen}))); 738 elf_to_obj[i] = id; 739 740 /* Load address: 0 for ET_REL, the assigned vaddr for linked images. 741 * Lets the section view carry the load picture for execs/DSOs. */ 742 if (sh->sh_addr) obj_section_set_addr(ob, id, sh->sh_addr); 743 744 /* Preserve format-specific bits the canonical SecSem/SecFlag 745 * mapping can't represent so emit_elf can write them back 746 * verbatim. ext_type only set when the sh_type fell through 747 * to the "unknown" path. */ 748 u32 leftover = (u32)(sh->sh_flags & ~ELF_KNOWN_FLAGS_MASK); 749 if (!type_known || leftover) { 750 obj_section_set_ext(ob, id, OBJ_EXT_ELF, type_known ? 0 : sh->sh_type, 751 leftover); 752 } 753 754 /* Body bytes. */ 755 if (sh->sh_type == SHT_NOBITS) { 756 obj_reserve_bss(ob, id, (u32)sh->sh_size, align); 757 } else if (sh->sh_size) { 758 if (sh->sh_offset + sh->sh_size > len) 759 compiler_panic(c, SRCLOC_NONE, 760 "read_elf: section '%.*s' bytes out of range", 761 SLICE_ARG(((Slice){.s = nm, .len = nlen}))); 762 /* For SYMTAB/STRTAB/RELA we still copy the raw bytes — the 763 * post-finalize shape contract says these sections are 764 * present; emit_elf will regenerate them on re-emit, so the 765 * preserved bytes are informational rather than load-bearing. 766 */ 767 obj_write(ob, id, data + sh->sh_offset, (size_t)sh->sh_size); 768 } 769 } 770 771 /* Pass 2: parse the .symtab into ObjSyms, building an 772 * elf_sym_idx -> ObjSymId table. There may be zero or one SYMTAB in 773 * an ET_REL; pick the first. */ 774 u32 symtab_shndx = 0; 775 for (u32 i = 1; i < e_shnum; ++i) { 776 if (shdrs[i].sh_type == SHT_SYMTAB) { 777 symtab_shndx = i; 778 break; 779 } 780 } 781 782 u32 nsyms = 0; 783 u32* sym_elf_to_obj = NULL; 784 785 if (symtab_shndx) { 786 const ShdrRec* sh = &shdrs[symtab_shndx]; 787 if (sh->sh_entsize != sym_size) 788 compiler_panic(c, SRCLOC_NONE, "read_elf: .symtab entsize %llu != %u", 789 (unsigned long long)sh->sh_entsize, sym_size); 790 if (sh->sh_size % sym_size) 791 compiler_panic(c, SRCLOC_NONE, 792 "read_elf: .symtab size %llu not a multiple of %u", 793 (unsigned long long)sh->sh_size, sym_size); 794 if (sh->sh_link >= e_shnum) 795 compiler_panic(c, SRCLOC_NONE, 796 "read_elf: .symtab sh_link %u out of range", sh->sh_link); 797 const ShdrRec* str_sh = &shdrs[sh->sh_link]; 798 if (str_sh->sh_offset + str_sh->sh_size > len) 799 compiler_panic(c, SRCLOC_NONE, "read_elf: .strtab out of range"); 800 const u8* strtab = data + str_sh->sh_offset; 801 u64 strtab_sz = str_sh->sh_size; 802 803 nsyms = (u32)(sh->sh_size / sym_size); 804 sym_elf_to_obj = arena_zarray(c->scratch, u32, nsyms ? nsyms : 1); 805 806 const u8* base = data + sh->sh_offset; 807 for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */ 808 const u8* p = base + (u64)i * sym_size; 809 /* Elf32_Sym REORDERS: st_name@0, st_value@4, st_size@8, st_info@12, 810 * st_other@13, st_shndx@14. Elf64_Sym: st_name@0, st_info@4, 811 * st_other@5, st_shndx@6, st_value@8, st_size@16. */ 812 u32 st_name = rd_u32_le(p + 0); 813 u8 st_info = is32 ? p[12] : p[4]; 814 u8 st_other = is32 ? p[13] : p[5]; 815 u16 st_shndx = is32 ? rd_u16_le(p + 14) : rd_u16_le(p + 6); 816 u64 st_value = is32 ? (u64)rd_u32_le(p + 4) : rd_u64_le(p + 8); 817 u64 st_size = is32 ? (u64)rd_u32_le(p + 8) : rd_u64_le(p + 16); 818 819 u32 nlen; 820 const char* nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen); 821 nlen = elf_default_version_namelen(nm, nlen); 822 Sym sn = nlen 823 ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}) 824 : 0; 825 826 u32 e_bind = ELF64_ST_BIND(st_info); 827 u32 e_type = ELF64_ST_TYPE(st_info); 828 u16 bind = elf_bind_to_obj(e_bind); 829 u16 kind = elf_type_to_kind(e_type, st_shndx); 830 u8 vis = elf_other_to_vis(st_other); 831 832 ObjSecId sec_id; 833 u64 value; 834 u64 cmnalign = 0; 835 if (st_shndx == SHN_UNDEF) { 836 sec_id = OBJ_SEC_NONE; 837 value = st_value; 838 } else if (st_shndx == SHN_ABS || st_shndx == SHN_COMMON) { 839 sec_id = OBJ_SEC_NONE; 840 value = st_value; 841 if (st_shndx == SHN_COMMON) cmnalign = st_value; 842 } else if (st_shndx < e_shnum && shdrs[st_shndx].sh_type == SHT_GROUP) { 843 /* A COMDAT group's signature symbol is defined in its SHT_GROUP 844 * section, which we consume into an ObjGroup and never keep as an 845 * obj section (so elf_to_obj is OBJ_SEC_NONE for it). The symbol just 846 * names the group; it is not a data location and is never a reloc 847 * target. Record it as an absolute defined symbol so it doesn't look 848 * like a phantom undefined reference -- FreeBSD's crt1.o brands the 849 * binary with such a symbol (.freebsd.note*). */ 850 sec_id = OBJ_SEC_NONE; 851 value = st_value; 852 kind = SK_ABS; 853 } else if (st_shndx < e_shnum) { 854 sec_id = elf_to_obj[st_shndx]; 855 value = st_value; 856 } else { 857 compiler_panic(c, SRCLOC_NONE, "read_elf: symbol shndx %u out of range", 858 (u32)st_shndx); 859 sec_id = OBJ_SEC_NONE; 860 value = 0; /* unreachable */ 861 } 862 863 ObjSymId id = 864 obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, (SymKind)kind, 865 sec_id, value, st_size, cmnalign); 866 obj_sym_mark_referenced(ob, id); 867 sym_elf_to_obj[i] = id; 868 } 869 } 870 871 /* Pass 3: parse each SHT_RELA / SHT_REL into ObjBuilder relocations 872 * targeting the section the rela header's sh_info points at. */ 873 for (u32 i = 1; i < e_shnum; ++i) { 874 const ShdrRec* sh = &shdrs[i]; 875 int is_rela = (sh->sh_type == SHT_RELA); 876 int is_rel = (sh->sh_type == SHT_REL); 877 if (!is_rela && !is_rel) continue; 878 /* Allocatable rela/rel in ET_EXEC/ET_DYN are loader (dynamic) 879 * relocations — sh_info is 0 or a .got index, not a target section. 880 * They belong to the image's dynamic-reloc view (read_elf_image), not 881 * the section-relocation table. ET_REL link-time relocs are never 882 * SHF_ALLOC, so this is a no-op for relocatable objects. */ 883 if (sh->sh_flags & SHF_ALLOC) continue; 884 885 u32 entsize = is_rela ? rela_size : rel_size; 886 if (sh->sh_entsize != entsize) 887 compiler_panic(c, SRCLOC_NONE, "read_elf: rela entsize %llu != %u", 888 (unsigned long long)sh->sh_entsize, entsize); 889 if (sh->sh_info == 0 || sh->sh_info >= e_shnum) 890 compiler_panic(c, SRCLOC_NONE, "read_elf: rela sh_info %u out of range", 891 sh->sh_info); 892 ObjSecId target = elf_to_obj[sh->sh_info]; 893 if (target == OBJ_SEC_NONE) continue; 894 895 u32 nrec = (u32)(sh->sh_size / entsize); 896 const u8* base = data + sh->sh_offset; 897 for (u32 j = 0; j < nrec; ++j) { 898 /* Elf32_Rela (12B): r_offset@0, r_info@4 (ELF32 packing, 8-bit 899 * type), r_addend@8. Elf64_Rela (24B): r_offset@0, r_info@8, 900 * r_addend@16. */ 901 const u8* p = base + (u64)j * entsize; 902 u64 r_offset = elf_rd_addr(p + 0, is32); 903 u64 r_info = is32 ? (u64)rd_u32_le(p + 4) : rd_u64_le(p + 8); 904 i64 r_addend = 905 is_rela ? (is32 ? (i64)(i32)rd_u32_le(p + 8) : (i64)rd_u64_le(p + 16)) 906 : 0; 907 u32 esym = is32 ? ELF32_R_SYM(r_info) : ELF64_R_SYM(r_info); 908 u32 etype = is32 ? ELF32_R_TYPE(r_info) : ELF64_R_TYPE(r_info); 909 910 u32 kind = reloc_from(etype); 911 if (kind == (u32)-1) 912 compiler_panic(c, SRCLOC_NONE, 913 "read_elf: unsupported reloc type %u for e_machine 0x%x", 914 etype, (u32)e_machine); 915 916 ObjSymId target_sym = OBJ_SYM_NONE; 917 if (esym && sym_elf_to_obj && esym < nsyms) 918 target_sym = sym_elf_to_obj[esym]; 919 920 obj_reloc_ex(ob, target, (u32)r_offset, (RelocKind)kind, target_sym, 921 r_addend, is_rela ? 1 : 0, 0); 922 } 923 } 924 925 /* Pass 4: SHT_GROUP. Each GROUP section's body is a sequence of 926 * 4-byte LE indices: [flags, shndx, shndx, ...]. The signature is 927 * the symbol named by sh_link/sh_info convention (sh_link=symtab, 928 * sh_info=symbol index in that symtab). */ 929 for (u32 i = 1; i < e_shnum; ++i) { 930 const ShdrRec* sh = &shdrs[i]; 931 if (sh->sh_type != SHT_GROUP) continue; 932 933 if (sh->sh_size < 4 || (sh->sh_size % 4)) continue; 934 const u8* p = data + sh->sh_offset; 935 u32 flags = rd_u32_le(p); 936 u32 nm_len; 937 const char* gnm = 938 strtab_lookup(shstrtab, shstrtab_sz, sh->sh_name, &nm_len); 939 Sym gname = pool_intern_slice(c->global, (Slice){.s = gnm, .len = nm_len}); 940 941 ObjSymId signature = OBJ_SYM_NONE; 942 if (sym_elf_to_obj && sh->sh_info < nsyms) 943 signature = sym_elf_to_obj[sh->sh_info]; 944 945 ObjGroupId gid = obj_group(ob, gname, signature, flags); 946 u32 n = (u32)(sh->sh_size / 4) - 1; 947 for (u32 j = 0; j < n; ++j) { 948 u32 shndx = rd_u32_le(p + 4 + j * 4); 949 if (shndx < e_shnum && elf_to_obj[shndx] != OBJ_SEC_NONE) 950 obj_group_add_section(ob, gid, elf_to_obj[shndx]); 951 } 952 } 953 954 /* ET_EXEC / ET_DYN: attach the linked-image view (segments + dynamic). */ 955 if (e_type != ET_REL) 956 read_elf_image(c, ob, data, len, e_type, is32, shdrs, e_shnum, elf_to_obj, 957 reloc_from); 958 959 obj_finalize(ob); 960 return ob; 961 } 962 963 /* ---- ET_DYN (shared object) reader ---- 964 * 965 * Produces an ObjBuilder containing only the DSO's exported symbols 966 * (parsed from .dynsym, not .symtab). The DSO's sections, relocations, 967 * and groups are skipped — DSOs contribute no bytes to the output 968 * image. The DT_SONAME (if any) is interned and returned via 969 * `*soname_out` so the caller can record DT_NEEDED at link time. 970 * 971 * Symbol shape: each defined dynsym entry produces an ObjSym whose 972 * (bind, kind, vis) match the source. `section_id` is OBJ_SEC_NONE — 973 * the symbol's value is its DSO-internal vaddr, not meaningful to the 974 * consuming linker, so we record `value=0`. The linker layer 975 * (resolve_undefs) only consults the name and the defined-ness flag. 976 * 977 * Undefined dynsym entries (st_shndx==SHN_UNDEF) are imports the DSO 978 * itself has against other libraries; they're not relevant to a 979 * consumer that's linking against this DSO and are dropped. */ 980 981 static int parse_phdr(const u8* data, size_t len, u64 e_phoff, u16 e_phentsize, 982 u16 e_phnum, u32 want_type, u64* out_offset, 983 u64* out_filesz) { 984 u32 i; 985 if (e_phentsize != ELF64_PHDR_SIZE) return 0; 986 if (e_phoff + (u64)e_phnum * ELF64_PHDR_SIZE > len) return 0; 987 for (i = 0; i < e_phnum; ++i) { 988 const u8* p = data + e_phoff + (u64)i * ELF64_PHDR_SIZE; 989 u32 p_type = rd_u32_le(p + 0); 990 if (p_type != want_type) continue; 991 *out_offset = rd_u64_le(p + 8); 992 *out_filesz = rd_u64_le(p + 32); 993 return 1; 994 } 995 return 0; 996 } 997 998 ObjBuilder* read_elf_dso(Compiler* c, const char* name, const u8* data, 999 size_t len, Sym* soname_out) { 1000 (void)name; 1001 if (soname_out) *soname_out = 0; 1002 1003 if (len < ELF64_EHDR_SIZE) 1004 compiler_panic(c, SRCLOC_NONE, 1005 "read_elf_dso: input shorter than ELF header"); 1006 if (data[EI_MAG0] != ELFMAG0 || data[EI_MAG1] != ELFMAG1 || 1007 data[EI_MAG2] != ELFMAG2 || data[EI_MAG3] != ELFMAG3) 1008 compiler_panic(c, SRCLOC_NONE, "read_elf_dso: bad ELF magic"); 1009 if (data[EI_CLASS] != ELFCLASS64) 1010 compiler_panic(c, SRCLOC_NONE, "read_elf_dso: not ELFCLASS64"); 1011 if (data[EI_DATA] != ELFDATA2LSB) 1012 compiler_panic(c, SRCLOC_NONE, "read_elf_dso: not ELFDATA2LSB"); 1013 1014 u16 e_type = rd_u16_le(data + 16); 1015 if (e_type != ET_DYN) 1016 compiler_panic(c, SRCLOC_NONE, 1017 "read_elf_dso: expected ET_DYN, got e_type=%u", (u32)e_type); 1018 1019 u16 e_machine = rd_u16_le(data + 18); 1020 { 1021 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_ELF); 1022 const ObjElfArchOps* arch = 1023 fmt && fmt->elf_machine ? fmt->elf_machine(e_machine) : NULL; 1024 if (!arch) 1025 compiler_panic(c, SRCLOC_NONE, "read_elf_dso: unsupported e_machine 0x%x", 1026 (u32)e_machine); 1027 } 1028 1029 u64 e_phoff = rd_u64_le(data + 32); 1030 u64 e_shoff = rd_u64_le(data + 40); 1031 u16 e_phentsize = rd_u16_le(data + 54); 1032 u16 e_phnum = rd_u16_le(data + 56); 1033 u16 e_shentsize = rd_u16_le(data + 58); 1034 u16 e_shnum = rd_u16_le(data + 60); 1035 u16 e_shstrndx = rd_u16_le(data + 62); 1036 1037 if (e_shentsize != ELF64_SHDR_SIZE) 1038 compiler_panic(c, SRCLOC_NONE, "read_elf_dso: unexpected e_shentsize %u", 1039 (u32)e_shentsize); 1040 if (e_shoff + (u64)e_shnum * ELF64_SHDR_SIZE > len) 1041 compiler_panic(c, SRCLOC_NONE, 1042 "read_elf_dso: section header table out of range"); 1043 if (e_shstrndx >= e_shnum) 1044 compiler_panic(c, SRCLOC_NONE, "read_elf_dso: e_shstrndx out of range"); 1045 1046 /* read_elf_dso is ELFCLASS64-only (panics above on other classes), so 1047 * parse with the ELF64 layout (is32 = 0). */ 1048 ShdrRec* shdrs = arena_array(c->scratch, ShdrRec, e_shnum); 1049 for (u32 i = 0; i < e_shnum; ++i) 1050 parse_shdr(data + e_shoff + (u64)i * ELF64_SHDR_SIZE, 0, &shdrs[i]); 1051 1052 /* Locate .dynsym (preferred over .symtab — a stripped DSO carries 1053 * only .dynsym) and its associated strtab via sh_link. */ 1054 u32 dynsym_idx = 0, dynamic_idx = 0; 1055 for (u32 i = 1; i < e_shnum; ++i) { 1056 if (shdrs[i].sh_type == SHT_DYNSYM && !dynsym_idx) dynsym_idx = i; 1057 if (shdrs[i].sh_type == SHT_DYNAMIC && !dynamic_idx) dynamic_idx = i; 1058 } 1059 1060 if (!dynsym_idx) 1061 compiler_panic(c, SRCLOC_NONE, 1062 "read_elf_dso: no SHT_DYNSYM in shared object"); 1063 1064 /* Parse PT_DYNAMIC for DT_SONAME. The .dynamic section gives us the 1065 * dynstr to resolve the SONAME's offset; if there's no .dynamic 1066 * section we fall back to scanning the PT_DYNAMIC segment. */ 1067 Sym soname = 0; 1068 if (dynamic_idx) { 1069 const ShdrRec* dsh = &shdrs[dynamic_idx]; 1070 if (dsh->sh_link >= e_shnum) 1071 compiler_panic(c, SRCLOC_NONE, 1072 "read_elf_dso: .dynamic sh_link %u out of range", 1073 dsh->sh_link); 1074 const ShdrRec* str_sh = &shdrs[dsh->sh_link]; 1075 if (str_sh->sh_offset + str_sh->sh_size > len) 1076 compiler_panic(c, SRCLOC_NONE, 1077 "read_elf_dso: .dynamic strtab out of range"); 1078 const u8* dynstr = data + str_sh->sh_offset; 1079 u64 dynstr_sz = str_sh->sh_size; 1080 1081 if (dsh->sh_offset + dsh->sh_size > len) 1082 compiler_panic(c, SRCLOC_NONE, 1083 "read_elf_dso: .dynamic body out of range"); 1084 const u8* dynp = data + dsh->sh_offset; 1085 u64 dynsz = dsh->sh_size; 1086 /* DT entries are 16 bytes: (d_tag: u64, d_un: u64). */ 1087 for (u64 off = 0; off + 16 <= dynsz; off += 16) { 1088 u64 tag = rd_u64_le(dynp + off); 1089 u64 val = rd_u64_le(dynp + off + 8); 1090 if (tag == DT_NULL) break; 1091 if (tag == DT_SONAME) { 1092 u32 nlen; 1093 const char* nm = strtab_lookup(dynstr, dynstr_sz, (u32)val, &nlen); 1094 if (nlen) 1095 soname = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); 1096 break; 1097 } 1098 } 1099 } else if (e_phnum) { 1100 /* Fallback: walk PT_DYNAMIC straight from program headers. We 1101 * only need DT_SONAME, so skip if we can't find a strtab pointer 1102 * inline (DT_STRTAB carries a vaddr, not a file offset — stripped 1103 * DSOs without SHT_DYNAMIC are exceedingly rare in practice). */ 1104 u64 dyn_off, dyn_sz; 1105 (void)parse_phdr(data, len, e_phoff, e_phentsize, e_phnum, PT_DYNAMIC, 1106 &dyn_off, &dyn_sz); 1107 } 1108 if (soname_out) *soname_out = soname; 1109 1110 /* Now parse .dynsym. */ 1111 const ShdrRec* sh = &shdrs[dynsym_idx]; 1112 if (sh->sh_entsize != ELF64_SYM_SIZE) 1113 compiler_panic(c, SRCLOC_NONE, "read_elf_dso: .dynsym entsize %llu != %u", 1114 (unsigned long long)sh->sh_entsize, (u32)ELF64_SYM_SIZE); 1115 if (sh->sh_size % ELF64_SYM_SIZE) 1116 compiler_panic(c, SRCLOC_NONE, 1117 "read_elf_dso: .dynsym size not multiple of entry size"); 1118 if (sh->sh_link >= e_shnum) 1119 compiler_panic(c, SRCLOC_NONE, 1120 "read_elf_dso: .dynsym sh_link out of range"); 1121 const ShdrRec* str_sh = &shdrs[sh->sh_link]; 1122 if (str_sh->sh_offset + str_sh->sh_size > len) 1123 compiler_panic(c, SRCLOC_NONE, "read_elf_dso: .dynstr out of range"); 1124 const u8* strtab = data + str_sh->sh_offset; 1125 u64 strtab_sz = str_sh->sh_size; 1126 1127 ObjBuilder* ob = obj_new(c); 1128 if (!ob) compiler_panic(c, SRCLOC_NONE, "read_elf_dso: obj_new failed"); 1129 1130 /* The DSO always gets an ObjImage: its dynsyms record each export's default 1131 * version (so the linker can emit a matching .gnu.version_r — see 1132 * build_versions in link_dyn.c, harmless/empty for unversioned DSOs like 1133 * musl), and its undef list records the symbols this DSO references so 1134 * --gc-sections keeps the executable's definitions of them alive. */ 1135 u32 verdef_max = 0; 1136 Sym* verdef_tbl = read_elf_verdefs(c, data, len, shdrs, e_shnum, &verdef_max); 1137 const u8* versym = NULL; 1138 u32 nversym = 0; 1139 for (u32 i = 1; i < e_shnum; ++i) { 1140 if (shdrs[i].sh_type != SHT_GNU_VERSYM) continue; 1141 if (shdrs[i].sh_offset + shdrs[i].sh_size <= len && shdrs[i].sh_entsize == 2) 1142 versym = data + shdrs[i].sh_offset, 1143 nversym = (u32)(shdrs[i].sh_size / 2u); 1144 break; 1145 } 1146 ObjImage* im = obj_image_ensure(ob, OBJ_KIND_DYN); 1147 if (im && soname) obj_image_set_soname(im, soname); 1148 1149 u32 nsyms = (u32)(sh->sh_size / ELF64_SYM_SIZE); 1150 const u8* base = data + sh->sh_offset; 1151 for (u32 i = 1; i < nsyms; ++i) { /* skip index 0 */ 1152 const u8* p = base + (u64)i * ELF64_SYM_SIZE; 1153 u32 st_name = rd_u32_le(p + 0); 1154 u8 st_info = p[4]; 1155 u8 st_other = p[5]; 1156 u16 st_shndx = rd_u16_le(p + 6); 1157 u32 e_bind = ELF64_ST_BIND(st_info); 1158 u32 nlen; 1159 const char* nm; 1160 Sym sn; 1161 1162 /* Locals are neither exports nor reference dependencies we track. */ 1163 if (e_bind == STB_LOCAL) continue; 1164 nm = strtab_lookup(strtab, strtab_sz, st_name, &nlen); 1165 if (!nlen) continue; 1166 sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); 1167 1168 /* The DSO's own undefined references: not exports, but if the executable 1169 * defines one (e.g. libc.so.7's `environ` / `__progname`, defined by the 1170 * crt) the static linker must keep that definition under --gc-sections. */ 1171 if (st_shndx == SHN_UNDEF) { 1172 obj_image_add_undef(im, sn); 1173 continue; 1174 } 1175 1176 u32 e_type_field = ELF64_ST_TYPE(st_info); 1177 u16 bind = elf_bind_to_obj(e_bind); 1178 u16 kind = elf_type_to_kind(e_type_field, st_shndx); 1179 u8 vis = elf_other_to_vis(st_other); 1180 1181 /* DSO exports land as defined symbols in OBJ_SEC_NONE with 1182 * value=0. The consumer treats them as imports — see 1183 * resolve_undefs in src/link/link_layout.c. */ 1184 { 1185 ObjSymId did = obj_symbol_ex(ob, sn, (SymBind)bind, (SymVis)vis, 1186 (SymKind)kind, OBJ_SEC_NONE, 0, 0, 0); 1187 obj_sym_mark_referenced(ob, did); 1188 } 1189 if (im) { 1190 ObjImageSym ds; 1191 ds.name = sn; 1192 ds.bind = (SymBind)bind; 1193 ds.kind = (SymKind)kind; 1194 ds.section = OBJ_SEC_NONE; 1195 ds.value = 0; 1196 ds.size = 0; 1197 ds.version = 0; 1198 if (i < nversym) { 1199 u16 v = rd_u16_le(versym + (u64)i * 2u); 1200 u32 ndx = (u32)(v & VERSYM_VERSION); 1201 if (!(v & VERSYM_HIDDEN) && ndx >= 2u && ndx <= verdef_max) 1202 ds.version = verdef_tbl[ndx]; 1203 } 1204 obj_image_add_dynsym(im, &ds); 1205 } 1206 } 1207 1208 obj_finalize(ob); 1209 return ob; 1210 }