read.c (33577B)
1 /* PE/COFF .obj (IMAGE_FILE_HEADER + sections) reader. Parses a 64-bit 2 * little-endian relocatable object back into a fresh ObjBuilder. Peer 3 * of read_elf / read_macho; the post-finalize ObjBuilder shape is the 4 * canonical superset doc/DESIGN.md §5.5 promises: read_coff of an 5 * emit_coff output produces an ObjBuilder shape-equivalent to the 6 * writer's input, modulo synthesized SECTION symbols and the COMDAT 7 * section-definition aux records. 8 * 9 * Scope: IMAGE_FILE_MACHINE_AMD64 and IMAGE_FILE_MACHINE_ARM64. PE 10 * *images* (executables / DLLs, beginning with the DOS 'MZ' stub) are 11 * detected at entry and dispatched to read_coff_image (read_image.c). 12 * Microsoft "short import" records (Sig1=0, Sig2=0xFFFF) found inside 13 * .lib archive members are likewise detected at entry and dispatched to 14 * read_coff_short_import, which synthesizes a DSO-shaped ObjBuilder 15 * annotated with the providing DLL name via obj_set_coff_import_dll. */ 16 17 #include <string.h> 18 19 #include "core/arena.h" 20 #include "core/heap.h" 21 #include "core/pool.h" 22 #include "core/slice.h" 23 #include "obj/coff/coff.h" 24 #include "obj/coff/read_util.h" 25 #include "obj/format.h" 26 27 /* ---- section-header scratch ---- */ 28 29 typedef struct CSecRec { 30 char raw_name[8]; 31 u32 virtual_size; 32 u32 size_of_raw_data; 33 u32 pointer_to_raw_data; 34 u32 pointer_to_relocations; 35 u16 number_of_relocations; 36 u32 characteristics; 37 ObjSecId obj_sec; /* OBJ_SEC_NONE if skipped */ 38 } CSecRec; 39 40 static void parse_shdr(const u8* p, CSecRec* out) { 41 memcpy(out->raw_name, p, 8); 42 out->virtual_size = coff_rd_u32(p + 8); 43 out->size_of_raw_data = coff_rd_u32(p + 16); 44 out->pointer_to_raw_data = coff_rd_u32(p + 20); 45 out->pointer_to_relocations = coff_rd_u32(p + 24); 46 out->number_of_relocations = coff_rd_u16(p + 32); 47 out->characteristics = coff_rd_u32(p + 36); 48 out->obj_sec = OBJ_SEC_NONE; 49 } 50 51 /* ---- string-table lookup (4-byte size prefix, NUL-terminated entries) ---- */ 52 53 static const char* strtab_lookup(const u8* tab, u32 tab_size, u32 off, 54 u32* len_out) { 55 if (off >= tab_size) { 56 *len_out = 0; 57 return ""; 58 } 59 const char* s = (const char*)(tab + off); 60 u32 max = tab_size - off; 61 u32 n = 0; 62 while (n < max && s[n] != '\0') ++n; 63 *len_out = n; 64 return s; 65 } 66 67 /* Resolve a section/symbol short-or-long name into (ptr, len). COFF 68 * section names use the "/<decimal>" convention for >8-byte names; COFF 69 * symbol names use the (Zeroes==0, Offset) form instead. This helper 70 * handles the section form (8 raw bytes; leading '/' triggers strtab 71 * lookup). */ 72 static void resolve_section_name(const char raw[8], const u8* strtab, 73 u32 strtab_size, const char** name_out, 74 u32* len_out) { 75 if (raw[0] == '/') { 76 /* Parse decimal offset. Up to 7 ASCII digits. */ 77 u32 off = 0; 78 for (u32 i = 1; i < 8 && raw[i] >= '0' && raw[i] <= '9'; ++i) { 79 off = off * 10u + (u32)(raw[i] - '0'); 80 } 81 *name_out = strtab_lookup(strtab, strtab_size, off, len_out); 82 return; 83 } 84 /* Inline: up to 8 bytes, NUL-padded (not necessarily NUL-terminated). */ 85 u32 n = 0; 86 while (n < 8 && raw[n] != '\0') ++n; 87 *name_out = raw; 88 *len_out = n; 89 } 90 91 /* characteristics -> SecKind / SecFlag / alignment live in read_util.c 92 * (coff_sec_kind / coff_sec_flags / coff_sec_align), shared with the 93 * image reader. */ 94 95 /* ---- symbol-name resolution ---- */ 96 97 static void resolve_sym_name(const u8* rec, const u8* strtab, u32 strtab_size, 98 const char** name_out, u32* len_out) { 99 /* ShortName: 8 bytes. If first 4 bytes are zero, second 4 bytes is 100 * the strtab offset (LongName form). */ 101 u32 z = coff_rd_u32(rec + 0); 102 if (z == 0) { 103 u32 off = coff_rd_u32(rec + 4); 104 *name_out = strtab_lookup(strtab, strtab_size, off, len_out); 105 return; 106 } 107 u32 n = 0; 108 while (n < 8 && rec[n] != '\0') ++n; 109 *name_out = (const char*)rec; 110 *len_out = n; 111 } 112 113 static int coff_reloc_inline_addend(const u8* data, size_t len, 114 const CSecRec* s, u32 off, u32 width, 115 i64* out) { 116 if (!s || !s->size_of_raw_data) return 0; 117 if ((u64)off + (u64)width > (u64)s->size_of_raw_data) return 0; 118 if ((u64)s->pointer_to_raw_data + (u64)off + (u64)width > (u64)len) 119 return 0; 120 const u8* p = data + s->pointer_to_raw_data + off; 121 switch (width) { 122 case 4: 123 *out = (i64)(i32)coff_rd_u32(p); 124 return 1; 125 case 8: 126 *out = (i64)coff_rd_u64(p); 127 return 1; 128 default: 129 return 0; 130 } 131 } 132 133 /* ---- short-import record handler ---- 134 * Microsoft "short import" format: a 20-byte ImportObjectHeader 135 * followed by SizeOfData bytes containing two NUL-terminated strings — 136 * the imported symbol name then the DLL name. These live as members 137 * of .lib archives (mingw's libkernel32.dll.a etc.) and stand in for 138 * a full long-form COFF import object. 139 * 140 * kit-side model: synthesize a DSO-shaped ObjBuilder with the 141 * imported symbol defined at section_id = OBJ_SEC_NONE (the same 142 * shape read_coff_dso / read_elf_dso produce for an exported name), 143 * and stash the providing DLL name on the builder via 144 * obj_set_coff_import_dll so the archive-ingestion layer can route 145 * the resulting LinkInput as a DSO with this name as the soname. 146 * 147 * We also synthesize the `__imp_<name>` alias mingw codegen uses to 148 * spell explicit IAT-slot access; both names ultimately resolve to 149 * the same DLL export at link time. */ 150 static ObjBuilder* read_coff_short_import(Compiler* c, const char* name, 151 const u8* data, size_t len) { 152 if (len < COFF_IMPORT_OBJECT_HEADER_SIZE) 153 compiler_panic(c, SRCLOC_NONE, 154 "read_coff: short-import record shorter than header"); 155 156 /* Sig1 / Sig2 already checked by the caller. */ 157 /* data + 4: Version (2 bytes, ignored). */ 158 u16 machine = coff_rd_u16(data + 6); 159 /* data + 8: TimeDateStamp (4 bytes, ignored). */ 160 u32 size_of_data = coff_rd_u32(data + 12); 161 u16 ordinal_or_hint = coff_rd_u16(data + 16); 162 u16 type_flags = coff_rd_u16(data + 18); 163 164 if ((u64)COFF_IMPORT_OBJECT_HEADER_SIZE + (u64)size_of_data > (u64)len) 165 compiler_panic(c, SRCLOC_NONE, 166 "read_coff: short-import SizeOfData=%u extends past input " 167 "(len=%zu)", 168 size_of_data, len); 169 170 if (machine != IMAGE_FILE_MACHINE_AMD64 && 171 machine != IMAGE_FILE_MACHINE_ARM64) 172 compiler_panic(c, SRCLOC_NONE, 173 "read_coff: short-import unsupported machine %#x", 174 (u32)machine); 175 176 /* Decode TypeFlags bitfield (Type:2, NameType:3, Reserved:11). */ 177 u32 import_type = (u32)(type_flags & 0x3u); 178 u32 name_type = (u32)((type_flags >> 2) & 0x7u); 179 180 /* Ordinal-only imports (NameType=IMPORT_OBJECT_ORDINAL) are not yet 181 * implemented in kit. None of the mingw / llvm-mingw system import 182 * archives use this shape — every libfoo.a member in the supported 183 * sysroots imports by name — so refusing here is a clean diagnostic, 184 * not an internal panic. When a real consumer surfaces, the work is 185 * to thread the ordinal through link_resolve and into the PE import 186 * directory hint/name tables. */ 187 if (name_type == IMPORT_OBJECT_ORDINAL) 188 compiler_panic( 189 c, SRCLOC_NONE, 190 "read_coff: short-import by ordinal not implemented " 191 "(archive member \"%.*s\", ordinal %u). kit links " 192 "imports by name only; rebuild the consumer to import " 193 "by name, or omit this archive from the link.", 194 SLICE_ARG(name ? slice_from_cstr(name) : SLICE_LIT("<unnamed>")), 195 (unsigned)ordinal_or_hint); 196 197 /* Symbol name: NUL-terminated starting at data + 20. */ 198 const u8* body = data + COFF_IMPORT_OBJECT_HEADER_SIZE; 199 u32 sym_name_max = size_of_data; 200 u32 sym_name_len = 0; 201 while (sym_name_len < sym_name_max && body[sym_name_len] != '\0') 202 ++sym_name_len; 203 if (sym_name_len == sym_name_max) 204 compiler_panic(c, SRCLOC_NONE, 205 "read_coff: short-import symbol name not NUL-terminated"); 206 207 /* DLL name: NUL-terminated starting after the symbol name's NUL. */ 208 u32 dll_name_off = sym_name_len + 1u; 209 if (dll_name_off >= size_of_data) 210 compiler_panic(c, SRCLOC_NONE, "read_coff: short-import missing DLL name"); 211 const u8* dll_p = body + dll_name_off; 212 u32 dll_name_max = size_of_data - dll_name_off; 213 u32 dll_name_len = 0; 214 while (dll_name_len < dll_name_max && dll_p[dll_name_len] != '\0') 215 ++dll_name_len; 216 if (dll_name_len == dll_name_max) 217 compiler_panic(c, SRCLOC_NONE, 218 "read_coff: short-import DLL name not NUL-terminated"); 219 220 ObjBuilder* ob = obj_new(c); 221 if (!ob) compiler_panic(c, SRCLOC_NONE, "read_coff: obj_new failed"); 222 223 /* Pick SymKind by import type: CODE -> function, DATA/CONST -> object. 224 * Both are defined at section_id=OBJ_SEC_NONE, value=0, size=0 — the 225 * shape read_coff_dso would produce for a DLL export. */ 226 SymKind k = (import_type == IMPORT_OBJECT_CODE) ? SK_FUNC : SK_OBJ; 227 228 Sym sn = pool_intern_slice( 229 c->global, (Slice){.s = (const char*)body, .len = sym_name_len}); 230 ObjSymId id = 231 obj_symbol_ex(ob, sn, SB_GLOBAL, SV_DEFAULT, k, OBJ_SEC_NONE, 0, 0, 0); 232 obj_sym_mark_referenced(ob, id); 233 234 /* `__imp_<name>` alias for codegen that refers to the IAT slot 235 * directly (mingw convention). Even code imports use an object-like 236 * `__imp_` symbol because references to it want the IAT data slot, not 237 * the callable import stub. */ 238 static const char kImpPrefix[] = "__imp_"; 239 u32 imp_len = (u32)(sizeof kImpPrefix - 1u) + sym_name_len; 240 char* imp_buf = arena_array(c->scratch, char, imp_len); 241 memcpy(imp_buf, kImpPrefix, sizeof kImpPrefix - 1u); 242 memcpy(imp_buf + (sizeof kImpPrefix - 1u), body, sym_name_len); 243 Sym imp_sn = 244 pool_intern_slice(c->global, (Slice){.s = imp_buf, .len = imp_len}); 245 ObjSymId imp_id = obj_symbol_ex(ob, imp_sn, SB_GLOBAL, SV_DEFAULT, SK_OBJ, 246 OBJ_SEC_NONE, 0, 0, 0); 247 obj_sym_mark_referenced(ob, imp_id); 248 249 /* Stash the DLL name so the archive-ingestion layer (Phase 4.3) can 250 * route this builder as a DSO with the DLL as soname. */ 251 Sym dll_sn = pool_intern_slice( 252 c->global, (Slice){.s = (const char*)dll_p, .len = dll_name_len}); 253 obj_set_coff_import_dll(ob, dll_sn); 254 255 /* NameType decides what the loader resolves IN THE DLL, which can differ 256 * from the local symbol name. The local symbol keeps its own name (so kit's 257 * references resolve); the PE hint/name-table entry must use the real 258 * export name. Record an override whenever they differ. */ 259 Slice imp_name = {.s = (const char*)body, .len = sym_name_len}; 260 if (name_type == IMPORT_OBJECT_NAME_NOPREFIX || 261 name_type == IMPORT_OBJECT_NAME_UNDECORATE) { 262 /* Strip one leading decoration char (?, @, or _). UNDECORATE also 263 * truncates at the first '@' (MS @argbytes stdcall/fastcall suffix). */ 264 if (imp_name.len > 0 && (imp_name.s[0] == '?' || imp_name.s[0] == '@' || 265 imp_name.s[0] == '_')) { 266 ++imp_name.s; 267 --imp_name.len; 268 } 269 if (name_type == IMPORT_OBJECT_NAME_UNDECORATE) { 270 u32 at = 0; 271 while (at < imp_name.len && imp_name.s[at] != '@') ++at; 272 imp_name.len = at; 273 } 274 } else if (name_type == IMPORT_OBJECT_NAME_EXPORTAS) { 275 /* The real export name is a third NUL-terminated string after the DLL. */ 276 u32 exp_off = dll_name_off + dll_name_len + 1u; 277 if (exp_off >= size_of_data) 278 compiler_panic(c, SRCLOC_NONE, 279 "read_coff: short-import EXPORTAS missing export name"); 280 const u8* exp_p = body + exp_off; 281 u32 exp_max = size_of_data - exp_off; 282 u32 exp_len = 0; 283 while (exp_len < exp_max && exp_p[exp_len] != '\0') ++exp_len; 284 if (exp_len == exp_max) 285 compiler_panic(c, SRCLOC_NONE, 286 "read_coff: short-import EXPORTAS name not NUL-terminated"); 287 imp_name.s = (const char*)exp_p; 288 imp_name.len = exp_len; 289 } 290 if (imp_name.len != sym_name_len || 291 memcmp(imp_name.s, body, sym_name_len) != 0) { 292 obj_set_coff_import_name(ob, pool_intern_slice(c->global, imp_name)); 293 } 294 295 obj_finalize(ob); 296 return ob; 297 } 298 299 ObjBuilder* read_coff(Compiler* c, const char* name, const u8* data, 300 size_t len) { 301 (void)name; 302 303 /* ---- Step 0: header validation ---- */ 304 if (len < COFF_FILE_HEADER_SIZE) 305 compiler_panic(c, SRCLOC_NONE, "read_coff: input shorter than COFF header"); 306 307 /* Microsoft short-import record? (Sig1=0, Sig2=0xFFFF.) These live 308 * as members of .lib archives and stand in for a long-form import 309 * object. Detect at entry; the rest of read_coff assumes the 310 * input is a real IMAGE_FILE_HEADER. */ 311 if (len >= 4 && coff_rd_u16(data + 0) == IMPORT_OBJECT_HDR_SIG1 && 312 coff_rd_u16(data + 2) == IMPORT_OBJECT_HDR_SIG2) { 313 return read_coff_short_import(c, name, data, len); 314 } 315 316 /* PE image? A linked .exe/.dll begins with the DOS 'MZ' stub, not a bare 317 * IMAGE_FILE_HEADER — dispatch to the image reader, which walks the 318 * DOS -> PE-sig -> file/optional headers. (Placed before the offset-0 319 * machine read below, which assumes a bare header, and before the 320 * optional-header rejection.) */ 321 if (len >= 2 && coff_rd_u16(data + 0) == IMAGE_DOS_SIGNATURE) 322 return read_coff_image(c, name, data, len); 323 324 u16 machine = coff_rd_u16(data + 0); 325 u16 nsections = coff_rd_u16(data + 2); 326 /* data + 4: TimeDateStamp (4 bytes, ignored). */ 327 u32 ptr_to_symtab = coff_rd_u32(data + 8); 328 u32 nsymbols = coff_rd_u32(data + 12); 329 u16 size_opt_hdr = coff_rd_u16(data + 16); 330 /* data + 18: Characteristics (2 bytes, currently ignored). */ 331 332 if (size_opt_hdr != 0) 333 compiler_panic(c, SRCLOC_NONE, 334 "read_coff: input has optional header (size=%u); " 335 "use read_coff_pe for executables", 336 (u32)size_opt_hdr); 337 338 if (machine != IMAGE_FILE_MACHINE_AMD64 && 339 machine != IMAGE_FILE_MACHINE_ARM64 && 340 machine != IMAGE_FILE_MACHINE_ARM64EC) 341 compiler_panic(c, SRCLOC_NONE, "read_coff: unsupported machine %#x", 342 (u32)machine); 343 344 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_COFF); 345 const ObjCoffArchOps* coff = 346 fmt && fmt->coff_machine ? fmt->coff_machine(machine) : NULL; 347 if (!coff || !coff->reloc_from) 348 compiler_panic(c, SRCLOC_NONE, "read_coff: no arch impl for machine %#x", 349 (u32)machine); 350 u32 (*reloc_from)(u32) = coff->reloc_from; 351 352 if ((u64)COFF_FILE_HEADER_SIZE + 353 (u64)nsections * (u64)COFF_SECTION_HEADER_SIZE > 354 (u64)len) 355 compiler_panic(c, SRCLOC_NONE, "read_coff: section header table out of range"); 356 357 /* ---- Step 1: bootstrap, locate strtab ---- */ 358 /* Strtab is at PointerToSymbolTable + NumberOfSymbols * 18. When the 359 * file has no symbol table (ptr=0, n=0) we treat strtab as empty. */ 360 const u8* strtab = NULL; 361 u32 strtab_size = 0; 362 if (ptr_to_symtab && nsymbols) { 363 u64 symtab_end = (u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE; 364 if (symtab_end + COFF_STRTAB_SIZE_FIELD_BYTES > (u64)len) 365 compiler_panic(c, SRCLOC_NONE, 366 "read_coff: symbol table / strtab header out of range"); 367 u32 declared = coff_rd_u32(data + symtab_end); 368 /* The size field is inclusive of the 4-byte prefix; treat <4 as 369 * "empty" (some tools write 0). */ 370 if (declared < COFF_STRTAB_SIZE_FIELD_BYTES) declared = 0; 371 if (declared) { 372 if (symtab_end + (u64)declared > (u64)len) 373 compiler_panic(c, SRCLOC_NONE, "read_coff: strtab body out of range"); 374 strtab = data + symtab_end; 375 strtab_size = declared; 376 } else { 377 strtab = data + symtab_end; 378 strtab_size = COFF_STRTAB_SIZE_FIELD_BYTES; 379 } 380 } 381 382 ObjBuilder* ob = obj_new(c); 383 if (!ob) compiler_panic(c, SRCLOC_NONE, "read_coff: obj_new failed"); 384 385 /* ---- Step 2: ingest sections ---- */ 386 CSecRec* secs = arena_array(c->scratch, CSecRec, nsections ? nsections : 1); 387 const u8* shdr_base = data + COFF_FILE_HEADER_SIZE; 388 for (u32 i = 0; i < nsections; ++i) { 389 CSecRec* s = &secs[i]; 390 parse_shdr(shdr_base + (u64)i * COFF_SECTION_HEADER_SIZE, s); 391 392 const char* nm; 393 u32 nlen; 394 resolve_section_name(s->raw_name, strtab, strtab_size, &nm, &nlen); 395 Sym sn = pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}); 396 397 u16 kind = coff_sec_kind(nm, nlen, s->characteristics); 398 u16 flags = coff_sec_flags(nm, nlen, s->characteristics); 399 u32 align = coff_sec_align(s->characteristics); 400 401 int is_bss = (s->characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) != 0; 402 u16 sem = is_bss ? SSEM_NOBITS : SSEM_PROGBITS; 403 404 ObjSecId id = obj_section_ex(ob, sn, (SecKind)kind, (SecSem)sem, flags, 405 align, 0u, 0u, 0u); 406 if (id == OBJ_SEC_NONE) 407 compiler_panic(c, SRCLOC_NONE, 408 "read_coff: obj_section_ex failed for section %u", i); 409 s->obj_sec = id; 410 411 /* Preserve raw Characteristics so emit_coff can write back any bits 412 * the canonical SecFlag/SecSem mapping doesn't model (LNK_INFO, 413 * LNK_REMOVE, MEM_DISCARDABLE, MEM_SHARED, GPREL, alignment nibble). */ 414 obj_section_set_ext(ob, id, OBJ_EXT_COFF, s->characteristics, 0); 415 416 if (is_bss) { 417 u32 bss_size = s->virtual_size ? s->virtual_size : s->size_of_raw_data; 418 obj_reserve_bss(ob, id, bss_size, align); 419 } else if (s->size_of_raw_data) { 420 u64 end = (u64)s->pointer_to_raw_data + (u64)s->size_of_raw_data; 421 if (end > (u64)len) 422 compiler_panic(c, SRCLOC_NONE, "read_coff: section %u bytes out of range", 423 i); 424 u8* dst = obj_reserve(ob, id, s->size_of_raw_data); 425 memcpy(dst, data + s->pointer_to_raw_data, s->size_of_raw_data); 426 } 427 } 428 429 /* ---- Step 3: ingest symbols (with aux-record awareness) ---- 430 * sym_to_obj is indexed by RAW symbol-table index (including aux 431 * slots), so reloc.SymbolTableIndex resolves directly without 432 * adjusting for skipped aux records. Aux slots map to OBJ_SYM_NONE. */ 433 ObjSymId* sym_to_obj = 434 arena_zarray(c->scratch, ObjSymId, nsymbols ? nsymbols : 1); 435 436 /* Track section-symbol primary symtab index per section, stored as 437 * (raw_index + 1) so 0 can mean "not seen yet" without colliding 438 * with the (legitimate) first symbol-table slot — emit_coff always 439 * lays the first section's section-symbol at index 0. */ 440 u32* sec_sym_primary = arena_zarray(c->scratch, u32, nsections + 1u); 441 442 const u8* sym_base = data + ptr_to_symtab; 443 if (nsymbols) { 444 if ((u64)ptr_to_symtab + (u64)nsymbols * (u64)COFF_SYMBOL_SIZE > (u64)len) 445 compiler_panic(c, SRCLOC_NONE, "read_coff: symbol table body out of range"); 446 } 447 448 for (u32 i = 0; i < nsymbols;) { 449 const u8* p = sym_base + (u64)i * COFF_SYMBOL_SIZE; 450 const char* nm; 451 u32 nlen; 452 resolve_sym_name(p, strtab, strtab_size, &nm, &nlen); 453 454 u32 value = coff_rd_u32(p + 8); 455 i16 sec_num = (i16)coff_rd_u16(p + 12); 456 u16 type = coff_rd_u16(p + 14); 457 u8 sclass = p[16]; 458 u8 naux = p[17]; 459 460 /* FILE storage class: concatenate aux records' raw bytes (each 461 * 18 bytes, NUL-padded) for the source-file name. */ 462 if (sclass == IMAGE_SYM_CLASS_FILE) { 463 /* Build name from aux records (up to naux*18 bytes); fall back 464 * to the primary record's name if naux==0. */ 465 const char* fnm = nm; 466 u32 fnlen = nlen; 467 if (naux) { 468 /* Each aux record's 18 bytes are interpreted as raw file-name 469 * bytes; concatenate then trim trailing NULs. */ 470 u32 total = (u32)naux * COFF_SYMBOL_SIZE; 471 if ((u64)i + 1u + (u64)naux > (u64)nsymbols) 472 compiler_panic(c, SRCLOC_NONE, 473 "read_coff: FILE aux records extend past symbol " 474 "table"); 475 const u8* aux = p + COFF_SYMBOL_SIZE; 476 u32 n = 0; 477 while (n < total && aux[n] != '\0') ++n; 478 fnm = (const char*)aux; 479 fnlen = n; 480 } 481 Sym fsn = 482 fnlen ? pool_intern_slice(c->global, (Slice){.s = fnm, .len = fnlen}) 483 : 0; 484 ObjSymId id = obj_symbol_ex(ob, fsn, SB_LOCAL, SV_DEFAULT, SK_FILE, 485 OBJ_SEC_NONE, 0, 0, 0); 486 obj_sym_mark_referenced(ob, id); 487 sym_to_obj[i] = id; 488 i += 1u + naux; 489 continue; 490 } 491 492 /* Skip .bf/.ef debug pair primaries (FUNCTION storage class) and 493 * the END_OF_FUNCTION marker: they carry no symbol kit models. */ 494 if (sclass == IMAGE_SYM_CLASS_FUNCTION || 495 sclass == IMAGE_SYM_CLASS_END_OF_FUNCTION) { 496 sym_to_obj[i] = OBJ_SYM_NONE; 497 i += 1u + naux; 498 continue; 499 } 500 501 /* Resolve (bind, vis, kind, section_id, value, size, cmnalign). */ 502 SymBind bind = SB_LOCAL; 503 SymVis vis = SV_DEFAULT; 504 SymKind kind = SK_NOTYPE; 505 ObjSecId target_sec = OBJ_SEC_NONE; 506 u64 sym_value = 0; 507 u64 sym_size = 0; 508 u64 cmnalign = 0; 509 510 if (sec_num == IMAGE_SYM_UNDEFINED) { 511 /* Undef or common. EXTERNAL with Value > 0 is a common. */ 512 if (sclass == IMAGE_SYM_CLASS_EXTERNAL && value > 0) { 513 bind = SB_GLOBAL; 514 kind = SK_COMMON; 515 sym_size = value; 516 cmnalign = 1; /* COFF doesn't carry per-common alignment */ 517 } else { 518 bind = (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) ? SB_WEAK 519 : (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL 520 : SB_LOCAL; 521 kind = SK_UNDEF; 522 } 523 } else if (sec_num == IMAGE_SYM_ABSOLUTE) { 524 kind = SK_ABS; 525 sym_value = value; 526 bind = (sclass == IMAGE_SYM_CLASS_EXTERNAL) ? SB_GLOBAL : SB_LOCAL; 527 } else if (sec_num == IMAGE_SYM_DEBUG) { 528 /* Defined-in-debug — kit has no model for it. Skip with an 529 * OBJ_SYM_NONE entry; relocations against this slot will resolve 530 * to OBJ_SYM_NONE, which obj_reloc_ex tolerates. */ 531 sym_to_obj[i] = OBJ_SYM_NONE; 532 i += 1u + naux; 533 continue; 534 } else if (sec_num >= 1 && (u32)sec_num <= nsections) { 535 target_sec = secs[sec_num - 1].obj_sec; 536 sym_value = value; 537 switch (sclass) { 538 case IMAGE_SYM_CLASS_EXTERNAL: 539 bind = SB_GLOBAL; 540 break; 541 case IMAGE_SYM_CLASS_WEAK_EXTERNAL: 542 bind = SB_WEAK; 543 break; 544 case IMAGE_SYM_CLASS_STATIC: 545 case IMAGE_SYM_CLASS_LABEL: 546 default: 547 bind = SB_LOCAL; 548 break; 549 } 550 551 /* Detect SECTION symbols: STATIC, Value==0, name matches the 552 * section's own name, and the section has at least one aux 553 * record (the section-definition aux). Mark as SK_SECTION so 554 * emit_coff regenerates the synthetic entry. */ 555 int is_section_sym = 0; 556 if (sclass == IMAGE_SYM_CLASS_STATIC && value == 0 && naux >= 1) { 557 const CSecRec* cs = &secs[sec_num - 1]; 558 u32 raw_nlen = 0; 559 while (raw_nlen < 8 && cs->raw_name[raw_nlen] != '\0') ++raw_nlen; 560 if (raw_nlen == nlen && memcmp(cs->raw_name, nm, nlen) == 0) { 561 is_section_sym = 1; 562 } else if (cs->raw_name[0] == '/') { 563 /* Long-named section: compare the resolved name. */ 564 const char* rn; 565 u32 rnlen; 566 resolve_section_name(cs->raw_name, strtab, strtab_size, &rn, &rnlen); 567 if (rnlen == nlen && memcmp(rn, nm, nlen) == 0) is_section_sym = 1; 568 } 569 } 570 571 if (is_section_sym) { 572 kind = SK_SECTION; 573 sec_sym_primary[sec_num] = i + 1u; 574 } else if (sclass == IMAGE_SYM_CLASS_SECTION) { 575 kind = SK_SECTION; 576 } else if (sclass == IMAGE_SYM_CLASS_LABEL) { 577 kind = SK_NOTYPE; 578 } else if ((type >> 8) == IMAGE_SYM_DTYPE_FUNCTION) { 579 kind = SK_FUNC; 580 } else if (type == IMAGE_SYM_TYPE_NULL) { 581 kind = (bind == SB_LOCAL) ? SK_NOTYPE : SK_OBJ; 582 } else { 583 kind = SK_OBJ; 584 } 585 } else { 586 compiler_panic(c, SRCLOC_NONE, 587 "read_coff: symbol section number %d out of range", 588 (int)sec_num); 589 } 590 591 /* WEAK_EXTERNAL primary: aux record carries TagIndex + Characteristics. */ 592 if (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) bind = SB_WEAK; 593 594 Sym sn = 595 nlen ? pool_intern_slice(c->global, (Slice){.s = nm, .len = nlen}) : 0; 596 ObjSymId id = obj_symbol_ex(ob, sn, bind, vis, kind, target_sec, sym_value, 597 sym_size, cmnalign); 598 obj_sym_mark_referenced(ob, id); 599 sym_to_obj[i] = id; 600 601 /* Genuine WEAK_EXTERNAL alias declaration (IMAGE_WEAK_EXTERN_SEARCH_ALIAS): 602 * record the fall-back symbol (aux TagIndex) by name so the linker can 603 * resolve this weak symbol to its target directly. mingw x86_64 spells 604 * `_setjmp` this way, aliasing `__intrinsic_setjmp` — a redirection the 605 * link-time single-underscore heuristic can't derive. Other weak-external 606 * search policies (kit's own SB_WEAK emit uses SEARCH_LIBRARY with a 607 * self/zero TagIndex, i.e. "weak, no fallback") are left to that heuristic 608 * and the plain SB_WEAK-undef path. */ 609 if (sclass == IMAGE_SYM_CLASS_WEAK_EXTERNAL && naux >= 1 && sn != 0) { 610 const u8* aux = p + COFF_SYMBOL_SIZE; 611 u32 tag_index = coff_rd_u32(aux + 0); 612 u32 characteristics = coff_rd_u32(aux + 4); 613 if (characteristics == IMAGE_WEAK_EXTERN_SEARCH_ALIAS && 614 tag_index < nsymbols && tag_index != i) { 615 const u8* tp = sym_base + (u64)tag_index * COFF_SYMBOL_SIZE; 616 const char* tnm; 617 u32 tnlen; 618 resolve_sym_name(tp, strtab, strtab_size, &tnm, &tnlen); 619 if (tnlen != 0 && (tnlen != nlen || memcmp(tnm, nm, nlen) != 0)) { 620 Sym target = 621 pool_intern_slice(c->global, (Slice){.s = tnm, .len = tnlen}); 622 obj_set_weak_alias(ob, id, target); 623 } 624 } 625 } 626 i += 1u + naux; 627 } 628 629 /* ---- Step 4: stitch COMDAT groups from section-definition aux ---- 630 * Each COMDAT section has a STATIC primary symbol (the section 631 * symbol) followed by one section-definition aux record. Selection 632 * != 0 marks the section as a COMDAT member; the signature symbol 633 * is the section symbol itself (Number field's selection variant 634 * controls dedup policy at link time). */ 635 for (u32 s = 1; s <= nsections; ++s) { 636 u32 prim_plus1 = sec_sym_primary[s]; 637 if (!prim_plus1) continue; 638 u32 prim = prim_plus1 - 1u; 639 const CSecRec* cs = &secs[s - 1]; 640 if (!(cs->characteristics & IMAGE_SCN_LNK_COMDAT)) continue; 641 const u8* p = sym_base + (u64)prim * COFF_SYMBOL_SIZE; 642 u8 naux = p[17]; 643 if (!naux) continue; 644 const u8* aux = p + COFF_SYMBOL_SIZE; 645 /* Aux layout: Length(4), NumberOfRelocations(2), NumberOfLinenumbers(2), 646 * CheckSum(4), Number(2), Selection(1), Unused(3). */ 647 u16 assoc_number = coff_rd_u16(aux + 12); 648 u8 selection = aux[14]; 649 if (selection == 0) continue; 650 651 ObjSymId sig = sym_to_obj[prim]; 652 const ObjSym* sigsym = obj_symbol_get(ob, sig); 653 Sym gname = sigsym ? sigsym->name : 0; 654 ObjGroupId gid = obj_group(ob, gname, sig, (u32)selection); 655 obj_group_add_section(ob, gid, cs->obj_sec); 656 obj_section_set_group(ob, cs->obj_sec, gid); 657 658 /* ASSOCIATIVE: the COMDAT member is associated with another 659 * section's group. Add this section to that group's list too so 660 * dead-strip keeps them paired. */ 661 if (selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && assoc_number >= 1 && 662 (u32)assoc_number <= nsections) { 663 u32 other_prim_plus1 = sec_sym_primary[assoc_number]; 664 if (other_prim_plus1) { 665 u32 other_prim = other_prim_plus1 - 1u; 666 const u8* op = sym_base + (u64)other_prim * COFF_SYMBOL_SIZE; 667 if (op[17]) { 668 const u8* oaux = op + COFF_SYMBOL_SIZE; 669 u8 osel = oaux[14]; 670 if (osel != 0) { 671 ObjSymId osig = sym_to_obj[other_prim]; 672 const ObjSym* osigsym = obj_symbol_get(ob, osig); 673 Sym ogname = osigsym ? osigsym->name : 0; 674 ObjGroupId ogid = obj_group(ob, ogname, osig, (u32)osel); 675 obj_group_add_section(ob, ogid, cs->obj_sec); 676 } 677 } 678 } 679 } 680 } 681 682 /* ---- Step 5: per-section relocations ---- */ 683 for (u32 i = 0; i < nsections; ++i) { 684 const CSecRec* s = &secs[i]; 685 if (!s->number_of_relocations) continue; 686 u64 reloc_end = (u64)s->pointer_to_relocations + 687 (u64)s->number_of_relocations * (u64)COFF_RELOC_SIZE; 688 if (reloc_end > (u64)len) 689 compiler_panic(c, SRCLOC_NONE, 690 "read_coff: relocation table for section %u out of range", 691 i); 692 const u8* rbase = data + s->pointer_to_relocations; 693 for (u32 j = 0; j < s->number_of_relocations; ++j) { 694 const u8* rp = rbase + (u64)j * COFF_RELOC_SIZE; 695 u32 r_va = coff_rd_u32(rp + 0); 696 u32 r_sym = coff_rd_u32(rp + 4); 697 u16 r_type = coff_rd_u16(rp + 8); 698 699 u32 kind = reloc_from(r_type); 700 if (kind == (u32)-1) 701 compiler_panic(c, SRCLOC_NONE, 702 "read_coff: unsupported reloc type %u for machine %#x", 703 (u32)r_type, (u32)machine); 704 705 ObjSymId target = OBJ_SYM_NONE; 706 if (r_sym < nsymbols) target = sym_to_obj[r_sym]; 707 708 /* COFF stores addends inline in the relocated field. Fold those 709 * bytes into Reloc.addend for the reloc kinds whose apply path 710 * overwrites the field. AMD64 REL32 also subtracts from a PC after 711 * the relocated field: plain REL32 is relative to P+4, and REL32_N is 712 * relative to P+N. Record that convention as an implicit negative 713 * addend so link_reloc_apply can stay format neutral. */ 714 /* ARM64 PAGEOFFSET_12L is one wire code for LDST{8,16,32,64,128}. 715 * The per-arch translator returns R_AARCH64_LDST64_ABS_LO12_NC by 716 * default; recover the actual access width from the patched LDR/ 717 * STR instruction's size field at bits [31:30] (and a SIMD/FP 718 * extension via bit 26 + opc[23]) so the linker applies the right 719 * scale. Mismatch panics at apply-time with "misaligned 720 * address" otherwise — see link_reloc.c. */ 721 if ((machine == IMAGE_FILE_MACHINE_ARM64 || 722 machine == IMAGE_FILE_MACHINE_ARM64EC) && 723 r_type == IMAGE_REL_ARM64_PAGEOFFSET_12L && s->size_of_raw_data && 724 (u64)r_va + 4u <= (u64)s->size_of_raw_data) { 725 const u8* ibytes = data + s->pointer_to_raw_data + r_va; 726 u32 instr = (u32)ibytes[0] | ((u32)ibytes[1] << 8) | 727 ((u32)ibytes[2] << 16) | ((u32)ibytes[3] << 24); 728 u32 sz = (instr >> 30) & 0x3u; 729 int is_simd = (instr >> 26) & 0x1u; 730 if (is_simd && ((instr >> 23) & 0x1u)) { 731 kind = R_AARCH64_LDST128_ABS_LO12_NC; 732 } else { 733 switch (sz) { 734 case 0: 735 kind = R_AARCH64_LDST8_ABS_LO12_NC; 736 break; 737 case 1: 738 kind = R_AARCH64_LDST16_ABS_LO12_NC; 739 break; 740 case 2: 741 kind = R_AARCH64_LDST32_ABS_LO12_NC; 742 break; 743 default: 744 kind = R_AARCH64_LDST64_ABS_LO12_NC; 745 break; 746 } 747 } 748 } 749 750 i64 addend = 0; 751 int has_explicit = 0; 752 if (machine == IMAGE_FILE_MACHINE_AMD64) { 753 i64 inline_addend = 0; 754 switch (r_type) { 755 case IMAGE_REL_AMD64_ADDR64: 756 if (coff_reloc_inline_addend(data, len, s, r_va, 8, 757 &inline_addend)) 758 addend = inline_addend; 759 break; 760 case IMAGE_REL_AMD64_ADDR32: 761 if (coff_reloc_inline_addend(data, len, s, r_va, 4, 762 &inline_addend)) 763 addend = inline_addend; 764 break; 765 case IMAGE_REL_AMD64_REL32: 766 if (coff_reloc_inline_addend(data, len, s, r_va, 4, 767 &inline_addend)) 768 addend = inline_addend; 769 addend -= 4; 770 break; 771 case IMAGE_REL_AMD64_REL32_1: 772 if (coff_reloc_inline_addend(data, len, s, r_va, 4, 773 &inline_addend)) 774 addend = inline_addend; 775 addend -= 1; 776 break; 777 case IMAGE_REL_AMD64_REL32_2: 778 if (coff_reloc_inline_addend(data, len, s, r_va, 4, 779 &inline_addend)) 780 addend = inline_addend; 781 addend -= 2; 782 break; 783 case IMAGE_REL_AMD64_REL32_3: 784 if (coff_reloc_inline_addend(data, len, s, r_va, 4, 785 &inline_addend)) 786 addend = inline_addend; 787 addend -= 3; 788 break; 789 case IMAGE_REL_AMD64_REL32_4: 790 if (coff_reloc_inline_addend(data, len, s, r_va, 4, 791 &inline_addend)) 792 addend = inline_addend; 793 addend -= 4; 794 break; 795 case IMAGE_REL_AMD64_REL32_5: 796 if (coff_reloc_inline_addend(data, len, s, r_va, 4, 797 &inline_addend)) 798 addend = inline_addend; 799 addend -= 5; 800 break; 801 default: 802 break; 803 } 804 } 805 806 obj_reloc_ex(ob, s->obj_sec, r_va, (RelocKind)kind, target, addend, 807 has_explicit, 0); 808 } 809 } 810 811 /* ---- Step 6: finalize and return ---- */ 812 obj_finalize(ob); 813 return ob; 814 }