link.c (72660B)
1 /* link_emit_coff: write a PE32+ MH_EXECUTABLE-style image to the 2 * caller-provided Writer. 3 * 4 * Phase 3.1 deliverable per doc/OBJ.md: skeleton + base-reloc 5 * handling for the four standard PE sections. Import-table synthesis 6 * (.idata / IAT) lands in Phase 3.2; per-arch IAT stub bytes in 3.3; 7 * TLS directory in 3.5; debug directory in 3.6 — those code paths 8 * panic loudly here so the strict-by-default posture surfaces them. 9 * 10 * File layout (in write order): 11 * 12 * [DOS stub IMAGE_DOS_HEADER] -- 64 bytes; e_lfanew=0x40 13 * [PE signature "PE\0\0"] -- 4 bytes 14 * [IMAGE_FILE_HEADER] -- 20 bytes 15 * [IMAGE_OPTIONAL_HEADER64] -- 240 bytes (PE32+) 16 * [IMAGE_SECTION_HEADER * nsec] -- 40 bytes each 17 * [pad to FileAlignment] 18 * [.text bytes, padded to FileAlignment] 19 * [.rdata bytes, padded to FileAlignment] 20 * [.data bytes, padded to FileAlignment] 21 * [.reloc bytes, padded to FileAlignment] 22 * 23 * .bss is uninitialized — it has a section header (with VirtualSize) 24 * but no file bytes and PointerToRawData=0. 25 * 26 * RVAs follow SectionAlignment (0x1000); FileAlignment is 0x200; the 27 * first section starts at RVA 0x1000 (right after the headers map). 28 * ImageBase is the Win64 convention 0x140000000. 29 * 30 * Reloc strategy. The link layout pass has already placed every kept 31 * input section into img->sections / img->segments under the ELF/Mach-O 32 * coordinate system (image-relative vaddrs, often packed by permission 33 * bucket). COFF wants a different packing — the four standard 34 * sections at SectionAlignment-aligned RVAs — so this writer re-derives 35 * per-input-section vaddrs from scratch and shifts each LinkSection / 36 * symbol / LinkRelocApply by its section's per-section delta before 37 * applying relocations. link_emit_macho takes the same tack for its 38 * __DATA_CONST splits; the ELF writer leaves vaddrs alone because the 39 * link layout already matches ELF's PT_LOAD shape. */ 40 41 #include "link/link.h" 42 43 #include <stdlib.h> 44 #include <string.h> 45 46 #include "core/core.h" 47 #include "core/heap.h" 48 #include "core/pool.h" 49 #include "core/slice.h" 50 #include "core/util.h" 51 #include "core/vec.h" 52 #include "link/link_internal.h" 53 #include "obj/coff/coff.h" 54 #include "obj/format.h" 55 56 /* ---- .idata layout constants ---- 57 * 58 * Per doc/OBJ.md: the .idata content is a concatenation of an 59 * IMAGE_IMPORT_DESCRIPTOR table (NULL-terminated), one ILT per DLL 60 * (each NULL-terminated u64 array), one IAT per DLL (same shape), 61 * a hint/name table, and a DLL-name string pool. Each block is 62 * pointer-sized aligned within the section. AArch64 import thunks use 63 * PAGEOFFSET_12L for 64-bit ILT/IAT slots, so those sub-blocks must be 64 * 8-byte aligned. */ 65 #define PE_IDATA_BLOCK_ALIGN 8u 66 /* Hint field on IMAGE_IMPORT_BY_NAME records. kit never has a real 67 * hint (the OS loader doesn't need one to do the bsearch on the DLL's 68 * export name table), so 0 is the canonical "no hint" value. */ 69 #define PE_IMPORT_HINT_NONE 0u 70 71 /* ---- PE/Win64 layout constants ---- 72 * 73 * Centralised here so the wire-format numbers in this TU stay named 74 * (and the magic-numbers rule in CLAUDE.md is honoured). Values match 75 * the PE/COFF spec + Win64 conventions; mingw-w64's ld defaults agree. */ 76 #define PE_IMAGE_BASE LINK_PE_IMAGE_BASE 77 #define PE_SECTION_ALIGNMENT 0x1000u 78 #define PE_FILE_ALIGNMENT 0x200u 79 #define PE_FIRST_SECTION_RVA 0x1000u 80 #define PE_DOS_E_LFANEW 0x40u 81 #define PE_NUM_DATA_DIRS COFF_NUM_DATA_DIRECTORIES 82 #define PE_OPT_HDR_SIZE COFF_OPT_HDR64_SIZE 83 #define PE_LINKER_MAJOR 0u 84 #define PE_LINKER_MINOR 1u 85 #define PE_OS_MAJOR 6u /* Windows Vista+ — mingw default */ 86 #define PE_OS_MINOR 0u 87 #define PE_SUBSYS_MAJOR 6u 88 #define PE_SUBSYS_MINOR 0u 89 #define PE_STACK_RESERVE 0x100000ULL 90 #define PE_STACK_COMMIT 0x1000ULL 91 #define PE_HEAP_RESERVE 0x100000ULL 92 #define PE_HEAP_COMMIT 0x1000ULL 93 /* DllCharacteristics bits that apply regardless of relocatability. */ 94 #define PE_DLL_CHARS_BASE \ 95 (IMAGE_DLLCHARACTERISTICS_NX_COMPAT | \ 96 IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE) 97 /* ASLR bits — only valid when the image carries base relocations (PIE). 98 * Advertising DYNAMIC_BASE / HIGH_ENTROPY_VA alongside RELOCS_STRIPPED is 99 * contradictory: the loader has nothing to fix up, so it can't relocate. */ 100 #define PE_DLL_CHARS_ASLR \ 101 (IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA | \ 102 IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE) 103 104 /* PE32+ DOS-stub-to-PE-signature offsets (manual, since we marshal 105 * field-by-field rather than memcpy'ing the packed struct). */ 106 #define PE_DOS_HDR_SIZE COFF_DOS_HEADER_SIZE 107 #define PE_SIG_SIZE 4u 108 #define PE_FILE_HDR_SIZE COFF_FILE_HEADER_SIZE 109 #define PE_SECTION_HDR_SIZE COFF_SECTION_HEADER_SIZE 110 111 /* Standard PE output buckets, plus .idata (import directory) and 112 * .reloc — both synthesised here rather than copied from input 113 * sections. Order matters: it's the on-image RVA order. */ 114 typedef enum CoffBucket { 115 COFF_BUCKET_TEXT = 0, 116 COFF_BUCKET_RDATA = 1, 117 COFF_BUCKET_IDATA = 2, 118 COFF_BUCKET_DATA = 3, 119 COFF_BUCKET_TLS = 4, 120 COFF_BUCKET_BSS = 5, 121 COFF_BUCKET_PDATA = 6, 122 COFF_BUCKET_RELOC = 7, 123 COFF_NBUCKETS = 8, 124 } CoffBucket; 125 126 /* IMAGE_TLS_DIRECTORY64 wire size: u64*4 + u32*2 = 40 bytes. */ 127 #define COFF_TLS_DIRECTORY64_SIZE 40u 128 /* Byte offsets of the four u64 VA fields within IMAGE_TLS_DIRECTORY64 129 * — they need base relocations so ASLR can fix them up. */ 130 #define COFF_TLSDIR_OFF_START_ADDR 0u 131 #define COFF_TLSDIR_OFF_END_ADDR 8u 132 #define COFF_TLSDIR_OFF_INDEX_ADDR 16u 133 #define COFF_TLSDIR_OFF_CALLBACKS 24u 134 135 typedef struct CoffSection { 136 const char* name; /* short ASCII; <= 8 bytes including NUL pad */ 137 u32 characteristics; 138 u8* bytes; /* NULL for .bss / .reloc-before-build */ 139 u32 size; /* VirtualSize (real bytes; for .bss, mem size) */ 140 u32 size_raw; /* SizeOfRawData (file size, FileAlignment-padded) */ 141 u32 rva; /* VirtualAddress in image */ 142 u32 file_offset; /* PointerToRawData; 0 for .bss */ 143 u8 in_image; /* 1 if this bucket is emitted as a section */ 144 u8 has_file_bytes; /* 0 for .bss */ 145 u8 pad[2]; 146 } CoffSection; 147 148 /* ---- byte writer helpers ---- */ 149 150 static void coff_write_zeroes(Writer* w, u64 n) { 151 static const u8 zeroes[256] = {0}; 152 while (n) { 153 u64 step = n > sizeof(zeroes) ? sizeof(zeroes) : n; 154 kit_writer_write(w, zeroes, (size_t)step); 155 n -= step; 156 } 157 } 158 159 /* Return the COFF bucket for a kept LinkSection. SF_TLS sections route 160 * into the dedicated .tls bucket so SECREL relocations from TLS access 161 * code resolve against the merged TLS image, not against .data. 162 * Everything else partitions on SF_EXEC / SF_WRITE plus the SSEM_NOBITS 163 * bit for .bss. */ 164 static CoffBucket coff_bucket_for(Compiler* c, const LinkSection* ls) { 165 if (ls->name) { 166 Slice nm = pool_slice(c->global, ls->name); 167 if (nm.s && nm.len >= 6u && memcmp(nm.s, ".pdata", 6u) == 0) 168 return COFF_BUCKET_PDATA; 169 } 170 if (ls->flags & SF_EXEC) return COFF_BUCKET_TEXT; 171 if (ls->flags & SF_TLS) return COFF_BUCKET_TLS; 172 if (ls->sem == SSEM_NOBITS) return COFF_BUCKET_BSS; 173 if (ls->flags & SF_WRITE) return COFF_BUCKET_DATA; 174 return COFF_BUCKET_RDATA; 175 } 176 177 /* True for relocation kinds that need an entry in .reloc so the OS 178 * loader can patch the site after ASLR picks a runtime ImageBase. 179 * PC-relative fixups don't need base-relocs — the displacement is 180 * load-invariant. */ 181 static int coff_reloc_needs_base_reloc(RelocKind k) { 182 return k == R_ABS64 || k == R_ABS32; 183 } 184 185 /* Look up the LinkSection whose [vaddr, vaddr+size] range covers the 186 * given image-relative address `v`, or return NULL. Used to attribute 187 * symbol vaddrs to a containing section so we can apply per-section 188 * vaddr deltas after re-laying out for PE. */ 189 static const LinkSection* coff_section_at(const LinkImage* img, u64 v) { 190 u32 i; 191 for (i = 0; i < img->nsections; ++i) { 192 const LinkSection* ls = &img->sections[i]; 193 if (v >= ls->vaddr && v <= ls->vaddr + ls->size) return ls; 194 } 195 return NULL; 196 } 197 198 /* Per-input-section delta map. Indexed by `LinkSection.id - 1`. 199 * Populated by coff_build_buckets. Consumed by every subsequent pass 200 * that needs to translate input-coordinate offsets (the world that 201 * img->sections / img->relocs live in) into PE-coordinate ones (where 202 * the writer plants bytes). delta is stored explicitly so callers 203 * avoid recomputing (new_rva + bucket.rva - old_vaddr) for every 204 * LinkRelocApply whose link_section_id points at the section. */ 205 typedef struct CoffSecMap { 206 u32 new_rva; /* image-relative RVA after PE relayout */ 207 u32 new_file_off; /* file offset of the patched byte */ 208 i64 delta; /* new_rva - old_vaddr */ 209 u8 bucket; 210 u8 pad[3]; 211 } CoffSecMap; 212 213 /* TLS directory placement state. Populated when at least one SF_TLS 214 * section survives dead-strip; consumed by the optional-header writer, 215 * the .reloc builder (base-relocs for the four absolute VA fields), 216 * and the .rdata emit pass that writes the final 40-byte record. */ 217 typedef struct CoffTlsLayout { 218 int present; /* 1 iff at least one TLS section was kept */ 219 u32 dir_rdata_off; /* byte offset of the IMAGE_TLS_DIRECTORY64 within .rdata 220 */ 221 u32 tls_size; /* size of the merged .tls bucket */ 222 LinkSymId tls_index_sym; /* resolved _tls_index LinkSymbol */ 223 LinkSymId callbacks_sym; /* __xl_a when mingw's TLS callbacks are linked */ 224 u64 callbacks_addend; /* mingw points past the leading NULL sentinel */ 225 } CoffTlsLayout; 226 227 static LinkSymId coff_find_sym(LinkImage* img, const char* name) { 228 Sym sym = pool_intern_slice(img->c->global, slice_from_cstr(name)); 229 u32 n = LinkSyms_count(&img->syms); 230 u32 i; 231 for (i = 0; i < n; ++i) { 232 const LinkSymbol* s = LinkSyms_at(&img->syms, i); 233 if (s->name == sym) return (LinkSymId)(i + 1); 234 } 235 return LINK_SYM_NONE; 236 } 237 238 /* Locate _tls_index by name in the resolved symbol table. mingw's 239 * libmingwex defines it (as part of tlsmcrt); without a CRT the link 240 * fails here with a clear message rather than producing a TLS 241 * directory pointing at a stale address. */ 242 static LinkSymId coff_find_tls_index_sym(LinkImage* img) { 243 return coff_find_sym(img, "_tls_index"); 244 } 245 246 static const LinkSection* coff_symbol_section(const LinkImage* img, 247 const LinkSymbol* s) { 248 if (s->name) { 249 Slice nm_s = pool_slice(img->c->global, s->name); 250 const char* nm = nm_s.s; 251 size_t n = nm_s.len; 252 const char* sec_name = NULL; 253 if (nm && n == 6 && memcmp(nm, "__xd_a", 6) == 0) 254 sec_name = ".CRT$XDA"; 255 else if (nm && n == 6 && memcmp(nm, "__xd_z", 6) == 0) 256 sec_name = ".CRT$XDZ"; 257 else if (nm && n == 6 && memcmp(nm, "__xl_a", 6) == 0) 258 sec_name = ".CRT$XLA"; 259 else if (nm && n == 6 && memcmp(nm, "__xl_c", 6) == 0) 260 sec_name = ".CRT$XLC"; 261 else if (nm && n == 6 && memcmp(nm, "__xl_d", 6) == 0) 262 sec_name = ".CRT$XLD"; 263 else if (nm && n == 6 && memcmp(nm, "__xl_z", 6) == 0) 264 sec_name = ".CRT$XLZ"; 265 if (sec_name) { 266 u32 i; 267 for (i = 0; i < img->nsections; ++i) { 268 const LinkSection* ls = &img->sections[i]; 269 if (ls->name && 270 slice_eq_cstr(pool_slice(img->c->global, ls->name), sec_name)) 271 return ls; 272 } 273 } 274 } 275 if (s->section_id != LINK_SEC_NONE && s->section_id <= img->nsections) 276 return &img->sections[s->section_id - 1]; 277 return coff_section_at(img, s->vaddr); 278 } 279 280 static u64 coff_symbol_final_va(const LinkImage* img, 281 const CoffSection out[COFF_NBUCKETS], 282 const CoffSecMap* map, LinkSymId id, 283 const char* what) { 284 const LinkSymbol* s = LinkSyms_at(&img->syms, id - 1); 285 if (!s->defined || s->kind == SK_ABS) { 286 compiler_panic(img->c, SRCLOC_NONE, 287 "link_emit_coff: `%.*s` is not a defined section-bound " 288 "symbol", 289 SLICE_ARG(slice_from_cstr(what))); 290 } 291 const LinkSection* sec = coff_symbol_section(img, s); 292 if (!sec) { 293 compiler_panic(img->c, SRCLOC_NONE, 294 "link_emit_coff: `%.*s` has no containing section", 295 SLICE_ARG(slice_from_cstr(what))); 296 } 297 u8 b = map[sec->id - 1].bucket; 298 return PE_IMAGE_BASE + (u64)out[b].rva + (u64)map[sec->id - 1].new_rva + 299 (s->vaddr - sec->vaddr); 300 } 301 302 /* Reserve 40 bytes at the tail of the .rdata bucket for the 303 * IMAGE_TLS_DIRECTORY64 record. Records the offset for later emit and 304 * grows the bucket if needed. The bytes start zeroed; coff_emit_tls_dir 305 * fills them in once final RVAs are known. */ 306 static void coff_plan_tls_layout(LinkImage* img, CoffSection out[COFF_NBUCKETS], 307 u32* rdata_cap, CoffTlsLayout* tls) { 308 memset(tls, 0, sizeof(*tls)); 309 if (out[COFF_BUCKET_TLS].size == 0) return; 310 tls->present = 1; 311 tls->tls_size = out[COFF_BUCKET_TLS].size; 312 tls->tls_index_sym = coff_find_tls_index_sym(img); 313 if (tls->tls_index_sym == LINK_SYM_NONE) { 314 compiler_panic(img->c, SRCLOC_NONE, 315 "link_emit_coff: .tls section requires `_tls_index` " 316 "(provided by mingw libmingwex / tlsmcrt.o) — none of " 317 "the linked inputs define it"); 318 } 319 /* IMAGE_TLS_DIRECTORY64 needs 8-byte alignment for its u64 fields; 320 * round the .rdata size up before reserving the 40-byte record. */ 321 tls->callbacks_sym = coff_find_sym(img, "__xl_a"); 322 if (tls->callbacks_sym != LINK_SYM_NONE) { 323 tls->callbacks_addend = 8; 324 } else { 325 tls->callbacks_sym = coff_find_sym(img, "__xl_c"); 326 tls->callbacks_addend = 0; 327 } 328 u32 rdata_size = (u32)ALIGN_UP((u64)out[COFF_BUCKET_RDATA].size, 8ull); 329 u32 need = rdata_size + COFF_TLS_DIRECTORY64_SIZE; 330 if (need > *rdata_cap) { 331 (void)VEC_GROW(img->heap, out[COFF_BUCKET_RDATA].bytes, *rdata_cap, need); 332 } 333 /* Zero any padding bytes introduced by the alignment bump and the 334 * directory slot itself. */ 335 if (rdata_size > out[COFF_BUCKET_RDATA].size) { 336 memset(out[COFF_BUCKET_RDATA].bytes + out[COFF_BUCKET_RDATA].size, 0, 337 rdata_size - out[COFF_BUCKET_RDATA].size); 338 } 339 memset(out[COFF_BUCKET_RDATA].bytes + rdata_size, 0, 340 COFF_TLS_DIRECTORY64_SIZE); 341 tls->dir_rdata_off = rdata_size; 342 out[COFF_BUCKET_RDATA].size = need; 343 } 344 345 /* Write the IMAGE_TLS_DIRECTORY64 bytes once all bucket RVAs are 346 * final. Each u64 VA field gets ImageBase + RVA; the base-reloc pass 347 * will emit IMAGE_REL_BASED_DIR64 entries so ASLR keeps them valid. */ 348 static void coff_emit_tls_dir(const LinkImage* img, 349 const CoffSection out[COFF_NBUCKETS], 350 const CoffSecMap* map, const CoffTlsLayout* tls) { 351 if (!tls->present) return; 352 u64 tls_start = PE_IMAGE_BASE + (u64)out[COFF_BUCKET_TLS].rva; 353 u64 tls_end = tls_start + (u64)tls->tls_size; 354 u64 idx_vaddr = 355 coff_symbol_final_va(img, out, map, tls->tls_index_sym, "_tls_index"); 356 const char* callbacks_name = tls->callbacks_addend ? "__xl_a" : "__xl_c"; 357 u64 callbacks_vaddr = 358 tls->callbacks_sym 359 ? coff_symbol_final_va(img, out, map, tls->callbacks_sym, 360 callbacks_name) + 361 tls->callbacks_addend 362 : 0; 363 364 u8* p = out[COFF_BUCKET_RDATA].bytes + tls->dir_rdata_off; 365 wr_u64_le(p + COFF_TLSDIR_OFF_START_ADDR, tls_start); 366 wr_u64_le(p + COFF_TLSDIR_OFF_END_ADDR, tls_end); 367 wr_u64_le(p + COFF_TLSDIR_OFF_INDEX_ADDR, idx_vaddr); 368 wr_u64_le(p + COFF_TLSDIR_OFF_CALLBACKS, callbacks_vaddr); 369 wr_u32_le(p + 32, 0); /* SizeOfZeroFill */ 370 wr_u32_le(p + 36, 0); /* Characteristics */ 371 } 372 373 static void coff_define_tls_used(LinkImage* img, 374 const CoffSection out[COFF_NBUCKETS], 375 const CoffTlsLayout* tls) { 376 if (!tls->present) return; 377 if (!img->linker) return; 378 link_emit_boundary_sym(img->linker, img, "_tls_used", 379 PE_IMAGE_BASE + (u64)out[COFF_BUCKET_RDATA].rva + 380 (u64)tls->dir_rdata_off); 381 } 382 383 /* ---- import-table synthesis (Phase 3.2) --------------------------- 384 * 385 * Per doc/OBJ.md: every LinkSymbol with `imported = 1` gets 386 * routed through an IAT slot synthesized in `.idata`. Function 387 * imports additionally receive a small per-arch stub in `.text` 388 * (`ff 25 disp32` on x64 / `adrp;ldr;br` on aa64) so a direct CALL26 389 * or PC32 against the symbol lands on a stub that indirects through 390 * the IAT. Data imports skip the stub — the symbol's final vaddr is 391 * just the IAT slot vaddr, and code-gen emits a `mov rax, [slot]` 392 * sequence the same way it would for any other GOT-style load. 393 * 394 * kit's COFF code-gen uses direct symbol references; there is no 395 * separate `__imp_<name>` LinkSymbol consulted at link time. The 396 * IAT-slot rewrite happens entirely by overriding the imported 397 * symbol's vaddr in apply_all_relocs. */ 398 399 typedef struct CoffImport { 400 LinkSymId sym; /* canonical LinkSymId from img->syms */ 401 Sym import_name; /* DLL export name override (short-import NameType); 0=use sym */ 402 u32 dll_idx; /* index into CoffImportTable.dlls */ 403 u32 stub_off; /* offset in .text bucket (functions only) */ 404 u32 iat_off; /* offset in .idata IAT block */ 405 u32 ilt_off; /* offset in .idata ILT block */ 406 u32 hint_off; /* offset in .idata hint/name table */ 407 u8 is_func; 408 u8 pad[3]; 409 } CoffImport; 410 411 typedef struct CoffImportDll { 412 Sym soname; 413 u32 first; /* index of first import in CoffImportTable.imports */ 414 u32 count; 415 u32 ilt_off; /* offset of this DLL's ILT block in .idata */ 416 u32 iat_off; /* offset of this DLL's IAT block in .idata */ 417 u32 name_off; /* offset of DLL name string in .idata */ 418 } CoffImportDll; 419 420 typedef struct CoffImportTable { 421 CoffImport* imports; 422 u32 nimports; 423 u32 imports_cap; /* heap-allocation size for cleanup */ 424 u32 nfunc_imports; /* subset of nimports that needs a .text stub */ 425 CoffImportDll* dlls; 426 u32 ndlls; 427 u32 dlls_cap; /* heap-allocation size for cleanup */ 428 /* Offsets within .idata of the five sub-blocks. Filled in by 429 * coff_plan_idata_layout once nimports / ndlls is known. */ 430 u32 desc_off; /* always 0 — descriptors come first */ 431 u32 desc_size; 432 u32 ilt_base; 433 u32 ilt_total; 434 u32 iat_base; 435 u32 iat_total; 436 u32 hint_base; 437 u32 hint_total; 438 u32 name_base; 439 u32 name_total; 440 u32 idata_size; 441 /* Stub region in .text bucket. Stubs are appended after every 442 * input .text section has been bucketed. stub_text_off is the 443 * bucket-local offset of the first stub; per-import stub offsets 444 * are stored in CoffImport.stub_off. */ 445 u32 stub_text_off; 446 u32 stub_total; 447 } CoffImportTable; 448 449 /* Sort comparator: imports grouped by DLL slot, stable on input 450 * order within a DLL (sort is stable enough via secondary key). */ 451 static int coff_import_cmp(const void* a, const void* b) { 452 const CoffImport* ia = (const CoffImport*)a; 453 const CoffImport* ib = (const CoffImport*)b; 454 if (ia->dll_idx < ib->dll_idx) return -1; 455 if (ia->dll_idx > ib->dll_idx) return 1; 456 /* Secondary: LinkSymId so the order is reproducible. */ 457 if (ia->sym < ib->sym) return -1; 458 if (ia->sym > ib->sym) return 1; 459 return 0; 460 } 461 462 static const char* coff_import_lookup_name(Compiler* c, const LinkSymbol* s, 463 size_t* nlen_out) { 464 Slice nm_s = s->name ? pool_slice(c->global, s->name) : SLICE_NULL; 465 const char* nm = nm_s.s; 466 size_t nlen = nm_s.len; 467 static const char kImpPrefix[] = "__imp_"; 468 const size_t kImpPrefixLen = sizeof(kImpPrefix) - 1u; 469 if (nm && nlen > kImpPrefixLen && 470 memcmp(nm, kImpPrefix, kImpPrefixLen) == 0) { 471 nm += kImpPrefixLen; 472 nlen -= kImpPrefixLen; 473 } 474 if (nlen_out) *nlen_out = nlen; 475 return nm; 476 } 477 478 /* The name placed in the PE hint/name table for an import. Honors the 479 * short-import NameType override (CoffImport.import_name, e.g. EXPORTAS's real 480 * DLL export name) when present, else derives it from the symbol name. */ 481 static const char* coff_import_emit_name(Compiler* c, const CoffImport* imp, 482 const LinkSymbol* s, size_t* nlen_out) { 483 if (imp->import_name) { 484 Slice nm_s = pool_slice(c->global, imp->import_name); 485 if (nlen_out) *nlen_out = nm_s.len; 486 return nm_s.s; 487 } 488 return coff_import_lookup_name(c, s, nlen_out); 489 } 490 491 /* True iff the import classifies as function-like. Mirrors the ELF 492 * `sym_is_func_import` heuristic: if the canonical kind is known 493 * we trust it, otherwise we default to function (which matches the 494 * COFF code-gen contract — direct calls are by far the common case 495 * and a data import wrongly stubbed would still fail loudly via the 496 * IAT-routed call). */ 497 static int coff_import_is_func(Compiler* c, const LinkSymbol* s) { 498 if (s->name) { 499 Slice nm_s = pool_slice(c->global, s->name); 500 const char* nm = nm_s.s; 501 size_t nlen = nm_s.len; 502 if (nm && nlen > 6u && memcmp(nm, "__imp_", 6u) == 0) return 0; 503 } 504 if (s->kind == SK_FUNC || s->kind == SK_IFUNC) return 1; 505 if (s->kind == SK_OBJ) return 0; 506 /* SK_UNDEF / SK_NOTYPE: assume function (the common case). */ 507 return 1; 508 } 509 510 /* Walk LinkSyms, collect imports, group by DLL soname. Returns 1 if 511 * any imports were collected, 0 otherwise (caller skips the entire 512 * .idata path). */ 513 static int coff_collect_imports(LinkImage* img, CoffImportTable* it) { 514 Heap* heap = img->heap; 515 Compiler* c = img->c; 516 Linker* l = img->linker; 517 u32 nsyms = LinkSyms_count(&img->syms); 518 u32 imp_cap = 0; 519 u32 dll_cap = 0; 520 u32 i; 521 522 memset(it, 0, sizeof(*it)); 523 if (!l) return 0; 524 for (i = 0; i < nsyms; ++i) { 525 LinkSymbol* s = LinkSyms_at(&img->syms, i); 526 LinkInput* in; 527 u32 dll_idx = (u32)-1; 528 u32 d; 529 if (!s->imported) continue; 530 if (s->name == 0) continue; 531 if (s->dso_input_id == LINK_INPUT_NONE) { 532 compiler_panic(c, SRCLOC_NONE, 533 "link_emit_coff: imported symbol has no providing DSO"); 534 } 535 /* img->globals only carries defined globals/weaks; imported undefs 536 * never land there. Dedup by name: skip if any earlier slot 537 * already collected this name. */ 538 { 539 int dup = 0; 540 for (u32 k = 0; k < it->nimports; ++k) { 541 LinkSymbol* prev = LinkSyms_at(&img->syms, it->imports[k].sym - 1); 542 if (prev->name == s->name) { 543 dup = 1; 544 break; 545 } 546 } 547 if (dup) continue; 548 } 549 if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) { 550 compiler_panic(c, SRCLOC_NONE, 551 "link_emit_coff: import dso_input_id out of range"); 552 } 553 in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u); 554 if (in->soname == 0) { 555 compiler_panic(c, SRCLOC_NONE, 556 "link_emit_coff: providing DSO has no soname; cannot " 557 "emit IMAGE_IMPORT_DESCRIPTOR.Name"); 558 } 559 /* Find-or-add the DLL slot. */ 560 for (d = 0; d < it->ndlls; ++d) { 561 if (it->dlls[d].soname == in->soname) { 562 dll_idx = d; 563 break; 564 } 565 } 566 if (dll_idx == (u32)-1) { 567 if (VEC_GROW(heap, it->dlls, dll_cap, it->ndlls + 1u)) 568 compiler_panic(c, SRCLOC_NONE, "link_emit_coff: oom on import dlls"); 569 dll_idx = it->ndlls++; 570 memset(&it->dlls[dll_idx], 0, sizeof(it->dlls[dll_idx])); 571 it->dlls[dll_idx].soname = in->soname; 572 } 573 if (VEC_GROW(heap, it->imports, imp_cap, it->nimports + 1u)) 574 compiler_panic(c, SRCLOC_NONE, "link_emit_coff: oom on imports"); 575 memset(&it->imports[it->nimports], 0, sizeof(it->imports[it->nimports])); 576 it->imports[it->nimports].sym = s->id; 577 it->imports[it->nimports].import_name = in->coff_import_name; 578 it->imports[it->nimports].dll_idx = dll_idx; 579 it->imports[it->nimports].is_func = (u8)coff_import_is_func(c, s); 580 if (it->imports[it->nimports].is_func) ++it->nfunc_imports; 581 ++it->nimports; 582 it->dlls[dll_idx].count++; 583 } 584 if (it->nimports == 0) return 0; 585 /* Re-bucket the imports array so each DLL's run is contiguous. */ 586 qsort(it->imports, it->nimports, sizeof(*it->imports), coff_import_cmp); 587 /* Fix up CoffImportDll.first now that imports[] is sorted. */ 588 { 589 u32 cur = 0; 590 for (u32 d = 0; d < it->ndlls; ++d) { 591 it->dlls[d].first = cur; 592 cur += it->dlls[d].count; 593 } 594 } 595 it->imports_cap = imp_cap; 596 it->dlls_cap = dll_cap; 597 return 1; 598 } 599 600 static void coff_imports_free(LinkImage* img, CoffImportTable* it) { 601 Heap* heap = img->heap; 602 if (it->imports) { 603 heap->free(heap, it->imports, 604 (size_t)it->imports_cap * sizeof(*it->imports)); 605 } 606 if (it->dlls) { 607 heap->free(heap, it->dlls, (size_t)it->dlls_cap * sizeof(*it->dlls)); 608 } 609 } 610 611 /* Compute every per-block / per-import offset inside .idata and the 612 * total .idata size in bytes. Also assigns per-import hint/name and 613 * dll-name offsets so the descriptor table can reference them by RVA 614 * later (RVAs need the bucket's final RVA, added in coff_emit_idata). */ 615 static void coff_plan_idata_layout(LinkImage* img, CoffImportTable* it) { 616 Compiler* c = img->c; 617 u32 off; 618 619 /* Block 1: import descriptors (one per DLL + zero terminator). */ 620 it->desc_off = 0; 621 it->desc_size = (it->ndlls + 1u) * COFF_IMPORT_DESCRIPTOR_SIZE; 622 off = (u32)ALIGN_UP((u64)it->desc_size, (u64)PE_IDATA_BLOCK_ALIGN); 623 624 /* Block 2: ILTs. Per DLL: count entries + 1 (terminator), 8 B each. */ 625 it->ilt_base = off; 626 for (u32 d = 0; d < it->ndlls; ++d) { 627 it->dlls[d].ilt_off = off; 628 /* Per-import: assign ilt_off within this DLL's block. */ 629 for (u32 k = 0; k < it->dlls[d].count; ++k) { 630 it->imports[it->dlls[d].first + k].ilt_off = 631 off + k * (u32)COFF_THUNK_DATA64_SIZE; 632 } 633 off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE; 634 } 635 it->ilt_total = off - it->ilt_base; 636 off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); 637 638 /* Block 3: IATs (same shape as ILTs). */ 639 it->iat_base = off; 640 for (u32 d = 0; d < it->ndlls; ++d) { 641 it->dlls[d].iat_off = off; 642 for (u32 k = 0; k < it->dlls[d].count; ++k) { 643 it->imports[it->dlls[d].first + k].iat_off = 644 off + k * (u32)COFF_THUNK_DATA64_SIZE; 645 } 646 off += (it->dlls[d].count + 1u) * (u32)COFF_THUNK_DATA64_SIZE; 647 } 648 it->iat_total = off - it->iat_base; 649 off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); 650 651 /* Block 4: hint/name records. Each: u16 hint + NUL-term name + 652 * 1-byte pad if the resulting size is odd (PE/COFF spec). */ 653 it->hint_base = off; 654 for (u32 i = 0; i < it->nimports; ++i) { 655 LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1); 656 size_t nlen = 0; 657 const char* nm = coff_import_emit_name(c, &it->imports[i], s, &nlen); 658 if (!nm || nlen == 0) 659 compiler_panic(c, SRCLOC_NONE, 660 "link_emit_coff: imported symbol has empty name"); 661 it->imports[i].hint_off = off; 662 /* hint (2 B) + name (nlen + 1) + optional pad to even. */ 663 u32 rec = 2u + (u32)nlen + 1u; 664 if (rec & 1u) ++rec; 665 off += rec; 666 } 667 it->hint_total = off - it->hint_base; 668 off = (u32)ALIGN_UP((u64)off, (u64)PE_IDATA_BLOCK_ALIGN); 669 670 /* Block 5: DLL name strings (NUL-terminated). */ 671 it->name_base = off; 672 for (u32 d = 0; d < it->ndlls; ++d) { 673 Slice nm_s = pool_slice(c->global, it->dlls[d].soname); 674 const char* nm = nm_s.s; 675 size_t nlen = nm_s.len; 676 if (!nm || nlen == 0) 677 compiler_panic(c, SRCLOC_NONE, 678 "link_emit_coff: providing DSO has empty soname"); 679 it->dlls[d].name_off = off; 680 off += (u32)nlen + 1u; 681 } 682 it->name_total = off - it->name_base; 683 it->idata_size = off; 684 } 685 686 /* Append the function-import stubs to the .text bucket. Each stub is 687 * the format arch descriptor's stub size. Records each stub's bucket- 688 * local offset on the matching CoffImport so the per-symbol stub vaddr 689 * can be computed once the .text bucket's RVA is final. */ 690 static void coff_append_stubs(LinkImage* img, CoffImportTable* it, 691 CoffSection* text_bucket, u32* text_bucket_cap) { 692 Heap* heap = img->heap; 693 Compiler* c = img->c; 694 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_COFF); 695 const ObjCoffArchOps* arch = 696 fmt && fmt->coff_arch ? fmt->coff_arch(c->target.arch) : NULL; 697 u32 stub_size; 698 u32 stub_align; 699 u64 cur; 700 if (!arch || arch->stub_size == 0 || !arch->emit_iat_stub) { 701 compiler_panic(c, SRCLOC_NONE, 702 "link_emit_coff: arch has no COFF IAT stub emitter"); 703 } 704 stub_size = arch->stub_size; 705 /* Stubs are pure code; aligning to instruction alignment is enough. 706 * x64 wants byte-granular, aa64 wants 4 B; align to stub size as a 707 * convenient upper bound. */ 708 stub_align = stub_size; 709 cur = (u64)text_bucket->size; 710 cur = ALIGN_UP(cur, (u64)stub_align); 711 it->stub_text_off = (u32)cur; 712 for (u32 i = 0; i < it->nimports; ++i) { 713 if (!it->imports[i].is_func) continue; 714 it->imports[i].stub_off = (u32)cur; 715 cur += stub_size; 716 } 717 it->stub_total = (u32)cur - it->stub_text_off; 718 if (it->stub_total == 0) return; 719 /* Grow the .text bucket buffer to hold the new region. */ 720 u32 need = (u32)cur; 721 if (need > *text_bucket_cap) { 722 (void)VEC_GROW(heap, text_bucket->bytes, *text_bucket_cap, need); 723 } 724 /* Zero the alignment pad; stub bytes are written later by 725 * coff_emit_stubs once vaddrs are known. */ 726 if ((u32)cur > text_bucket->size) { 727 memset(text_bucket->bytes + text_bucket->size, 0, 728 (size_t)((u32)cur - text_bucket->size)); 729 } 730 text_bucket->size = (u32)cur; 731 } 732 733 /* Emit each function import's IAT stub into the .text bucket. Must 734 * run after coff_assign_layout has fixed both .text's RVA and 735 * .idata's RVA, since the stub bakes in the post-shift IAT slot 736 * displacement. */ 737 static void coff_emit_stubs(LinkImage* img, const CoffImportTable* it, 738 const CoffSection out[COFF_NBUCKETS]) { 739 Compiler* c = img->c; 740 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_COFF); 741 const ObjCoffArchOps* arch = 742 fmt && fmt->coff_arch ? fmt->coff_arch(c->target.arch) : NULL; 743 u64 img_base = PE_IMAGE_BASE; 744 u32 text_rva = out[COFF_BUCKET_TEXT].rva; 745 u32 idata_rva = out[COFF_BUCKET_IDATA].rva; 746 if (!arch || !arch->emit_iat_stub) { 747 compiler_panic(c, SRCLOC_NONE, 748 "link_emit_coff: arch has no COFF IAT stub emitter"); 749 } 750 for (u32 i = 0; i < it->nimports; ++i) { 751 u64 stub_va, slot_va; 752 if (!it->imports[i].is_func) continue; 753 stub_va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off; 754 slot_va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off; 755 arch->emit_iat_stub(out[COFF_BUCKET_TEXT].bytes + it->imports[i].stub_off, 756 stub_va, slot_va); 757 } 758 } 759 760 /* Emit .idata content into the bucket buffer. Allocates the buffer 761 * here (size is already known from coff_plan_idata_layout). */ 762 static void coff_emit_idata(LinkImage* img, const CoffImportTable* it, 763 CoffSection out[COFF_NBUCKETS], 764 u32* idata_bucket_cap) { 765 Heap* heap = img->heap; 766 Compiler* c = img->c; 767 CoffSection* idata = &out[COFF_BUCKET_IDATA]; 768 u32 idata_rva = idata->rva; 769 u8* buf; 770 /* Allocate the bucket buffer (idata_size is already block-aligned). */ 771 buf = (u8*)heap->alloc(heap, it->idata_size, _Alignof(u64)); 772 if (!buf) 773 compiler_panic(c, SRCLOC_NONE, "link_emit_coff: oom on .idata buffer"); 774 memset(buf, 0, it->idata_size); 775 idata->bytes = buf; 776 idata->size = it->idata_size; 777 *idata_bucket_cap = it->idata_size; 778 779 /* Block 1: IMAGE_IMPORT_DESCRIPTOR table. */ 780 for (u32 d = 0; d < it->ndlls; ++d) { 781 u8* p = buf + d * (u32)COFF_IMPORT_DESCRIPTOR_SIZE; 782 u32 ilt_rva = idata_rva + it->dlls[d].ilt_off; 783 u32 iat_rva = idata_rva + it->dlls[d].iat_off; 784 u32 name_rva = idata_rva + it->dlls[d].name_off; 785 wr_u32_le(p + 0, ilt_rva); /* OriginalFirstThunk */ 786 wr_u32_le(p + 4, 0u); /* TimeDateStamp */ 787 wr_u32_le(p + 8, 0u); /* ForwarderChain */ 788 wr_u32_le(p + 12, name_rva); /* Name */ 789 wr_u32_le(p + 16, iat_rva); /* FirstThunk */ 790 } 791 /* Trailing zero descriptor already zero-filled by memset. */ 792 793 /* Blocks 2+3: ILT + IAT. Both initially point at the same hint/name 794 * record for each import; the OS loader rewrites IAT entries at 795 * load time. */ 796 for (u32 i = 0; i < it->nimports; ++i) { 797 u64 hint_rva = (u64)idata_rva + (u64)it->imports[i].hint_off; 798 wr_u64_le(buf + it->imports[i].ilt_off, hint_rva); 799 wr_u64_le(buf + it->imports[i].iat_off, hint_rva); 800 } 801 /* Per-DLL ILT/IAT terminators are u64 0, already zero-filled. */ 802 803 /* Block 4: hint/name records. */ 804 for (u32 i = 0; i < it->nimports; ++i) { 805 LinkSymbol* s = LinkSyms_at(&img->syms, it->imports[i].sym - 1); 806 size_t nlen = 0; 807 const char* nm = coff_import_emit_name(c, &it->imports[i], s, &nlen); 808 u8* p = buf + it->imports[i].hint_off; 809 wr_u16_le(p, PE_IMPORT_HINT_NONE); 810 memcpy(p + 2, nm, nlen); 811 /* NUL terminator + optional pad already zero. */ 812 } 813 814 /* Block 5: DLL name strings. */ 815 for (u32 d = 0; d < it->ndlls; ++d) { 816 Slice nm_s = pool_slice(c->global, it->dlls[d].soname); 817 const char* nm = nm_s.s; 818 size_t nlen = nm_s.len; 819 memcpy(buf + it->dlls[d].name_off, nm, nlen); 820 /* NUL already zero. */ 821 } 822 } 823 824 /* Per-LinkSymId vaddr override table for imports. Indexed by 825 * LinkSymId-1; 0 means "not an import". Built once after the .idata 826 * bucket RVA is final. Consumed by coff_apply_all_relocs in lieu of 827 * the symbol's own vaddr field (which is 0 for imports). */ 828 typedef struct CoffImportVaddr { 829 u64* by_sym; /* size = nsyms; 0 entries mean "not imported" */ 830 u32 nsyms; 831 } CoffImportVaddr; 832 833 static void coff_import_vaddr_build(LinkImage* img, const CoffImportTable* it, 834 const CoffSection out[COFF_NBUCKETS], 835 CoffImportVaddr* iv) { 836 Heap* heap = img->heap; 837 u64 img_base = PE_IMAGE_BASE; 838 u32 text_rva = out[COFF_BUCKET_TEXT].rva; 839 u32 idata_rva = out[COFF_BUCKET_IDATA].rva; 840 iv->nsyms = LinkSyms_count(&img->syms); 841 iv->by_sym = (u64*)heap->alloc(heap, sizeof(u64) * (size_t)(iv->nsyms + 1u), 842 _Alignof(u64)); 843 if (!iv->by_sym) 844 compiler_panic(img->c, SRCLOC_NONE, 845 "link_emit_coff: oom on import vaddr table"); 846 memset(iv->by_sym, 0, sizeof(u64) * (size_t)(iv->nsyms + 1u)); 847 for (u32 i = 0; i < it->nimports; ++i) { 848 LinkSymId sid = it->imports[i].sym; 849 u64 va; 850 if (it->imports[i].is_func) { 851 va = img_base + (u64)text_rva + (u64)it->imports[i].stub_off; 852 } else { 853 va = img_base + (u64)idata_rva + (u64)it->imports[i].iat_off; 854 } 855 iv->by_sym[sid - 1u] = va; 856 /* Fan out across every shadow LinkSymId with the same name so a 857 * per-input undef reference resolves to the same import slot. */ 858 { 859 LinkSymbol* canonical = LinkSyms_at(&img->syms, sid - 1u); 860 for (u32 j = 0; j < iv->nsyms; ++j) { 861 LinkSymbol* s = LinkSyms_at(&img->syms, j); 862 if (s->name == canonical->name && s->imported) { 863 iv->by_sym[s->id - 1u] = va; 864 } 865 } 866 } 867 } 868 } 869 870 static void coff_import_vaddr_free(LinkImage* img, CoffImportVaddr* iv) { 871 Heap* heap = img->heap; 872 if (iv->by_sym) { 873 heap->free(heap, iv->by_sym, sizeof(u64) * (size_t)(iv->nsyms + 1u)); 874 } 875 } 876 877 /* Resolve Compiler.target.arch -> IMAGE_FILE_MACHINE_* via the per-arch 878 * coff ops table. Panic if the arch has no COFF descriptor or the 879 * machine value is one kit doesn't ship (Phase 1 supports AMD64 and 880 * ARM64 only). */ 881 static u16 coff_machine_or_panic(Compiler* c) { 882 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_COFF); 883 const ObjCoffArchOps* arch = 884 fmt && fmt->coff_arch ? fmt->coff_arch(c->target.arch) : NULL; 885 u16 m; 886 if (!arch) 887 compiler_panic(c, SRCLOC_NONE, "link_emit_coff: no COFF arch descriptor"); 888 m = arch->machine; 889 if (m != IMAGE_FILE_MACHINE_AMD64 && m != IMAGE_FILE_MACHINE_ARM64) 890 compiler_panic(c, SRCLOC_NONE, "link_emit_coff: unsupported machine 0x%x", 891 (unsigned)m); 892 return m; 893 } 894 895 static int coff_section_name_starts(Compiler* c, const LinkSection* ls, 896 const char* prefix) { 897 size_t pn = slice_from_cstr(prefix).len; 898 Slice s_s = ls->name ? pool_slice(c->global, ls->name) : SLICE_NULL; 899 const char* s = s_s.s; 900 size_t n = s_s.len; 901 return s && n >= pn && memcmp(s, prefix, pn) == 0; 902 } 903 904 static int coff_section_name_cmp(Compiler* c, const LinkSection* a, 905 const LinkSection* b) { 906 Slice as_s = a->name ? pool_slice(c->global, a->name) : SLICE_NULL; 907 Slice bs_s = b->name ? pool_slice(c->global, b->name) : SLICE_NULL; 908 const char* as = as_s.s ? as_s.s : ""; 909 const char* bs = bs_s.s ? bs_s.s : ""; 910 size_t an = as_s.len, bn = bs_s.len; 911 size_t n = an < bn ? an : bn; 912 int cmp = n ? memcmp(as, bs, n) : 0; 913 if (cmp) return cmp; 914 if (an < bn) return -1; 915 if (an > bn) return 1; 916 if (a->id < b->id) return -1; 917 if (a->id > b->id) return 1; 918 return 0; 919 } 920 921 static void coff_place_section(LinkImage* img, CoffSection out[COFF_NBUCKETS], 922 CoffSecMap* map, u64 bucket_cur[COFF_NBUCKETS], 923 u32 bucket_cap[COFF_NBUCKETS], 924 const LinkSection* ls) { 925 Heap* heap = img->heap; 926 CoffBucket b2 = coff_bucket_for(img->c, ls); 927 u32 align = ls->align ? ls->align : 1u; 928 u64 cur = bucket_cur[b2]; 929 cur = ALIGN_UP(cur, (u64)align); 930 map[ls->id - 1].bucket = (u8)b2; 931 /* Record the bucket-local offset; the absolute RVA / file offset 932 * are filled in after bucket placement (RVAs need 933 * SectionAlignment, file offsets need FileAlignment). */ 934 map[ls->id - 1].new_rva = (u32)cur; 935 if (b2 != COFF_BUCKET_BSS) { 936 /* Copy bytes from the source segment buffer into the bucket. */ 937 if (ls->size) { 938 u32 need = (u32)(cur + ls->size); 939 if (need > bucket_cap[b2]) { 940 (void)VEC_GROW(heap, out[b2].bytes, bucket_cap[b2], need); 941 } 942 memset(out[b2].bytes + bucket_cur[b2], 0, (size_t)(cur - bucket_cur[b2])); 943 if (ls->sem != SSEM_NOBITS) { 944 const LinkSegment* seg = &img->segments[ls->segment_id - 1]; 945 const u8* src = img->segment_bytes[seg->id - 1] + 946 (size_t)(ls->file_offset - seg->file_offset); 947 memcpy(out[b2].bytes + cur, src, (size_t)ls->size); 948 } else { 949 memset(out[b2].bytes + cur, 0, (size_t)ls->size); 950 } 951 } 952 } 953 cur += ls->size; 954 bucket_cur[b2] = cur; 955 out[b2].size = (u32)cur; 956 } 957 958 static void coff_insert_sorted_section(Compiler* c, const LinkSection** a, 959 u32* n, const LinkSection* ls) { 960 u32 i = *n; 961 while (i > 0 && coff_section_name_cmp(c, ls, a[i - 1u]) < 0) { 962 a[i] = a[i - 1u]; 963 --i; 964 } 965 a[i] = ls; 966 *n += 1u; 967 } 968 969 /* ---- pass 1: bucket input sections, assemble bytes, assign deltas ---- 970 * CoffSecMap is defined above (alongside CoffTlsLayout) because the 971 * TLS planning helpers need to consume one. */ 972 973 /* Build the four payload buckets (.text/.rdata/.data/.bss). 974 * 975 * `map[secid-1]` is populated for every kept LinkSection with the 976 * section's new RVA, new file offset, the bucket it landed in, and the 977 * delta to add to in-section vaddrs. Bucket buffers are 978 * heap-allocated; the caller frees them after emit. */ 979 static void coff_build_buckets(LinkImage* img, CoffSection out[COFF_NBUCKETS], 980 CoffSecMap* map) { 981 Heap* heap = img->heap; 982 Compiler* c = img->c; 983 const LinkSection** tls_sorted = NULL; 984 const LinkSection** crt_sorted = NULL; 985 u32 ntls_sorted = 0; 986 u32 ncrt_sorted = 0; 987 u32 i, b; 988 989 for (b = 0; b < COFF_NBUCKETS; ++b) { 990 memset(&out[b], 0, sizeof(out[b])); 991 } 992 out[COFF_BUCKET_TEXT].name = ".text"; 993 out[COFF_BUCKET_TEXT].characteristics = 994 IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ; 995 out[COFF_BUCKET_TEXT].has_file_bytes = 1; 996 out[COFF_BUCKET_RDATA].name = ".rdata"; 997 out[COFF_BUCKET_RDATA].characteristics = 998 IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; 999 out[COFF_BUCKET_RDATA].has_file_bytes = 1; 1000 out[COFF_BUCKET_IDATA].name = ".idata"; 1001 out[COFF_BUCKET_IDATA].characteristics = 1002 IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; 1003 out[COFF_BUCKET_IDATA].has_file_bytes = 1; 1004 out[COFF_BUCKET_DATA].name = ".data"; 1005 out[COFF_BUCKET_DATA].characteristics = 1006 IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE; 1007 out[COFF_BUCKET_DATA].has_file_bytes = 1; 1008 /* The Windows loader uses .tls as a *template*: the bytes on disk 1009 * seed each thread's per-TLS copy at thread creation, and threads 1010 * write to their copies, not the template. The PE section is still 1011 * marked writable because that's what mingw and link.exe emit; the 1012 * loader special-cases it via the TLS directory. */ 1013 out[COFF_BUCKET_TLS].name = ".tls"; 1014 out[COFF_BUCKET_TLS].characteristics = 1015 IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE; 1016 out[COFF_BUCKET_TLS].has_file_bytes = 1; 1017 out[COFF_BUCKET_BSS].name = ".bss"; 1018 out[COFF_BUCKET_BSS].characteristics = IMAGE_SCN_CNT_UNINITIALIZED_DATA | 1019 IMAGE_SCN_MEM_READ | 1020 IMAGE_SCN_MEM_WRITE; 1021 out[COFF_BUCKET_BSS].has_file_bytes = 0; 1022 out[COFF_BUCKET_PDATA].name = ".pdata"; 1023 out[COFF_BUCKET_PDATA].characteristics = 1024 IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; 1025 out[COFF_BUCKET_PDATA].has_file_bytes = 1; 1026 out[COFF_BUCKET_RELOC].name = ".reloc"; 1027 out[COFF_BUCKET_RELOC].characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA | 1028 IMAGE_SCN_MEM_READ | 1029 IMAGE_SCN_MEM_DISCARDABLE; 1030 out[COFF_BUCKET_RELOC].has_file_bytes = 1; 1031 1032 /* Track per-bucket cursors. Bucket sizes are bounded by the sum of 1033 * input section sizes plus per-section alignment padding; we grow 1034 * lazily via VEC_GROW. */ 1035 u64 bucket_cur[COFF_NBUCKETS]; 1036 u32 bucket_cap[COFF_NBUCKETS]; 1037 for (b = 0; b < COFF_NBUCKETS; ++b) { 1038 bucket_cur[b] = 0; 1039 bucket_cap[b] = 0; 1040 } 1041 1042 tls_sorted = img->nsections ? (const LinkSection**)heap->alloc( 1043 heap, sizeof(*tls_sorted) * img->nsections, 1044 _Alignof(const LinkSection*)) 1045 : NULL; 1046 crt_sorted = img->nsections ? (const LinkSection**)heap->alloc( 1047 heap, sizeof(*crt_sorted) * img->nsections, 1048 _Alignof(const LinkSection*)) 1049 : NULL; 1050 if (img->nsections && (!tls_sorted || !crt_sorted)) 1051 compiler_panic(c, SRCLOC_NONE, "link_emit_coff: oom sorting sections"); 1052 1053 for (i = 0; i < img->nsections; ++i) { 1054 const LinkSection* ls = &img->sections[i]; 1055 if (!(ls->flags & SF_ALLOC)) continue; 1056 if (ls->flags & SF_TLS) { 1057 coff_insert_sorted_section(c, tls_sorted, &ntls_sorted, ls); 1058 continue; 1059 } 1060 if (coff_section_name_starts(c, ls, ".CRT$")) { 1061 coff_insert_sorted_section(c, crt_sorted, &ncrt_sorted, ls); 1062 continue; 1063 } 1064 coff_place_section(img, out, map, bucket_cur, bucket_cap, ls); 1065 } 1066 1067 for (i = 0; i < ntls_sorted; ++i) { 1068 coff_place_section(img, out, map, bucket_cur, bucket_cap, tls_sorted[i]); 1069 } 1070 for (i = 0; i < ncrt_sorted; ++i) { 1071 coff_place_section(img, out, map, bucket_cur, bucket_cap, crt_sorted[i]); 1072 } 1073 1074 /* Track caps so we can free with the right size later (heap->free 1075 * needs the original allocation size). Stash into size_raw 1076 * temporarily — overwritten below with the proper PE value. */ 1077 for (b = 0; b < COFF_NBUCKETS; ++b) out[b].size_raw = bucket_cap[b]; 1078 if (tls_sorted) 1079 heap->free(heap, tls_sorted, sizeof(*tls_sorted) * img->nsections); 1080 if (crt_sorted) 1081 heap->free(heap, crt_sorted, sizeof(*crt_sorted) * img->nsections); 1082 } 1083 1084 /* Assign RVAs and file offsets to the buckets that participate in the 1085 * image. Returns the file offset at which trailing pad-to-EOF should 1086 * land (== file size). */ 1087 static u64 coff_assign_layout(CoffSection out[COFF_NBUCKETS], 1088 u32 headers_file_size, u32 first_section_rva) { 1089 u32 rva = first_section_rva; 1090 u64 file = ALIGN_UP((u64)headers_file_size, (u64)PE_FILE_ALIGNMENT); 1091 u32 b; 1092 for (b = 0; b < COFF_NBUCKETS; ++b) { 1093 if (out[b].size == 0) { 1094 out[b].in_image = 0; 1095 out[b].rva = 0; 1096 out[b].file_offset = 0; 1097 out[b].size_raw = 0; 1098 continue; 1099 } 1100 out[b].in_image = 1; 1101 out[b].rva = (u32)ALIGN_UP((u64)rva, (u64)PE_SECTION_ALIGNMENT); 1102 if (out[b].has_file_bytes) { 1103 out[b].file_offset = (u32)file; 1104 out[b].size_raw = (u32)ALIGN_UP((u64)out[b].size, (u64)PE_FILE_ALIGNMENT); 1105 file += out[b].size_raw; 1106 } else { 1107 out[b].file_offset = 0; 1108 out[b].size_raw = 0; 1109 } 1110 rva = out[b].rva + out[b].size; 1111 } 1112 return file; 1113 } 1114 1115 /* Build the .reloc bytes by grouping absolute relocs by 4-KiB page. 1116 * The map[] array maps LinkSectionId-1 to the per-section post-PE-relayout 1117 * RVA, so we can compute each reloc's site_rva = section_rva + (orig 1118 * write_vaddr - orig section_vaddr). 1119 * 1120 * Layout per page: 1121 * u32 page_rva 1122 * u32 size_of_block (8 + n_entries*2, padded to a multiple of 4) 1123 * u16 entries[]: (type << 12) | (offset & 0xfff) 1124 * optional trailing u16 = 0 (IMAGE_REL_BASED_ABSOLUTE) for u32 alignment */ 1125 typedef struct CoffRelocEntry { 1126 u32 site_rva; 1127 u16 type; 1128 u16 pad; 1129 } CoffRelocEntry; 1130 1131 static int coff_reloc_entry_cmp(const void* a, const void* b) { 1132 const CoffRelocEntry* ea = (const CoffRelocEntry*)a; 1133 const CoffRelocEntry* eb = (const CoffRelocEntry*)b; 1134 if (ea->site_rva < eb->site_rva) return -1; 1135 if (ea->site_rva > eb->site_rva) return 1; 1136 return 0; 1137 } 1138 1139 static void coff_build_reloc_section(LinkImage* img, 1140 const CoffSection out[COFF_NBUCKETS], 1141 const CoffSecMap* map, CoffSection* reloc, 1142 const CoffRelocEntry* extras, 1143 u32 n_extras) { 1144 Heap* heap = img->heap; 1145 Compiler* c = img->c; 1146 u32 nrel = LinkRelocs_count(&img->relocs); 1147 CoffRelocEntry* entries = NULL; 1148 u32 nentries = 0; 1149 u32 cap = 0; 1150 u32 i; 1151 1152 for (i = 0; i < nrel; ++i) { 1153 const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); 1154 const LinkSection* ls; 1155 u64 site_old_vaddr; 1156 u32 site_rva; 1157 u16 type; 1158 if (!coff_reloc_needs_base_reloc(r->kind)) continue; 1159 if (r->link_section_id == LINK_SEC_NONE) continue; 1160 ls = &img->sections[r->link_section_id - 1]; 1161 /* r->write_vaddr is in the pre-relayout coordinate system (same as 1162 * ls->vaddr), so the offset into the section is stable. Add the 1163 * containing bucket's final RVA to land at the image RVA. */ 1164 site_old_vaddr = r->write_vaddr; 1165 u8 sb = map[ls->id - 1].bucket; 1166 site_rva = out[sb].rva + map[ls->id - 1].new_rva + 1167 (u32)(site_old_vaddr - ls->vaddr); 1168 if (r->kind == R_ABS64) { 1169 type = (u16)IMAGE_REL_BASED_DIR64; 1170 } else { 1171 type = (u16)IMAGE_REL_BASED_HIGHLOW; 1172 } 1173 if (nentries == cap) { 1174 (void)VEC_GROW(heap, entries, cap, nentries + 1u); 1175 } 1176 entries[nentries].site_rva = site_rva; 1177 entries[nentries].type = type; 1178 entries[nentries].pad = 0; 1179 ++nentries; 1180 } 1181 /* Append caller-supplied extras (TLS directory absolute-VA fields, 1182 * etc.). These are already site-RVAs in the final image. */ 1183 for (i = 0; i < n_extras; ++i) { 1184 if (nentries == cap) { 1185 (void)VEC_GROW(heap, entries, cap, nentries + 1u); 1186 } 1187 entries[nentries] = extras[i]; 1188 ++nentries; 1189 } 1190 if (nentries == 0) { 1191 reloc->bytes = NULL; 1192 reloc->size = 0; 1193 if (entries) heap->free(heap, entries, cap * sizeof(*entries)); 1194 (void)c; 1195 return; 1196 } 1197 /* Sort entries by RVA so we can group runs sharing a 4-KiB page. */ 1198 qsort(entries, nentries, sizeof(*entries), coff_reloc_entry_cmp); 1199 1200 /* Two-pass: first compute the total size (so we can allocate the 1201 * blob exactly), then emit. */ 1202 u32 blob_size = 0; 1203 u32 run_start = 0; 1204 while (run_start < nentries) { 1205 u32 page = entries[run_start].site_rva & ~0xfffu; 1206 u32 run_end = run_start; 1207 while (run_end < nentries && 1208 (entries[run_end].site_rva & ~0xfffu) == page) { 1209 ++run_end; 1210 } 1211 u32 n = run_end - run_start; 1212 u32 block = COFF_BASE_RELOCATION_SIZE + n * 2u; 1213 block = (u32)ALIGN_UP((u64)block, 4ull); 1214 blob_size += block; 1215 run_start = run_end; 1216 } 1217 reloc->bytes = (u8*)heap->alloc(heap, blob_size, 4); 1218 if (!reloc->bytes && blob_size) 1219 compiler_panic(c, SRCLOC_NONE, "link_emit_coff: oom on .reloc blob"); 1220 memset(reloc->bytes, 0, blob_size); 1221 reloc->size = blob_size; 1222 /* Stash allocation size for free path. */ 1223 reloc->size_raw = blob_size; 1224 1225 u32 cursor = 0; 1226 run_start = 0; 1227 while (run_start < nentries) { 1228 u32 page = entries[run_start].site_rva & ~0xfffu; 1229 u32 run_end = run_start; 1230 while (run_end < nentries && 1231 (entries[run_end].site_rva & ~0xfffu) == page) { 1232 ++run_end; 1233 } 1234 u32 n = run_end - run_start; 1235 u32 raw_size = COFF_BASE_RELOCATION_SIZE + n * 2u; 1236 u32 block = (u32)ALIGN_UP((u64)raw_size, 4ull); 1237 u8* p = reloc->bytes + cursor; 1238 wr_u32_le(p, page); 1239 wr_u32_le(p + 4, block); 1240 u32 k; 1241 for (k = 0; k < n; ++k) { 1242 u16 entry = (u16)(((u16)entries[run_start + k].type << 12) | 1243 (entries[run_start + k].site_rva & 0xfffu)); 1244 wr_u16_le(p + 8 + k * 2u, entry); 1245 } 1246 /* Optional trailing pad: a single IMAGE_REL_BASED_ABSOLUTE (0). */ 1247 if (block > raw_size) { 1248 wr_u16_le(p + 8 + n * 2u, 0); 1249 } 1250 cursor += block; 1251 run_start = run_end; 1252 } 1253 heap->free(heap, entries, cap * sizeof(*entries)); 1254 } 1255 1256 /* Patch each LinkRelocApply against the PE-relayout coordinates and 1257 * apply. `bucket_bytes[bucket]` gives the writable buffer for that 1258 * bucket; the per-section delta in map[] turns the old in-section 1259 * offsets into bucket-local offsets. 1260 * 1261 * Imported targets (LinkSymbol.imported == 1) have no vaddr of their 1262 * own — instead the .idata pass populated `iv->by_sym[id-1]` with the 1263 * function stub's vaddr (for callable imports) or the IAT slot's 1264 * vaddr (for data imports). This is the spot where that table is 1265 * consulted in lieu of the symbol's own zero vaddr. */ 1266 static void coff_apply_all_relocs(LinkImage* img, 1267 const CoffSection out[COFF_NBUCKETS], 1268 const CoffSecMap* map, 1269 const CoffImportVaddr* iv) { 1270 Compiler* c = img->c; 1271 u32 i; 1272 u64 img_base = PE_IMAGE_BASE; 1273 u32 nrel = LinkRelocs_count(&img->relocs); 1274 for (i = 0; i < nrel; ++i) { 1275 LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); 1276 const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); 1277 const LinkSection* sec; 1278 const LinkSection* tgt_sec; 1279 u64 S, P; 1280 u8* P_bytes; 1281 u8 bucket; 1282 u32 site_off_in_sec; 1283 u32 site_bucket_off; 1284 if (r->link_section_id == LINK_SEC_NONE) continue; 1285 sec = &img->sections[r->link_section_id - 1]; 1286 bucket = map[sec->id - 1].bucket; 1287 if (!out[bucket].has_file_bytes || !out[bucket].bytes) { 1288 /* Shouldn't happen — .bss has no relocations. */ 1289 continue; 1290 } 1291 site_off_in_sec = (u32)(r->write_vaddr - sec->vaddr); 1292 site_bucket_off = map[sec->id - 1].new_rva + site_off_in_sec; 1293 P_bytes = out[bucket].bytes + site_bucket_off; 1294 /* P = ImageBase + bucket_rva + map[].new_rva + site_off_in_sec 1295 * — i.e. the final runtime address of the patch site. */ 1296 P = img_base + (u64)out[bucket].rva + (u64)map[sec->id - 1].new_rva + 1297 site_off_in_sec; 1298 1299 /* Resolve S: target symbol's new image-relative address. Look up 1300 * the LinkSection that contains the symbol's original vaddr, then 1301 * apply that section's delta. */ 1302 if (tgt->imported) { 1303 /* IAT-routed: stub vaddr (functions) / slot vaddr (data). */ 1304 if (!iv || iv->by_sym[r->target - 1u] == 0) 1305 compiler_panic(c, SRCLOC_NONE, 1306 "link_emit_coff: imported target lacks IAT slot"); 1307 S = iv->by_sym[r->target - 1u]; 1308 } else if (tgt->kind == SK_ABS) { 1309 S = tgt->vaddr; 1310 } else if (tgt->defined) { 1311 tgt_sec = coff_symbol_section(img, tgt); 1312 if (!tgt_sec) { 1313 compiler_panic(c, SRCLOC_NONE, 1314 "link_emit_coff: symbol vaddr 0x%llx has no " 1315 "containing section", 1316 (unsigned long long)tgt->vaddr); 1317 } 1318 u8 tb = map[tgt_sec->id - 1].bucket; 1319 u64 sym_off = tgt->vaddr - tgt_sec->vaddr; 1320 S = img_base + (u64)out[tb].rva + (u64)map[tgt_sec->id - 1].new_rva + 1321 sym_off; 1322 } else { 1323 /* Undef and not imported — shouldn't survive resolve_undefs. */ 1324 compiler_panic(c, SRCLOC_NONE, 1325 "link_emit_coff: unresolved non-imported symbol"); 1326 } 1327 /* COFF-only section-relative kinds: the SECREL value is the 1328 * symbol's offset from the start of its containing output section 1329 * (PE bucket), and SECTION is the 1-based PE section index. 1330 * link_reloc_apply only sees S and P, so we patch these inline 1331 * before delegating common kinds. */ 1332 if (r->kind == R_COFF_SECREL || r->kind == R_COFF_SECTION || 1333 r->kind == R_COFF_AARCH64_SECREL_LOW12A || 1334 r->kind == R_COFF_AARCH64_SECREL_HIGH12A) { 1335 if (!tgt->defined || tgt->kind == SK_ABS) { 1336 compiler_panic(c, SRCLOC_NONE, 1337 "link_emit_coff: COFF SECREL/SECTION requires a " 1338 "defined section-bound target symbol"); 1339 } 1340 u8 tb = map[tgt_sec->id - 1].bucket; 1341 u64 sym_off_in_bucket = 1342 (u64)map[tgt_sec->id - 1].new_rva + (tgt->vaddr - tgt_sec->vaddr); 1343 if (r->kind == R_COFF_SECREL) { 1344 u64 v = sym_off_in_bucket + (u64)r->addend; 1345 wr_u32_le(P_bytes, (u32)(v & 0xffffffffu)); 1346 } else if (r->kind == R_COFF_SECTION) { 1347 /* PE section indices are 1-based; buckets are 0-based, so add 1. */ 1348 wr_u16_le(P_bytes, (u16)((tb + 1u) & 0xffffu)); 1349 } else { 1350 /* AArch64 SECREL_{LOW,HIGH}12A: patch the imm12 field of an 1351 * existing ADD-imm12 instruction. LOW12A = bits [11:0] of the 1352 * SECREL; HIGH12A = bits [23:12]. The instruction's sh bit was 1353 * already set by the codegen (0 for LOW, 1 for HIGH). */ 1354 u64 v = sym_off_in_bucket + (u64)r->addend; 1355 u32 imm12 = (r->kind == R_COFF_AARCH64_SECREL_HIGH12A) 1356 ? (u32)((v >> 12) & 0xfffu) 1357 : (u32)(v & 0xfffu); 1358 u32 instr = rd_u32_le(P_bytes); 1359 instr = (instr & ~(0xfffu << 10)) | (imm12 << 10); 1360 wr_u32_le(P_bytes, instr); 1361 } 1362 continue; 1363 } 1364 if (r->kind == R_COFF_ADDR32NB) { 1365 u64 inline_addend = rd_u32_le(P_bytes); 1366 u64 v = (S - img_base) + inline_addend + (u64)r->addend; 1367 wr_u32_le(P_bytes, (u32)(v & 0xffffffffu)); 1368 continue; 1369 } 1370 if (tgt->bind == SB_WEAK && tgt->kind == SK_ABS && tgt->vaddr == 0) { 1371 /* AArch64 cannot generally ADRP from a PE image base down to absolute 1372 * NULL. Materialize the weak-undef address as zero directly; the paired 1373 * ADD low-12 relocation is already a no-op. */ 1374 if (r->kind == R_AARCH64_ADR_PREL_PG_HI21 || 1375 r->kind == R_AARCH64_ADR_PREL_PG_HI21_NC) { 1376 u32 instr = rd_u32_le(P_bytes); 1377 u32 rd = instr & 0x1fu; 1378 wr_u32_le(P_bytes, 0xd2800000u | rd); /* movz Xrd, #0 */ 1379 continue; 1380 } 1381 if (r->kind == R_AARCH64_ADD_ABS_LO12_NC) continue; 1382 } 1383 link_reloc_apply(c, r->kind, P_bytes, S, r->addend, P); 1384 } 1385 } 1386 1387 /* ---- header marshalling ---- 1388 * 1389 * Each helper streams its on-disk shape to the writer field-by-field; 1390 * we avoid sizeof(struct) on the packed PE wire types since they carry 1391 * implicit-padding hazards on hosts that disagree with #pragma pack(1) 1392 * defaults. */ 1393 1394 static void coff_write_dos_stub(Writer* w) { 1395 u8 buf[PE_DOS_HDR_SIZE]; 1396 memset(buf, 0, sizeof(buf)); 1397 /* e_magic ("MZ") + e_lfanew (offset of PE signature). All other 1398 * legacy fields zero. */ 1399 buf[0] = (u8)(IMAGE_DOS_SIGNATURE & 0xffu); 1400 buf[1] = (u8)((IMAGE_DOS_SIGNATURE >> 8) & 0xffu); 1401 wr_u32_le(buf + 0x3c, PE_DOS_E_LFANEW); 1402 kit_writer_write(w, buf, sizeof(buf)); 1403 } 1404 1405 static void coff_write_file_header(Writer* w, u16 machine, u16 nsec, 1406 u16 characteristics) { 1407 coff_wr_u16(w, machine); 1408 coff_wr_u16(w, nsec); 1409 coff_wr_u32(w, 0u); /* TimeDateStamp */ 1410 coff_wr_u32(w, 0u); /* PointerToSymbolTable */ 1411 coff_wr_u32(w, 0u); /* NumberOfSymbols */ 1412 coff_wr_u16(w, (u16)PE_OPT_HDR_SIZE); /* SizeOfOptionalHeader */ 1413 coff_wr_u16(w, characteristics); 1414 } 1415 1416 /* Per-section meta used by both the data-directory fill and the 1417 * IMAGE_SECTION_HEADER emit. Compactly captures everything the writer 1418 * needs to know about the four-or-five output sections. */ 1419 typedef struct CoffOutHdr { 1420 const char* name; 1421 u32 vsize; 1422 u32 rva; 1423 u32 size_raw; 1424 u32 file_offset; 1425 u32 characteristics; 1426 } CoffOutHdr; 1427 1428 static void coff_write_optional_header(Writer* w, u32 entry_rva, 1429 const CoffSection out[COFF_NBUCKETS], 1430 u32 headers_size_padded, u32 image_size, 1431 int dynamic_base, u16 subsystem, 1432 const CoffImportTable* it, 1433 const CoffTlsLayout* tls) { 1434 /* Standard fields. */ 1435 coff_wr_u16(w, IMAGE_NT_OPTIONAL_HDR64_MAGIC); 1436 coff_wr_u8(w, PE_LINKER_MAJOR); 1437 coff_wr_u8(w, PE_LINKER_MINOR); 1438 /* SizeOfCode / SizeOfInitializedData / SizeOfUninitializedData. */ 1439 u32 size_code = 1440 out[COFF_BUCKET_TEXT].in_image ? out[COFF_BUCKET_TEXT].size_raw : 0; 1441 u32 size_init = 1442 (out[COFF_BUCKET_RDATA].in_image ? out[COFF_BUCKET_RDATA].size_raw : 0) + 1443 (out[COFF_BUCKET_IDATA].in_image ? out[COFF_BUCKET_IDATA].size_raw : 0) + 1444 (out[COFF_BUCKET_DATA].in_image ? out[COFF_BUCKET_DATA].size_raw : 0) + 1445 (out[COFF_BUCKET_TLS].in_image ? out[COFF_BUCKET_TLS].size_raw : 0) + 1446 (out[COFF_BUCKET_PDATA].in_image ? out[COFF_BUCKET_PDATA].size_raw : 0) + 1447 (out[COFF_BUCKET_RELOC].in_image ? out[COFF_BUCKET_RELOC].size_raw : 0); 1448 u32 size_uninit = 1449 out[COFF_BUCKET_BSS].in_image ? out[COFF_BUCKET_BSS].size : 0; 1450 coff_wr_u32(w, size_code); 1451 coff_wr_u32(w, size_init); 1452 coff_wr_u32(w, size_uninit); 1453 coff_wr_u32(w, entry_rva); 1454 coff_wr_u32(w, 1455 out[COFF_BUCKET_TEXT].in_image ? out[COFF_BUCKET_TEXT].rva : 0); 1456 /* Windows-specific fields. */ 1457 coff_wr_u64(w, PE_IMAGE_BASE); 1458 coff_wr_u32(w, PE_SECTION_ALIGNMENT); 1459 coff_wr_u32(w, PE_FILE_ALIGNMENT); 1460 coff_wr_u16(w, PE_OS_MAJOR); 1461 coff_wr_u16(w, PE_OS_MINOR); 1462 coff_wr_u16(w, 0u); /* MajorImageVersion */ 1463 coff_wr_u16(w, 0u); /* MinorImageVersion */ 1464 coff_wr_u16(w, PE_SUBSYS_MAJOR); 1465 coff_wr_u16(w, PE_SUBSYS_MINOR); 1466 coff_wr_u32(w, 0u); /* Win32VersionValue */ 1467 coff_wr_u32(w, image_size); 1468 coff_wr_u32(w, headers_size_padded); 1469 coff_wr_u32(w, 0u); /* CheckSum */ 1470 coff_wr_u16(w, subsystem ? subsystem : IMAGE_SUBSYSTEM_WINDOWS_CUI); 1471 coff_wr_u16( 1472 w, (u16)(PE_DLL_CHARS_BASE | (dynamic_base ? PE_DLL_CHARS_ASLR : 0))); 1473 coff_wr_u64(w, PE_STACK_RESERVE); 1474 coff_wr_u64(w, PE_STACK_COMMIT); 1475 coff_wr_u64(w, PE_HEAP_RESERVE); 1476 coff_wr_u64(w, PE_HEAP_COMMIT); 1477 coff_wr_u32(w, 0u); /* LoaderFlags */ 1478 coff_wr_u32(w, (u32)PE_NUM_DATA_DIRS); 1479 /* DataDirectory[16]. Populated entries: 1480 * [1] IMPORT — descriptor table RVA + total descriptor bytes 1481 * [3] EXCEPTION — .pdata runtime-function table 1482 * [5] BASERELOC — when PIE and .reloc is in the image 1483 * [12] IAT — first IAT block RVA + sum of per-DLL IAT sizes 1484 * Everything else stays zero. */ 1485 u32 i; 1486 int has_idata = it && it->nimports > 0 && out[COFF_BUCKET_IDATA].in_image; 1487 for (i = 0; i < PE_NUM_DATA_DIRS; ++i) { 1488 if (i == IMAGE_DIRECTORY_ENTRY_IMPORT && has_idata) { 1489 coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->desc_off); 1490 coff_wr_u32(w, it->desc_size); 1491 } else if (i == IMAGE_DIRECTORY_ENTRY_EXCEPTION && 1492 out[COFF_BUCKET_PDATA].in_image) { 1493 coff_wr_u32(w, out[COFF_BUCKET_PDATA].rva); 1494 coff_wr_u32(w, out[COFF_BUCKET_PDATA].size); 1495 } else if (i == IMAGE_DIRECTORY_ENTRY_IAT && has_idata) { 1496 coff_wr_u32(w, out[COFF_BUCKET_IDATA].rva + it->iat_base); 1497 coff_wr_u32(w, it->iat_total); 1498 } else if (i == IMAGE_DIRECTORY_ENTRY_BASERELOC && dynamic_base && 1499 out[COFF_BUCKET_RELOC].in_image) { 1500 coff_wr_u32(w, out[COFF_BUCKET_RELOC].rva); 1501 coff_wr_u32(w, out[COFF_BUCKET_RELOC].size); 1502 } else if (i == IMAGE_DIRECTORY_ENTRY_TLS && tls && tls->present) { 1503 coff_wr_u32(w, out[COFF_BUCKET_RDATA].rva + tls->dir_rdata_off); 1504 coff_wr_u32(w, COFF_TLS_DIRECTORY64_SIZE); 1505 } else { 1506 coff_wr_u32(w, 0u); 1507 coff_wr_u32(w, 0u); 1508 } 1509 } 1510 } 1511 1512 static void coff_write_section_header(Writer* w, const char* name, u32 vsize, 1513 u32 rva, u32 size_raw, u32 file_offset, 1514 u32 characteristics) { 1515 u8 nm[8] = {0, 0, 0, 0, 0, 0, 0, 0}; 1516 size_t n = slice_from_cstr(name).len; 1517 if (n > 8) n = 8; 1518 memcpy(nm, name, n); 1519 kit_writer_write(w, nm, 8); 1520 coff_wr_u32(w, vsize); 1521 coff_wr_u32(w, rva); 1522 coff_wr_u32(w, size_raw); 1523 coff_wr_u32(w, file_offset); 1524 coff_wr_u32(w, 0u); /* PointerToRelocations */ 1525 coff_wr_u32(w, 0u); /* PointerToLinenumbers */ 1526 coff_wr_u16(w, 0u); /* NumberOfRelocations */ 1527 coff_wr_u16(w, 0u); /* NumberOfLinenumbers */ 1528 coff_wr_u32(w, characteristics); 1529 } 1530 1531 /* ---- main entry ---- */ 1532 1533 void link_emit_coff(LinkImage* img, Writer* w) { 1534 Heap* heap = img->heap; 1535 Compiler* c = img->c; 1536 u16 machine = coff_machine_or_panic(c); 1537 if (img->entry_sym == LINK_SYM_NONE) 1538 compiler_panic(c, SRCLOC_NONE, "link_emit_coff: no resolved entry symbol"); 1539 1540 /* ---- pass 1: build buckets + per-section delta map ---- */ 1541 CoffSection out[COFF_NBUCKETS]; 1542 CoffSecMap* map = (CoffSecMap*)heap->alloc( 1543 heap, sizeof(CoffSecMap) * (img->nsections + 1u), _Alignof(CoffSecMap)); 1544 if (!map && img->nsections) 1545 compiler_panic(c, SRCLOC_NONE, "link_emit_coff: oom on section map"); 1546 memset(map, 0, sizeof(CoffSecMap) * (img->nsections + 1u)); 1547 1548 /* coff_build_buckets stashes per-bucket allocation caps in size_raw; 1549 * we read them back into a local before size_raw is recomputed by 1550 * coff_assign_layout so the cleanup path can free with the right 1551 * size. */ 1552 coff_build_buckets(img, out, map); 1553 /* coff_build_buckets stashes per-bucket allocation caps in size_raw 1554 * (the only bucket field we own for the duration of layout); read 1555 * them out before coff_assign_layout overwrites the field. .reloc 1556 * and .idata aren't touched by coff_build_buckets — their caps are 1557 * filled in below once coff_build_reloc_section / coff_emit_idata 1558 * run. */ 1559 u32 bucket_caps[COFF_NBUCKETS]; 1560 u32 b; 1561 for (b = 0; b < COFF_NBUCKETS; ++b) bucket_caps[b] = out[b].size_raw; 1562 1563 /* ---- pass 1b: collect imports and reserve .idata + .text stubs ---- 1564 * 1565 * Builds the per-DLL / per-import layout and appends one IAT-routing 1566 * stub per imported function to the .text bucket. The .idata bucket 1567 * size is set here (so it counts in nsec); the stub vaddrs and 1568 * IAT-slot vaddrs are finalised after coff_assign_layout. */ 1569 CoffImportTable imports; 1570 int have_imports = coff_collect_imports(img, &imports); 1571 if (have_imports) { 1572 coff_plan_idata_layout(img, &imports); 1573 coff_append_stubs(img, &imports, &out[COFF_BUCKET_TEXT], 1574 &bucket_caps[COFF_BUCKET_TEXT]); 1575 /* Reserve the .idata bucket size so coff_assign_layout / nsec 1576 * accounting sees it. Actual bytes are written by coff_emit_idata 1577 * once the bucket RVA is known. */ 1578 out[COFF_BUCKET_IDATA].size = imports.idata_size; 1579 } 1580 1581 /* ---- pass 1c: plan the TLS directory record ---- 1582 * 1583 * If any SF_TLS sections survived, reserve 40 bytes at the tail of 1584 * .rdata for the IMAGE_TLS_DIRECTORY64. Bytes are zeroed now and 1585 * filled in by coff_emit_tls_dir once the bucket RVAs are final. */ 1586 CoffTlsLayout tls; 1587 coff_plan_tls_layout(img, out, &bucket_caps[COFF_BUCKET_RDATA], &tls); 1588 1589 /* ---- pass 2: decide whether .reloc will be in the image ---- 1590 * 1591 * The headers' file size (and therefore every section's file 1592 * offset) depends on the section-table entry count, so we need to 1593 * commit to "is .reloc emitted?" before laying out file offsets. 1594 * .reloc lights up iff at least one absolute VA reloc points into a kept 1595 * section, OR a TLS directory is emitted (its VA fields need base-relocs). 1596 * ARM64 Windows rejects fixed images (/dynamicbase:no), and x64 Windows 1597 * accepts ASLR images by default, so PE images advertise DYNAMIC_BASE when 1598 * this table is present instead of tying the table to the generic ELF/Mach-O 1599 * img->pie flag. */ 1600 int emit_reloc = 0; 1601 { 1602 u32 i; 1603 u32 nrel = LinkRelocs_count(&img->relocs); 1604 for (i = 0; i < nrel; ++i) { 1605 const LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); 1606 if (!coff_reloc_needs_base_reloc(r->kind)) continue; 1607 if (r->link_section_id == LINK_SEC_NONE) continue; 1608 emit_reloc = 1; 1609 break; 1610 } 1611 if (!emit_reloc && tls.present) emit_reloc = 1; 1612 } 1613 1614 u32 nsec = 0; 1615 for (b = 0; b < COFF_NBUCKETS; ++b) { 1616 if (b == COFF_BUCKET_RELOC) { 1617 if (emit_reloc) ++nsec; /* tentative; size set below */ 1618 continue; 1619 } 1620 if (out[b].size) ++nsec; 1621 } 1622 u32 headers_size_unpadded = PE_DOS_HDR_SIZE + PE_SIG_SIZE + PE_FILE_HDR_SIZE + 1623 PE_OPT_HDR_SIZE + nsec * PE_SECTION_HDR_SIZE; 1624 u32 headers_size_padded = 1625 (u32)ALIGN_UP((u64)headers_size_unpadded, (u64)PE_FILE_ALIGNMENT); 1626 1627 /* First layout pass: fixes RVAs / file offsets for buckets that 1628 * already have a finalised size (.text, .rdata, .idata, .data, .bss). 1629 * .reloc's RVA is provisional — it depends on .reloc's own size, 1630 * which is still 0 at this point. */ 1631 (void)coff_assign_layout(out, headers_size_padded, PE_FIRST_SECTION_RVA); 1632 1633 /* ---- pass 2b: emit .idata bytes + per-arch IAT stubs ---- 1634 * 1635 * The .idata bucket's RVA is final after the first assign_layout; 1636 * stubs need it (the indirect-jump displacement targets an IAT slot) 1637 * and .idata's own descriptor / ILT / IAT records all carry RVAs. 1638 * coff_import_vaddr_build builds the per-LinkSymId override table 1639 * that apply_all_relocs consults in place of the (zero) symbol 1640 * vaddr for imported targets. */ 1641 CoffImportVaddr import_vaddr; 1642 memset(&import_vaddr, 0, sizeof(import_vaddr)); 1643 if (have_imports) { 1644 coff_emit_idata(img, &imports, out, &bucket_caps[COFF_BUCKET_IDATA]); 1645 coff_emit_stubs(img, &imports, out); 1646 coff_import_vaddr_build(img, &imports, out, &import_vaddr); 1647 } 1648 1649 /* Write the TLS directory bytes now that bucket RVAs are final. */ 1650 coff_emit_tls_dir(img, out, map, &tls); 1651 1652 /* ---- pass 3: build .reloc using the now-final bucket RVAs ---- 1653 * 1654 * coff_build_reloc_section reads out[bucket].rva indirectly via 1655 * map[].new_rva + (write_vaddr - sec->vaddr) → site offset within 1656 * the bucket; the absolute site_rva is bucket.rva + that offset. 1657 * Patch site RVAs are page-quantised in the emitted blob, so this 1658 * is the spot where the bucket RVAs need to be already final. 1659 * 1660 * TLS directory's four absolute-VA fields ride into the entries via 1661 * the `extras` array — they aren't ordinary symbol relocations, so 1662 * they don't show up in img->relocs. */ 1663 if (emit_reloc) { 1664 CoffRelocEntry tls_extras[4]; 1665 u32 n_tls_extras = 0; 1666 if (tls.present) { 1667 u32 dir_rva = out[COFF_BUCKET_RDATA].rva + tls.dir_rdata_off; 1668 static const u32 field_offs[4] = { 1669 COFF_TLSDIR_OFF_START_ADDR, 1670 COFF_TLSDIR_OFF_END_ADDR, 1671 COFF_TLSDIR_OFF_INDEX_ADDR, 1672 COFF_TLSDIR_OFF_CALLBACKS, 1673 }; 1674 u32 k; 1675 for (k = 0; k < 4; ++k) { 1676 if (field_offs[k] == COFF_TLSDIR_OFF_CALLBACKS && !tls.callbacks_sym) 1677 continue; 1678 tls_extras[n_tls_extras].site_rva = dir_rva + field_offs[k]; 1679 tls_extras[n_tls_extras].type = (u16)IMAGE_REL_BASED_DIR64; 1680 tls_extras[n_tls_extras].pad = 0; 1681 ++n_tls_extras; 1682 } 1683 } 1684 coff_build_reloc_section(img, out, map, &out[COFF_BUCKET_RELOC], tls_extras, 1685 n_tls_extras); 1686 bucket_caps[COFF_BUCKET_RELOC] = out[COFF_BUCKET_RELOC].size_raw; 1687 /* size_raw was stashed by build; assign_layout below recomputes it 1688 * as the FileAlignment-padded length. */ 1689 (void)coff_assign_layout(out, headers_size_padded, PE_FIRST_SECTION_RVA); 1690 } 1691 1692 /* `_tls_used` is the public mingw/PE name for the TLS directory 1693 * record. Keep it in lockstep with the optional-header TLS data 1694 * directory, rather than leaving references bound to mingw's tlssup.o 1695 * placeholder record. */ 1696 coff_define_tls_used(img, out, &tls); 1697 1698 /* ---- pass 4: resolve entry symbol's PE RVA ---- 1699 * 1700 * Done before apply so the optional-header field has its final 1701 * value. */ 1702 const LinkSymbol* entry_sym = LinkSyms_at(&img->syms, img->entry_sym - 1); 1703 if (!entry_sym->defined || entry_sym->kind == SK_ABS) 1704 compiler_panic(c, SRCLOC_NONE, 1705 "link_emit_coff: entry symbol is not a defined " 1706 "image-relative function"); 1707 const LinkSection* entry_sec = coff_section_at(img, entry_sym->vaddr); 1708 if (!entry_sec) 1709 compiler_panic(c, SRCLOC_NONE, 1710 "link_emit_coff: entry symbol has no containing " 1711 "section"); 1712 u8 entry_bucket = map[entry_sec->id - 1].bucket; 1713 u32 entry_rva = out[entry_bucket].rva + map[entry_sec->id - 1].new_rva + 1714 (u32)(entry_sym->vaddr - entry_sec->vaddr); 1715 1716 /* ---- pass 5: apply all relocations into bucket bytes ---- */ 1717 coff_apply_all_relocs(img, out, map, have_imports ? &import_vaddr : NULL); 1718 1719 /* ---- pass 6: compute SizeOfImage (in-memory size) ---- */ 1720 u32 image_size = 0; 1721 for (b = 0; b < COFF_NBUCKETS; ++b) { 1722 if (!out[b].in_image) continue; 1723 u32 end = out[b].rva + out[b].size; 1724 if (end > image_size) image_size = end; 1725 } 1726 image_size = (u32)ALIGN_UP((u64)image_size, (u64)PE_SECTION_ALIGNMENT); 1727 1728 /* ---- pass 7: write everything ---- */ 1729 u16 file_chars = IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_LARGE_ADDRESS_AWARE; 1730 int dynamic_base = out[COFF_BUCKET_RELOC].in_image; 1731 if (!dynamic_base) { 1732 file_chars |= IMAGE_FILE_RELOCS_STRIPPED; 1733 } 1734 1735 coff_write_dos_stub(w); 1736 /* PE signature. */ 1737 coff_wr_u32(w, IMAGE_NT_SIGNATURE); 1738 coff_write_file_header(w, machine, (u16)nsec, file_chars); 1739 u16 subsystem = img->linker ? img->linker->pe_subsystem : 0; 1740 coff_write_optional_header(w, entry_rva, out, headers_size_padded, image_size, 1741 dynamic_base, subsystem, 1742 have_imports ? &imports : NULL, &tls); 1743 1744 /* Section table. */ 1745 for (b = 0; b < COFF_NBUCKETS; ++b) { 1746 if (!out[b].in_image) continue; 1747 coff_write_section_header(w, out[b].name, out[b].size, out[b].rva, 1748 out[b].size_raw, out[b].file_offset, 1749 out[b].characteristics); 1750 } 1751 1752 /* Pad to first section's file offset. */ 1753 u64 cur = (u64)headers_size_unpadded; 1754 u64 first_file_off = headers_size_padded; 1755 if (cur < first_file_off) { 1756 coff_write_zeroes(w, first_file_off - cur); 1757 cur = first_file_off; 1758 } 1759 1760 /* Section bodies. */ 1761 for (b = 0; b < COFF_NBUCKETS; ++b) { 1762 if (!out[b].in_image) continue; 1763 if (!out[b].has_file_bytes) continue; 1764 if (cur < out[b].file_offset) { 1765 coff_write_zeroes(w, out[b].file_offset - cur); 1766 cur = out[b].file_offset; 1767 } 1768 kit_writer_write(w, out[b].bytes, out[b].size); 1769 cur += out[b].size; 1770 if (out[b].size_raw > out[b].size) { 1771 coff_write_zeroes(w, out[b].size_raw - out[b].size); 1772 cur += out[b].size_raw - out[b].size; 1773 } 1774 } 1775 1776 /* ---- cleanup ---- */ 1777 for (b = 0; b < COFF_NBUCKETS; ++b) { 1778 if (out[b].bytes) heap->free(heap, out[b].bytes, bucket_caps[b]); 1779 } 1780 heap->free(heap, map, sizeof(CoffSecMap) * (img->nsections + 1u)); 1781 if (have_imports) { 1782 coff_import_vaddr_free(img, &import_vaddr); 1783 coff_imports_free(img, &imports); 1784 } 1785 }