link.c (70552B)
1 /* link_emit_elf: write a static ET_EXEC ELF64 image to the 2 * caller-provided Writer. 3 * 4 * 64-bit little-endian only. The per-arch ELF reloc-type tables in 5 * obj/elf_reloc_<arch>.c handle RelocKind <-> ELF translation; this 6 * file gets e_machine from the link arch descriptor. 7 * 8 * File layout (in write order): 9 * 10 * [headers PT_LOAD, PF_R, mapped at IMAGE_BASE] 11 * Ehdr64 12 * Phdr64[nphdr] -- one per loaded segment + headers + 13 * PT_NOTE .note.gnu.build-id -- 12 + 16 = 28 bytes 14 * (deterministic 16-byte id) pad to PAGE 15 * 16 * [PT_LOAD per kept image segment, in img->segments order] 17 * segment bytes (padded to its file_offset) 18 * 19 * [non-allocatable sections, file-only] 20 * .symtab -- ELF64_SYM_SIZE * nsyms 21 * .strtab -- NUL-led blob 22 * .shstrtab -- NUL-led blob 23 * 24 * [section header table at e_shoff] 25 * Shdr64[nshdr] 26 * 27 * Section header schema (for nm / objdump -t / gdb consumption): 28 * 29 * 0 SHN_UNDEF (zero entry) 30 * N one shdr per loaded sub-region: .text/.rodata/.data/.bss as 31 * the corresponding RX/R/RW segments materialize (.bss split 32 * out as the trailing memsz>filesz tail of the RW segment). 33 * 1 .note.gnu.build-id (allocatable, in headers PT_LOAD) 34 * 1 .symtab (sh_link -> .strtab; sh_info = first non-local idx) 35 * 1 .strtab 36 * 1 .shstrtab (Ehdr64.e_shstrndx) 37 * 38 * Build-id is computed deterministically over the post-relocation 39 * segment bytes (FNV-1a 64 over each segment, mixed into a 128-bit 40 * accumulator). The 16-byte digest is written into the note before the 41 * note is emitted to the Writer. 42 * 43 * The image image-relative addresses on entry are bumped by 44 * align_up(headers_size, PAGE) before relocs are applied, exactly as 45 * before — segment bytes / symbol vaddrs land at their final IMAGE_BASE 46 * absolute addresses by the time relocs run. */ 47 48 #include "link/link.h" 49 50 #include <string.h> 51 52 #include "core/heap.h" 53 #include "core/pool.h" 54 #include "core/slice.h" 55 #include "core/util.h" 56 #include "core/vec.h" 57 #include "link/link_arch.h" 58 #include "link/link_internal.h" 59 #include "obj/elf/elf.h" 60 #include "obj/format.h" 61 62 /* ---- ELF64 wire structs (subset) ---- */ 63 64 #define EI_NIDENT 16 65 66 typedef struct __attribute__((packed)) Ehdr64 { 67 u8 e_ident[EI_NIDENT]; 68 u16 e_type; 69 u16 e_machine; 70 u32 e_version; 71 u64 e_entry; 72 u64 e_phoff; 73 u64 e_shoff; 74 u32 e_flags; 75 u16 e_ehsize; 76 u16 e_phentsize; 77 u16 e_phnum; 78 u16 e_shentsize; 79 u16 e_shnum; 80 u16 e_shstrndx; 81 } Ehdr64; 82 83 typedef struct __attribute__((packed)) Phdr64 { 84 u32 p_type; 85 u32 p_flags; 86 u64 p_offset; 87 u64 p_vaddr; 88 u64 p_paddr; 89 u64 p_filesz; 90 u64 p_memsz; 91 u64 p_align; 92 } Phdr64; 93 94 typedef struct __attribute__((packed)) Shdr64 { 95 u32 sh_name; 96 u32 sh_type; 97 u64 sh_flags; 98 u64 sh_addr; 99 u64 sh_offset; 100 u64 sh_size; 101 u32 sh_link; 102 u32 sh_info; 103 u64 sh_addralign; 104 u64 sh_entsize; 105 } Shdr64; 106 107 /* ---- ELF32 wire structs (RV32 static ET_EXEC) ---- 108 * 109 * Ehdr32/Shdr32 keep the ELF64 field ORDER, only narrowing the 110 * native-width members to u32. Phdr32 REORDERS p_flags to AFTER the 111 * sizes (vs Phdr64 where p_flags is field #2) — the packed struct below 112 * encodes that order, so the by-name field assignments in the phdr build 113 * loop stay correct under either class. */ 114 typedef struct __attribute__((packed)) Ehdr32 { 115 u8 e_ident[EI_NIDENT]; 116 u16 e_type; 117 u16 e_machine; 118 u32 e_version; 119 u32 e_entry; 120 u32 e_phoff; 121 u32 e_shoff; 122 u32 e_flags; 123 u16 e_ehsize; 124 u16 e_phentsize; 125 u16 e_phnum; 126 u16 e_shentsize; 127 u16 e_shnum; 128 u16 e_shstrndx; 129 } Ehdr32; 130 131 typedef struct __attribute__((packed)) Phdr32 { 132 u32 p_type; 133 u32 p_offset; 134 u32 p_vaddr; 135 u32 p_paddr; 136 u32 p_filesz; 137 u32 p_memsz; 138 u32 p_flags; 139 u32 p_align; 140 } Phdr32; 141 142 typedef struct __attribute__((packed)) Shdr32 { 143 u32 sh_name; 144 u32 sh_type; 145 u32 sh_flags; 146 u32 sh_addr; 147 u32 sh_offset; 148 u32 sh_size; 149 u32 sh_link; 150 u32 sh_info; 151 u32 sh_addralign; 152 u32 sh_entsize; 153 } Shdr32; 154 155 #define PT_NOTE 4 156 #define PT_TLS 7 157 158 /* Static ET_EXEC base. ET_DYN (PIE) uses 0 — the loader picks the 159 * runtime base. The active value lives in `img_base` below; the macro 160 * stays for the static path's hard-coded vaddrs. */ 161 #define IMAGE_BASE_STATIC 0x400000ULL 162 163 #define BUILD_ID_DESC_LEN 16u 164 #define NOTE_NAME_GNU "GNU" 165 #define NOTE_NAME_GNU_LEN 4u /* "GNU\0" */ 166 #define NOTE_BUILD_ID_TYPE 3u 167 #define BUILD_ID_NOTE_BYTES (12u + NOTE_NAME_GNU_LEN + BUILD_ID_DESC_LEN) 168 169 /* ---- byte writer helpers ---- */ 170 171 static void write_bytes(Writer* w, const void* data, size_t n) { 172 w->write(w, data, n); 173 } 174 175 static void write_zeroes(Writer* w, size_t n) { 176 static const u8 zeroes[256] = {0}; 177 while (n) { 178 size_t step = n > sizeof(zeroes) ? sizeof(zeroes) : n; 179 w->write(w, zeroes, step); 180 n -= step; 181 } 182 } 183 184 static u32 perms_to_pflags(u32 secflags) { 185 u32 f = PF_R; 186 if (secflags & SF_EXEC) f |= PF_X; 187 if (secflags & SF_WRITE) f |= PF_W; 188 return f; 189 } 190 191 /* ---- class-aware header serializers ---- 192 * 193 * The writer builds every header in the wide Ehdr64/Phdr64/Shdr64 194 * in-memory form, then serializes to the on-disk class. On ELFCLASS64 195 * the bytes are the wide struct verbatim (preserving the existing 196 * byte-exact RV64/x86_64/aa64 output). On ELFCLASS32 the native-width 197 * fields narrow to u32 and Phdr fields reorder (p_flags after sizes). 198 * The serialized record sizes are the wire constants in elf.h. */ 199 static size_t elf_ehdr_sz(int class32) { 200 return class32 ? ELF32_EHDR_SIZE : sizeof(Ehdr64); 201 } 202 static size_t elf_phdr_sz(int class32) { 203 return class32 ? ELF32_PHDR_SIZE : sizeof(Phdr64); 204 } 205 static size_t elf_shdr_sz(int class32) { 206 return class32 ? ELF32_SHDR_SIZE : sizeof(Shdr64); 207 } 208 209 static void write_ehdr(Writer* w, const Ehdr64* e, int class32) { 210 if (!class32) { 211 write_bytes(w, e, sizeof(*e)); 212 return; 213 } 214 Ehdr32 e32; 215 memset(&e32, 0, sizeof e32); 216 memcpy(e32.e_ident, e->e_ident, EI_NIDENT); 217 e32.e_type = e->e_type; 218 e32.e_machine = e->e_machine; 219 e32.e_version = e->e_version; 220 e32.e_entry = (u32)e->e_entry; 221 e32.e_phoff = (u32)e->e_phoff; 222 e32.e_shoff = (u32)e->e_shoff; 223 e32.e_flags = e->e_flags; 224 e32.e_ehsize = e->e_ehsize; 225 e32.e_phentsize = e->e_phentsize; 226 e32.e_phnum = e->e_phnum; 227 e32.e_shentsize = e->e_shentsize; 228 e32.e_shnum = e->e_shnum; 229 e32.e_shstrndx = e->e_shstrndx; 230 write_bytes(w, &e32, sizeof e32); 231 } 232 233 static void write_phdrs(Writer* w, const Phdr64* phdrs, u32 n, int class32) { 234 if (!class32) { 235 write_bytes(w, phdrs, sizeof(Phdr64) * n); 236 return; 237 } 238 for (u32 i = 0; i < n; ++i) { 239 const Phdr64* p = &phdrs[i]; 240 Phdr32 p32; 241 p32.p_type = p->p_type; 242 p32.p_offset = (u32)p->p_offset; 243 p32.p_vaddr = (u32)p->p_vaddr; 244 p32.p_paddr = (u32)p->p_paddr; 245 p32.p_filesz = (u32)p->p_filesz; 246 p32.p_memsz = (u32)p->p_memsz; 247 p32.p_flags = p->p_flags; 248 p32.p_align = (u32)p->p_align; 249 write_bytes(w, &p32, sizeof p32); 250 } 251 } 252 253 static void write_shdr(Writer* w, const Shdr64* s, int class32) { 254 if (!class32) { 255 write_bytes(w, s, sizeof(*s)); 256 return; 257 } 258 Shdr32 s32; 259 s32.sh_name = s->sh_name; 260 s32.sh_type = s->sh_type; 261 s32.sh_flags = (u32)s->sh_flags; 262 s32.sh_addr = (u32)s->sh_addr; 263 s32.sh_offset = (u32)s->sh_offset; 264 s32.sh_size = (u32)s->sh_size; 265 s32.sh_link = s->sh_link; 266 s32.sh_info = s->sh_info; 267 s32.sh_addralign = (u32)s->sh_addralign; 268 s32.sh_entsize = (u32)s->sh_entsize; 269 write_bytes(w, &s32, sizeof s32); 270 } 271 272 /* Scripted-layout post-pass: vaddrs are already final (the script 273 * pinned them via `. = …`), so only file offsets need to bump to 274 * leave room for ehdr+phdrs. Mirror of shift_image_addresses but 275 * touches only the file dimension. */ 276 static void shift_image_file_offsets(LinkImage* img, u64 delta) { 277 u32 i; 278 for (i = 0; i < img->nsegments; ++i) img->segments[i].file_offset += delta; 279 for (i = 0; i < img->nsections; ++i) img->sections[i].file_offset += delta; 280 for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) 281 LinkRelocs_at(&img->relocs, i)->write_file_offset += delta; 282 } 283 284 static void shift_image_addresses(LinkImage* img, u64 delta) { 285 u32 i; 286 for (i = 0; i < img->nsegments; ++i) { 287 img->segments[i].file_offset += delta; 288 img->segments[i].vaddr += delta; 289 } 290 for (i = 0; i < img->nsections; ++i) { 291 /* File-only debug sections carry DWARF-section-relative bases, not 292 * load addresses — they live outside any PT_LOAD and must not shift 293 * with the loaded image. Their file_offset is assigned fresh by the 294 * trailing-offset pass below. */ 295 if (img->sections[i].file_only) continue; 296 img->sections[i].file_offset += delta; 297 img->sections[i].vaddr += delta; 298 } 299 for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) { 300 LinkRelocs_at(&img->relocs, i)->write_file_offset += delta; 301 LinkRelocs_at(&img->relocs, i)->write_vaddr += delta; 302 } 303 for (i = 0; i < LinkSyms_count(&img->syms); ++i) { 304 LinkSymbol* s = LinkSyms_at(&img->syms, i); 305 if (s->kind == SK_ABS) continue; 306 if (!s->defined) continue; 307 /* A symbol resolved into a file-only debug section (e.g. the local 308 * SK_SECTION symbol a DWARF R_ABS32 targets) holds a sec-relative 309 * offset, not a load address — leave it unshifted so the apply pass 310 * writes the right DWARF offset. */ 311 if (s->section_id != LINK_SEC_NONE && s->section_id <= img->nsections && 312 img->sections[s->section_id - 1].file_only) 313 continue; 314 s->vaddr += delta; 315 } 316 /* tls_vaddr lives in the same image-relative coordinate system as 317 * the segments it tracks, so it bumps with them. */ 318 if (img->tls_memsz) img->tls_vaddr += delta; 319 /* Dyn-link state mirrors a few segment / section vaddrs and pre- 320 * populated DynRela.r_offset values from layout_dyn. Bump them so 321 * the post-shift .rela.plt / .dynamic emit and apply_all_relocs see 322 * the right addresses (sym_plt_vaddr is read to redirect CALL26 323 * against imports). */ 324 if (img->dyn) { 325 LinkDynState* dyn = img->dyn; 326 if (dyn->plt_vaddr) dyn->plt_vaddr += delta; 327 if (dyn->got_plt_vaddr) dyn->got_plt_vaddr += delta; 328 if (dyn->dynamic_vaddr) dyn->dynamic_vaddr += delta; 329 if (dyn->sym_plt_vaddr) { 330 u32 j; 331 for (j = 0; j < dyn->sym_dynidx_size; ++j) 332 if (dyn->sym_plt_vaddr[j]) dyn->sym_plt_vaddr[j] += delta; 333 } 334 if (dyn->rela_plt) { 335 u32 j; 336 for (j = 0; j < dyn->nrela_plt; ++j) dyn->rela_plt[j].r_offset += delta; 337 } 338 /* rela_dyn is populated by apply_all_relocs (which runs after this 339 * shift), so its records are already in post-shift coordinates. */ 340 } 341 } 342 343 /* AArch64 ELF ABI: the per-thread TLS block starts at TP + 16 bytes 344 * (the TCB sits ahead of the TLS image). RISC-V psABI normally points 345 * tp at the start of the TLS image; the kit harness's start.c 346 * places a 16-byte TCB ahead of .tdata and biases tp accordingly, so 347 * the TPREL offset for both arches is (target - tls_vaddr) + 16. */ 348 #define TLS_TCB_SIZE 16ull 349 350 static int reloc_is_tlsle(RelocKind k, int tls_variant_ii) { 351 if (k == R_TPOFF64 && !tls_variant_ii) return 1; 352 return k == R_AARCH64_TLSLE_ADD_TPREL_HI12 || 353 k == R_AARCH64_TLSLE_ADD_TPREL_LO12_NC || 354 k == R_RV_TPREL_HI20 || k == R_RV_TPREL_LO12_I || 355 k == R_RV_TPREL_LO12_S; 356 } 357 358 /* Variant-I TP bias: distance from the TLS image start to where `tp` points. 359 * - AArch64 (AAPCS64): tp points at a 16-byte TCB ahead of the image -> +16 360 * for both hosted and freestanding. 361 * - RISC-V: the psABI points tp at the *start* of the image, so hosted libcs 362 * (FreeBSD/Linux _init_tls) want +0; kit's own freestanding start.c places 363 * a 16-byte TCB ahead of .tdata and biases tp to match AArch64, so 364 * freestanding rv64/rv32 keep +16. */ 365 static u64 tls_tcb_bias(Compiler* c) { 366 /* The per-arch freestanding bias lives in the ELF arch descriptor 367 * (ObjElfArchOps.tls_tp_bias): 16 for AArch64/RISC-V variant-I, 0 for 368 * x86_64 variant-II. RISC-V is the only arch whose *hosted* bias 369 * differs from its freestanding bias (the psABI points tp at the image 370 * start, so hosted libcs want +0); that split stays here. */ 371 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_ELF); 372 const ObjElfArchOps* arch = 373 (fmt && fmt->elf_arch) ? fmt->elf_arch(c->target.arch) : NULL; 374 u64 bias = arch ? (u64)arch->tls_tp_bias : TLS_TCB_SIZE; 375 if ((c->target.arch == KIT_ARCH_RV64 || c->target.arch == KIT_ARCH_RV32) && 376 c->target.os != KIT_OS_FREESTANDING) 377 return 0ull; 378 return bias; 379 } 380 381 /* x86_64 SysV ABI: TLS variant II — the per-thread TLS image sits at 382 * *negative* offsets from %fs (which points at the TCB). start.c 383 * lays out [tdata | tbss | TCB] and arch_prctl(ARCH_SET_FS, &TCB), so 384 * a symbol at offset X within the TLS image is at fs-relative offset 385 * (X - tls_memsz). The two ELF reloc kinds R_X86_64_TPOFF32/_TPOFF64 386 * encode that signed offset directly at the reloc site (no TCB bias — 387 * variant II's TCB sits *after* the image, so TPOFF is negative). */ 388 static int reloc_is_x64_tlsle(RelocKind k, int tls_variant_ii) { 389 if (k == R_TPOFF64 && tls_variant_ii) return 1; 390 return k == R_X64_TPOFF32; 391 } 392 393 static int reloc_is_abs(RelocKind k) { return k == R_ABS32 || k == R_ABS64; } 394 395 /* Function-call relocs that may route through the PLT when the target 396 * is imported. aarch64 CALL26/JUMP26, x86_64 PLT32, and risc-v CALL_PLT 397 * (which kit maps to R_PLT32) all carry the "call this address; if 398 * it's not resolvable here use the PLT trampoline" contract; the apply 399 * pass overwrites S with the PLT entry vaddr in that case. */ 400 static int reloc_is_branch26(RelocKind k) { 401 return k == R_AARCH64_CALL26 || k == R_AARCH64_JUMP26 || k == R_X64_PLT32 || 402 k == R_PLT32 || k == R_RV_CALL; 403 } 404 405 static void emit_dyn_record(LinkImage* img, u64 site_vaddr, u32 reloc_type, 406 u32 dynidx, i64 addend) { 407 LinkDynState* dyn = img->dyn; 408 if (!dyn || !dyn->rela_dyn) return; 409 if (dyn->nrela_dyn >= dyn->cap_rela_dyn) { 410 compiler_panic(img->c, SRCLOC_NONE, 411 "link: too many .rela.dyn records (%u >= %u); raise " 412 "cap_rela_dyn in layout_dyn", 413 dyn->nrela_dyn, dyn->cap_rela_dyn); 414 } 415 DynRela* r = &dyn->rela_dyn[dyn->nrela_dyn++]; 416 r->r_offset = site_vaddr; 417 r->r_info = ELF64_R_INFO((u64)dynidx, reloc_type); 418 r->r_addend = addend; 419 } 420 421 static const ObjElfArchOps* elf_arch_or_panic(Compiler* c, const char* where) { 422 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_ELF); 423 const ObjElfArchOps* arch = 424 fmt && fmt->elf_arch ? fmt->elf_arch(c->target.arch) : NULL; 425 if (!arch) 426 compiler_panic(c, SRCLOC_NONE, "%.*s: no ELF arch descriptor", 427 SLICE_ARG(slice_from_cstr(where))); 428 return arch; 429 } 430 431 static void emit_relative_record(LinkImage* img, u64 site_vaddr, u64 addend) { 432 const ObjElfArchOps* arch = elf_arch_or_panic(img->c, "link"); 433 emit_dyn_record(img, site_vaddr, arch->r_relative, 0, (i64)addend); 434 } 435 436 static void emit_globdat_record(LinkImage* img, u64 site_vaddr, u32 dynidx, 437 i64 addend) { 438 const ObjElfArchOps* arch = elf_arch_or_panic(img->c, "link"); 439 emit_dyn_record(img, site_vaddr, arch->r_glob_dat, dynidx, addend); 440 } 441 442 /* RISC-V PCREL_LO12_* references the address of an AUIPC carrying the 443 * paired PCREL_HI20. Given the AUIPC's site vaddr (post-shift), find 444 * its PCREL_HI20 reloc and compute the displacement that AUIPC 445 * encoded — the LO12 then takes the low 12 bits of the same disp. 446 * 447 * Linear scan over img->relocs is fine in practice: kernel images and 448 * cg cases produce at most a few hundred relocs total. */ 449 static i64 rv_pcrel_lo12_disp(LinkImage* img, u64 auipc_vaddr, u64 img_base) { 450 u32 i; 451 for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) { 452 const LinkRelocApply* hi = LinkRelocs_at(&img->relocs, i); 453 const LinkSymbol* hi_tgt; 454 u64 hi_S, hi_P; 455 if (hi->kind != R_RV_PCREL_HI20 && hi->kind != R_RV_GOT_HI20 && 456 hi->kind != R_RV_TLS_GOT_HI20) 457 continue; 458 if (hi->write_vaddr + img_base != auipc_vaddr) continue; 459 hi_tgt = LinkSyms_at(&img->syms, hi->target - 1); 460 hi_S = (hi_tgt->kind == SK_ABS) ? hi_tgt->vaddr : hi_tgt->vaddr + img_base; 461 hi_P = hi->write_vaddr + img_base; 462 return (i64)hi_S + hi->addend - (i64)hi_P; 463 } 464 compiler_panic(img->c, SRCLOC_NONE, 465 "link: PCREL_LO12 at 0x%llx has no paired PCREL_HI20", 466 (unsigned long long)auipc_vaddr); 467 return 0; 468 } 469 470 static void apply_all_relocs(LinkImage* img, u64 img_base) { 471 u32 i; 472 int pie = img->pie; 473 int tls_vi = (int)link_arch_desc_for(img->c)->tls_variant_ii; 474 for (i = 0; i < LinkRelocs_count(&img->relocs); ++i) { 475 LinkRelocApply* r = LinkRelocs_at(&img->relocs, i); 476 const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); 477 const LinkSection* sec = &img->sections[r->link_section_id - 1]; 478 const LinkSegment* seg; 479 u64 S, P; 480 u8* P_bytes; 481 482 /* File-only debug section: not loaded, so no dynamic reloc and no 483 * img_base. Write the final value straight into the registry buffer 484 * at the reloc offset. A SK_SECTION target resolves to its DWARF 485 * sec-relative base (tgt->vaddr, kept unshifted); a code/data symbol 486 * resolves to its link-time vaddr (img_base + vaddr) for low_pc / 487 * set_address. Mirrors the JIT debug view (link_jit.c). */ 488 if (sec->segment_id == LINK_SEG_NONE) { 489 u8* dbg = link_fileonly_bytes(img, r->link_section_id); 490 if (!dbg) continue; 491 if (tgt->kind == SK_SECTION || tgt->kind == SK_ABS) 492 S = tgt->vaddr; 493 else 494 S = img_base + tgt->vaddr; 495 link_reloc_apply(img->c, r->kind, dbg + r->offset, S, r->addend, 0); 496 continue; 497 } 498 seg = &img->segments[sec->segment_id - 1]; 499 if (reloc_is_tlsle(r->kind, tls_vi) || reloc_is_x64_tlsle(r->kind, tls_vi)) 500 /* Both the direct local-exec relocs and the internal R_TPOFF64 used to 501 * fill a TLS-IE GOT slot resolve to a tp-relative offset within THIS 502 * image's TLS block — only meaningful for a thread-local defined here. 503 * Reject an imported or non-thread-local target rather than emit a 504 * bogus offset (kit has no initial-exec/global-dynamic fallback). */ 505 link_require_local_tls(img->c, tgt); 506 if (reloc_is_tlsle(r->kind, tls_vi)) { 507 /* S is the target's TP-relative offset: distance from the TLS image 508 * start plus the arch/OS TCB bias (see tls_tcb_bias). Both vaddrs are 509 * in the same (post-shift, image-relative) coordinate system, so 510 * img_base cancels out. */ 511 S = (tgt->vaddr - img->tls_vaddr) + tls_tcb_bias(img->c); 512 } else if (reloc_is_x64_tlsle(r->kind, tls_vi)) { 513 /* x86_64 variant II: TP points just past the TLS image, so a symbol at 514 * offset X within the image is at TP-relative offset (X - tls_size). 515 * The runtime (FreeBSD/glibc _init_tls) allocates the block rounded up 516 * to the TLS alignment, so tls_size must be round_up(memsz, align) -- 517 * using the raw memsz is off by the rounding remainder whenever memsz 518 * is not a multiple of align, handing back a garbage TLS address (e.g. 519 * jemalloc's tsd, faulting non-canonical). Cast through i64/u64 so the 520 * apply writes the full 32- or 64-bit signed value. */ 521 u64 a = img->tls_align ? img->tls_align : 1u; 522 u64 tls_size = (img->tls_memsz + a - 1u) & ~(a - 1u); 523 i64 off = (i64)(tgt->vaddr - img->tls_vaddr) - (i64)tls_size; 524 S = (u64)off; 525 } else if (r->kind == R_RV_PCREL_LO12_I || r->kind == R_RV_PCREL_LO12_S) { 526 /* PCREL_LO12: rewrite S so that link_reloc_apply's existing 527 * LO12_I/LO12_S encoder produces the right low 12 bits of the 528 * paired AUIPC's PC-relative displacement. The reloc's own 529 * addend is unused; signed lo12 = disp & 0xfff. */ 530 P = r->write_vaddr + img_base; 531 P_bytes = img->segment_bytes[seg->id - 1] + 532 (size_t)(r->write_file_offset - seg->file_offset); 533 { 534 i64 disp = rv_pcrel_lo12_disp(img, tgt->vaddr + img_base, img_base); 535 RelocKind alias = 536 (r->kind == R_RV_PCREL_LO12_I) ? R_RV_LO12_I : R_RV_LO12_S; 537 link_reloc_apply(img->c, alias, P_bytes, (u64)disp, 0, P); 538 } 539 continue; 540 } else { 541 S = tgt->vaddr + img_base; 542 if (tgt->kind == SK_ABS) S = tgt->vaddr; 543 } 544 P = r->write_vaddr + img_base; 545 P_bytes = img->segment_bytes[seg->id - 1] + 546 (size_t)(r->write_file_offset - seg->file_offset); 547 548 /* Imported target: redirect / rewrite per reloc kind (Phase 5). 549 * 550 * - CALL26 / JUMP26: target the import's PLT entry. The PLT stub 551 * reads .got.plt[3+i], which the loader pre-fills via JUMP_SLOT 552 * (.rela.plt). S becomes the PLT-entry vaddr; the existing 553 * apply path computes the disp from there. 554 * - R_ABS{32,64}: leave the patch site at zero and emit a 555 * GLOB_DAT record so the loader writes the resolved address 556 * into the site at load time. This covers both 557 * layout_got-emitted .got slot fills (target = import) and any 558 * direct absolute reference in user data (e.g. a function 559 * pointer initializer). 560 * - GOT-page / LO12-NC against an import: emit_reloc_records has 561 * already redirected the target from the import to the 562 * synthetic .got slot symbol, so the apply path here sees the 563 * slot, not the import — nothing special needed; the slot's 564 * own R_ABS64 fill against the (vaddr=0) import will trip the 565 * abs-import branch above and emit GLOB_DAT. 566 * 567 * Anything else against an imported symbol (e.g. PREL19 / ADR 568 * etc.) is rare in real binaries and would need its own 569 * dynamic-reloc kind; panic loudly so a future test that needs 570 * it announces itself. */ 571 if (tgt->imported) { 572 /* `tgt` may be a per-input shadow LinkSymbol — resolve_undefs 573 * stamps `imported = 1` on every undef matched by name, but 574 * collect_imports only stashes plt_vaddr / dynidx on the 575 * canonical entry registered in img->globals. Resolve to the 576 * canonical id before indexing the dyn-state arrays. */ 577 LinkSymId canon_id = tgt->id; 578 if (tgt->name != 0) { 579 LinkSymId hit = symhash_get(&img->globals, tgt->name); 580 if (hit != LINK_SYM_NONE) canon_id = hit; 581 } 582 u32 dynidx = (img->dyn && canon_id < img->dyn->sym_dynidx_size) 583 ? img->dyn->sym_dynidx[canon_id] 584 : 0u; 585 if (reloc_is_branch26(r->kind)) { 586 u64 plt_v = (img->dyn && canon_id < img->dyn->sym_dynidx_size) 587 ? img->dyn->sym_plt_vaddr[canon_id] 588 : 0u; 589 if (plt_v == 0) 590 compiler_panic(img->c, SRCLOC_NONE, 591 "link: imported sym has no PLT entry (CALL26)"); 592 S = plt_v + img_base; 593 link_reloc_apply(img->c, r->kind, P_bytes, S, r->addend, P); 594 continue; 595 } 596 if (reloc_is_abs(r->kind)) { 597 if (dynidx == 0) 598 compiler_panic(img->c, SRCLOC_NONE, 599 "link: imported sym has no .dynsym entry"); 600 emit_globdat_record(img, r->write_vaddr, dynidx, r->addend); 601 /* Site bytes are irrelevant: the loader's GLOB_DAT writes 602 * (sym_value + r_addend) into r_offset before user code runs, 603 * overwriting whatever's there. Leaving the existing zero 604 * fill saves a write. */ 605 continue; 606 } 607 { 608 Slice nm_s = 609 tgt->name ? pool_slice(img->c->global, tgt->name) : SLICE_NULL; 610 const char* nm = nm_s.s ? nm_s.s : ""; 611 size_t nl = nm_s.len; 612 compiler_panic( 613 img->c, SRCLOC_NONE, 614 "link: unhandled reloc kind %u against imported symbol '%.*s'", 615 (unsigned)r->kind, (int)nl, nm); 616 } 617 } 618 619 /* PIE: an absolute reloc against a defined non-imported symbol 620 * stays image-relative in the file (the loader adds load-base via 621 * a synthesized R_AARCH64_RELATIVE). img_base is 0 for PIE so 622 * S above is already image-relative — the apply writes that into 623 * the site, and the RELATIVE record tells the loader to add 624 * load_base on top. */ 625 if (pie && reloc_is_abs(r->kind) && tgt->defined && tgt->kind != SK_ABS) { 626 /* RELA RELATIVE ignores the in-place site value: the loader writes 627 * (load_base + r_addend) into the slot. So the addend must be the 628 * full image-relative target — symbol vaddr plus the reloc's own 629 * addend — not just the symbol vaddr. Dropping r->addend collapses 630 * every entry of an addend-bearing table (jump tables, labeladdr 631 * arrays, &sym+off initializers) onto the symbol base. */ 632 emit_relative_record(img, r->write_vaddr, tgt->vaddr + (u64)r->addend); 633 } 634 link_reloc_apply(img->c, r->kind, P_bytes, S, r->addend, P); 635 } 636 } 637 638 /* The build-id payload is a format-agnostic image identity hash — 639 * see link_image_id_compute in link_image_id.c. Mach-O wraps the 640 * same bytes in LC_UUID; ELF wraps them in a .note.gnu.build-id. */ 641 642 /* ---- string-table builder ---- */ 643 644 typedef struct StrBuilder { 645 Heap* heap; 646 u8* data; 647 u32 len; 648 u32 cap; 649 } StrBuilder; 650 651 static void strb_init(StrBuilder* s, Heap* h, u32 reserve) { 652 s->heap = h; 653 s->cap = reserve > 16u ? reserve : 16u; 654 s->data = (u8*)h->alloc(h, s->cap, 1); 655 if (!s->data) s->cap = 0; 656 s->len = 0; 657 if (s->cap) { 658 s->data[0] = 0; 659 s->len = 1; 660 } /* leading NUL */ 661 } 662 663 static void strb_fini(StrBuilder* s) { 664 if (s->data) s->heap->free(s->heap, s->data, s->cap); 665 s->data = NULL; 666 s->cap = s->len = 0; 667 } 668 669 static void strb_grow(StrBuilder* s, u32 need) { 670 (void)VEC_GROW(s->heap, s->data, s->cap, need); 671 } 672 673 static u32 strb_add(StrBuilder* s, const char* str, u32 slen) { 674 u32 off; 675 if (slen == 0) return 0; 676 /* Linear dedup: scan existing data for a matching NUL-terminated 677 * substring. Strtabs are small enough to make this acceptable. */ 678 if (s->len > slen) { 679 u32 i; 680 for (i = 0; i + slen < s->len; ++i) { 681 if (s->data[i + slen] == 0 && memcmp(s->data + i, str, slen) == 0) 682 return i; 683 } 684 } 685 off = s->len; 686 strb_grow(s, s->len + slen + 1u); 687 memcpy(s->data + s->len, str, slen); 688 s->data[s->len + slen] = 0; 689 s->len += slen + 1u; 690 return off; 691 } 692 693 static u32 strb_add_cstr(StrBuilder* s, const char* str) { 694 return strb_add(s, str, (u32)slice_from_cstr(str).len); 695 } 696 697 /* ---- symtab builder ---- */ 698 699 typedef struct SymRec { 700 u32 st_name; 701 u8 st_info; 702 u8 st_other; 703 u16 st_shndx; 704 u64 st_value; 705 u64 st_size; 706 } SymRec; 707 708 static u8 sym_kind_to_st_type(u8 kind) { 709 /* Shared elf.h table maps SK_COMMON -> STT_OBJECT (the on-disk shape 710 * for tentative definitions). The linker, however, writes COMMON as 711 * STT_NOTYPE; override that one entry locally. */ 712 if (kind == SK_COMMON) return STT_NOTYPE; 713 return elf_st_type(kind); 714 } 715 716 static u8 sym_bind_to_st_bind(u8 bind) { return elf_st_bind(bind); } 717 718 /* Produces one symbol record on the wire from a SymRec. Elf32_Sym (16B) 719 * REORDERS fields vs Elf64_Sym (24B): st_value/st_size come BEFORE 720 * st_info/st_other/st_shndx, so select the byte layout by class32. */ 721 static void write_sym_rec(Writer* w, const SymRec* r, int class32) { 722 if (class32) { 723 u8 buf[ELF32_SYM_SIZE]; 724 u32 i; 725 buf[0] = (u8)(r->st_name); 726 buf[1] = (u8)(r->st_name >> 8); 727 buf[2] = (u8)(r->st_name >> 16); 728 buf[3] = (u8)(r->st_name >> 24); 729 for (i = 0; i < 4; ++i) buf[4 + i] = (u8)(r->st_value >> (i * 8)); 730 for (i = 0; i < 4; ++i) buf[8 + i] = (u8)(r->st_size >> (i * 8)); 731 buf[12] = r->st_info; 732 buf[13] = r->st_other; 733 buf[14] = (u8)(r->st_shndx); 734 buf[15] = (u8)(r->st_shndx >> 8); 735 write_bytes(w, buf, sizeof buf); 736 return; 737 } 738 u8 buf[ELF64_SYM_SIZE]; 739 buf[0] = (u8)(r->st_name); 740 buf[1] = (u8)(r->st_name >> 8); 741 buf[2] = (u8)(r->st_name >> 16); 742 buf[3] = (u8)(r->st_name >> 24); 743 buf[4] = r->st_info; 744 buf[5] = r->st_other; 745 buf[6] = (u8)(r->st_shndx); 746 buf[7] = (u8)(r->st_shndx >> 8); 747 { 748 u32 i; 749 for (i = 0; i < 8; ++i) buf[8 + i] = (u8)(r->st_value >> (i * 8)); 750 for (i = 0; i < 8; ++i) buf[16 + i] = (u8)(r->st_size >> (i * 8)); 751 } 752 write_bytes(w, buf, sizeof buf); 753 } 754 755 static void refresh_dynsym_exports(LinkImage* img, u64 img_base) { 756 LinkDynState* dyn; 757 const LinkSection* sec_dynsym; 758 const LinkSegment* seg; 759 u8* bytes; 760 u32 i; 761 if (!img || !img->dyn) return; 762 dyn = img->dyn; 763 if (!dyn->sym_dynidx || dyn->sec_dynsym == LINK_SEC_NONE) return; 764 sec_dynsym = &img->sections[dyn->sec_dynsym - 1]; 765 seg = &img->segments[sec_dynsym->segment_id - 1]; 766 bytes = img->segment_bytes[seg->id - 1] + 767 (size_t)(sec_dynsym->file_offset - seg->file_offset); 768 769 for (i = 0; i < LinkSyms_count(&img->syms); ++i) { 770 const LinkSymbol* s = LinkSyms_at(&img->syms, i); 771 u32 dynidx; 772 DynSymRec* r; 773 if (!s->defined || s->imported) continue; 774 if (s->id >= dyn->sym_dynidx_size) continue; 775 dynidx = dyn->sym_dynidx[s->id]; 776 if (dynidx == 0 || dynidx >= dyn->ndynsym) continue; 777 778 r = &dyn->dynsym[dynidx]; 779 r->st_value = (s->kind == SK_ABS) ? s->vaddr : img_base + s->vaddr; 780 r->st_size = s->size; 781 r->st_shndx = (s->kind == SK_ABS) ? SHN_ABS : 1u; 782 } 783 784 for (i = 0; i < dyn->ndynsym; ++i) { 785 u8* p = bytes + (u64)i * ELF64_SYM_SIZE; 786 const DynSymRec* r = &dyn->dynsym[i]; 787 wr_u32_le(p + 0, r->st_name); 788 p[4] = r->st_info; 789 p[5] = r->st_other; 790 wr_u16_le(p + 6, r->st_shndx); 791 wr_u64_le(p + 8, r->st_value); 792 wr_u64_le(p + 16, r->st_size); 793 } 794 } 795 796 /* ---- section header layout ---- * 797 * 798 * Per-segment cuts: each kept image segment contributes 1 .text/.rodata 799 * shdr for its file portion, plus a separate .bss shdr for the trailing 800 * NOBITS portion of an RW segment (memsz > filesz). The headers PT_LOAD 801 * contributes a single .note.gnu.build-id shdr. Trailing non-alloc 802 * shdrs: .symtab .strtab .shstrtab (always 3). */ 803 804 typedef struct OutShdr { 805 u32 shdr_idx; /* 1-based; assigned during planning */ 806 LinkSegmentId segment_id; 807 Sym name; 808 u16 sem; /* SecSem from source LinkSection */ 809 u32 flags; /* SF_* from source LinkSection */ 810 u32 align; 811 u64 vaddr; 812 u64 file_offset; 813 u64 size; 814 int is_nobits; 815 int is_fileonly; /* non-allocatable .debug_* section (no PT_LOAD) */ 816 } OutShdr; 817 818 static u16 sym_shndx_for(const LinkSymbol* s, const OutShdr* outshdrs, 819 u32 noutshdr) { 820 if (!s->defined) return SHN_UNDEF; 821 if (s->kind == SK_ABS) return SHN_ABS; 822 if (s->kind == SK_FILE) return SHN_ABS; 823 if (s->kind == SK_COMMON) return SHN_COMMON; 824 /* Find an output shdr whose [vaddr, vaddr+size) covers s->vaddr. 825 * Boundary symbols match at the upper edge. */ 826 { 827 u32 i; 828 for (i = 0; i < noutshdr; ++i) { 829 u64 lo, hi; 830 /* File-only debug shdrs sit at sec-relative vaddrs (0-based) and 831 * never contain a loaded symbol — skip them so a low-vaddr code 832 * symbol (e.g. PIE, img_base 0) isn't mis-attributed to a 833 * .debug_* section whose [0,size) range happens to overlap. */ 834 if (outshdrs[i].is_fileonly) continue; 835 lo = outshdrs[i].vaddr; 836 hi = lo + outshdrs[i].size; 837 if (s->vaddr >= lo && s->vaddr <= hi) return (u16)outshdrs[i].shdr_idx; 838 } 839 } 840 return SHN_ABS; 841 } 842 843 static u32 sec_sem_to_sht(u16 sem) { 844 switch (sem) { 845 case SSEM_PROGBITS: 846 return SHT_PROGBITS; 847 case SSEM_NOBITS: 848 return SHT_NOBITS; 849 case SSEM_NOTE: 850 return SHT_NOTE; 851 case SSEM_INIT_ARRAY: 852 return SHT_INIT_ARRAY; 853 case SSEM_FINI_ARRAY: 854 return SHT_FINI_ARRAY; 855 case SSEM_PREINIT_ARRAY: 856 return SHT_PREINIT_ARRAY; 857 default: 858 return SHT_PROGBITS; 859 } 860 } 861 862 static u64 sec_flags_to_shf(u32 flags) { 863 u64 r = 0; 864 if (flags & SF_ALLOC) r |= SHF_ALLOC; 865 if (flags & SF_EXEC) r |= SHF_EXECINSTR; 866 if (flags & SF_WRITE) r |= SHF_WRITE; 867 if (flags & SF_TLS) r |= SHF_TLS; 868 if (flags & SF_MERGE) r |= SHF_MERGE; 869 if (flags & SF_STRINGS) r |= SHF_STRINGS; 870 if (flags & SF_LINK_ORDER) r |= SHF_LINK_ORDER; 871 if (flags & SF_RETAIN) r |= SHF_GNU_RETAIN; 872 return r; 873 } 874 875 /* Output-shdr sort order: loaded sections first, by (segment_id, vaddr); 876 * then file-only debug sections after all segments, grouped by name (so 877 * same-name multi-input contributions are adjacent and merge into one 878 * output section) and ordered by their sec-relative base. Returns 1 if 879 * `a` should sort before `b`. */ 880 static int shdr_sort_less(const LinkSection* a, const LinkSection* b) { 881 if (a->file_only != b->file_only) return b->file_only; /* loaded first */ 882 if (a->file_only) { 883 if (a->name != b->name) return a->name < b->name; 884 return a->vaddr < b->vaddr; 885 } 886 if (a->segment_id != b->segment_id) return a->segment_id < b->segment_id; 887 return a->vaddr < b->vaddr; 888 } 889 890 void link_emit_elf(LinkImage* img, Writer* w) { 891 Heap* heap = img->heap; 892 Compiler* c = img->c; 893 const ObjElfArchOps* arch = elf_arch_or_panic(c, "link_emit_elf"); 894 u32 e_machine = arch->e_machine; 895 /* class32: ELFCLASS32 (RV32) output, derived from target ptr width. 896 * ptr_size must be 4 or 8 (every supported arch sets one of these). */ 897 int class32 = (c->target.ptr_size == 4); 898 /* RV32 is static-only in v1: dynamic linking (link_dyn.c) and the 899 * PIE re-serialize block below remain ELFCLASS64. Gate here so the 900 * dynamic path is never reached for a 32-bit image. */ 901 if (class32 && (img->pie || img->dyn)) 902 compiler_panic(c, SRCLOC_NONE, 903 "rv32: dynamic/PIE linking unsupported; static only"); 904 if (img->entry_sym == LINK_SYM_NONE) 905 compiler_panic(c, SRCLOC_NONE, "link_emit_elf: no resolved entry symbol"); 906 /* IFUNC trampolines: layout_iplt builds the .iplt stubs + .igot.plt 907 * slots and (when emit_static_exe was set) synthesizes a 908 * .init_array entry that calls __kit_ifunc_init at startup. The 909 * rt member walks .iplt.pairs and fills each slot before user code 910 * runs. The ELF writer doesn't have to do anything special here. */ 911 912 /* PIE / ET_DYN: img_base is 0 (the loader picks the runtime base; 913 * absolute relocs against internal symbols are emitted as 914 * R_AARCH64_RELATIVE in .rela.dyn). Otherwise classic ET_EXEC at 915 * IMAGE_BASE_STATIC. 916 * 917 * Scripted: the linker script pinned absolute vaddrs (e.g. 918 * `. = 0x40080000`); img_base stays 0 and the headers PT_LOAD / 919 * build-id note are dropped — the script's image is consumed by a 920 * raw loader (qemu -kernel, a bootloader) that doesn't need a 921 * self-describing memory image. */ 922 int pie = img->pie; 923 int scripted = img->scripted; 924 /* Static ET_EXEC base: a `kit ld -Ttext ADDR` override (e.g. 0x80000000 for a 925 * qemu `virt` image) wins over IMAGE_BASE_STATIC; PIE/scripted keep base 0. 926 */ 927 u64 img_base = (pie || scripted) ? 0ULL 928 : img->text_base_set ? img->text_base 929 : IMAGE_BASE_STATIC; 930 931 /* ---- plan number of program headers ---- 932 * 933 * 1 headers PT_LOAD + nsegments PT_LOAD + 1 PT_NOTE (build-id) 934 * + 1 PT_TLS when this image carries any TLS sections. 935 * + 4 dyn phdrs (PT_PHDR / PT_INTERP / PT_DYNAMIC / PT_GNU_STACK) when PIE 936 * also has real dynamic-link state. 937 * 938 * Scripted images skip the headers PT_LOAD and PT_NOTE: phdrs are 939 * just the per-segment PT_LOADs. */ 940 u32 has_tls = img->tls_memsz ? 1u : 0u; 941 u32 nphdr_section_notes = 0; 942 { 943 u32 i; 944 for (i = 0; i < img->nsections; ++i) { 945 const LinkSection* s = &img->sections[i]; 946 if (!s->file_only && s->sem == SSEM_NOTE && s->size) { 947 nphdr_section_notes++; 948 } 949 } 950 } 951 u32 nphdr_extra_dyn = (pie && img->dyn) ? 4u : 0u; 952 u32 nphdr_headers = scripted ? 0u : 1u; 953 u32 nphdr_buildid = scripted ? 0u : 1u; 954 u32 nphdr_total = nphdr_headers + img->nsegments + nphdr_buildid + 955 nphdr_section_notes + has_tls + nphdr_extra_dyn; 956 u64 build_id_note_bytes = scripted ? 0ULL : BUILD_ID_NOTE_BYTES; 957 /* Class-selected on-disk header sizes (ELF32: 52/32/40, ELF64: 64/56/64). */ 958 u64 ehdr_sz = elf_ehdr_sz(class32); 959 u64 phent_sz = elf_phdr_sz(class32); 960 u64 headers_size = 961 ehdr_sz + (u64)nphdr_total * phent_sz + build_id_note_bytes; 962 u64 headers_load = ALIGN_UP(headers_size, (u64)PAGE_SIZE); 963 964 /* The build-id note lives inside the headers PT_LOAD at this offset. */ 965 u64 build_id_off = ehdr_sz + (u64)nphdr_total * phent_sz; 966 u64 build_id_addr = img_base + build_id_off; 967 968 /* ---- shift image addresses, apply relocations ---- 969 * 970 * Must happen before segshdrs/symtab construction so they observe 971 * post-shift vaddrs (the values that will land in the file). */ 972 if (scripted) 973 shift_image_file_offsets(img, headers_load); 974 else 975 shift_image_addresses(img, headers_load); 976 apply_all_relocs(img, img_base); 977 978 /* ---- write .dynamic body + re-serialize .rela.dyn (PIE only) ---- 979 * 980 * Both depend on post-shift vaddrs. .dynamic embeds image-relative 981 * pointers to .dynsym/.dynstr/.gnu.hash/.rela.dyn/.rela.plt/.got.plt 982 * (the loader adds load_base at runtime). .rela.dyn picked up 983 * RELATIVE records during apply_all_relocs; rewrite the section 984 * bytes to include them. */ 985 if (pie && img->dyn) { 986 LinkDynState* dyn = img->dyn; 987 const LinkSection* sec_dynamic = &img->sections[dyn->sec_dynamic - 1]; 988 const LinkSection* sec_dynsym = &img->sections[dyn->sec_dynsym - 1]; 989 const LinkSection* sec_dynstr = &img->sections[dyn->sec_dynstr - 1]; 990 const LinkSection* sec_gnuhash = &img->sections[dyn->sec_gnu_hash - 1]; 991 const LinkSection* sec_reladyn = &img->sections[dyn->sec_rela_dyn - 1]; 992 const LinkSection* sec_relaplt = (dyn->sec_rela_plt != LINK_SEC_NONE) 993 ? &img->sections[dyn->sec_rela_plt - 1] 994 : NULL; 995 const LinkSection* sec_gotplt = (dyn->sec_got_plt != LINK_SEC_NONE) 996 ? &img->sections[dyn->sec_got_plt - 1] 997 : NULL; 998 const LinkSection* sec_versym = 999 (dyn->sec_gnu_version != LINK_SEC_NONE) 1000 ? &img->sections[dyn->sec_gnu_version - 1] 1001 : NULL; 1002 const LinkSection* sec_verneed = 1003 (dyn->sec_gnu_version_r != LINK_SEC_NONE) 1004 ? &img->sections[dyn->sec_gnu_version_r - 1] 1005 : NULL; 1006 const LinkSegment* dseg = &img->segments[sec_dynamic->segment_id - 1]; 1007 u8* dyn_bytes_at = img->segment_bytes[dseg->id - 1] + 1008 (size_t)(sec_dynamic->file_offset - dseg->file_offset); 1009 1010 refresh_dynsym_exports(img, img_base); 1011 1012 /* Build DT_* entries in order. Layout matches count_dynamic_entries. */ 1013 u32 written = 0; 1014 u8* p = dyn_bytes_at; 1015 #define DT_PUT(TAG, VAL) \ 1016 do { \ 1017 wr_u64_le(p, (u64)(TAG)); \ 1018 wr_u64_le(p + 8, (u64)(VAL)); \ 1019 p += 16; \ 1020 written++; \ 1021 } while (0) 1022 1023 /* DT_NEEDED entries — d_un.d_val is the offset of the soname 1024 * within .dynstr. The dynstr was built in layout_dyn with 1025 * dedup; look each soname up by name to compute its offset. */ 1026 { 1027 u32 ni; 1028 for (ni = 0; ni < dyn->nneeded; ++ni) { 1029 Sym soname = dyn->needed[ni]; 1030 Slice nm_s = pool_slice(c->global, soname); 1031 const char* nm = nm_s.s; 1032 size_t namelen = nm_s.len; 1033 /* Linear search dynstr for this name. */ 1034 u32 off = 0; 1035 if (nm && namelen) { 1036 u32 si; 1037 for (si = 0; si + namelen < dyn->dynstr_len; ++si) { 1038 if (dyn->dynstr[si + namelen] == 0 && 1039 memcmp(dyn->dynstr + si, nm, namelen) == 0) { 1040 off = si; 1041 break; 1042 } 1043 } 1044 /* Should always be present — collect_needed populated dynstr 1045 * via build_dynsym? Actually build_dynsym only added import 1046 * names. We need to also add NEEDED sonames. */ 1047 if (off == 0) { 1048 /* Fallback: append to dynstr. Phase 4 layout_dyn pre-sized 1049 * .dynstr exactly to its current content; appending here 1050 * would overflow the section. Instead, panic with a clear 1051 * message — the soname was supposed to be added during 1052 * layout. */ 1053 compiler_panic(c, SRCLOC_NONE, 1054 "link_emit_elf: DT_NEEDED soname missing from " 1055 ".dynstr"); 1056 } 1057 } 1058 DT_PUT(DT_NEEDED, off); 1059 } 1060 } 1061 1062 DT_PUT(DT_STRTAB, img_base + sec_dynstr->vaddr); 1063 DT_PUT(DT_STRSZ, sec_dynstr->size); 1064 DT_PUT(DT_SYMTAB, img_base + sec_dynsym->vaddr); 1065 DT_PUT(DT_SYMENT, 24); 1066 DT_PUT(DT_GNU_HASH, img_base + sec_gnuhash->vaddr); 1067 /* Symbol-version tables (only when an import bound a versioned export). */ 1068 if (dyn->nverneed && sec_versym && sec_verneed) { 1069 DT_PUT(DT_VERSYM, img_base + sec_versym->vaddr); 1070 DT_PUT(DT_VERNEED, img_base + sec_verneed->vaddr); 1071 DT_PUT(DT_VERNEEDNUM, dyn->nverneed); 1072 } 1073 /* DT_PLT* / DT_JMPREL only make sense when there's a PLT. Emitting 1074 * them with size=0 / vaddr=0 (or pointing past the end of any 1075 * PT_LOAD) trips llvm-readelf's "address not in any segment" check 1076 * and confuses some loaders' DT walk. */ 1077 if (dyn->nrela_plt) { 1078 DT_PUT(DT_PLTGOT, sec_gotplt ? (img_base + sec_gotplt->vaddr) : 0); 1079 DT_PUT(DT_PLTRELSZ, sec_relaplt ? sec_relaplt->size : 0); 1080 DT_PUT(DT_PLTREL, DT_RELA); 1081 DT_PUT(DT_JMPREL, sec_relaplt ? (img_base + sec_relaplt->vaddr) : 0); 1082 } 1083 if (dyn->cap_rela_dyn) { 1084 DT_PUT(DT_RELA, img_base + sec_reladyn->vaddr); 1085 DT_PUT(DT_RELASZ, sec_reladyn->size); 1086 DT_PUT(DT_RELAENT, 24); 1087 } 1088 DT_PUT(DT_FLAGS_1, DF_1_NOW); 1089 DT_PUT(DT_NULL, 0); 1090 #undef DT_PUT 1091 1092 /* Pad any pre-allocated tail with DT_NULL. */ 1093 while (written < dyn->ndyn_entries) { 1094 wr_u64_le(p, 0); 1095 wr_u64_le(p + 8, 0); 1096 p += 16; 1097 written++; 1098 } 1099 1100 /* Re-serialize .rela.dyn body. GLOB_DAT records (imports against 1101 * .got slots) and RELATIVE records (PIE internal abs64 fixups) 1102 * are both populated during apply_all_relocs; layout_dyn pre-counts 1103 * the exact number of runtime relocation records. */ 1104 { 1105 const LinkSegment* rdseg = &img->segments[sec_reladyn->segment_id - 1]; 1106 u8* rd_bytes = img->segment_bytes[rdseg->id - 1] + 1107 (size_t)(sec_reladyn->file_offset - rdseg->file_offset); 1108 u32 i; 1109 for (i = 0; i < dyn->nrela_dyn; ++i) { 1110 const DynRela* rr = &dyn->rela_dyn[i]; 1111 u8* rp = rd_bytes + (u64)i * ELF64_RELA_SIZE; 1112 wr_u64_le(rp + 0, rr->r_offset); 1113 wr_u64_le(rp + 8, rr->r_info); 1114 wr_u64_le(rp + 16, (u64)rr->r_addend); 1115 } 1116 } 1117 1118 /* Re-serialize .rela.plt body. JUMP_SLOT records were written by 1119 * layout_dyn at pre-shift vaddrs; shift_image_addresses bumped 1120 * dyn->rela_plt[i].r_offset along with the rest, so the post-shift 1121 * values match the .got.plt slot vaddrs the loader will patch. */ 1122 if (sec_relaplt && dyn->nrela_plt) { 1123 const LinkSegment* rpseg = &img->segments[sec_relaplt->segment_id - 1]; 1124 u8* rp_bytes = img->segment_bytes[rpseg->id - 1] + 1125 (size_t)(sec_relaplt->file_offset - rpseg->file_offset); 1126 u32 i; 1127 for (i = 0; i < dyn->nrela_plt; ++i) { 1128 const DynRela* rr = &dyn->rela_plt[i]; 1129 u8* rp = rp_bytes + (u64)i * ELF64_RELA_SIZE; 1130 wr_u64_le(rp + 0, rr->r_offset); 1131 wr_u64_le(rp + 8, rr->r_info); 1132 wr_u64_le(rp + 16, (u64)rr->r_addend); 1133 } 1134 } 1135 1136 /* Re-write .got.plt[0] = &.dynamic with the post-shift vaddr. 1137 * layout_dyn wrote the pre-shift value into the segment bytes; 1138 * shift_image_addresses bumped dyn->dynamic_vaddr so we can refill 1139 * the slot here. Slots 1 and 2 (link_map cookie, 1140 * _dl_runtime_resolve) are loader-owned for lazy binding; under 1141 * DF_1_NOW they're never read so leaving them zero is fine. */ 1142 if (sec_gotplt && dyn->dynamic_vaddr) { 1143 const LinkSegment* gpseg = &img->segments[sec_gotplt->segment_id - 1]; 1144 u8* gp_bytes = img->segment_bytes[gpseg->id - 1] + 1145 (size_t)(sec_gotplt->file_offset - gpseg->file_offset); 1146 wr_u64_le(gp_bytes, dyn->dynamic_vaddr); 1147 } 1148 } 1149 1150 /* ---- compute build-id (post-reloc, deterministic) ---- 1151 * 1152 * Format-agnostic — Mach-O LC_UUID will hash the same bytes. */ 1153 u8 build_id[BUILD_ID_DESC_LEN]; 1154 link_image_id_compute(img, build_id); 1155 1156 /* ---- plan section headers covering loaded segments ---- 1157 * 1158 * Worst case: 1 file shdr per segment + 1 .bss shdr if RW has a tail. 1159 * shdr indices: 0=NULL, 1..nsegshdr=these, then build-id/symtab/... 1160 */ 1161 /* Walk img->sections sorted by (segment_id, vaddr) and merge into 1162 * one OutShdr per (segment_id, name) run. layout already places 1163 * same-name sections adjacent within a segment, so a stable 1164 * by-vaddr sort followed by run-length grouping captures it. */ 1165 OutShdr* outshdrs; 1166 u32 noutshdr = 0; 1167 u32 outshdr_cap = img->nsections + 1u; 1168 outshdrs = (OutShdr*)heap->alloc(heap, sizeof(*outshdrs) * outshdr_cap, 1169 _Alignof(OutShdr)); 1170 if (!outshdrs) 1171 compiler_panic(c, SRCLOC_NONE, "link_emit_elf: oom on outshdrs"); 1172 memset(outshdrs, 0, sizeof(*outshdrs) * outshdr_cap); 1173 { 1174 /* Build a sort index over LinkSection ids by (segment_id, vaddr). */ 1175 u32* order = (u32*)heap->alloc(heap, sizeof(u32) * (img->nsections + 1u), 1176 _Alignof(u32)); 1177 if (!order && img->nsections) 1178 compiler_panic(c, SRCLOC_NONE, "link_emit_elf: oom on shdr sort"); 1179 u32 i, j; 1180 for (i = 0; i < img->nsections; ++i) order[i] = i; 1181 /* Insertion sort — section count is small. */ 1182 for (i = 1; i < img->nsections; ++i) { 1183 u32 cur = order[i]; 1184 const LinkSection* a = &img->sections[cur]; 1185 j = i; 1186 while (j > 0) { 1187 const LinkSection* b = &img->sections[order[j - 1]]; 1188 if (!shdr_sort_less(a, b)) break; /* a not before b → stop (stable) */ 1189 order[j] = order[j - 1]; 1190 --j; 1191 } 1192 order[j] = cur; 1193 } 1194 for (i = 0; i < img->nsections; ++i) { 1195 const LinkSection* ls = &img->sections[order[i]]; 1196 OutShdr* tail = noutshdr ? &outshdrs[noutshdr - 1] : NULL; 1197 int merge = tail && tail->segment_id == ls->segment_id && 1198 tail->name == ls->name && 1199 tail->is_nobits == (ls->sem == SSEM_NOBITS); 1200 if (merge) { 1201 u64 end = ls->vaddr + ls->size; 1202 u64 prev_end = tail->vaddr + tail->size; 1203 if (end > prev_end) tail->size = end - tail->vaddr; 1204 if (ls->align > tail->align) tail->align = ls->align; 1205 } else { 1206 OutShdr* o = &outshdrs[noutshdr]; 1207 o->shdr_idx = 1u + noutshdr; 1208 o->segment_id = ls->segment_id; 1209 o->name = ls->name; 1210 o->sem = ls->sem; 1211 o->flags = ls->flags; 1212 o->align = ls->align; 1213 o->vaddr = ls->vaddr; 1214 o->file_offset = ls->file_offset; 1215 o->size = ls->size; 1216 o->is_nobits = (ls->sem == SSEM_NOBITS); 1217 o->is_fileonly = ls->file_only; 1218 noutshdr++; 1219 } 1220 } 1221 heap->free(heap, order, sizeof(u32) * (img->nsections + 1u)); 1222 } 1223 1224 /* ---- build .shstrtab ---- */ 1225 StrBuilder shstrtab; 1226 strb_init(&shstrtab, heap, 128); 1227 u32 sh_name_symtab = strb_add_cstr(&shstrtab, ".symtab"); 1228 u32 sh_name_strtab = strb_add_cstr(&shstrtab, ".strtab"); 1229 u32 sh_name_shstrtab = strb_add_cstr(&shstrtab, ".shstrtab"); 1230 u32 sh_name_buildid = 1231 scripted ? 0u : strb_add_cstr(&shstrtab, ".note.gnu.build-id"); 1232 /* Per-output-shdr names — interned strings from input section names. */ 1233 u32* outshdr_name_off = 1234 (u32*)heap->alloc(heap, sizeof(u32) * (noutshdr + 1u), _Alignof(u32)); 1235 if (!outshdr_name_off && noutshdr) 1236 compiler_panic(c, SRCLOC_NONE, "link_emit_elf: oom on shdr name table"); 1237 { 1238 u32 i; 1239 for (i = 0; i < noutshdr; ++i) { 1240 const OutShdr* o = &outshdrs[i]; 1241 if (o->name) { 1242 Slice nm_s = pool_slice(c->global, o->name); 1243 const char* nm = nm_s.s; 1244 size_t nlen = nm_s.len; 1245 outshdr_name_off[i] = 1246 nm && nlen ? strb_add(&shstrtab, nm, (u32)nlen) : 0; 1247 } else { 1248 outshdr_name_off[i] = 0; 1249 } 1250 } 1251 } 1252 1253 u32 nbuildid_shdr = scripted ? 0u : 1u; 1254 u32 nshdr = 1u + noutshdr + nbuildid_shdr + 3u; 1255 u32 shndx_symtab = 1u + noutshdr + nbuildid_shdr; 1256 u32 shndx_strtab = shndx_symtab + 1u; 1257 u32 shndx_shstrtab = shndx_strtab + 1u; 1258 1259 /* ---- build .symtab + .strtab ---- 1260 * 1261 * Two passes (locals first, then globals/weaks). Slot 0 is 1262 * STN_UNDEF. Globals are deduped via img->globals — only the 1263 * canonical entry per name is emitted, since per-input undef 1264 * records keep their own LinkSymId after resolve_undefs's 1265 * "copy fields from canonical def" step. sh_info = first non-local 1266 * idx. */ 1267 StrBuilder strtab; 1268 strb_init(&strtab, heap, 256); 1269 1270 SymRec* recs = (SymRec*)heap->alloc( 1271 heap, sizeof(*recs) * (LinkSyms_count(&img->syms) + 1u), 1272 _Alignof(SymRec)); 1273 if (!recs) compiler_panic(c, SRCLOC_NONE, "link_emit_elf: oom on symrecs"); 1274 u32 nsyms_emit = 0; 1275 u32 first_global_idx; 1276 memset(&recs[nsyms_emit++], 0, sizeof(*recs)); /* slot 0 */ 1277 first_global_idx = nsyms_emit; 1278 1279 { 1280 u32 pass, i; 1281 for (pass = 0; pass < 2; ++pass) { 1282 int want_local = (pass == 0); 1283 if (!want_local) first_global_idx = nsyms_emit; 1284 for (i = 0; i < LinkSyms_count(&img->syms); ++i) { 1285 const LinkSymbol* s = LinkSyms_at(&img->syms, i); 1286 int is_local = (s->bind == SB_LOCAL); 1287 size_t namelen = 0; 1288 const char* nm; 1289 u8 st_type, st_bind; 1290 u16 shndx; 1291 u64 st_value; 1292 SymRec* r; 1293 if (want_local != is_local) continue; 1294 if (s->name == 0 && s->kind != SK_FILE) continue; 1295 /* Dedupe globals: per-input undef-of-X and the canonical 1296 * def-of-X are separate img->syms entries (resolve_undefs 1297 * mirrors fields onto the undef). Only the canonical 1298 * (first registered) entry is in img->globals. Skip the 1299 * shadow copies. */ 1300 if (!is_local && s->name) { 1301 LinkSymId canonical = symhash_get(&img->globals, s->name); 1302 if (canonical != LINK_SYM_NONE && canonical != s->id) continue; 1303 } 1304 { 1305 Slice nm_s = s->name ? pool_slice(c->global, s->name) : SLICE_NULL; 1306 nm = nm_s.s ? nm_s.s : ""; 1307 namelen = nm_s.len; 1308 } 1309 shndx = sym_shndx_for(s, outshdrs, noutshdr); 1310 /* st_value: in ET_EXEC, defined non-ABS symbols carry 1311 * absolute virtual addresses (IMAGE_BASE + image 1312 * vaddr); ABS symbols carry their own value verbatim. */ 1313 if (s->kind == SK_FILE) 1314 st_value = 0; 1315 else if (s->kind == SK_ABS) 1316 st_value = s->vaddr; 1317 else if (s->defined) 1318 st_value = img_base + s->vaddr; 1319 else 1320 st_value = 0; 1321 st_type = sym_kind_to_st_type(s->kind); 1322 st_bind = sym_bind_to_st_bind(s->bind); 1323 r = &recs[nsyms_emit++]; 1324 memset(r, 0, sizeof(*r)); 1325 r->st_name = (nm && namelen) ? strb_add(&strtab, nm, (u32)namelen) : 0; 1326 r->st_info = ELF64_ST_INFO(st_bind, st_type); 1327 r->st_other = STV_DEFAULT; 1328 r->st_shndx = shndx; 1329 r->st_value = st_value; 1330 r->st_size = s->size; 1331 } 1332 } 1333 } 1334 1335 /* ---- compute file offsets for trailing non-alloc sections ---- */ 1336 /* End of segment data: the highest (file_offset + file_size) across 1337 * loaded segments. */ 1338 u64 end_of_segs = headers_load; 1339 { 1340 u32 i; 1341 for (i = 0; i < img->nsegments; ++i) { 1342 const LinkSegment* seg = &img->segments[i]; 1343 u64 e = seg->file_offset + seg->file_size; 1344 if (e > end_of_segs) end_of_segs = e; 1345 } 1346 } 1347 /* File-only debug sections go in the trailing non-alloc region, after 1348 * the loaded segments and before .symtab. Assign each merged debug 1349 * OutShdr a file offset (and propagate it back to its constituent 1350 * LinkSections for the byte-write pass). */ 1351 u64 dbg_cursor = end_of_segs; 1352 if (img->dbg_count) { 1353 u32 oi; 1354 for (oi = 0; oi < noutshdr; ++oi) { 1355 OutShdr* o = &outshdrs[oi]; 1356 u32 si; 1357 if (!o->is_fileonly) continue; 1358 o->file_offset = ALIGN_UP(dbg_cursor, o->align ? o->align : 1u); 1359 for (si = 0; si < img->dbg_count; ++si) { 1360 LinkSection* ls = &img->sections[img->dbg_first_lsid - 1 + si]; 1361 if (ls->name == o->name) 1362 ls->file_offset = o->file_offset + ls->vaddr; /* base within run */ 1363 } 1364 dbg_cursor = o->file_offset + o->size; 1365 } 1366 } 1367 u64 symtab_off = ALIGN_UP(dbg_cursor, (u64)8u); 1368 u32 sym_size = class32 ? ELF32_SYM_SIZE : ELF64_SYM_SIZE; 1369 u64 symtab_size = (u64)sym_size * nsyms_emit; 1370 u64 strtab_off = symtab_off + symtab_size; 1371 u64 strtab_size = strtab.len; 1372 u64 shstrtab_off = strtab_off + strtab_size; 1373 u64 shstrtab_size = shstrtab.len; 1374 u64 shdr_off = ALIGN_UP(shstrtab_off + shstrtab_size, (u64)8u); 1375 1376 /* ---- build phdrs ---- */ 1377 Phdr64* phdrs = (Phdr64*)heap->alloc(heap, sizeof(Phdr64) * nphdr_total, 1378 _Alignof(Phdr64)); 1379 if (!phdrs) compiler_panic(c, SRCLOC_NONE, "link_emit_elf: oom on phdrs"); 1380 memset(phdrs, 0, sizeof(Phdr64) * nphdr_total); 1381 { 1382 u32 pi = 0; 1383 /* PT_PHDR points at the phdr table itself within the headers 1384 * PT_LOAD. Required by the runtime loader for ET_DYN to know 1385 * where its own program headers live. Must appear before the 1386 * first PT_LOAD on dynamic exes (musl checks). */ 1387 if (pie && img->dyn) { 1388 phdrs[pi].p_type = PT_PHDR; 1389 phdrs[pi].p_flags = PF_R; 1390 phdrs[pi].p_offset = sizeof(Ehdr64); 1391 phdrs[pi].p_vaddr = img_base + sizeof(Ehdr64); 1392 phdrs[pi].p_paddr = phdrs[pi].p_vaddr; 1393 phdrs[pi].p_filesz = (u64)nphdr_total * sizeof(Phdr64); 1394 phdrs[pi].p_memsz = phdrs[pi].p_filesz; 1395 phdrs[pi].p_align = 8; 1396 pi++; 1397 } 1398 /* Headers PT_LOAD (covers ehdr + phdrs + build-id note). 1399 * Scripted images don't emit one — see plan note above. */ 1400 if (!scripted) { 1401 phdrs[pi].p_type = PT_LOAD; 1402 phdrs[pi].p_flags = PF_R; 1403 phdrs[pi].p_offset = 0; 1404 phdrs[pi].p_vaddr = img_base; 1405 phdrs[pi].p_paddr = img_base; 1406 phdrs[pi].p_filesz = headers_size; 1407 phdrs[pi].p_memsz = headers_size; 1408 phdrs[pi].p_align = PAGE_SIZE; 1409 pi++; 1410 } 1411 /* Per-segment PT_LOAD. */ 1412 u32 i; 1413 for (i = 0; i < img->nsegments; ++i) { 1414 const LinkSegment* seg = &img->segments[i]; 1415 Phdr64* p = &phdrs[pi++]; 1416 p->p_type = PT_LOAD; 1417 p->p_flags = perms_to_pflags(seg->flags); 1418 p->p_offset = seg->file_offset; 1419 p->p_vaddr = img_base + seg->vaddr; /* post-shift */ 1420 p->p_paddr = p->p_vaddr; 1421 p->p_filesz = seg->file_size; 1422 /* TLS .tbss is per-thread template space, not a loadable bss 1423 * region — PT_TLS already records the full memsz (incl. .tbss) 1424 * for the loader's per-thread allocation, so the matching 1425 * PT_LOAD must not extend memsz past filesz. qemu-riscv64 1426 * rejects PT_LOADs with memsz>filesz on non-writable mappings 1427 * ("PT_LOAD with non-writable bss"), and the SEG_TLS perms are 1428 * SF_ALLOC|SF_TLS only. */ 1429 p->p_memsz = (seg->flags & SF_TLS) ? seg->file_size : seg->mem_size; 1430 p->p_align = seg->align ? seg->align : PAGE_SIZE; 1431 } 1432 /* Allocatable SHT_NOTE sections can also be addressed by PT_NOTE. QEMU's 1433 * x86 PVH loader uses this for XEN_ELFNOTE_PHYS32_ENTRY while the bytes 1434 * still live in the normal read-only PT_LOAD. */ 1435 for (i = 0; i < img->nsections; ++i) { 1436 const LinkSection* s = &img->sections[i]; 1437 Phdr64* p; 1438 if (s->file_only || s->sem != SSEM_NOTE || s->size == 0) continue; 1439 p = &phdrs[pi++]; 1440 p->p_type = PT_NOTE; 1441 p->p_flags = PF_R; 1442 p->p_offset = s->file_offset; 1443 p->p_vaddr = img_base + s->vaddr; 1444 p->p_paddr = p->p_vaddr; 1445 p->p_filesz = s->size; 1446 p->p_memsz = s->size; 1447 p->p_align = s->align ? s->align : 4; 1448 } 1449 /* PT_NOTE for build-id. Scripted images skip the build-id entirely. */ 1450 if (!scripted) { 1451 phdrs[pi].p_type = PT_NOTE; 1452 phdrs[pi].p_flags = PF_R; 1453 phdrs[pi].p_offset = build_id_off; 1454 phdrs[pi].p_vaddr = build_id_addr; 1455 phdrs[pi].p_paddr = build_id_addr; 1456 phdrs[pi].p_filesz = BUILD_ID_NOTE_BYTES; 1457 phdrs[pi].p_memsz = BUILD_ID_NOTE_BYTES; 1458 phdrs[pi].p_align = 4; 1459 pi++; 1460 } 1461 /* PT_TLS describing the .tdata template + .tbss zero-fill. 1462 * vaddr/file_offset point at the same bytes the matching 1463 * PT_LOAD already covers — the loader uses PT_TLS to size 1464 * each thread's TLS block and to seed it from .tdata. */ 1465 if (has_tls) { 1466 phdrs[pi].p_type = PT_TLS; 1467 phdrs[pi].p_flags = PF_R; 1468 phdrs[pi].p_offset = img->tls_vaddr; 1469 phdrs[pi].p_vaddr = img_base + img->tls_vaddr; 1470 phdrs[pi].p_paddr = phdrs[pi].p_vaddr; 1471 phdrs[pi].p_filesz = img->tls_filesz; 1472 phdrs[pi].p_memsz = img->tls_memsz; 1473 phdrs[pi].p_align = img->tls_align ? img->tls_align : 1u; 1474 pi++; 1475 } 1476 /* Dynamic phdrs. PT_INTERP and PT_DYNAMIC point at the matching 1477 * sections (which layout_dyn placed in the ro/rw_dyn segments). 1478 * PT_GNU_STACK marks the stack as non-executable (filesz=0). */ 1479 if (pie && img->dyn) { 1480 LinkDynState* dyn = img->dyn; 1481 const LinkSection* sec_interp = &img->sections[dyn->sec_interp - 1]; 1482 const LinkSection* sec_dynamic = &img->sections[dyn->sec_dynamic - 1]; 1483 phdrs[pi].p_type = PT_INTERP; 1484 phdrs[pi].p_flags = PF_R; 1485 phdrs[pi].p_offset = sec_interp->file_offset; 1486 phdrs[pi].p_vaddr = img_base + sec_interp->vaddr; 1487 phdrs[pi].p_paddr = phdrs[pi].p_vaddr; 1488 phdrs[pi].p_filesz = sec_interp->size; 1489 phdrs[pi].p_memsz = sec_interp->size; 1490 phdrs[pi].p_align = 1; 1491 pi++; 1492 phdrs[pi].p_type = PT_DYNAMIC; 1493 phdrs[pi].p_flags = PF_R | PF_W; 1494 phdrs[pi].p_offset = sec_dynamic->file_offset; 1495 phdrs[pi].p_vaddr = img_base + sec_dynamic->vaddr; 1496 phdrs[pi].p_paddr = phdrs[pi].p_vaddr; 1497 phdrs[pi].p_filesz = sec_dynamic->size; 1498 phdrs[pi].p_memsz = sec_dynamic->size; 1499 phdrs[pi].p_align = 8; 1500 pi++; 1501 phdrs[pi].p_type = PT_GNU_STACK; 1502 phdrs[pi].p_flags = PF_R | PF_W; 1503 phdrs[pi].p_offset = 0; 1504 phdrs[pi].p_vaddr = 0; 1505 phdrs[pi].p_paddr = 0; 1506 phdrs[pi].p_filesz = 0; 1507 phdrs[pi].p_memsz = 0; 1508 phdrs[pi].p_align = 16; 1509 pi++; 1510 /* PT_GNU_RELRO would mark the read-only-after-relocation span 1511 * here. Phase 6 leaves it out — it's an optimization the loader 1512 * can live without, and our ro_seg already lives in a PF_R 1513 * PT_LOAD that's never made writable. */ 1514 } else if (pie) { 1515 /* dyn was nominally requested but layout_dyn early-out — no 1516 * imports and no DSO inputs. INTERP/DYNAMIC loader headers are skipped. */ 1517 (void)0; 1518 } 1519 (void)pi; 1520 } 1521 1522 /* ---- build ehdr ---- */ 1523 Ehdr64 ehdr; 1524 memset(&ehdr, 0, sizeof(ehdr)); 1525 ehdr.e_ident[0] = ELFMAG0; 1526 ehdr.e_ident[1] = ELFMAG1; 1527 ehdr.e_ident[2] = ELFMAG2; 1528 ehdr.e_ident[3] = ELFMAG3; 1529 ehdr.e_ident[4] = class32 ? ELFCLASS32 : ELFCLASS64; 1530 ehdr.e_ident[5] = ELFDATA2LSB; 1531 ehdr.e_ident[6] = EV_CURRENT; 1532 ehdr.e_ident[7] = ELFOSABI_NONE; 1533 /* Brand FreeBSD executables with EI_OSABI=ELFOSABI_FREEBSD; the kernel 1534 * matches that brand directly. Without it a static binary is rejected with 1535 * ENOEXEC -- the FreeBSD ABI note crt1.o carries is not sufficient on its 1536 * own for kit's images (the kernel's note scan does not recognize the 1537 * layout), so we set the OSABI on every arch (FreeBSD/clang only sets it on 1538 * amd64/aarch64, but the riscv64 kernel accepts it too). */ 1539 if (img->c->target.os == KIT_OS_FREEBSD) ehdr.e_ident[7] = ELFOSABI_FREEBSD; 1540 ehdr.e_type = pie ? ET_DYN : ET_EXEC; 1541 ehdr.e_machine = (u16)e_machine; 1542 ehdr.e_version = EV_CURRENT; 1543 ehdr.e_entry = img_base + LinkSyms_at(&img->syms, img->entry_sym - 1)->vaddr; 1544 ehdr.e_phoff = ehdr_sz; 1545 ehdr.e_shoff = shdr_off; 1546 /* e_flags carries the arch ABI bits (RISC-V float-ABI / RVC). This was 1547 * previously hardcoded 0 for all arches; writing arch->e_flags lands 1548 * the RV32/RV64 flags. (RV64 descriptor e_flags is unchanged, so its 1549 * header now reflects RVC|FLOAT_ABI_DOUBLE — see integration notes.) */ 1550 ehdr.e_flags = arch->e_flags; 1551 /* rv32: ilp32 and ilp32f share KIT_ARCH_RV32, so the descriptor's float-ABI 1552 * bits are a placeholder. Override them from -mabi so the executable's ABI 1553 * matches its objects (and a soft ilp32 image isn't mislabelled single). */ 1554 if (e_machine == EM_RISCV && class32) { 1555 u32 fa = EF_RISCV_FLOAT_ABI_SOFT; 1556 if (c->target.float_abi == KIT_FLOAT_ABI_SINGLE) 1557 fa = EF_RISCV_FLOAT_ABI_SINGLE; 1558 else if (c->target.float_abi == KIT_FLOAT_ABI_DOUBLE) 1559 fa = EF_RISCV_FLOAT_ABI_DOUBLE; 1560 else if (c->target.float_abi == KIT_FLOAT_ABI_DEFAULT) 1561 fa = EF_RISCV_FLOAT_ABI_SINGLE; 1562 ehdr.e_flags = (ehdr.e_flags & ~(u32)EF_RISCV_FLOAT_ABI_MASK) | fa; 1563 } 1564 ehdr.e_ehsize = (u16)ehdr_sz; 1565 ehdr.e_phentsize = (u16)phent_sz; 1566 ehdr.e_phnum = (u16)nphdr_total; 1567 ehdr.e_shentsize = (u16)elf_shdr_sz(class32); 1568 ehdr.e_shnum = (u16)nshdr; 1569 ehdr.e_shstrndx = (u16)shndx_shstrtab; 1570 1571 /* ---- write ehdr, phdrs, build-id note, pad ---- */ 1572 u64 cur_off; 1573 write_ehdr(w, &ehdr, class32); 1574 write_phdrs(w, phdrs, nphdr_total, class32); 1575 cur_off = ehdr_sz + phent_sz * nphdr_total; 1576 1577 /* .note.gnu.build-id wire format: 1578 * u32 namesz = 4 ("GNU\0") 1579 * u32 descsz = 16 1580 * u32 type = NT_GNU_BUILD_ID (3) 1581 * "GNU\0" 1582 * <16 bytes of build-id> 1583 * 1584 * Scripted images don't carry build-id; they have no PT_NOTE phdr to 1585 * point at it and the file payload would just be dead bytes. */ 1586 if (!scripted) { 1587 u8 nh[12]; 1588 u32 v; 1589 v = NOTE_NAME_GNU_LEN; 1590 nh[0] = (u8)v; 1591 nh[1] = (u8)(v >> 8); 1592 nh[2] = (u8)(v >> 16); 1593 nh[3] = (u8)(v >> 24); 1594 v = BUILD_ID_DESC_LEN; 1595 nh[4] = (u8)v; 1596 nh[5] = (u8)(v >> 8); 1597 nh[6] = (u8)(v >> 16); 1598 nh[7] = (u8)(v >> 24); 1599 v = NOTE_BUILD_ID_TYPE; 1600 nh[8] = (u8)v; 1601 nh[9] = (u8)(v >> 8); 1602 nh[10] = (u8)(v >> 16); 1603 nh[11] = (u8)(v >> 24); 1604 write_bytes(w, nh, sizeof nh); 1605 write_bytes(w, NOTE_NAME_GNU "\0", NOTE_NAME_GNU_LEN); 1606 write_bytes(w, build_id, BUILD_ID_DESC_LEN); 1607 cur_off += BUILD_ID_NOTE_BYTES; 1608 } 1609 1610 /* Pad to first segment file_offset (== headers_load). */ 1611 { 1612 u32 i; 1613 for (i = 0; i < img->nsegments; ++i) { 1614 const LinkSegment* seg = &img->segments[i]; 1615 if (seg->file_size == 0) continue; 1616 if (cur_off < seg->file_offset) { 1617 write_zeroes(w, (size_t)(seg->file_offset - cur_off)); 1618 cur_off = seg->file_offset; 1619 } 1620 write_bytes(w, img->segment_bytes[seg->id - 1], (size_t)seg->file_size); 1621 cur_off += seg->file_size; 1622 } 1623 } 1624 1625 /* ---- write file-only debug sections ---- * 1626 * 1627 * Emit each merged debug OutShdr at its assigned file offset by 1628 * writing its constituent contributions in base order (the registry 1629 * is per-name base-ascending). OutShdrs were placed at ascending file 1630 * offsets, so cur_off advances monotonically. */ 1631 if (img->dbg_count) { 1632 u32 oi; 1633 for (oi = 0; oi < noutshdr; ++oi) { 1634 const OutShdr* o = &outshdrs[oi]; 1635 u32 si; 1636 if (!o->is_fileonly) continue; 1637 if (cur_off < o->file_offset) { 1638 write_zeroes(w, (size_t)(o->file_offset - cur_off)); 1639 cur_off = o->file_offset; 1640 } 1641 for (si = 0; si < img->dbg_count; ++si) { 1642 const LinkSection* ls = &img->sections[img->dbg_first_lsid - 1 + si]; 1643 if (ls->name != o->name || img->dbg_size[si] == 0) continue; 1644 write_bytes(w, img->dbg_bytes[si], (size_t)img->dbg_size[si]); 1645 cur_off += img->dbg_size[si]; 1646 } 1647 } 1648 } 1649 1650 /* ---- write trailing non-alloc sections ---- */ 1651 if (cur_off < symtab_off) { 1652 write_zeroes(w, (size_t)(symtab_off - cur_off)); 1653 cur_off = symtab_off; 1654 } 1655 { 1656 u32 i; 1657 for (i = 0; i < nsyms_emit; ++i) write_sym_rec(w, &recs[i], class32); 1658 cur_off += symtab_size; 1659 } 1660 if (strtab.len) { 1661 write_bytes(w, strtab.data, strtab.len); 1662 cur_off += strtab.len; 1663 } 1664 if (shstrtab.len) { 1665 write_bytes(w, shstrtab.data, shstrtab.len); 1666 cur_off += shstrtab.len; 1667 } 1668 1669 /* ---- write section header table ---- */ 1670 if (cur_off < shdr_off) { 1671 write_zeroes(w, (size_t)(shdr_off - cur_off)); 1672 cur_off = shdr_off; 1673 } 1674 { 1675 Shdr64 sh; 1676 u32 i; 1677 /* shdr 0: NULL */ 1678 memset(&sh, 0, sizeof(sh)); 1679 write_shdr(w, &sh, class32); 1680 /* Locate dyn-section names (interned earlier in layout_dyn) so 1681 * we can override sh_type / sh_link / sh_info / sh_entsize for 1682 * .dynsym / .dynstr / .gnu.hash / .rela.dyn / .rela.plt / 1683 * .dynamic. The sh_link cross-references (e.g., .dynsym -> 1684 * .dynstr) need the matching shdr indices, which we look up by 1685 * comparing OutShdr.name to the same Sym values. */ 1686 Sym n_dynsym = 0, n_dynstr = 0, n_gnuhash = 0; 1687 Sym n_reladyn = 0, n_relaplt = 0, n_dynamic = 0; 1688 Sym n_gotplt = 0, n_gnuver = 0, n_gnuver_r = 0; 1689 if (pie && img->dyn) { 1690 n_dynsym = pool_intern_slice(c->global, SLICE_LIT(".dynsym")); 1691 n_dynstr = pool_intern_slice(c->global, SLICE_LIT(".dynstr")); 1692 n_gnuhash = pool_intern_slice(c->global, SLICE_LIT(".gnu.hash")); 1693 n_reladyn = pool_intern_slice(c->global, SLICE_LIT(".rela.dyn")); 1694 n_relaplt = pool_intern_slice(c->global, SLICE_LIT(".rela.plt")); 1695 n_dynamic = pool_intern_slice(c->global, SLICE_LIT(".dynamic")); 1696 n_gotplt = pool_intern_slice(c->global, SLICE_LIT(".got.plt")); 1697 n_gnuver = pool_intern_slice(c->global, SLICE_LIT(".gnu.version")); 1698 n_gnuver_r = pool_intern_slice(c->global, SLICE_LIT(".gnu.version_r")); 1699 } 1700 /* Two-pass: first find dynsym/dynstr/gotplt indices for sh_link 1701 * fixups, then emit. */ 1702 u32 idx_dynsym = 0, idx_dynstr = 0, idx_gotplt = 0; 1703 if (pie && img->dyn) { 1704 for (i = 0; i < noutshdr; ++i) { 1705 Sym nm = outshdrs[i].name; 1706 u32 ix = outshdrs[i].shdr_idx; 1707 if (nm == n_dynsym) 1708 idx_dynsym = ix; 1709 else if (nm == n_dynstr) 1710 idx_dynstr = ix; 1711 else if (nm == n_gotplt) 1712 idx_gotplt = ix; 1713 } 1714 } 1715 /* per-name output shdrs */ 1716 for (i = 0; i < noutshdr; ++i) { 1717 const OutShdr* o = &outshdrs[i]; 1718 memset(&sh, 0, sizeof(sh)); 1719 sh.sh_name = outshdr_name_off[i]; 1720 sh.sh_type = sec_sem_to_sht(o->sem); 1721 sh.sh_flags = sec_flags_to_shf(o->flags); 1722 sh.sh_addr = img_base + o->vaddr; 1723 /* File-only debug sections aren't loaded: SHT_PROGBITS, no 1724 * SHF_ALLOC, sh_addr 0. addr2line / gdb read them by file offset. */ 1725 if (o->is_fileonly) { 1726 sh.sh_type = SHT_PROGBITS; 1727 sh.sh_flags = 0; 1728 sh.sh_addr = 0; 1729 } 1730 sh.sh_offset = o->file_offset; 1731 sh.sh_size = o->size; 1732 sh.sh_link = 0; 1733 sh.sh_info = 0; 1734 sh.sh_addralign = o->align ? o->align : 1; 1735 sh.sh_entsize = (o->sem == SSEM_INIT_ARRAY || o->sem == SSEM_FINI_ARRAY || 1736 o->sem == SSEM_PREINIT_ARRAY) 1737 ? 8 1738 : 0; 1739 /* Dyn-section overrides: sh_type / sh_link / sh_info / entsize. */ 1740 if (pie && img->dyn) { 1741 if (o->name == n_dynsym) { 1742 sh.sh_type = SHT_DYNSYM; 1743 sh.sh_link = idx_dynstr; 1744 sh.sh_info = img->dyn->first_global; 1745 sh.sh_entsize = 24; 1746 } else if (o->name == n_dynstr) { 1747 sh.sh_type = SHT_STRTAB; 1748 } else if (o->name == n_gnuhash) { 1749 sh.sh_type = SHT_GNU_HASH; 1750 sh.sh_link = idx_dynsym; 1751 } else if (o->name == n_reladyn) { 1752 sh.sh_type = SHT_RELA; 1753 sh.sh_link = idx_dynsym; 1754 sh.sh_entsize = 24; 1755 } else if (o->name == n_relaplt) { 1756 sh.sh_type = SHT_RELA; 1757 sh.sh_link = idx_dynsym; 1758 sh.sh_info = idx_gotplt; 1759 sh.sh_entsize = 24; 1760 sh.sh_flags |= SHF_INFO_LINK; 1761 } else if (o->name == n_dynamic) { 1762 sh.sh_type = SHT_DYNAMIC; 1763 sh.sh_link = idx_dynstr; 1764 sh.sh_entsize = 16; 1765 } else if (o->name == n_gnuver) { 1766 sh.sh_type = SHT_GNU_VERSYM; 1767 sh.sh_link = idx_dynsym; 1768 sh.sh_entsize = 2; 1769 } else if (o->name == n_gnuver_r) { 1770 sh.sh_type = SHT_GNU_VERNEED; 1771 sh.sh_link = idx_dynstr; 1772 sh.sh_info = img->dyn->nverneed; 1773 } else if (o->name == n_gotplt) { 1774 sh.sh_entsize = 8; 1775 } 1776 } 1777 write_shdr(w, &sh, class32); 1778 } 1779 /* shdr: .note.gnu.build-id (allocatable; in headers PT_LOAD) */ 1780 if (!scripted) { 1781 memset(&sh, 0, sizeof(sh)); 1782 sh.sh_name = sh_name_buildid; 1783 sh.sh_type = SHT_NOTE; 1784 sh.sh_flags = SHF_ALLOC; 1785 sh.sh_addr = build_id_addr; 1786 sh.sh_offset = build_id_off; 1787 sh.sh_size = BUILD_ID_NOTE_BYTES; 1788 sh.sh_addralign = 4; 1789 write_shdr(w, &sh, class32); 1790 } 1791 /* shdr: .symtab */ 1792 memset(&sh, 0, sizeof(sh)); 1793 sh.sh_name = sh_name_symtab; 1794 sh.sh_type = SHT_SYMTAB; 1795 sh.sh_flags = 0; 1796 sh.sh_addr = 0; 1797 sh.sh_offset = symtab_off; 1798 sh.sh_size = symtab_size; 1799 sh.sh_link = shndx_strtab; 1800 sh.sh_info = first_global_idx; 1801 sh.sh_addralign = 8; 1802 sh.sh_entsize = sym_size; 1803 write_shdr(w, &sh, class32); 1804 /* shdr: .strtab */ 1805 memset(&sh, 0, sizeof(sh)); 1806 sh.sh_name = sh_name_strtab; 1807 sh.sh_type = SHT_STRTAB; 1808 sh.sh_offset = strtab_off; 1809 sh.sh_size = strtab_size; 1810 sh.sh_addralign = 1; 1811 write_shdr(w, &sh, class32); 1812 /* shdr: .shstrtab */ 1813 memset(&sh, 0, sizeof(sh)); 1814 sh.sh_name = sh_name_shstrtab; 1815 sh.sh_type = SHT_STRTAB; 1816 sh.sh_offset = shstrtab_off; 1817 sh.sh_size = shstrtab_size; 1818 sh.sh_addralign = 1; 1819 write_shdr(w, &sh, class32); 1820 } 1821 1822 heap->free(heap, phdrs, sizeof(Phdr64) * nphdr_total); 1823 heap->free(heap, recs, sizeof(*recs) * (LinkSyms_count(&img->syms) + 1u)); 1824 heap->free(heap, outshdrs, sizeof(*outshdrs) * outshdr_cap); 1825 if (outshdr_name_off) 1826 heap->free(heap, outshdr_name_off, sizeof(u32) * (noutshdr + 1u)); 1827 strb_fini(&strtab); 1828 strb_fini(&shstrtab); 1829 }