link_dyn.c (51602B)
1 /* Phase 4 of dynamic linking: synthesize the dyn-link tables and 2 * sections an ET_DYN ELF exe needs to be loadable by a real runtime 3 * loader (musl ld-musl-aarch64.so.1). 4 * 5 * Inputs (computed by earlier passes): 6 * - LinkSymbol entries with `imported = 1` (set by resolve_undefs's 7 * DSO-search path; their dso_input_id names the providing DSO). 8 * - LinkInputs of kind LINK_INPUT_DSO_BYTES carrying SONAMEs. 9 * 10 * Outputs (deposited on LinkImage.dyn): 11 * - .interp PT_INTERP target string 12 * - .dynsym + .dynstr symbol table + name pool 13 * - .gnu.hash GNU-style hash for the loader 14 * - .rela.dyn GLOB_DAT (data imports) + space for 15 * R_AARCH64_RELATIVE records that 16 * Phase 6 emit fills in 17 * - .rela.plt JUMP_SLOT records (one per imported func) 18 * - .plt allocated, body NOT emitted (Phase 5) 19 * - .got.plt 3 reserved slots + 1 per PLT slot, 20 * allocated, body NOT emitted 21 * - .dynamic PT_DYNAMIC body, populated 22 * 23 * The .plt body / GOT-slot fill / CALL26 reloc rewriting are Phase 5; 24 * they're called out at the relevant allocation site so the missing 25 * pieces are obvious to anyone reading the output. The static-exe path 26 * is unaffected — layout_dyn early-outs when emit_pie is 0. 27 * 28 * Allocator pattern follows layout_iplt (link_layout.c): grow segments 29 * + sections via realloc, then page-align each new segment after the 30 * existing image span. Synthetic sections carry input_id == LINK_INPUT_NONE 31 * so downstream passes (emit_reloc_records, GC) leave them alone. 32 */ 33 34 #include <string.h> 35 36 #include "core/bytes.h" 37 #include "core/heap.h" 38 #include "core/pool.h" 39 #include "core/slice.h" 40 #include "core/util.h" 41 #include "core/vec.h" 42 #include "link/link.h" 43 #include "link/link_arch.h" 44 #include "link/link_internal.h" 45 #include "obj/elf/elf.h" 46 #include "obj/format.h" 47 48 /* ---- small allocators (mirror layout_iplt's helpers) ---- */ 49 50 static u32 dyn_alloc_segments(LinkImage* img, u32 nseg) { 51 Heap* h = img->heap; 52 u32 base = img->nsegments; 53 u32 new_nseg = base + nseg; 54 LinkSegment* nsegs = (LinkSegment*)h->realloc( 55 h, img->segments, sizeof(*img->segments) * img->nsegments, 56 sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment)); 57 u8** nsbufs = (u8**)h->realloc( 58 h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments, 59 sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*)); 60 size_t* nscaps = (size_t*)h->realloc( 61 h, img->segment_bytes_cap, 62 sizeof(*img->segment_bytes_cap) * img->nsegments, 63 sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t)); 64 if (!nsegs || !nsbufs || !nscaps) 65 compiler_panic(img->c, SRCLOC_NONE, "link: oom on dyn segments"); 66 img->segments = nsegs; 67 img->segment_bytes = nsbufs; 68 img->segment_bytes_cap = nscaps; 69 return base; 70 } 71 72 static u32 dyn_alloc_sections(LinkImage* img, u32 nsec) { 73 Heap* h = img->heap; 74 u32 base = img->nsections; 75 u32 new_nsec = base + nsec; 76 LinkSection* nsections = (LinkSection*)h->realloc( 77 h, img->sections, sizeof(*img->sections) * img->nsections, 78 sizeof(*img->sections) * new_nsec, _Alignof(LinkSection)); 79 if (!nsections) 80 compiler_panic(img->c, SRCLOC_NONE, "link: oom on dyn sections"); 81 img->sections = nsections; 82 return base; 83 } 84 85 /* ---- byte-builder for .dynstr / .gnu.hash ---- */ 86 87 typedef struct ByteBuf { 88 Heap* heap; 89 u8* data; 90 u32 len; 91 u32 cap; 92 } ByteBuf; 93 94 static void bb_init(ByteBuf* b, Heap* h) { 95 b->heap = h; 96 b->data = NULL; 97 b->len = 0; 98 b->cap = 0; 99 } 100 static void bb_reserve(ByteBuf* b, u32 need) { 101 if (need <= b->cap) return; 102 (void)VEC_GROW(b->heap, b->data, b->cap, need); 103 } 104 static u32 bb_append(ByteBuf* b, const void* src, u32 n) { 105 u32 off = b->len; 106 bb_reserve(b, b->len + n); 107 if (n) memcpy(b->data + b->len, src, n); 108 b->len += n; 109 return off; 110 } 111 static u32 bb_append_str(ByteBuf* b, const char* s, u32 n) { 112 /* Linear dedup over what we've appended so far. Strtabs are small. */ 113 if (n == 0) return 0; 114 if (b->len > n) { 115 u32 i; 116 for (i = 0; i + n < b->len; ++i) { 117 if (b->data[i + n] == 0 && memcmp(b->data + i, s, n) == 0) return i; 118 } 119 } 120 u32 off = b->len; 121 bb_reserve(b, b->len + n + 1u); 122 memcpy(b->data + b->len, s, n); 123 b->data[b->len + n] = 0; 124 b->len += n + 1u; 125 return off; 126 } 127 128 /* ---- GNU-hash computation (psABI v1 hash) ---- 129 * Body layout: 130 * u32 nbuckets 131 * u32 symoffset (first hashed dynsym index) 132 * u32 bloom_size (in 64-bit words) 133 * u32 bloom_shift 134 * u64 bloom[bloom_size] 135 * u32 buckets[nbuckets] 136 * u32 chains[ndynsym - symoffset] 137 * 138 * For Phase 4 we keep this very small: nbuckets = max(1, n/2), 139 * bloom_size = 1, bloom_shift = 6 (64-bit ELFCLASS64). All hashed 140 * symbols (sym_offset..ndynsym-1) participate in bloom + buckets + 141 * chains. Slot 0..symoffset-1 are STN_UNDEF + locals, which the 142 * loader doesn't hash. */ 143 144 static u32 gnu_hash_name(const char* s, u32 n) { 145 /* h = 5381; for c in s: h = h * 33 + c */ 146 u32 h = 5381u; 147 u32 i; 148 for (i = 0; i < n; ++i) h = (h * 33u) + (u8)s[i]; 149 return h; 150 } 151 152 /* ---- partition: enumerate imports ---- 153 * 154 * Walks LinkSyms and collects each `imported` symbol that's the 155 * canonical entry in img->globals (resolve_undefs may stamp `imported` 156 * onto multiple shadow slots of the same name; only the canonical one 157 * lands in dynsym). The two output arrays are LinkSymIds: funcs first 158 * (PLT-bound), then data (GOT-bound via GLOB_DAT). */ 159 160 typedef struct ImportLists { 161 LinkSymId* exports; 162 LinkSymId* funcs; 163 u32 nfuncs; 164 LinkSymId* datas; 165 u32 ndatas; 166 u32 nexports; 167 } ImportLists; 168 169 static int sym_is_func_import(const LinkSymbol* s) { 170 /* Most undef shadows have kind = SK_UNDEF (the obj reader keys kind 171 * off shndx, not STT_*). Only useful when the canonical entry 172 * carried a real type — fall through to the DSO lookup otherwise. */ 173 return s->kind == SK_FUNC || s->kind == SK_IFUNC; 174 } 175 176 /* Resolve an import's classifier kind by consulting its providing 177 * DSO's dynsym. read_elf_dso preserves STT_FUNC / STT_OBJECT / etc. 178 * on each defined export; the consumer's undef may have arrived as 179 * SK_UNDEF (clang emits external refs as SHN_UNDEF, which the reader 180 * collapses to SK_UNDEF regardless of STT_*). Returns 1 for func / 181 * ifunc, 0 for everything else (or if the DSO export is missing). */ 182 static int dso_export_is_func(Linker* l, const LinkSymbol* s) { 183 if (s->dso_input_id == LINK_INPUT_NONE) return 0; 184 if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) return 0; 185 LinkInput* in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u); 186 if (!in->obj) return 0; 187 ObjSymIter* it = obj_symiter_new(in->obj); 188 ObjSymEntry e; 189 int is_func = 0; 190 while (obj_symiter_next(it, &e)) { 191 const ObjSym* es = e.sym; 192 if (!es || es->name != s->name) continue; 193 if (es->kind == SK_UNDEF) continue; 194 is_func = (es->kind == SK_FUNC || es->kind == SK_IFUNC); 195 break; 196 } 197 obj_symiter_free(it); 198 return is_func; 199 } 200 201 static void collect_imports(Linker* l, LinkImage* img, Heap* h, 202 ImportLists* il) { 203 u32 i; 204 u32 cap_e = 0, cap_f = 0, cap_d = 0; 205 il->exports = NULL; 206 il->funcs = NULL; 207 il->datas = NULL; 208 il->nexports = il->nfuncs = il->ndatas = 0; 209 for (i = 0; i < LinkSyms_count(&img->syms); ++i) { 210 LinkSymbol* s = LinkSyms_at(&img->syms, i); 211 if (s->name == 0) continue; 212 /* Only the canonical (img->globals) entry per name. */ 213 LinkSymId canonical = symhash_get(&img->globals, s->name); 214 if (canonical != LINK_SYM_NONE && canonical != s->id) continue; 215 if (s->defined && !s->imported && 216 (s->bind == SB_GLOBAL || s->bind == SB_WEAK) && s->kind != SK_FILE && 217 s->kind != SK_SECTION) { 218 if (VEC_GROW(h, il->exports, cap_e, il->nexports + 1u)) 219 compiler_panic(img->c, SRCLOC_NONE, "link: oom on exports"); 220 il->exports[il->nexports++] = s->id; 221 continue; 222 } 223 if (!s->imported) continue; 224 int is_func = sym_is_func_import(s) || dso_export_is_func(l, s); 225 if (is_func) { 226 if (VEC_GROW(h, il->funcs, cap_f, il->nfuncs + 1u)) 227 compiler_panic(img->c, SRCLOC_NONE, "link: oom on import-funcs"); 228 il->funcs[il->nfuncs++] = s->id; 229 } else { 230 if (VEC_GROW(h, il->datas, cap_d, il->ndatas + 1u)) 231 compiler_panic(img->c, SRCLOC_NONE, "link: oom on import-datas"); 232 il->datas[il->ndatas++] = s->id; 233 } 234 } 235 } 236 237 static void free_imports(Heap* h, ImportLists* il) { 238 if (il->exports) h->free(h, il->exports, sizeof(*il->exports) * il->nexports); 239 if (il->funcs) h->free(h, il->funcs, sizeof(*il->funcs) * il->nfuncs); 240 if (il->datas) h->free(h, il->datas, sizeof(*il->datas) * il->ndatas); 241 } 242 243 /* ---- DT_NEEDED set: each DSO input that contributed at least one 244 * import. Order is input order so the loader sees deps in declaration 245 * order. */ 246 static void collect_needed(Linker* l, LinkImage* img, LinkDynState* dyn) { 247 Heap* h = img->heap; 248 u8* used; 249 u32 ninputs = LinkInputs_count(&l->inputs); 250 u32 i, nused = 0; 251 252 used = (u8*)h->alloc(h, ninputs ? ninputs : 1u, 1); 253 if (!used) compiler_panic(img->c, SRCLOC_NONE, "link: oom on needed map"); 254 memset(used, 0, ninputs ? ninputs : 1u); 255 256 /* Mark every DSO that ended up satisfying at least one import. */ 257 for (i = 0; i < LinkSyms_count(&img->syms); ++i) { 258 LinkSymbol* s = LinkSyms_at(&img->syms, i); 259 if (!s->imported) continue; 260 if (s->dso_input_id == LINK_INPUT_NONE) continue; 261 if (s->dso_input_id - 1u >= ninputs) continue; 262 used[s->dso_input_id - 1u] = 1; 263 } 264 /* Always pull every explicitly-supplied DSO into DT_NEEDED, even if 265 * no import landed on it — matches GNU ld without --as-needed. 266 * Phase 4 doesn't plumb --as-needed through to the resolver, so the 267 * default "needed" behavior is the right baseline. */ 268 for (i = 0; i < ninputs; ++i) { 269 LinkInput* in = LinkInputs_at(&l->inputs, i); 270 if (in->kind == LINK_INPUT_DSO_BYTES && in->soname != 0) used[i] = 1; 271 } 272 for (i = 0; i < ninputs; ++i) 273 if (used[i]) ++nused; 274 275 dyn->needed = 276 nused ? (Sym*)h->alloc(h, sizeof(Sym) * nused, _Alignof(Sym)) : NULL; 277 if (nused && !dyn->needed) 278 compiler_panic(img->c, SRCLOC_NONE, "link: oom on needed list"); 279 dyn->nneeded = 0; 280 for (i = 0; i < ninputs; ++i) { 281 LinkInput* in = LinkInputs_at(&l->inputs, i); 282 if (!used[i]) continue; 283 if (in->soname == 0) continue; 284 dyn->needed[dyn->nneeded++] = in->soname; 285 } 286 h->free(h, used, ninputs ? ninputs : 1u); 287 } 288 289 /* ---- dynsym + dynstr build ---- 290 * 291 * Slot 0: STN_UNDEF (zero entry). The loader ignores names with index 292 * 0; we still emit a dynstr entry at offset 0 (the leading NUL). 293 * 294 * Slots 1..nexports: executable-defined globals exported for DSO lookup. 295 * Slots after exports: imported symbols (functions first, then data). 296 * st_shndx = SHN_UNDEF; the loader fills in the value at bind time. 297 * st_value/size are zero — the static linker has no value for an 298 * imported symbol. 299 * 300 * Defined executable globals must be present too: ELF DSOs can resolve 301 * references back to the main executable, and FreeBSD libc depends on that 302 * for Scrt1.o's `environ` and `__progname` definitions. */ 303 304 static void build_dynsym(LinkImage* img, LinkDynState* dyn, 305 const ImportLists* il, ByteBuf* dynstr) { 306 Heap* h = img->heap; 307 u32 nimports = il->nfuncs + il->ndatas; 308 u32 ndynsym = 1u + il->nexports + nimports; /* +1 for null slot */ 309 u32 i; 310 311 dyn->ndynsym = ndynsym; 312 dyn->dynsym = (DynSymRec*)h->alloc(h, sizeof(*dyn->dynsym) * ndynsym, 313 _Alignof(DynSymRec)); 314 if (!dyn->dynsym) compiler_panic(img->c, SRCLOC_NONE, "link: oom on dynsym"); 315 memset(dyn->dynsym, 0, sizeof(*dyn->dynsym) * ndynsym); 316 317 /* Slot 0: STN_UNDEF. dynstr leads with a NUL so st_name=0 reads as 318 * the empty string. */ 319 { 320 u8 z = 0; 321 bb_append(dynstr, &z, 1); 322 } 323 324 /* Per-symbol: dedupe `sym_dynidx` lookup table. Sized to LinkSymId 325 * upper bound. Clean (zero-filled) by alloc convention; we set 326 * indices for imports below. */ 327 dyn->sym_dynidx_size = LinkSyms_count(&img->syms) + 1u; 328 dyn->sym_dynidx = (u32*)h->alloc( 329 h, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size, _Alignof(u32)); 330 if (!dyn->sym_dynidx) 331 compiler_panic(img->c, SRCLOC_NONE, "link: oom on sym_dynidx"); 332 memset(dyn->sym_dynidx, 0, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size); 333 /* sym_plt_vaddr is populated alongside the PLT body emit below; here 334 * we only allocate the parallel array. */ 335 dyn->sym_plt_vaddr = (u64*)h->alloc( 336 h, sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size, _Alignof(u64)); 337 if (!dyn->sym_plt_vaddr) 338 compiler_panic(img->c, SRCLOC_NONE, "link: oom on sym_plt_vaddr"); 339 memset(dyn->sym_plt_vaddr, 0, 340 sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size); 341 342 /* All dynamic entries we emit today are non-local, so first_global is 343 * right after the single STN_UNDEF slot. */ 344 dyn->first_global = 1u; 345 346 u32 idx = 1u; 347 for (i = 0; i < il->nexports; ++i) { 348 LinkSymId lsid = il->exports[i]; 349 LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1); 350 DynSymRec* r = &dyn->dynsym[idx]; 351 Slice nm_s = pool_slice(img->c->global, s->name); 352 const char* nm = nm_s.s; 353 size_t namelen = nm_s.len; 354 u8 elf_type = elf_st_type(s->kind); 355 u8 elf_bind = elf_st_bind(s->bind); 356 r->st_name = bb_append_str(dynstr, nm, (u32)namelen); 357 r->st_info = ELF64_ST_INFO(elf_bind, elf_type); 358 r->st_other = STV_DEFAULT; 359 /* The emitter refreshes defined-symbol values after the final header 360 * shift. Any nonzero, non-special section index is enough for rtld to 361 * treat the symbol as defined; section headers are not part of runtime 362 * loading. */ 363 r->st_shndx = 1; 364 r->st_value = s->vaddr; 365 r->st_size = s->size; 366 dyn->sym_dynidx[lsid] = idx; 367 ++idx; 368 } 369 for (i = 0; i < il->nfuncs; ++i) { 370 LinkSymId lsid = il->funcs[i]; 371 LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1); 372 DynSymRec* r = &dyn->dynsym[idx]; 373 Slice nm_s = pool_slice(img->c->global, s->name); 374 const char* nm = nm_s.s; 375 size_t namelen = nm_s.len; 376 r->st_name = bb_append_str(dynstr, nm, (u32)namelen); 377 r->st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC); 378 r->st_other = STV_DEFAULT; 379 r->st_shndx = SHN_UNDEF; 380 r->st_value = 0; 381 r->st_size = 0; 382 dyn->sym_dynidx[lsid] = idx; 383 ++idx; 384 } 385 for (i = 0; i < il->ndatas; ++i) { 386 LinkSymId lsid = il->datas[i]; 387 LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1); 388 DynSymRec* r = &dyn->dynsym[idx]; 389 Slice nm_s = pool_slice(img->c->global, s->name); 390 const char* nm = nm_s.s; 391 size_t namelen = nm_s.len; 392 u8 elf_type = STT_OBJECT; 393 if (s->kind == SK_TLS) 394 elf_type = STT_TLS; 395 else if (s->kind == SK_NOTYPE) 396 elf_type = STT_NOTYPE; 397 r->st_name = bb_append_str(dynstr, nm, (u32)namelen); 398 r->st_info = ELF64_ST_INFO(STB_GLOBAL, elf_type); 399 r->st_other = STV_DEFAULT; 400 r->st_shndx = SHN_UNDEF; 401 r->st_value = 0; 402 r->st_size = 0; 403 dyn->sym_dynidx[lsid] = idx; 404 ++idx; 405 } 406 } 407 408 /* ---- GNU symbol versioning (.gnu.version + .gnu.version_r) ---- 409 * 410 * For each imported symbol that binds to a versioned DSO export, require that 411 * export's *default* version (read into ObjImageSym.version at input time) so 412 * the runtime binds the right one. On FreeBSD this is mandatory: the INO64 413 * transition left `stat`/`fstat`/... as two incompatible struct-stat ABIs, the 414 * compat behind a hidden FBSD_1.0 and the modern one as the default FBSD_1.5; 415 * an unversioned reference binds the compat and reads st_size at the wrong 416 * offset. We emit: 417 * .gnu.version — one u16 per .dynsym entry: 0 (null/unversioned import), 418 * 1 (defined export), or >=2 (a version requirement index). 419 * .gnu.version_r — Verneed per DT_NEEDED soname + Vernaux per required 420 * version, numbered 2.. in first-seen order. 421 * Both reference only .dynstr offsets and indices (no vaddrs), so the bytes are 422 * final at layout time. Nothing is emitted when no import is versioned, leaving 423 * musl/glibc-without-version and static links byte-for-byte unchanged. */ 424 425 static u32 elf_sysv_hash(const char* s, u32 n) { 426 u32 h = 0, g, i; 427 for (i = 0; i < n; ++i) { 428 h = (h << 4) + (u8)s[i]; 429 g = h & 0xf0000000u; 430 if (g) h ^= g >> 24; 431 h &= ~g; 432 } 433 return h; 434 } 435 436 /* Default version name the DSO `in` exports for `name`, or 0 if `in` carries no 437 * versioning / doesn't export `name` with a default version. */ 438 static Sym dso_default_version(LinkInput* in, Sym name) { 439 const ObjImage* im = in->obj ? obj_image(in->obj) : NULL; 440 u32 i, n; 441 if (!im) return 0; 442 n = obj_image_ndynsyms(im); 443 for (i = 0; i < n; ++i) { 444 const ObjImageSym* s = obj_image_dynsym(im, i); 445 if (s->name == name && s->version != 0) return s->version; 446 } 447 return 0; 448 } 449 450 typedef struct VerReq { 451 Sym soname; 452 Sym version; 453 u16 index; 454 } VerReq; 455 456 typedef struct VerBuild { 457 Heap* h; 458 Linker* l; 459 LinkImage* img; 460 LinkDynState* dyn; 461 u8* vs; /* versym bytes being filled */ 462 VerReq* reqs; 463 u32 nreq; 464 u32 capreq; 465 } VerBuild; 466 467 /* Resolve one imported symbol's version requirement: look up its providing 468 * DSO's default version for the name, intern a (soname, version) requirement 469 * (assigning the next index), and stamp the symbol's versym slot. */ 470 static void ver_process_import(VerBuild* vb, LinkSymId lsid) { 471 LinkSymbol* s = LinkSyms_at(&vb->img->syms, lsid - 1); 472 u32 di = vb->dyn->sym_dynidx[lsid]; 473 LinkInput* in; 474 Sym ver; 475 u16 vidx = 0; 476 u32 r; 477 if (!di || s->dso_input_id == LINK_INPUT_NONE) return; 478 if (s->dso_input_id - 1u >= LinkInputs_count(&vb->l->inputs)) return; 479 in = LinkInputs_at(&vb->l->inputs, s->dso_input_id - 1u); 480 if (in->soname == 0) return; 481 ver = dso_default_version(in, s->name); 482 if (ver == 0) return; 483 for (r = 0; r < vb->nreq; ++r) 484 if (vb->reqs[r].soname == in->soname && vb->reqs[r].version == ver) { 485 vidx = vb->reqs[r].index; 486 break; 487 } 488 if (!vidx) { 489 if (VEC_GROW(vb->h, vb->reqs, vb->capreq, vb->nreq + 1u)) 490 compiler_panic(vb->img->c, SRCLOC_NONE, "link: oom on version reqs"); 491 vidx = (u16)(2u + vb->nreq); 492 vb->reqs[vb->nreq].soname = in->soname; 493 vb->reqs[vb->nreq].version = ver; 494 vb->reqs[vb->nreq].index = vidx; 495 vb->nreq++; 496 } 497 wr_u16_le(vb->vs + (u64)di * 2u, vidx); 498 } 499 500 static void build_versions(Linker* l, LinkImage* img, LinkDynState* dyn, 501 const ImportLists* il, ByteBuf* dynstr) { 502 Heap* h = img->heap; 503 VerBuild vb; 504 u32 i; 505 506 dyn->versym = NULL; 507 dyn->versym_len = 0; 508 dyn->verneed = NULL; 509 dyn->verneed_len = 0; 510 dyn->nverneed = 0; 511 if (dyn->ndynsym == 0) return; 512 513 /* versym: default 0 (local/unversioned); defined exports -> GLOBAL. */ 514 vb.h = h; 515 vb.l = l; 516 vb.img = img; 517 vb.dyn = dyn; 518 vb.reqs = NULL; 519 vb.nreq = 0; 520 vb.capreq = 0; 521 vb.vs = (u8*)h->alloc(h, (size_t)dyn->ndynsym * 2u, 2); 522 if (!vb.vs) compiler_panic(img->c, SRCLOC_NONE, "link: oom on versym"); 523 memset(vb.vs, 0, (size_t)dyn->ndynsym * 2u); 524 for (i = 0; i < il->nexports; ++i) { 525 u32 di = dyn->sym_dynidx[il->exports[i]]; 526 if (di) wr_u16_le(vb.vs + (u64)di * 2u, (u16)VER_NDX_GLOBAL); 527 } 528 for (i = 0; i < il->nfuncs; ++i) ver_process_import(&vb, il->funcs[i]); 529 for (i = 0; i < il->ndatas; ++i) ver_process_import(&vb, il->datas[i]); 530 531 if (vb.nreq == 0) { 532 /* No versioned imports: emit nothing, keep the link unchanged. */ 533 h->free(h, vb.vs, (size_t)dyn->ndynsym * 2u); 534 if (vb.reqs) h->free(h, vb.reqs, sizeof(*vb.reqs) * vb.capreq); 535 return; 536 } 537 dyn->versym = vb.vs; 538 dyn->versym_len = dyn->ndynsym * 2u; 539 540 /* Group requirements by soname (first-seen order) into Verneed/Vernaux. */ 541 { 542 Sym* sonames = NULL; 543 u32 nson = 0, capson = 0; 544 u32 r; 545 for (r = 0; r < vb.nreq; ++r) { 546 u32 k; 547 int seen = 0; 548 for (k = 0; k < nson; ++k) 549 if (sonames[k] == vb.reqs[r].soname) { 550 seen = 1; 551 break; 552 } 553 if (!seen) { 554 if (VEC_GROW(h, sonames, capson, nson + 1u)) 555 compiler_panic(img->c, SRCLOC_NONE, "link: oom on verneed sonames"); 556 sonames[nson++] = vb.reqs[r].soname; 557 } 558 } 559 { 560 u32 total = nson * (u32)ELF_VERNEED_SIZE + vb.nreq * (u32)ELF_VERNAUX_SIZE; 561 u8* vn = (u8*)h->alloc(h, total, 4); 562 u8* p; 563 u32 si; 564 if (!vn) compiler_panic(img->c, SRCLOC_NONE, "link: oom on verneed"); 565 memset(vn, 0, total); 566 p = vn; 567 for (si = 0; si < nson; ++si) { 568 Slice so_s = pool_slice(l->c->global, sonames[si]); 569 u32 file_off = bb_append_str(dynstr, so_s.s, (u32)so_s.len); 570 u8* vn_rec = p; 571 u32 cnt = 0; 572 u8* aux; 573 p += ELF_VERNEED_SIZE; 574 aux = p; 575 for (r = 0; r < vb.nreq; ++r) { 576 Slice ver_s; 577 u32 name_off; 578 if (vb.reqs[r].soname != sonames[si]) continue; 579 ver_s = pool_slice(l->c->global, vb.reqs[r].version); 580 name_off = bb_append_str(dynstr, ver_s.s, (u32)ver_s.len); 581 wr_u32_le(p + 0, elf_sysv_hash(ver_s.s, (u32)ver_s.len)); /* vna_hash */ 582 wr_u16_le(p + 4, 0); /* vna_flags */ 583 wr_u16_le(p + 6, vb.reqs[r].index); /* vna_other */ 584 wr_u32_le(p + 8, name_off); /* vna_name */ 585 /* vna_next: filled after we know if another aux follows. */ 586 p += ELF_VERNAUX_SIZE; 587 ++cnt; 588 } 589 /* Verneed header. vn_aux is the byte offset to the first Vernaux. */ 590 wr_u16_le(vn_rec + 0, 1); /* vn_version */ 591 wr_u16_le(vn_rec + 2, (u16)cnt); /* vn_cnt */ 592 wr_u32_le(vn_rec + 4, file_off); /* vn_file */ 593 wr_u32_le(vn_rec + 8, (u32)(aux - vn_rec)); /* vn_aux */ 594 wr_u32_le(vn_rec + 12, 595 si + 1u < nson ? (u32)(p - vn_rec) : 0u); /* vn_next */ 596 /* Link the Vernaux chain (each entry -> next, last -> 0). */ 597 { 598 u8* a = aux; 599 u32 j; 600 for (j = 0; j < cnt; ++j) { 601 wr_u32_le(a + 12, j + 1u < cnt ? (u32)ELF_VERNAUX_SIZE : 0u); 602 a += ELF_VERNAUX_SIZE; 603 } 604 } 605 } 606 dyn->verneed = vn; 607 dyn->verneed_len = total; 608 dyn->nverneed = nson; 609 } 610 if (sonames) h->free(h, sonames, sizeof(*sonames) * capson); 611 } 612 if (vb.reqs) h->free(h, vb.reqs, sizeof(*vb.reqs) * vb.capreq); 613 } 614 615 /* ---- .gnu.hash builder ---- 616 * 617 * Hashed range is [first_global, ndynsym) — slot 0 (STN_UNDEF) is 618 * unhashed. Layout matches loader expectations (musl, glibc, FreeBSD). 619 * 620 * Bucket count: one. That keeps the required chain ordering trivial even as 621 * we mix executable exports and imports without sorting the dynsym table by 622 * hash bucket. Bloom is 1 word for Phase 4 — a real implementation would 623 * scale with hashed_count, but 1 word with shift=6 still satisfies the 624 * loader's correctness check (false positives only cost a chain scan). */ 625 626 static void build_gnu_hash(Heap* h, LinkImage* img, LinkDynState* dyn, 627 const ByteBuf* dynstr) { 628 u32 hashed = (dyn->ndynsym > dyn->first_global) 629 ? (dyn->ndynsym - dyn->first_global) 630 : 0u; 631 u32 nbuckets = 1u; 632 u32 bloom_size = 1u; /* 64-bit word */ 633 u32 bloom_shift = 6u; 634 u32 sym_offset = dyn->first_global; 635 u32 hdr_bytes = 16u; /* nbuckets/symoff/bloomsz/bloomshift */ 636 u32 bloom_bytes = bloom_size * 8u; 637 u32 buckets_bytes = nbuckets * 4u; 638 u32 chains_bytes = hashed * 4u; 639 u32 total = hdr_bytes + bloom_bytes + buckets_bytes + chains_bytes; 640 641 u8* buf = (u8*)h->alloc(h, total ? total : 1u, 4); 642 if (!buf) compiler_panic(img->c, SRCLOC_NONE, "link: oom on .gnu.hash"); 643 memset(buf, 0, total); 644 645 wr_u32_le(buf + 0, nbuckets); 646 wr_u32_le(buf + 4, sym_offset); 647 wr_u32_le(buf + 8, bloom_size); 648 wr_u32_le(buf + 12, bloom_shift); 649 650 /* Bloom + buckets + chains. We need each hashed symbol's hash. */ 651 if (hashed) { 652 u32 i; 653 u32* hashes = (u32*)h->alloc(h, sizeof(u32) * hashed, _Alignof(u32)); 654 if (!hashes) 655 compiler_panic(img->c, SRCLOC_NONE, "link: oom on .gnu.hash hashes"); 656 for (i = 0; i < hashed; ++i) { 657 const DynSymRec* r = &dyn->dynsym[sym_offset + i]; 658 const char* name = (const char*)dynstr->data + r->st_name; 659 size_t n = name ? slice_from_cstr(name).len : 0; 660 hashes[i] = gnu_hash_name(name, (u32)n); 661 } 662 663 /* Bloom filter: H[i] / H[i] >> shift */ 664 u64 bloom = 0; 665 for (i = 0; i < hashed; ++i) { 666 u32 h1 = hashes[i] % 64u; 667 u32 h2 = (hashes[i] >> bloom_shift) % 64u; 668 bloom |= ((u64)1 << h1) | ((u64)1 << h2); 669 } 670 wr_u64_le(buf + hdr_bytes, bloom); 671 672 /* Buckets/chains: for each hashed sym, append to its bucket's 673 * chain. The chain encodes (hash & ~1) per entry; the LSB is set 674 * on the LAST entry in a bucket to terminate. Buckets are filled 675 * with the first chain index that hashes there (1-based into the 676 * dynsym, i.e. `sym_offset + i`). */ 677 u32* buckets = (u32*)(buf + hdr_bytes + bloom_bytes); 678 u32* chains = (u32*)(buf + hdr_bytes + bloom_bytes + buckets_bytes); 679 /* First pass: bucket = first sym index that hashes there. */ 680 for (i = 0; i < hashed; ++i) { 681 u32 b = hashes[i] % nbuckets; 682 if (buckets[b] == 0) buckets[b] = sym_offset + i; 683 } 684 /* Second pass: chain[i] = hash with LSB cleared; LSB set if next 685 * sym is in a different bucket. Walk symbols in order; LSB on 686 * chain[i] when sym i+1 is in a different bucket OR is the end. */ 687 for (i = 0; i < hashed; ++i) { 688 u32 v = hashes[i] & ~1u; 689 int last = (i + 1 == hashed) || 690 ((hashes[i + 1] % nbuckets) != (hashes[i] % nbuckets)); 691 if (last) v |= 1u; 692 chains[i] = v; 693 } 694 h->free(h, hashes, sizeof(u32) * hashed); 695 } 696 697 dyn->gnu_hash = buf; 698 dyn->gnu_hash_len = total; 699 } 700 701 /* ---- .dynamic body builder ---- 702 * 703 * Computed at layout time so the size is known before segments are 704 * placed. Each entry is two u64s (d_tag, d_un.d_val|d_un.d_ptr). 705 * Final entry is DT_NULL. The d_ptr fields that point at other 706 * synthetic sections are filled with image-relative vaddrs; the emit 707 * pass adds load-base / IMAGE_BASE only when ET_EXEC. */ 708 709 typedef struct DynEntry { 710 u64 tag; 711 u64 val; /* either d_val or d_ptr; emit just writes 8 bytes */ 712 } DynEntry; 713 714 static u32 count_dynamic_entries(const LinkDynState* dyn) { 715 /* Required: DT_STRTAB DT_STRSZ DT_SYMTAB DT_SYMENT DT_GNU_HASH 716 * DT_FLAGS_1 (DF_1_NOW for eager binding) 717 * DT_NULL terminator 718 * Optional (only when there are .rela.dyn records): 719 * DT_RELA DT_RELASZ DT_RELAENT 720 * Optional (only when there are imported functions / a PLT): 721 * DT_PLTGOT DT_PLTRELSZ DT_PLTREL DT_JMPREL 722 * Plus DT_NEEDED per dependency. */ 723 u32 n = dyn->nneeded; 724 n += 7; /* 5 fixed + DT_FLAGS_1 + DT_NULL */ 725 if (dyn->cap_rela_dyn) n += 3; /* DT_RELA + DT_RELASZ + DT_RELAENT */ 726 if (dyn->nrela_plt) n += 4; /* PLT-only entries */ 727 if (dyn->nverneed) n += 3; /* DT_VERSYM + DT_VERNEED + DT_VERNEEDNUM */ 728 return n; 729 } 730 731 /* ---- main entry ---- */ 732 733 void layout_dyn(Linker* l, LinkImage* img) { 734 Heap* h = img->heap; 735 LinkDynState* dyn; 736 LinkDynState dyn_probe; 737 ImportLists imports; 738 ByteBuf dynstr; 739 u64 page; 740 const LinkArchDesc* arch; 741 const ObjElfArchOps* elf_arch; 742 743 if (!l->emit_pie) return; 744 745 /* The dynamic-section layout below is ELF64-only (Elf64_Sym/Dyn/Rela wire 746 * sizes, 8-byte GOT slots). rv32 is a static-only v1 target, so a dynamic / 747 * PIE rv32 link is unsupported — fail with a clear diagnostic instead of 748 * crashing on the ELF64 assumptions. Link rv32 images statically (kit ld 749 * -no-pie, or a -T script for bare-metal section placement). */ 750 if (img->c->target.ptr_size == 4u) { 751 compiler_panic(img->c, SRCLOC_NONE, 752 "link: dynamic/PIE linking is not supported for 32-bit " 753 "RISC-V (ELFCLASS32); link statically (kit ld -no-pie)"); 754 } 755 756 arch = link_arch_desc_for(l->c); 757 if (!arch) 758 compiler_panic(img->c, SRCLOC_NONE, "link: layout_dyn: no arch descriptor"); 759 { 760 const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_ELF); 761 elf_arch = fmt && fmt->elf_arch ? fmt->elf_arch(l->c->target.arch) : NULL; 762 if (!elf_arch) 763 compiler_panic(img->c, SRCLOC_NONE, 764 "link: layout_dyn: no ELF arch descriptor"); 765 } 766 767 /* Step 1: enumerate imports + DT_NEEDED. A PIE with no imports and no 768 * DSO inputs is effectively static; keep ET_DYN output but do not stamp 769 * PT_INTERP/PT_DYNAMIC or an empty .dynamic section. */ 770 memset(&dyn_probe, 0, sizeof dyn_probe); 771 collect_imports(l, img, h, &imports); 772 collect_needed(l, img, &dyn_probe); 773 if (l->emit_static_exe && imports.nfuncs == 0 && imports.ndatas == 0 && 774 dyn_probe.nneeded == 0) { 775 img->pie = 1; 776 free_imports(h, &imports); 777 return; 778 } 779 780 dyn = (LinkDynState*)h->alloc(h, sizeof(*dyn), _Alignof(LinkDynState)); 781 if (!dyn) compiler_panic(img->c, SRCLOC_NONE, "link: oom on dyn state"); 782 *dyn = dyn_probe; 783 img->dyn = dyn; 784 img->pie = 1; 785 786 /* PT_INTERP path. Default to the canonical musl loader matching the 787 * target arch (per-arch table in src/arch/<arch>/link.c) when the caller 788 * didn't set one. Drivers like kit-cc always override via 789 * link_set_interp_path; this default is correctness for direct 790 * libkit consumers. glibc users have to set their interp 791 * explicitly — we don't pick a default for them. */ 792 dyn->interp_path = 793 l->interp_path 794 ? l->interp_path 795 : pool_intern_slice(l->c->global, 796 slice_from_cstr(elf_arch->default_musl_interp)); 797 798 /* Step 2: build .dynstr + .dynsym. .dynstr must also carry the 799 * DT_NEEDED soname strings the .dynamic body references; intern 800 * them after the import names so build_dynsym's de-dup also covers 801 * any name that happens to collide with a soname. */ 802 bb_init(&dynstr, h); 803 build_dynsym(img, dyn, &imports, &dynstr); 804 { 805 u32 ni; 806 for (ni = 0; ni < dyn->nneeded; ++ni) { 807 Slice s_s = pool_slice(l->c->global, dyn->needed[ni]); 808 const char* s = s_s.s; 809 size_t slen = s_s.len; 810 if (s && slen) (void)bb_append_str(&dynstr, s, (u32)slen); 811 } 812 } 813 /* Symbol versioning: assign per-import version requirements and append the 814 * version strings ("FBSD_1.5", ...) to .dynstr. Must run before .dynstr is 815 * finalized below; emits nothing when no import is versioned. */ 816 build_versions(l, img, dyn, &imports, &dynstr); 817 dyn->dynstr = dynstr.data; 818 dyn->dynstr_len = dynstr.len; 819 820 /* Step 3: .gnu.hash. */ 821 build_gnu_hash(h, img, dyn, &dynstr); 822 823 /* Step 4: pre-size all the synthetic sections. 824 * .interp: strlen + 1 825 * .dynsym: 24 * ndynsym 826 * .dynstr: dynstr_len 827 * .gnu.hash: gnu_hash_len 828 * .rela.dyn: 24 * (runtime GLOB_DAT + RELATIVE records) 829 * .rela.plt: 24 * nfuncs 830 * .plt: 32 + 16 * nfuncs (PLT0 + per-slot) 831 * .got.plt: 8 * (3 + nfuncs) 832 * .dynamic: 16 * count_dynamic_entries 833 */ 834 dyn->nplt = imports.nfuncs; 835 dyn->nrela_plt = imports.nfuncs; 836 dyn->rela_plt = imports.nfuncs 837 ? (DynRela*)h->alloc(h, sizeof(DynRela) * imports.nfuncs, 838 _Alignof(DynRela)) 839 : NULL; 840 if (imports.nfuncs && !dyn->rela_plt) 841 compiler_panic(img->c, SRCLOC_NONE, "link: oom on rela_plt"); 842 843 /* RELA dyn: GLOB_DAT (one per imported abs-relocated symbol) + 844 * RELATIVE (one per PIE internal abs reloc against a defined sym). 845 * Phase 5 emits these dynamically during reloc-apply; pre-count the 846 * exact total here (img->relocs and the resolve-time `imported` flags 847 * are already settled by the time layout_dyn runs) so the section 848 * isn't padded with hundreds of trailing R_*_NONE records. */ 849 u32 cap_rel = 0; 850 { 851 u32 ri; 852 for (ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) { 853 const LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri); 854 const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1); 855 const LinkSection* sec; 856 if (r->kind != R_ABS32 && r->kind != R_ABS64) continue; 857 if (r->link_section_id == LINK_SEC_NONE || 858 r->link_section_id > img->nsections) 859 continue; 860 sec = &img->sections[r->link_section_id - 1]; 861 if (sec->segment_id == LINK_SEG_NONE || sec->file_only) continue; 862 if (tgt->imported) { 863 cap_rel++; /* GLOB_DAT */ 864 } else if (tgt->defined && tgt->kind != SK_ABS) { 865 cap_rel++; /* RELATIVE */ 866 } 867 } 868 } 869 dyn->cap_rela_dyn = cap_rel; 870 dyn->rela_dyn = 871 dyn->cap_rela_dyn 872 ? (DynRela*)h->alloc(h, sizeof(DynRela) * dyn->cap_rela_dyn, 873 _Alignof(DynRela)) 874 : NULL; 875 if (dyn->cap_rela_dyn && !dyn->rela_dyn) 876 compiler_panic(img->c, SRCLOC_NONE, "link: oom on rela_dyn"); 877 dyn->nrela_dyn = 0; 878 879 Slice interp_s = pool_slice(l->c->global, dyn->interp_path); 880 const char* interp_str = interp_s.s; 881 size_t namelen = interp_s.len; 882 u64 interp_bytes = (u64)namelen + 1u; 883 u64 dynsym_bytes = (u64)dyn->ndynsym * ELF64_SYM_SIZE; 884 u64 dynstr_bytes = (u64)dyn->dynstr_len; 885 u64 gnuhash_bytes = (u64)dyn->gnu_hash_len; 886 int has_ver = dyn->nverneed > 0; 887 u64 versym_bytes = (u64)dyn->versym_len; 888 u64 verneed_bytes = (u64)dyn->verneed_len; 889 /* rela.dyn is pre-counted exactly; rela.plt is one record per PLT slot. */ 890 u64 rela_dyn_bytes = (u64)dyn->cap_rela_dyn * ELF64_RELA_SIZE; 891 u64 rela_plt_bytes = (u64)dyn->nrela_plt * ELF64_RELA_SIZE; 892 u64 plt_bytes = 893 (u64)(imports.nfuncs 894 ? arch->plt0_size + arch->plt_entry_size * imports.nfuncs 895 : 0u); 896 u64 gotplt_bytes = (u64)(imports.nfuncs ? 8u * (3u + imports.nfuncs) : 0u); 897 dyn->ndyn_entries = count_dynamic_entries(dyn); 898 u64 dynamic_bytes = (u64)dyn->ndyn_entries * ELF64_DYN_SIZE; 899 900 /* Step 5: place segments, page-aligned after the existing image 901 * span. New segments: 902 * ro_seg (PF_R) — .interp + .dynsym + .dynstr + .gnu.hash + 903 * .rela.dyn + .rela.plt 904 * rx_seg (PF_R+X)— .plt (only when imports.nfuncs > 0) 905 * rw_seg (PF_R+W)— .got.plt + .dynamic 906 * 907 * .dynamic lives in rw_seg because glibc's loader patches DT_* 908 * d_un.d_ptr fields in-place at startup (elf_get_dynamic_info 909 * adjusts STRTAB/SYMTAB/etc. by l_addr); a PF_R-only segment 910 * causes SEGV_ACCERR. musl's loader doesn't do this rewrite, but 911 * the RW placement is conventional and works for both. 912 */ 913 page = 0x4000u; /* keep aligned with layout_page_size default */ 914 { 915 /* Read the page size from layout_page_size by re-using the 916 * configured execmem if present — duplicates the helper rather 917 * than expose it; the value is only used for alignment. */ 918 const KitExecMem* m = (l && l->jit_host) ? l->jit_host->execmem : NULL; 919 if (m && m->page_size) page = (u64)m->page_size; 920 } 921 922 u64 base_vaddr = 0; 923 u32 i; 924 for (i = 0; i < img->nsegments; ++i) { 925 u64 end = img->segments[i].vaddr + img->segments[i].mem_size; 926 if (end > base_vaddr) base_vaddr = end; 927 } 928 base_vaddr = ALIGN_UP(base_vaddr, page); 929 930 /* Pack ro section offsets (relative to ro_seg.vaddr). 8-byte 931 * alignment for tables; 4-byte for .interp string. */ 932 u64 off = 0; 933 u64 interp_off = off; 934 off = ALIGN_UP(off + interp_bytes, 8u); 935 u64 dynsym_off = off; 936 off = ALIGN_UP(off + dynsym_bytes, 8u); 937 u64 dynstr_off = off; 938 off = ALIGN_UP(off + dynstr_bytes, 8u); 939 u64 gnuhash_off = off; 940 off = ALIGN_UP(off + gnuhash_bytes, 8u); 941 u64 rela_dyn_off = off; 942 off = ALIGN_UP(off + rela_dyn_bytes, 8u); 943 u64 rela_plt_off = off; 944 off = ALIGN_UP(off + rela_plt_bytes, 8u); 945 /* .gnu.version + .gnu.version_r (zero-sized and skipped when no import is 946 * versioned, so the ro segment is unchanged for unversioned links). */ 947 u64 versym_off = off; 948 off = ALIGN_UP(off + versym_bytes, 8u); 949 u64 verneed_off = off; 950 off = ALIGN_UP(off + verneed_bytes, 8u); 951 u64 ro_seg_size = off; 952 953 /* When no PLT is needed, suppress the RX/.plt segment entirely. */ 954 int has_plt = imports.nfuncs > 0; 955 956 /* Pack rw_seg offsets: .got.plt (when has_plt) followed by .dynamic. */ 957 u64 rw_off = 0; 958 u64 gotplt_off = rw_off; 959 if (has_plt) rw_off = ALIGN_UP(rw_off + gotplt_bytes, 8u); 960 u64 dynamic_off = rw_off; 961 rw_off = ALIGN_UP(rw_off + dynamic_bytes, 8u); 962 u64 rw_seg_size = rw_off; 963 964 u64 ro_vaddr = base_vaddr; 965 u64 rx_vaddr = ALIGN_UP(ro_vaddr + ro_seg_size, page); 966 u64 rw_vaddr = ALIGN_UP(rx_vaddr + (has_plt ? plt_bytes : 0u), page); 967 968 /* rw_seg always exists (it carries .dynamic). */ 969 u32 nseg = 2u + (has_plt ? 1u : 0u); 970 u32 seg_base = dyn_alloc_segments(img, nseg); 971 u32 ro_seg_idx = seg_base + 0u; 972 u32 rx_seg_idx = has_plt ? seg_base + 1u : 0u; 973 u32 rw_seg_idx = seg_base + (has_plt ? 2u : 1u); 974 975 LinkSegment* ro_seg = &img->segments[ro_seg_idx]; 976 memset(ro_seg, 0, sizeof(*ro_seg)); 977 ro_seg->id = (LinkSegmentId)(ro_seg_idx + 1u); 978 ro_seg->flags = SF_ALLOC; /* PF_R */ 979 ro_seg->file_offset = ro_vaddr; 980 ro_seg->vaddr = ro_vaddr; 981 ro_seg->file_size = ro_seg_size; 982 ro_seg->mem_size = ro_seg_size; 983 ro_seg->align = (u32)page; 984 ro_seg->nsections = 6u + (has_ver ? 2u : 0u); 985 img->segment_bytes[ro_seg_idx] = 986 ro_seg_size ? (u8*)h->alloc(h, (size_t)ro_seg_size, 16) : NULL; 987 img->segment_bytes_cap[ro_seg_idx] = (size_t)ro_seg_size; 988 if (ro_seg_size && !img->segment_bytes[ro_seg_idx]) 989 compiler_panic(img->c, SRCLOC_NONE, "link: oom on ro dyn segment"); 990 if (ro_seg_size) 991 memset(img->segment_bytes[ro_seg_idx], 0, (size_t)ro_seg_size); 992 993 if (has_plt) { 994 LinkSegment* rx_seg = &img->segments[rx_seg_idx]; 995 memset(rx_seg, 0, sizeof(*rx_seg)); 996 rx_seg->id = (LinkSegmentId)(rx_seg_idx + 1u); 997 rx_seg->flags = SF_ALLOC | SF_EXEC; 998 rx_seg->file_offset = rx_vaddr; 999 rx_seg->vaddr = rx_vaddr; 1000 rx_seg->file_size = plt_bytes; 1001 rx_seg->mem_size = plt_bytes; 1002 rx_seg->align = (u32)page; 1003 rx_seg->nsections = 1; 1004 img->segment_bytes[rx_seg_idx] = (u8*)h->alloc(h, (size_t)plt_bytes, 16); 1005 img->segment_bytes_cap[rx_seg_idx] = (size_t)plt_bytes; 1006 if (!img->segment_bytes[rx_seg_idx]) 1007 compiler_panic(img->c, SRCLOC_NONE, "link: oom on .plt segment"); 1008 memset(img->segment_bytes[rx_seg_idx], 0, (size_t)plt_bytes); 1009 /* Stash plt / got.plt vaddrs now — the PLT body emit just below 1010 * reads them, and the post-shift fixup in shift_image_addresses 1011 * (link_elf.c) keys on these fields too. */ 1012 dyn->plt_vaddr = rx_vaddr; 1013 dyn->plt_size = plt_bytes; 1014 dyn->got_plt_vaddr = rw_vaddr; 1015 dyn->got_plt_size = gotplt_bytes; 1016 /* PLT body emit: the descriptor owns the psABI-specific bytes. */ 1017 if (!arch->emit_plt0 || !arch->emit_plt_entry) 1018 compiler_panic(l->c, SRCLOC_NONE, "link: PLT emit not configured"); 1019 { 1020 u8* plt_b = img->segment_bytes[rx_seg_idx]; 1021 u32 ki; 1022 arch->emit_plt0(plt_b, dyn->plt_vaddr, dyn->got_plt_vaddr); 1023 for (ki = 0; ki < imports.nfuncs; ++ki) { 1024 u64 entry_vaddr = dyn->plt_vaddr + arch->plt0_size + 1025 (u64)arch->plt_entry_size * (u64)ki; 1026 u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); 1027 u8* p = 1028 plt_b + arch->plt0_size + (size_t)arch->plt_entry_size * (size_t)ki; 1029 arch->emit_plt_entry(p, entry_vaddr, slot_vaddr); 1030 } 1031 } 1032 } 1033 /* rw_seg always exists — it carries .dynamic, plus .got.plt when 1034 * imports are present. */ 1035 { 1036 LinkSegment* rw_seg = &img->segments[rw_seg_idx]; 1037 memset(rw_seg, 0, sizeof(*rw_seg)); 1038 rw_seg->id = (LinkSegmentId)(rw_seg_idx + 1u); 1039 rw_seg->flags = SF_ALLOC | SF_WRITE; 1040 rw_seg->file_offset = rw_vaddr; 1041 rw_seg->vaddr = rw_vaddr; 1042 rw_seg->file_size = rw_seg_size; 1043 rw_seg->mem_size = rw_seg_size; 1044 rw_seg->align = (u32)page; 1045 rw_seg->nsections = has_plt ? 2u : 1u; 1046 img->segment_bytes[rw_seg_idx] = (u8*)h->alloc(h, (size_t)rw_seg_size, 16); 1047 img->segment_bytes_cap[rw_seg_idx] = (size_t)rw_seg_size; 1048 if (!img->segment_bytes[rw_seg_idx]) 1049 compiler_panic(img->c, SRCLOC_NONE, "link: oom on rw dyn segment"); 1050 /* Zero-initialize. .got.plt[0] (&.dynamic) is filled later, after 1051 * shift_image_addresses has bumped dyn->dynamic_vaddr. .dynamic 1052 * body is built post-shift in link_emit_elf. Loader 1053 * patches all .got.plt slots from .rela.plt before user code 1054 * under DF_1_NOW. */ 1055 memset(img->segment_bytes[rw_seg_idx], 0, (size_t)rw_seg_size); 1056 } 1057 img->nsegments += nseg; 1058 1059 /* Step 6: synthetic LinkSection entries. Order in img->sections 1060 * matches the loader-friendly file order and feeds emit's 1061 * outshdr-merge pass. */ 1062 u32 nsec = 7u + (has_plt ? 2u : 0u) + (has_ver ? 2u : 0u); 1063 u32 sec_base = dyn_alloc_sections(img, nsec); 1064 1065 /* helper: populate a fresh LinkSection for a segment-internal range */ 1066 /* Inline because the args differ enough (sem, name) per slot. */ 1067 Sym name_interp = pool_intern_slice(l->c->global, SLICE_LIT(".interp")); 1068 Sym name_dynsym = pool_intern_slice(l->c->global, SLICE_LIT(".dynsym")); 1069 Sym name_dynstr = pool_intern_slice(l->c->global, SLICE_LIT(".dynstr")); 1070 Sym name_gnu_hash = pool_intern_slice(l->c->global, SLICE_LIT(".gnu.hash")); 1071 Sym name_rela_dyn = pool_intern_slice(l->c->global, SLICE_LIT(".rela.dyn")); 1072 Sym name_rela_plt = pool_intern_slice(l->c->global, SLICE_LIT(".rela.plt")); 1073 Sym name_dynamic = pool_intern_slice(l->c->global, SLICE_LIT(".dynamic")); 1074 Sym name_plt = pool_intern_slice(l->c->global, SLICE_LIT(".plt")); 1075 Sym name_got_plt = pool_intern_slice(l->c->global, SLICE_LIT(".got.plt")); 1076 Sym name_gnu_version = pool_intern_slice(l->c->global, SLICE_LIT(".gnu.version")); 1077 Sym name_gnu_version_r = 1078 pool_intern_slice(l->c->global, SLICE_LIT(".gnu.version_r")); 1079 1080 #define INIT_SEC(IDX, NAME, SEG_IDX, OFF_IN_SEG, SIZE, ALIGN, FLAGS, SEM) \ 1081 do { \ 1082 LinkSection* ls = &img->sections[sec_base + (IDX)]; \ 1083 memset(ls, 0, sizeof(*ls)); \ 1084 ls->id = (LinkSectionId)(sec_base + (IDX) + 1u); \ 1085 ls->input_id = LINK_INPUT_NONE; \ 1086 ls->obj_section_id = OBJ_SEC_NONE; \ 1087 ls->segment_id = img->segments[(SEG_IDX)].id; \ 1088 ls->input_offset = (OFF_IN_SEG); \ 1089 ls->file_offset = img->segments[(SEG_IDX)].file_offset + (OFF_IN_SEG); \ 1090 ls->vaddr = img->segments[(SEG_IDX)].vaddr + (OFF_IN_SEG); \ 1091 ls->size = (SIZE); \ 1092 ls->flags = (FLAGS); \ 1093 ls->align = (ALIGN); \ 1094 ls->name = (NAME); \ 1095 ls->sem = (SEM); \ 1096 } while (0) 1097 1098 INIT_SEC(0, name_interp, ro_seg_idx, interp_off, interp_bytes, 1, SF_ALLOC, 1099 SSEM_PROGBITS); 1100 INIT_SEC(1, name_dynsym, ro_seg_idx, dynsym_off, dynsym_bytes, 8, SF_ALLOC, 1101 SSEM_PROGBITS); 1102 INIT_SEC(2, name_dynstr, ro_seg_idx, dynstr_off, dynstr_bytes, 1, SF_ALLOC, 1103 SSEM_PROGBITS); 1104 INIT_SEC(3, name_gnu_hash, ro_seg_idx, gnuhash_off, gnuhash_bytes, 8, 1105 SF_ALLOC, SSEM_PROGBITS); 1106 INIT_SEC(4, name_rela_dyn, ro_seg_idx, rela_dyn_off, rela_dyn_bytes, 8, 1107 SF_ALLOC, SSEM_PROGBITS); 1108 INIT_SEC(5, name_rela_plt, ro_seg_idx, rela_plt_off, rela_plt_bytes, 8, 1109 SF_ALLOC, SSEM_PROGBITS); 1110 INIT_SEC(6, name_dynamic, rw_seg_idx, dynamic_off, dynamic_bytes, 8, 1111 SF_ALLOC | SF_WRITE, SSEM_PROGBITS); 1112 1113 dyn->sec_interp = (LinkSectionId)(sec_base + 0 + 1u); 1114 dyn->sec_dynsym = (LinkSectionId)(sec_base + 1 + 1u); 1115 dyn->sec_dynstr = (LinkSectionId)(sec_base + 2 + 1u); 1116 dyn->sec_gnu_hash = (LinkSectionId)(sec_base + 3 + 1u); 1117 dyn->sec_rela_dyn = (LinkSectionId)(sec_base + 4 + 1u); 1118 dyn->sec_rela_plt = (LinkSectionId)(sec_base + 5 + 1u); 1119 dyn->sec_dynamic = (LinkSectionId)(sec_base + 6 + 1u); 1120 dyn->dynamic_vaddr = img->segments[rw_seg_idx].vaddr + dynamic_off; 1121 dyn->dynamic_size = dynamic_bytes; 1122 1123 if (has_plt) { 1124 INIT_SEC(7, name_plt, rx_seg_idx, 0, plt_bytes, 16, SF_ALLOC | SF_EXEC, 1125 SSEM_PROGBITS); 1126 INIT_SEC(8, name_got_plt, rw_seg_idx, gotplt_off, gotplt_bytes, 8, 1127 SF_ALLOC | SF_WRITE, SSEM_PROGBITS); 1128 dyn->sec_plt = (LinkSectionId)(sec_base + 7 + 1u); 1129 dyn->sec_got_plt = (LinkSectionId)(sec_base + 8 + 1u); 1130 } 1131 if (has_ver) { 1132 /* Appended after the optional PLT slots; emit sorts the section-header 1133 * table by (segment, vaddr), so array order here is not load-bearing. The 1134 * SSEM_PROGBITS sem just parks the bytes in the ro segment — the runtime 1135 * reads them via DT_VERSYM/DT_VERNEED, not the section headers. */ 1136 u32 vb0 = 7u + (has_plt ? 2u : 0u); 1137 INIT_SEC(vb0, name_gnu_version, ro_seg_idx, versym_off, versym_bytes, 2, 1138 SF_ALLOC, SSEM_PROGBITS); 1139 INIT_SEC(vb0 + 1u, name_gnu_version_r, ro_seg_idx, verneed_off, 1140 verneed_bytes, 4, SF_ALLOC, SSEM_PROGBITS); 1141 dyn->sec_gnu_version = (LinkSectionId)(sec_base + vb0 + 1u); 1142 dyn->sec_gnu_version_r = (LinkSectionId)(sec_base + vb0 + 1u + 1u); 1143 } 1144 #undef INIT_SEC 1145 1146 img->nsections += nsec; 1147 1148 /* Step 7: copy .interp / .dynsym / .dynstr / .gnu.hash bytes into 1149 * the ro segment. .dynamic body is built during emit (it embeds 1150 * runtime vaddrs that PIE keeps image-relative; emit just reads 1151 * the section ids' final vaddrs). */ 1152 u8* ro_bytes = img->segment_bytes[ro_seg_idx]; 1153 1154 /* .interp */ 1155 if (interp_bytes && ro_bytes) 1156 memcpy(ro_bytes + interp_off, interp_str, (size_t)interp_bytes); 1157 1158 /* .dynsym: serialize DynSymRec to ELF64 wire layout. */ 1159 { 1160 u32 si; 1161 for (si = 0; si < dyn->ndynsym; ++si) { 1162 u8* p = ro_bytes + dynsym_off + (u64)si * ELF64_SYM_SIZE; 1163 const DynSymRec* r = &dyn->dynsym[si]; 1164 wr_u32_le(p + 0, r->st_name); 1165 p[4] = r->st_info; 1166 p[5] = r->st_other; 1167 wr_u16_le(p + 6, r->st_shndx); 1168 wr_u64_le(p + 8, r->st_value); 1169 wr_u64_le(p + 16, r->st_size); 1170 } 1171 } 1172 1173 /* .dynstr */ 1174 if (dynstr_bytes && ro_bytes && dyn->dynstr) 1175 memcpy(ro_bytes + dynstr_off, dyn->dynstr, dyn->dynstr_len); 1176 1177 /* .gnu.hash */ 1178 if (gnuhash_bytes && ro_bytes && dyn->gnu_hash) 1179 memcpy(ro_bytes + gnuhash_off, dyn->gnu_hash, dyn->gnu_hash_len); 1180 1181 /* .gnu.version + .gnu.version_r (no vaddrs inside; copied verbatim). */ 1182 if (has_ver && ro_bytes) { 1183 if (versym_bytes && dyn->versym) 1184 memcpy(ro_bytes + versym_off, dyn->versym, dyn->versym_len); 1185 if (verneed_bytes && dyn->verneed) 1186 memcpy(ro_bytes + verneed_off, dyn->verneed, dyn->verneed_len); 1187 } 1188 1189 /* .rela.plt: emit JUMP_SLOT records, one per imported function, and 1190 * stash each import's PLT-entry vaddr in `sym_plt_vaddr` so the 1191 * apply pass can redirect CALL26/JUMP26 against the import. The 1192 * record's r_offset addresses the .got.plt slot the PLT stub reads 1193 * through; the loader patches that slot to the resolved runtime 1194 * address before user code runs (DF_1_NOW, BIND_NOW). Bytes are 1195 * written here at pre-shift vaddrs; link_emit re-serializes them 1196 * after shift_image_addresses bumps the dyn vaddrs by headers_load. */ 1197 { 1198 u32 ki; 1199 for (ki = 0; ki < imports.nfuncs; ++ki) { 1200 LinkSymId lsid = imports.funcs[ki]; 1201 u32 dynidx = dyn->sym_dynidx[lsid]; 1202 u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki); 1203 u64 plt_entry_vaddr = dyn->plt_vaddr + arch->plt0_size + 1204 (u64)arch->plt_entry_size * (u64)ki; 1205 DynRela* r = &dyn->rela_plt[ki]; 1206 r->r_offset = slot_vaddr; 1207 r->r_info = ELF64_R_INFO((u64)dynidx, elf_arch->r_jump_slot); 1208 r->r_addend = 0; 1209 /* Serialize into segment bytes (will be re-serialized post-shift). */ 1210 u8* p = ro_bytes + rela_plt_off + (u64)ki * ELF64_RELA_SIZE; 1211 wr_u64_le(p + 0, r->r_offset); 1212 wr_u64_le(p + 8, r->r_info); 1213 wr_u64_le(p + 16, (u64)r->r_addend); 1214 /* sym_plt_vaddr is consulted by apply_all_relocs. */ 1215 dyn->sym_plt_vaddr[lsid] = plt_entry_vaddr; 1216 } 1217 } 1218 1219 /* .rela.dyn entries (GLOB_DAT for imports referenced via .got, and 1220 * RELATIVE for PIE internal abs fixups) are emitted by 1221 * apply_all_relocs as it walks every relocation. layout_dyn 1222 * leaves .rela.dyn empty here; the bytes are written post-shift in 1223 * link_emit_elf. */ 1224 1225 /* .got.plt prelude: for BIND_NOW we leave the body zero — the 1226 * loader patches every slot from .rela.plt before user code. Some 1227 * loaders still inspect slot 0 (&.dynamic) at startup; provide it 1228 * so glibc-style loaders don't fault. The loader writes the link_map 1229 * cookie into slot 1 at load time. */ 1230 if (has_plt) { 1231 u8* gp_bytes = img->segment_bytes[rw_seg_idx]; 1232 if (gp_bytes && gotplt_bytes >= 8u) { 1233 wr_u64_le(gp_bytes, dyn->dynamic_vaddr); 1234 /* Slots 1, 2, and per-PLT slots stay zero until the loader 1235 * fills them. Phase 5 would prefill the per-PLT slots with 1236 * the address of PLT0 to support lazy binding. */ 1237 } 1238 } 1239 1240 /* The .dynamic body is built later, after segment shifts are 1241 * applied during emit (link_elf.c). emit_dynamic_body takes the 1242 * post-shift vaddrs of every other dyn section and writes one 1243 * DT_* entry per index. */ 1244 1245 /* Synthesize linker-defined symbols that reference the .dynamic 1246 * vaddr. Scrt1.o on Linux loads `_DYNAMIC` via ADRP+ADD, and 1247 * libc_nonshared.a's atexit shim takes `__dso_handle` as the 1248 * per-image identity (we use the .dynamic vaddr — any stable 1249 * per-image address satisfies the contract since the shim only 1250 * passes it through to __cxa_atexit, which the program-side glibc 1251 * just stashes). */ 1252 link_define_boundary(l, img, "_DYNAMIC", dyn->dynamic_vaddr); 1253 link_define_boundary(l, img, "__dso_handle", dyn->dynamic_vaddr); 1254 1255 free_imports(h, &imports); 1256 } 1257 1258 /* ---- cleanup ---- */ 1259 1260 void link_dyn_state_free(LinkImage* img) { 1261 Heap* h = img->heap; 1262 LinkDynState* dyn = img->dyn; 1263 if (!dyn) return; 1264 if (dyn->dynsym) h->free(h, dyn->dynsym, sizeof(*dyn->dynsym) * dyn->ndynsym); 1265 if (dyn->dynstr) h->free(h, dyn->dynstr, dyn->dynstr_len); 1266 if (dyn->gnu_hash) h->free(h, dyn->gnu_hash, dyn->gnu_hash_len); 1267 if (dyn->versym) h->free(h, dyn->versym, dyn->versym_len); 1268 if (dyn->verneed) h->free(h, dyn->verneed, dyn->verneed_len); 1269 if (dyn->rela_dyn) 1270 h->free(h, dyn->rela_dyn, sizeof(*dyn->rela_dyn) * dyn->cap_rela_dyn); 1271 if (dyn->rela_plt) 1272 h->free(h, dyn->rela_plt, sizeof(*dyn->rela_plt) * dyn->nrela_plt); 1273 if (dyn->needed) h->free(h, dyn->needed, sizeof(*dyn->needed) * dyn->nneeded); 1274 if (dyn->sym_dynidx) 1275 h->free(h, dyn->sym_dynidx, 1276 sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size); 1277 if (dyn->sym_plt_vaddr) 1278 h->free(h, dyn->sym_plt_vaddr, 1279 sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size); 1280 h->free(h, dyn, sizeof(*dyn)); 1281 img->dyn = NULL; 1282 }