kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

link_dyn.c (51602B)


      1 /* Phase 4 of dynamic linking: synthesize the dyn-link tables and
      2  * sections an ET_DYN ELF exe needs to be loadable by a real runtime
      3  * loader (musl ld-musl-aarch64.so.1).
      4  *
      5  * Inputs (computed by earlier passes):
      6  *   - LinkSymbol entries with `imported = 1` (set by resolve_undefs's
      7  *     DSO-search path; their dso_input_id names the providing DSO).
      8  *   - LinkInputs of kind LINK_INPUT_DSO_BYTES carrying SONAMEs.
      9  *
     10  * Outputs (deposited on LinkImage.dyn):
     11  *   - .interp                    PT_INTERP target string
     12  *   - .dynsym + .dynstr          symbol table + name pool
     13  *   - .gnu.hash                  GNU-style hash for the loader
     14  *   - .rela.dyn                  GLOB_DAT (data imports) + space for
     15  *                                R_AARCH64_RELATIVE records that
     16  *                                Phase 6 emit fills in
     17  *   - .rela.plt                  JUMP_SLOT records (one per imported func)
     18  *   - .plt                       allocated, body NOT emitted (Phase 5)
     19  *   - .got.plt                   3 reserved slots + 1 per PLT slot,
     20  *                                allocated, body NOT emitted
     21  *   - .dynamic                   PT_DYNAMIC body, populated
     22  *
     23  * The .plt body / GOT-slot fill / CALL26 reloc rewriting are Phase 5;
     24  * they're called out at the relevant allocation site so the missing
     25  * pieces are obvious to anyone reading the output. The static-exe path
     26  * is unaffected — layout_dyn early-outs when emit_pie is 0.
     27  *
     28  * Allocator pattern follows layout_iplt (link_layout.c): grow segments
     29  * + sections via realloc, then page-align each new segment after the
     30  * existing image span. Synthetic sections carry input_id == LINK_INPUT_NONE
     31  * so downstream passes (emit_reloc_records, GC) leave them alone.
     32  */
     33 
     34 #include <string.h>
     35 
     36 #include "core/bytes.h"
     37 #include "core/heap.h"
     38 #include "core/pool.h"
     39 #include "core/slice.h"
     40 #include "core/util.h"
     41 #include "core/vec.h"
     42 #include "link/link.h"
     43 #include "link/link_arch.h"
     44 #include "link/link_internal.h"
     45 #include "obj/elf/elf.h"
     46 #include "obj/format.h"
     47 
     48 /* ---- small allocators (mirror layout_iplt's helpers) ---- */
     49 
     50 static u32 dyn_alloc_segments(LinkImage* img, u32 nseg) {
     51   Heap* h = img->heap;
     52   u32 base = img->nsegments;
     53   u32 new_nseg = base + nseg;
     54   LinkSegment* nsegs = (LinkSegment*)h->realloc(
     55       h, img->segments, sizeof(*img->segments) * img->nsegments,
     56       sizeof(*img->segments) * new_nseg, _Alignof(LinkSegment));
     57   u8** nsbufs = (u8**)h->realloc(
     58       h, img->segment_bytes, sizeof(*img->segment_bytes) * img->nsegments,
     59       sizeof(*img->segment_bytes) * new_nseg, _Alignof(u8*));
     60   size_t* nscaps = (size_t*)h->realloc(
     61       h, img->segment_bytes_cap,
     62       sizeof(*img->segment_bytes_cap) * img->nsegments,
     63       sizeof(*img->segment_bytes_cap) * new_nseg, _Alignof(size_t));
     64   if (!nsegs || !nsbufs || !nscaps)
     65     compiler_panic(img->c, SRCLOC_NONE, "link: oom on dyn segments");
     66   img->segments = nsegs;
     67   img->segment_bytes = nsbufs;
     68   img->segment_bytes_cap = nscaps;
     69   return base;
     70 }
     71 
     72 static u32 dyn_alloc_sections(LinkImage* img, u32 nsec) {
     73   Heap* h = img->heap;
     74   u32 base = img->nsections;
     75   u32 new_nsec = base + nsec;
     76   LinkSection* nsections = (LinkSection*)h->realloc(
     77       h, img->sections, sizeof(*img->sections) * img->nsections,
     78       sizeof(*img->sections) * new_nsec, _Alignof(LinkSection));
     79   if (!nsections)
     80     compiler_panic(img->c, SRCLOC_NONE, "link: oom on dyn sections");
     81   img->sections = nsections;
     82   return base;
     83 }
     84 
     85 /* ---- byte-builder for .dynstr / .gnu.hash ---- */
     86 
     87 typedef struct ByteBuf {
     88   Heap* heap;
     89   u8* data;
     90   u32 len;
     91   u32 cap;
     92 } ByteBuf;
     93 
     94 static void bb_init(ByteBuf* b, Heap* h) {
     95   b->heap = h;
     96   b->data = NULL;
     97   b->len = 0;
     98   b->cap = 0;
     99 }
    100 static void bb_reserve(ByteBuf* b, u32 need) {
    101   if (need <= b->cap) return;
    102   (void)VEC_GROW(b->heap, b->data, b->cap, need);
    103 }
    104 static u32 bb_append(ByteBuf* b, const void* src, u32 n) {
    105   u32 off = b->len;
    106   bb_reserve(b, b->len + n);
    107   if (n) memcpy(b->data + b->len, src, n);
    108   b->len += n;
    109   return off;
    110 }
    111 static u32 bb_append_str(ByteBuf* b, const char* s, u32 n) {
    112   /* Linear dedup over what we've appended so far. Strtabs are small. */
    113   if (n == 0) return 0;
    114   if (b->len > n) {
    115     u32 i;
    116     for (i = 0; i + n < b->len; ++i) {
    117       if (b->data[i + n] == 0 && memcmp(b->data + i, s, n) == 0) return i;
    118     }
    119   }
    120   u32 off = b->len;
    121   bb_reserve(b, b->len + n + 1u);
    122   memcpy(b->data + b->len, s, n);
    123   b->data[b->len + n] = 0;
    124   b->len += n + 1u;
    125   return off;
    126 }
    127 
    128 /* ---- GNU-hash computation (psABI v1 hash) ----
    129  * Body layout:
    130  *   u32 nbuckets
    131  *   u32 symoffset              (first hashed dynsym index)
    132  *   u32 bloom_size              (in 64-bit words)
    133  *   u32 bloom_shift
    134  *   u64 bloom[bloom_size]
    135  *   u32 buckets[nbuckets]
    136  *   u32 chains[ndynsym - symoffset]
    137  *
    138  * For Phase 4 we keep this very small: nbuckets = max(1, n/2),
    139  * bloom_size = 1, bloom_shift = 6 (64-bit ELFCLASS64). All hashed
    140  * symbols (sym_offset..ndynsym-1) participate in bloom + buckets +
    141  * chains. Slot 0..symoffset-1 are STN_UNDEF + locals, which the
    142  * loader doesn't hash. */
    143 
    144 static u32 gnu_hash_name(const char* s, u32 n) {
    145   /* h = 5381; for c in s: h = h * 33 + c */
    146   u32 h = 5381u;
    147   u32 i;
    148   for (i = 0; i < n; ++i) h = (h * 33u) + (u8)s[i];
    149   return h;
    150 }
    151 
    152 /* ---- partition: enumerate imports ----
    153  *
    154  * Walks LinkSyms and collects each `imported` symbol that's the
    155  * canonical entry in img->globals (resolve_undefs may stamp `imported`
    156  * onto multiple shadow slots of the same name; only the canonical one
    157  * lands in dynsym). The two output arrays are LinkSymIds: funcs first
    158  * (PLT-bound), then data (GOT-bound via GLOB_DAT). */
    159 
    160 typedef struct ImportLists {
    161   LinkSymId* exports;
    162   LinkSymId* funcs;
    163   u32 nfuncs;
    164   LinkSymId* datas;
    165   u32 ndatas;
    166   u32 nexports;
    167 } ImportLists;
    168 
    169 static int sym_is_func_import(const LinkSymbol* s) {
    170   /* Most undef shadows have kind = SK_UNDEF (the obj reader keys kind
    171    * off shndx, not STT_*).  Only useful when the canonical entry
    172    * carried a real type — fall through to the DSO lookup otherwise. */
    173   return s->kind == SK_FUNC || s->kind == SK_IFUNC;
    174 }
    175 
    176 /* Resolve an import's classifier kind by consulting its providing
    177  * DSO's dynsym.  read_elf_dso preserves STT_FUNC / STT_OBJECT / etc.
    178  * on each defined export; the consumer's undef may have arrived as
    179  * SK_UNDEF (clang emits external refs as SHN_UNDEF, which the reader
    180  * collapses to SK_UNDEF regardless of STT_*).  Returns 1 for func /
    181  * ifunc, 0 for everything else (or if the DSO export is missing). */
    182 static int dso_export_is_func(Linker* l, const LinkSymbol* s) {
    183   if (s->dso_input_id == LINK_INPUT_NONE) return 0;
    184   if (s->dso_input_id - 1u >= LinkInputs_count(&l->inputs)) return 0;
    185   LinkInput* in = LinkInputs_at(&l->inputs, s->dso_input_id - 1u);
    186   if (!in->obj) return 0;
    187   ObjSymIter* it = obj_symiter_new(in->obj);
    188   ObjSymEntry e;
    189   int is_func = 0;
    190   while (obj_symiter_next(it, &e)) {
    191     const ObjSym* es = e.sym;
    192     if (!es || es->name != s->name) continue;
    193     if (es->kind == SK_UNDEF) continue;
    194     is_func = (es->kind == SK_FUNC || es->kind == SK_IFUNC);
    195     break;
    196   }
    197   obj_symiter_free(it);
    198   return is_func;
    199 }
    200 
    201 static void collect_imports(Linker* l, LinkImage* img, Heap* h,
    202                             ImportLists* il) {
    203   u32 i;
    204   u32 cap_e = 0, cap_f = 0, cap_d = 0;
    205   il->exports = NULL;
    206   il->funcs = NULL;
    207   il->datas = NULL;
    208   il->nexports = il->nfuncs = il->ndatas = 0;
    209   for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
    210     LinkSymbol* s = LinkSyms_at(&img->syms, i);
    211     if (s->name == 0) continue;
    212     /* Only the canonical (img->globals) entry per name. */
    213     LinkSymId canonical = symhash_get(&img->globals, s->name);
    214     if (canonical != LINK_SYM_NONE && canonical != s->id) continue;
    215     if (s->defined && !s->imported &&
    216         (s->bind == SB_GLOBAL || s->bind == SB_WEAK) && s->kind != SK_FILE &&
    217         s->kind != SK_SECTION) {
    218       if (VEC_GROW(h, il->exports, cap_e, il->nexports + 1u))
    219         compiler_panic(img->c, SRCLOC_NONE, "link: oom on exports");
    220       il->exports[il->nexports++] = s->id;
    221       continue;
    222     }
    223     if (!s->imported) continue;
    224     int is_func = sym_is_func_import(s) || dso_export_is_func(l, s);
    225     if (is_func) {
    226       if (VEC_GROW(h, il->funcs, cap_f, il->nfuncs + 1u))
    227         compiler_panic(img->c, SRCLOC_NONE, "link: oom on import-funcs");
    228       il->funcs[il->nfuncs++] = s->id;
    229     } else {
    230       if (VEC_GROW(h, il->datas, cap_d, il->ndatas + 1u))
    231         compiler_panic(img->c, SRCLOC_NONE, "link: oom on import-datas");
    232       il->datas[il->ndatas++] = s->id;
    233     }
    234   }
    235 }
    236 
    237 static void free_imports(Heap* h, ImportLists* il) {
    238   if (il->exports) h->free(h, il->exports, sizeof(*il->exports) * il->nexports);
    239   if (il->funcs) h->free(h, il->funcs, sizeof(*il->funcs) * il->nfuncs);
    240   if (il->datas) h->free(h, il->datas, sizeof(*il->datas) * il->ndatas);
    241 }
    242 
    243 /* ---- DT_NEEDED set: each DSO input that contributed at least one
    244  * import. Order is input order so the loader sees deps in declaration
    245  * order. */
    246 static void collect_needed(Linker* l, LinkImage* img, LinkDynState* dyn) {
    247   Heap* h = img->heap;
    248   u8* used;
    249   u32 ninputs = LinkInputs_count(&l->inputs);
    250   u32 i, nused = 0;
    251 
    252   used = (u8*)h->alloc(h, ninputs ? ninputs : 1u, 1);
    253   if (!used) compiler_panic(img->c, SRCLOC_NONE, "link: oom on needed map");
    254   memset(used, 0, ninputs ? ninputs : 1u);
    255 
    256   /* Mark every DSO that ended up satisfying at least one import. */
    257   for (i = 0; i < LinkSyms_count(&img->syms); ++i) {
    258     LinkSymbol* s = LinkSyms_at(&img->syms, i);
    259     if (!s->imported) continue;
    260     if (s->dso_input_id == LINK_INPUT_NONE) continue;
    261     if (s->dso_input_id - 1u >= ninputs) continue;
    262     used[s->dso_input_id - 1u] = 1;
    263   }
    264   /* Always pull every explicitly-supplied DSO into DT_NEEDED, even if
    265    * no import landed on it — matches GNU ld without --as-needed.
    266    * Phase 4 doesn't plumb --as-needed through to the resolver, so the
    267    * default "needed" behavior is the right baseline. */
    268   for (i = 0; i < ninputs; ++i) {
    269     LinkInput* in = LinkInputs_at(&l->inputs, i);
    270     if (in->kind == LINK_INPUT_DSO_BYTES && in->soname != 0) used[i] = 1;
    271   }
    272   for (i = 0; i < ninputs; ++i)
    273     if (used[i]) ++nused;
    274 
    275   dyn->needed =
    276       nused ? (Sym*)h->alloc(h, sizeof(Sym) * nused, _Alignof(Sym)) : NULL;
    277   if (nused && !dyn->needed)
    278     compiler_panic(img->c, SRCLOC_NONE, "link: oom on needed list");
    279   dyn->nneeded = 0;
    280   for (i = 0; i < ninputs; ++i) {
    281     LinkInput* in = LinkInputs_at(&l->inputs, i);
    282     if (!used[i]) continue;
    283     if (in->soname == 0) continue;
    284     dyn->needed[dyn->nneeded++] = in->soname;
    285   }
    286   h->free(h, used, ninputs ? ninputs : 1u);
    287 }
    288 
    289 /* ---- dynsym + dynstr build ----
    290  *
    291  * Slot 0: STN_UNDEF (zero entry). The loader ignores names with index
    292  * 0; we still emit a dynstr entry at offset 0 (the leading NUL).
    293  *
    294  * Slots 1..nexports: executable-defined globals exported for DSO lookup.
    295  * Slots after exports: imported symbols (functions first, then data).
    296  * st_shndx = SHN_UNDEF; the loader fills in the value at bind time.
    297  * st_value/size are zero — the static linker has no value for an
    298  * imported symbol.
    299  *
    300  * Defined executable globals must be present too: ELF DSOs can resolve
    301  * references back to the main executable, and FreeBSD libc depends on that
    302  * for Scrt1.o's `environ` and `__progname` definitions. */
    303 
    304 static void build_dynsym(LinkImage* img, LinkDynState* dyn,
    305                          const ImportLists* il, ByteBuf* dynstr) {
    306   Heap* h = img->heap;
    307   u32 nimports = il->nfuncs + il->ndatas;
    308   u32 ndynsym = 1u + il->nexports + nimports; /* +1 for null slot */
    309   u32 i;
    310 
    311   dyn->ndynsym = ndynsym;
    312   dyn->dynsym = (DynSymRec*)h->alloc(h, sizeof(*dyn->dynsym) * ndynsym,
    313                                      _Alignof(DynSymRec));
    314   if (!dyn->dynsym) compiler_panic(img->c, SRCLOC_NONE, "link: oom on dynsym");
    315   memset(dyn->dynsym, 0, sizeof(*dyn->dynsym) * ndynsym);
    316 
    317   /* Slot 0: STN_UNDEF. dynstr leads with a NUL so st_name=0 reads as
    318    * the empty string. */
    319   {
    320     u8 z = 0;
    321     bb_append(dynstr, &z, 1);
    322   }
    323 
    324   /* Per-symbol: dedupe `sym_dynidx` lookup table. Sized to LinkSymId
    325    * upper bound. Clean (zero-filled) by alloc convention; we set
    326    * indices for imports below. */
    327   dyn->sym_dynidx_size = LinkSyms_count(&img->syms) + 1u;
    328   dyn->sym_dynidx = (u32*)h->alloc(
    329       h, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size, _Alignof(u32));
    330   if (!dyn->sym_dynidx)
    331     compiler_panic(img->c, SRCLOC_NONE, "link: oom on sym_dynidx");
    332   memset(dyn->sym_dynidx, 0, sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size);
    333   /* sym_plt_vaddr is populated alongside the PLT body emit below; here
    334    * we only allocate the parallel array. */
    335   dyn->sym_plt_vaddr = (u64*)h->alloc(
    336       h, sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size, _Alignof(u64));
    337   if (!dyn->sym_plt_vaddr)
    338     compiler_panic(img->c, SRCLOC_NONE, "link: oom on sym_plt_vaddr");
    339   memset(dyn->sym_plt_vaddr, 0,
    340          sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size);
    341 
    342   /* All dynamic entries we emit today are non-local, so first_global is
    343    * right after the single STN_UNDEF slot. */
    344   dyn->first_global = 1u;
    345 
    346   u32 idx = 1u;
    347   for (i = 0; i < il->nexports; ++i) {
    348     LinkSymId lsid = il->exports[i];
    349     LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1);
    350     DynSymRec* r = &dyn->dynsym[idx];
    351     Slice nm_s = pool_slice(img->c->global, s->name);
    352     const char* nm = nm_s.s;
    353     size_t namelen = nm_s.len;
    354     u8 elf_type = elf_st_type(s->kind);
    355     u8 elf_bind = elf_st_bind(s->bind);
    356     r->st_name = bb_append_str(dynstr, nm, (u32)namelen);
    357     r->st_info = ELF64_ST_INFO(elf_bind, elf_type);
    358     r->st_other = STV_DEFAULT;
    359     /* The emitter refreshes defined-symbol values after the final header
    360      * shift. Any nonzero, non-special section index is enough for rtld to
    361      * treat the symbol as defined; section headers are not part of runtime
    362      * loading. */
    363     r->st_shndx = 1;
    364     r->st_value = s->vaddr;
    365     r->st_size = s->size;
    366     dyn->sym_dynidx[lsid] = idx;
    367     ++idx;
    368   }
    369   for (i = 0; i < il->nfuncs; ++i) {
    370     LinkSymId lsid = il->funcs[i];
    371     LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1);
    372     DynSymRec* r = &dyn->dynsym[idx];
    373     Slice nm_s = pool_slice(img->c->global, s->name);
    374     const char* nm = nm_s.s;
    375     size_t namelen = nm_s.len;
    376     r->st_name = bb_append_str(dynstr, nm, (u32)namelen);
    377     r->st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
    378     r->st_other = STV_DEFAULT;
    379     r->st_shndx = SHN_UNDEF;
    380     r->st_value = 0;
    381     r->st_size = 0;
    382     dyn->sym_dynidx[lsid] = idx;
    383     ++idx;
    384   }
    385   for (i = 0; i < il->ndatas; ++i) {
    386     LinkSymId lsid = il->datas[i];
    387     LinkSymbol* s = LinkSyms_at(&img->syms, lsid - 1);
    388     DynSymRec* r = &dyn->dynsym[idx];
    389     Slice nm_s = pool_slice(img->c->global, s->name);
    390     const char* nm = nm_s.s;
    391     size_t namelen = nm_s.len;
    392     u8 elf_type = STT_OBJECT;
    393     if (s->kind == SK_TLS)
    394       elf_type = STT_TLS;
    395     else if (s->kind == SK_NOTYPE)
    396       elf_type = STT_NOTYPE;
    397     r->st_name = bb_append_str(dynstr, nm, (u32)namelen);
    398     r->st_info = ELF64_ST_INFO(STB_GLOBAL, elf_type);
    399     r->st_other = STV_DEFAULT;
    400     r->st_shndx = SHN_UNDEF;
    401     r->st_value = 0;
    402     r->st_size = 0;
    403     dyn->sym_dynidx[lsid] = idx;
    404     ++idx;
    405   }
    406 }
    407 
    408 /* ---- GNU symbol versioning (.gnu.version + .gnu.version_r) ----
    409  *
    410  * For each imported symbol that binds to a versioned DSO export, require that
    411  * export's *default* version (read into ObjImageSym.version at input time) so
    412  * the runtime binds the right one. On FreeBSD this is mandatory: the INO64
    413  * transition left `stat`/`fstat`/... as two incompatible struct-stat ABIs, the
    414  * compat behind a hidden FBSD_1.0 and the modern one as the default FBSD_1.5;
    415  * an unversioned reference binds the compat and reads st_size at the wrong
    416  * offset. We emit:
    417  *   .gnu.version   — one u16 per .dynsym entry: 0 (null/unversioned import),
    418  *                    1 (defined export), or >=2 (a version requirement index).
    419  *   .gnu.version_r — Verneed per DT_NEEDED soname + Vernaux per required
    420  *                    version, numbered 2.. in first-seen order.
    421  * Both reference only .dynstr offsets and indices (no vaddrs), so the bytes are
    422  * final at layout time. Nothing is emitted when no import is versioned, leaving
    423  * musl/glibc-without-version and static links byte-for-byte unchanged. */
    424 
    425 static u32 elf_sysv_hash(const char* s, u32 n) {
    426   u32 h = 0, g, i;
    427   for (i = 0; i < n; ++i) {
    428     h = (h << 4) + (u8)s[i];
    429     g = h & 0xf0000000u;
    430     if (g) h ^= g >> 24;
    431     h &= ~g;
    432   }
    433   return h;
    434 }
    435 
    436 /* Default version name the DSO `in` exports for `name`, or 0 if `in` carries no
    437  * versioning / doesn't export `name` with a default version. */
    438 static Sym dso_default_version(LinkInput* in, Sym name) {
    439   const ObjImage* im = in->obj ? obj_image(in->obj) : NULL;
    440   u32 i, n;
    441   if (!im) return 0;
    442   n = obj_image_ndynsyms(im);
    443   for (i = 0; i < n; ++i) {
    444     const ObjImageSym* s = obj_image_dynsym(im, i);
    445     if (s->name == name && s->version != 0) return s->version;
    446   }
    447   return 0;
    448 }
    449 
    450 typedef struct VerReq {
    451   Sym soname;
    452   Sym version;
    453   u16 index;
    454 } VerReq;
    455 
    456 typedef struct VerBuild {
    457   Heap* h;
    458   Linker* l;
    459   LinkImage* img;
    460   LinkDynState* dyn;
    461   u8* vs; /* versym bytes being filled */
    462   VerReq* reqs;
    463   u32 nreq;
    464   u32 capreq;
    465 } VerBuild;
    466 
    467 /* Resolve one imported symbol's version requirement: look up its providing
    468  * DSO's default version for the name, intern a (soname, version) requirement
    469  * (assigning the next index), and stamp the symbol's versym slot. */
    470 static void ver_process_import(VerBuild* vb, LinkSymId lsid) {
    471   LinkSymbol* s = LinkSyms_at(&vb->img->syms, lsid - 1);
    472   u32 di = vb->dyn->sym_dynidx[lsid];
    473   LinkInput* in;
    474   Sym ver;
    475   u16 vidx = 0;
    476   u32 r;
    477   if (!di || s->dso_input_id == LINK_INPUT_NONE) return;
    478   if (s->dso_input_id - 1u >= LinkInputs_count(&vb->l->inputs)) return;
    479   in = LinkInputs_at(&vb->l->inputs, s->dso_input_id - 1u);
    480   if (in->soname == 0) return;
    481   ver = dso_default_version(in, s->name);
    482   if (ver == 0) return;
    483   for (r = 0; r < vb->nreq; ++r)
    484     if (vb->reqs[r].soname == in->soname && vb->reqs[r].version == ver) {
    485       vidx = vb->reqs[r].index;
    486       break;
    487     }
    488   if (!vidx) {
    489     if (VEC_GROW(vb->h, vb->reqs, vb->capreq, vb->nreq + 1u))
    490       compiler_panic(vb->img->c, SRCLOC_NONE, "link: oom on version reqs");
    491     vidx = (u16)(2u + vb->nreq);
    492     vb->reqs[vb->nreq].soname = in->soname;
    493     vb->reqs[vb->nreq].version = ver;
    494     vb->reqs[vb->nreq].index = vidx;
    495     vb->nreq++;
    496   }
    497   wr_u16_le(vb->vs + (u64)di * 2u, vidx);
    498 }
    499 
    500 static void build_versions(Linker* l, LinkImage* img, LinkDynState* dyn,
    501                            const ImportLists* il, ByteBuf* dynstr) {
    502   Heap* h = img->heap;
    503   VerBuild vb;
    504   u32 i;
    505 
    506   dyn->versym = NULL;
    507   dyn->versym_len = 0;
    508   dyn->verneed = NULL;
    509   dyn->verneed_len = 0;
    510   dyn->nverneed = 0;
    511   if (dyn->ndynsym == 0) return;
    512 
    513   /* versym: default 0 (local/unversioned); defined exports -> GLOBAL. */
    514   vb.h = h;
    515   vb.l = l;
    516   vb.img = img;
    517   vb.dyn = dyn;
    518   vb.reqs = NULL;
    519   vb.nreq = 0;
    520   vb.capreq = 0;
    521   vb.vs = (u8*)h->alloc(h, (size_t)dyn->ndynsym * 2u, 2);
    522   if (!vb.vs) compiler_panic(img->c, SRCLOC_NONE, "link: oom on versym");
    523   memset(vb.vs, 0, (size_t)dyn->ndynsym * 2u);
    524   for (i = 0; i < il->nexports; ++i) {
    525     u32 di = dyn->sym_dynidx[il->exports[i]];
    526     if (di) wr_u16_le(vb.vs + (u64)di * 2u, (u16)VER_NDX_GLOBAL);
    527   }
    528   for (i = 0; i < il->nfuncs; ++i) ver_process_import(&vb, il->funcs[i]);
    529   for (i = 0; i < il->ndatas; ++i) ver_process_import(&vb, il->datas[i]);
    530 
    531   if (vb.nreq == 0) {
    532     /* No versioned imports: emit nothing, keep the link unchanged. */
    533     h->free(h, vb.vs, (size_t)dyn->ndynsym * 2u);
    534     if (vb.reqs) h->free(h, vb.reqs, sizeof(*vb.reqs) * vb.capreq);
    535     return;
    536   }
    537   dyn->versym = vb.vs;
    538   dyn->versym_len = dyn->ndynsym * 2u;
    539 
    540   /* Group requirements by soname (first-seen order) into Verneed/Vernaux. */
    541   {
    542     Sym* sonames = NULL;
    543     u32 nson = 0, capson = 0;
    544     u32 r;
    545     for (r = 0; r < vb.nreq; ++r) {
    546       u32 k;
    547       int seen = 0;
    548       for (k = 0; k < nson; ++k)
    549         if (sonames[k] == vb.reqs[r].soname) {
    550           seen = 1;
    551           break;
    552         }
    553       if (!seen) {
    554         if (VEC_GROW(h, sonames, capson, nson + 1u))
    555           compiler_panic(img->c, SRCLOC_NONE, "link: oom on verneed sonames");
    556         sonames[nson++] = vb.reqs[r].soname;
    557       }
    558     }
    559     {
    560       u32 total = nson * (u32)ELF_VERNEED_SIZE + vb.nreq * (u32)ELF_VERNAUX_SIZE;
    561       u8* vn = (u8*)h->alloc(h, total, 4);
    562       u8* p;
    563       u32 si;
    564       if (!vn) compiler_panic(img->c, SRCLOC_NONE, "link: oom on verneed");
    565       memset(vn, 0, total);
    566       p = vn;
    567       for (si = 0; si < nson; ++si) {
    568         Slice so_s = pool_slice(l->c->global, sonames[si]);
    569         u32 file_off = bb_append_str(dynstr, so_s.s, (u32)so_s.len);
    570         u8* vn_rec = p;
    571         u32 cnt = 0;
    572         u8* aux;
    573         p += ELF_VERNEED_SIZE;
    574         aux = p;
    575         for (r = 0; r < vb.nreq; ++r) {
    576           Slice ver_s;
    577           u32 name_off;
    578           if (vb.reqs[r].soname != sonames[si]) continue;
    579           ver_s = pool_slice(l->c->global, vb.reqs[r].version);
    580           name_off = bb_append_str(dynstr, ver_s.s, (u32)ver_s.len);
    581           wr_u32_le(p + 0, elf_sysv_hash(ver_s.s, (u32)ver_s.len)); /* vna_hash */
    582           wr_u16_le(p + 4, 0);                                      /* vna_flags */
    583           wr_u16_le(p + 6, vb.reqs[r].index);                       /* vna_other */
    584           wr_u32_le(p + 8, name_off);                               /* vna_name */
    585           /* vna_next: filled after we know if another aux follows. */
    586           p += ELF_VERNAUX_SIZE;
    587           ++cnt;
    588         }
    589         /* Verneed header. vn_aux is the byte offset to the first Vernaux. */
    590         wr_u16_le(vn_rec + 0, 1);                         /* vn_version */
    591         wr_u16_le(vn_rec + 2, (u16)cnt);                  /* vn_cnt */
    592         wr_u32_le(vn_rec + 4, file_off);                  /* vn_file */
    593         wr_u32_le(vn_rec + 8, (u32)(aux - vn_rec));       /* vn_aux */
    594         wr_u32_le(vn_rec + 12,
    595                   si + 1u < nson ? (u32)(p - vn_rec) : 0u); /* vn_next */
    596         /* Link the Vernaux chain (each entry -> next, last -> 0). */
    597         {
    598           u8* a = aux;
    599           u32 j;
    600           for (j = 0; j < cnt; ++j) {
    601             wr_u32_le(a + 12, j + 1u < cnt ? (u32)ELF_VERNAUX_SIZE : 0u);
    602             a += ELF_VERNAUX_SIZE;
    603           }
    604         }
    605       }
    606       dyn->verneed = vn;
    607       dyn->verneed_len = total;
    608       dyn->nverneed = nson;
    609     }
    610     if (sonames) h->free(h, sonames, sizeof(*sonames) * capson);
    611   }
    612   if (vb.reqs) h->free(h, vb.reqs, sizeof(*vb.reqs) * vb.capreq);
    613 }
    614 
    615 /* ---- .gnu.hash builder ----
    616  *
    617  * Hashed range is [first_global, ndynsym) — slot 0 (STN_UNDEF) is
    618  * unhashed. Layout matches loader expectations (musl, glibc, FreeBSD).
    619  *
    620  * Bucket count: one. That keeps the required chain ordering trivial even as
    621  * we mix executable exports and imports without sorting the dynsym table by
    622  * hash bucket. Bloom is 1 word for Phase 4 — a real implementation would
    623  * scale with hashed_count, but 1 word with shift=6 still satisfies the
    624  * loader's correctness check (false positives only cost a chain scan). */
    625 
    626 static void build_gnu_hash(Heap* h, LinkImage* img, LinkDynState* dyn,
    627                            const ByteBuf* dynstr) {
    628   u32 hashed = (dyn->ndynsym > dyn->first_global)
    629                    ? (dyn->ndynsym - dyn->first_global)
    630                    : 0u;
    631   u32 nbuckets = 1u;
    632   u32 bloom_size = 1u; /* 64-bit word */
    633   u32 bloom_shift = 6u;
    634   u32 sym_offset = dyn->first_global;
    635   u32 hdr_bytes = 16u; /* nbuckets/symoff/bloomsz/bloomshift */
    636   u32 bloom_bytes = bloom_size * 8u;
    637   u32 buckets_bytes = nbuckets * 4u;
    638   u32 chains_bytes = hashed * 4u;
    639   u32 total = hdr_bytes + bloom_bytes + buckets_bytes + chains_bytes;
    640 
    641   u8* buf = (u8*)h->alloc(h, total ? total : 1u, 4);
    642   if (!buf) compiler_panic(img->c, SRCLOC_NONE, "link: oom on .gnu.hash");
    643   memset(buf, 0, total);
    644 
    645   wr_u32_le(buf + 0, nbuckets);
    646   wr_u32_le(buf + 4, sym_offset);
    647   wr_u32_le(buf + 8, bloom_size);
    648   wr_u32_le(buf + 12, bloom_shift);
    649 
    650   /* Bloom + buckets + chains. We need each hashed symbol's hash. */
    651   if (hashed) {
    652     u32 i;
    653     u32* hashes = (u32*)h->alloc(h, sizeof(u32) * hashed, _Alignof(u32));
    654     if (!hashes)
    655       compiler_panic(img->c, SRCLOC_NONE, "link: oom on .gnu.hash hashes");
    656     for (i = 0; i < hashed; ++i) {
    657       const DynSymRec* r = &dyn->dynsym[sym_offset + i];
    658       const char* name = (const char*)dynstr->data + r->st_name;
    659       size_t n = name ? slice_from_cstr(name).len : 0;
    660       hashes[i] = gnu_hash_name(name, (u32)n);
    661     }
    662 
    663     /* Bloom filter: H[i] / H[i] >> shift */
    664     u64 bloom = 0;
    665     for (i = 0; i < hashed; ++i) {
    666       u32 h1 = hashes[i] % 64u;
    667       u32 h2 = (hashes[i] >> bloom_shift) % 64u;
    668       bloom |= ((u64)1 << h1) | ((u64)1 << h2);
    669     }
    670     wr_u64_le(buf + hdr_bytes, bloom);
    671 
    672     /* Buckets/chains: for each hashed sym, append to its bucket's
    673      * chain. The chain encodes (hash & ~1) per entry; the LSB is set
    674      * on the LAST entry in a bucket to terminate. Buckets are filled
    675      * with the first chain index that hashes there (1-based into the
    676      * dynsym, i.e. `sym_offset + i`). */
    677     u32* buckets = (u32*)(buf + hdr_bytes + bloom_bytes);
    678     u32* chains = (u32*)(buf + hdr_bytes + bloom_bytes + buckets_bytes);
    679     /* First pass: bucket = first sym index that hashes there. */
    680     for (i = 0; i < hashed; ++i) {
    681       u32 b = hashes[i] % nbuckets;
    682       if (buckets[b] == 0) buckets[b] = sym_offset + i;
    683     }
    684     /* Second pass: chain[i] = hash with LSB cleared; LSB set if next
    685      * sym is in a different bucket. Walk symbols in order; LSB on
    686      * chain[i] when sym i+1 is in a different bucket OR is the end. */
    687     for (i = 0; i < hashed; ++i) {
    688       u32 v = hashes[i] & ~1u;
    689       int last = (i + 1 == hashed) ||
    690                  ((hashes[i + 1] % nbuckets) != (hashes[i] % nbuckets));
    691       if (last) v |= 1u;
    692       chains[i] = v;
    693     }
    694     h->free(h, hashes, sizeof(u32) * hashed);
    695   }
    696 
    697   dyn->gnu_hash = buf;
    698   dyn->gnu_hash_len = total;
    699 }
    700 
    701 /* ---- .dynamic body builder ----
    702  *
    703  * Computed at layout time so the size is known before segments are
    704  * placed. Each entry is two u64s (d_tag, d_un.d_val|d_un.d_ptr).
    705  * Final entry is DT_NULL. The d_ptr fields that point at other
    706  * synthetic sections are filled with image-relative vaddrs; the emit
    707  * pass adds load-base / IMAGE_BASE only when ET_EXEC. */
    708 
    709 typedef struct DynEntry {
    710   u64 tag;
    711   u64 val; /* either d_val or d_ptr; emit just writes 8 bytes */
    712 } DynEntry;
    713 
    714 static u32 count_dynamic_entries(const LinkDynState* dyn) {
    715   /* Required: DT_STRTAB DT_STRSZ DT_SYMTAB DT_SYMENT DT_GNU_HASH
    716    *           DT_FLAGS_1 (DF_1_NOW for eager binding)
    717    *           DT_NULL terminator
    718    * Optional (only when there are .rela.dyn records):
    719    *           DT_RELA DT_RELASZ DT_RELAENT
    720    * Optional (only when there are imported functions / a PLT):
    721    *           DT_PLTGOT DT_PLTRELSZ DT_PLTREL DT_JMPREL
    722    * Plus DT_NEEDED per dependency. */
    723   u32 n = dyn->nneeded;
    724   n += 7;                        /* 5 fixed + DT_FLAGS_1 + DT_NULL */
    725   if (dyn->cap_rela_dyn) n += 3; /* DT_RELA + DT_RELASZ + DT_RELAENT */
    726   if (dyn->nrela_plt) n += 4;    /* PLT-only entries */
    727   if (dyn->nverneed) n += 3;     /* DT_VERSYM + DT_VERNEED + DT_VERNEEDNUM */
    728   return n;
    729 }
    730 
    731 /* ---- main entry ---- */
    732 
    733 void layout_dyn(Linker* l, LinkImage* img) {
    734   Heap* h = img->heap;
    735   LinkDynState* dyn;
    736   LinkDynState dyn_probe;
    737   ImportLists imports;
    738   ByteBuf dynstr;
    739   u64 page;
    740   const LinkArchDesc* arch;
    741   const ObjElfArchOps* elf_arch;
    742 
    743   if (!l->emit_pie) return;
    744 
    745   /* The dynamic-section layout below is ELF64-only (Elf64_Sym/Dyn/Rela wire
    746    * sizes, 8-byte GOT slots). rv32 is a static-only v1 target, so a dynamic /
    747    * PIE rv32 link is unsupported — fail with a clear diagnostic instead of
    748    * crashing on the ELF64 assumptions. Link rv32 images statically (kit ld
    749    * -no-pie, or a -T script for bare-metal section placement). */
    750   if (img->c->target.ptr_size == 4u) {
    751     compiler_panic(img->c, SRCLOC_NONE,
    752                    "link: dynamic/PIE linking is not supported for 32-bit "
    753                    "RISC-V (ELFCLASS32); link statically (kit ld -no-pie)");
    754   }
    755 
    756   arch = link_arch_desc_for(l->c);
    757   if (!arch)
    758     compiler_panic(img->c, SRCLOC_NONE, "link: layout_dyn: no arch descriptor");
    759   {
    760     const ObjFormatImpl* fmt = obj_format_lookup(KIT_OBJ_ELF);
    761     elf_arch = fmt && fmt->elf_arch ? fmt->elf_arch(l->c->target.arch) : NULL;
    762     if (!elf_arch)
    763       compiler_panic(img->c, SRCLOC_NONE,
    764                      "link: layout_dyn: no ELF arch descriptor");
    765   }
    766 
    767   /* Step 1: enumerate imports + DT_NEEDED. A PIE with no imports and no
    768    * DSO inputs is effectively static; keep ET_DYN output but do not stamp
    769    * PT_INTERP/PT_DYNAMIC or an empty .dynamic section. */
    770   memset(&dyn_probe, 0, sizeof dyn_probe);
    771   collect_imports(l, img, h, &imports);
    772   collect_needed(l, img, &dyn_probe);
    773   if (l->emit_static_exe && imports.nfuncs == 0 && imports.ndatas == 0 &&
    774       dyn_probe.nneeded == 0) {
    775     img->pie = 1;
    776     free_imports(h, &imports);
    777     return;
    778   }
    779 
    780   dyn = (LinkDynState*)h->alloc(h, sizeof(*dyn), _Alignof(LinkDynState));
    781   if (!dyn) compiler_panic(img->c, SRCLOC_NONE, "link: oom on dyn state");
    782   *dyn = dyn_probe;
    783   img->dyn = dyn;
    784   img->pie = 1;
    785 
    786   /* PT_INTERP path. Default to the canonical musl loader matching the
    787    * target arch (per-arch table in src/arch/<arch>/link.c) when the caller
    788    * didn't set one. Drivers like kit-cc always override via
    789    * link_set_interp_path; this default is correctness for direct
    790    * libkit consumers.  glibc users have to set their interp
    791    * explicitly — we don't pick a default for them. */
    792   dyn->interp_path =
    793       l->interp_path
    794           ? l->interp_path
    795           : pool_intern_slice(l->c->global,
    796                               slice_from_cstr(elf_arch->default_musl_interp));
    797 
    798   /* Step 2: build .dynstr + .dynsym. .dynstr must also carry the
    799    * DT_NEEDED soname strings the .dynamic body references; intern
    800    * them after the import names so build_dynsym's de-dup also covers
    801    * any name that happens to collide with a soname. */
    802   bb_init(&dynstr, h);
    803   build_dynsym(img, dyn, &imports, &dynstr);
    804   {
    805     u32 ni;
    806     for (ni = 0; ni < dyn->nneeded; ++ni) {
    807       Slice s_s = pool_slice(l->c->global, dyn->needed[ni]);
    808       const char* s = s_s.s;
    809       size_t slen = s_s.len;
    810       if (s && slen) (void)bb_append_str(&dynstr, s, (u32)slen);
    811     }
    812   }
    813   /* Symbol versioning: assign per-import version requirements and append the
    814    * version strings ("FBSD_1.5", ...) to .dynstr. Must run before .dynstr is
    815    * finalized below; emits nothing when no import is versioned. */
    816   build_versions(l, img, dyn, &imports, &dynstr);
    817   dyn->dynstr = dynstr.data;
    818   dyn->dynstr_len = dynstr.len;
    819 
    820   /* Step 3: .gnu.hash. */
    821   build_gnu_hash(h, img, dyn, &dynstr);
    822 
    823   /* Step 4: pre-size all the synthetic sections.
    824    * .interp:      strlen + 1
    825    * .dynsym:      24 * ndynsym
    826    * .dynstr:      dynstr_len
    827    * .gnu.hash:    gnu_hash_len
    828    * .rela.dyn:    24 * (runtime GLOB_DAT + RELATIVE records)
    829    * .rela.plt:    24 * nfuncs
    830    * .plt:         32 + 16 * nfuncs   (PLT0 + per-slot)
    831    * .got.plt:     8 * (3 + nfuncs)
    832    * .dynamic:     16 * count_dynamic_entries
    833    */
    834   dyn->nplt = imports.nfuncs;
    835   dyn->nrela_plt = imports.nfuncs;
    836   dyn->rela_plt = imports.nfuncs
    837                       ? (DynRela*)h->alloc(h, sizeof(DynRela) * imports.nfuncs,
    838                                            _Alignof(DynRela))
    839                       : NULL;
    840   if (imports.nfuncs && !dyn->rela_plt)
    841     compiler_panic(img->c, SRCLOC_NONE, "link: oom on rela_plt");
    842 
    843   /* RELA dyn: GLOB_DAT (one per imported abs-relocated symbol) +
    844    * RELATIVE (one per PIE internal abs reloc against a defined sym).
    845    * Phase 5 emits these dynamically during reloc-apply; pre-count the
    846    * exact total here (img->relocs and the resolve-time `imported` flags
    847    * are already settled by the time layout_dyn runs) so the section
    848    * isn't padded with hundreds of trailing R_*_NONE records. */
    849   u32 cap_rel = 0;
    850   {
    851     u32 ri;
    852     for (ri = 0; ri < LinkRelocs_count(&img->relocs); ++ri) {
    853       const LinkRelocApply* r = LinkRelocs_at(&img->relocs, ri);
    854       const LinkSymbol* tgt = LinkSyms_at(&img->syms, r->target - 1);
    855       const LinkSection* sec;
    856       if (r->kind != R_ABS32 && r->kind != R_ABS64) continue;
    857       if (r->link_section_id == LINK_SEC_NONE ||
    858           r->link_section_id > img->nsections)
    859         continue;
    860       sec = &img->sections[r->link_section_id - 1];
    861       if (sec->segment_id == LINK_SEG_NONE || sec->file_only) continue;
    862       if (tgt->imported) {
    863         cap_rel++; /* GLOB_DAT */
    864       } else if (tgt->defined && tgt->kind != SK_ABS) {
    865         cap_rel++; /* RELATIVE */
    866       }
    867     }
    868   }
    869   dyn->cap_rela_dyn = cap_rel;
    870   dyn->rela_dyn =
    871       dyn->cap_rela_dyn
    872           ? (DynRela*)h->alloc(h, sizeof(DynRela) * dyn->cap_rela_dyn,
    873                                _Alignof(DynRela))
    874           : NULL;
    875   if (dyn->cap_rela_dyn && !dyn->rela_dyn)
    876     compiler_panic(img->c, SRCLOC_NONE, "link: oom on rela_dyn");
    877   dyn->nrela_dyn = 0;
    878 
    879   Slice interp_s = pool_slice(l->c->global, dyn->interp_path);
    880   const char* interp_str = interp_s.s;
    881   size_t namelen = interp_s.len;
    882   u64 interp_bytes = (u64)namelen + 1u;
    883   u64 dynsym_bytes = (u64)dyn->ndynsym * ELF64_SYM_SIZE;
    884   u64 dynstr_bytes = (u64)dyn->dynstr_len;
    885   u64 gnuhash_bytes = (u64)dyn->gnu_hash_len;
    886   int has_ver = dyn->nverneed > 0;
    887   u64 versym_bytes = (u64)dyn->versym_len;
    888   u64 verneed_bytes = (u64)dyn->verneed_len;
    889   /* rela.dyn is pre-counted exactly; rela.plt is one record per PLT slot. */
    890   u64 rela_dyn_bytes = (u64)dyn->cap_rela_dyn * ELF64_RELA_SIZE;
    891   u64 rela_plt_bytes = (u64)dyn->nrela_plt * ELF64_RELA_SIZE;
    892   u64 plt_bytes =
    893       (u64)(imports.nfuncs
    894                 ? arch->plt0_size + arch->plt_entry_size * imports.nfuncs
    895                 : 0u);
    896   u64 gotplt_bytes = (u64)(imports.nfuncs ? 8u * (3u + imports.nfuncs) : 0u);
    897   dyn->ndyn_entries = count_dynamic_entries(dyn);
    898   u64 dynamic_bytes = (u64)dyn->ndyn_entries * ELF64_DYN_SIZE;
    899 
    900   /* Step 5: place segments, page-aligned after the existing image
    901    * span. New segments:
    902    *   ro_seg   (PF_R)  — .interp + .dynsym + .dynstr + .gnu.hash +
    903    *                       .rela.dyn + .rela.plt
    904    *   rx_seg   (PF_R+X)— .plt              (only when imports.nfuncs > 0)
    905    *   rw_seg   (PF_R+W)— .got.plt + .dynamic
    906    *
    907    * .dynamic lives in rw_seg because glibc's loader patches DT_*
    908    * d_un.d_ptr fields in-place at startup (elf_get_dynamic_info
    909    * adjusts STRTAB/SYMTAB/etc. by l_addr); a PF_R-only segment
    910    * causes SEGV_ACCERR. musl's loader doesn't do this rewrite, but
    911    * the RW placement is conventional and works for both.
    912    */
    913   page = 0x4000u; /* keep aligned with layout_page_size default */
    914   {
    915     /* Read the page size from layout_page_size by re-using the
    916      * configured execmem if present — duplicates the helper rather
    917      * than expose it; the value is only used for alignment. */
    918     const KitExecMem* m = (l && l->jit_host) ? l->jit_host->execmem : NULL;
    919     if (m && m->page_size) page = (u64)m->page_size;
    920   }
    921 
    922   u64 base_vaddr = 0;
    923   u32 i;
    924   for (i = 0; i < img->nsegments; ++i) {
    925     u64 end = img->segments[i].vaddr + img->segments[i].mem_size;
    926     if (end > base_vaddr) base_vaddr = end;
    927   }
    928   base_vaddr = ALIGN_UP(base_vaddr, page);
    929 
    930   /* Pack ro section offsets (relative to ro_seg.vaddr). 8-byte
    931    * alignment for tables; 4-byte for .interp string. */
    932   u64 off = 0;
    933   u64 interp_off = off;
    934   off = ALIGN_UP(off + interp_bytes, 8u);
    935   u64 dynsym_off = off;
    936   off = ALIGN_UP(off + dynsym_bytes, 8u);
    937   u64 dynstr_off = off;
    938   off = ALIGN_UP(off + dynstr_bytes, 8u);
    939   u64 gnuhash_off = off;
    940   off = ALIGN_UP(off + gnuhash_bytes, 8u);
    941   u64 rela_dyn_off = off;
    942   off = ALIGN_UP(off + rela_dyn_bytes, 8u);
    943   u64 rela_plt_off = off;
    944   off = ALIGN_UP(off + rela_plt_bytes, 8u);
    945   /* .gnu.version + .gnu.version_r (zero-sized and skipped when no import is
    946    * versioned, so the ro segment is unchanged for unversioned links). */
    947   u64 versym_off = off;
    948   off = ALIGN_UP(off + versym_bytes, 8u);
    949   u64 verneed_off = off;
    950   off = ALIGN_UP(off + verneed_bytes, 8u);
    951   u64 ro_seg_size = off;
    952 
    953   /* When no PLT is needed, suppress the RX/.plt segment entirely. */
    954   int has_plt = imports.nfuncs > 0;
    955 
    956   /* Pack rw_seg offsets: .got.plt (when has_plt) followed by .dynamic. */
    957   u64 rw_off = 0;
    958   u64 gotplt_off = rw_off;
    959   if (has_plt) rw_off = ALIGN_UP(rw_off + gotplt_bytes, 8u);
    960   u64 dynamic_off = rw_off;
    961   rw_off = ALIGN_UP(rw_off + dynamic_bytes, 8u);
    962   u64 rw_seg_size = rw_off;
    963 
    964   u64 ro_vaddr = base_vaddr;
    965   u64 rx_vaddr = ALIGN_UP(ro_vaddr + ro_seg_size, page);
    966   u64 rw_vaddr = ALIGN_UP(rx_vaddr + (has_plt ? plt_bytes : 0u), page);
    967 
    968   /* rw_seg always exists (it carries .dynamic). */
    969   u32 nseg = 2u + (has_plt ? 1u : 0u);
    970   u32 seg_base = dyn_alloc_segments(img, nseg);
    971   u32 ro_seg_idx = seg_base + 0u;
    972   u32 rx_seg_idx = has_plt ? seg_base + 1u : 0u;
    973   u32 rw_seg_idx = seg_base + (has_plt ? 2u : 1u);
    974 
    975   LinkSegment* ro_seg = &img->segments[ro_seg_idx];
    976   memset(ro_seg, 0, sizeof(*ro_seg));
    977   ro_seg->id = (LinkSegmentId)(ro_seg_idx + 1u);
    978   ro_seg->flags = SF_ALLOC; /* PF_R */
    979   ro_seg->file_offset = ro_vaddr;
    980   ro_seg->vaddr = ro_vaddr;
    981   ro_seg->file_size = ro_seg_size;
    982   ro_seg->mem_size = ro_seg_size;
    983   ro_seg->align = (u32)page;
    984   ro_seg->nsections = 6u + (has_ver ? 2u : 0u);
    985   img->segment_bytes[ro_seg_idx] =
    986       ro_seg_size ? (u8*)h->alloc(h, (size_t)ro_seg_size, 16) : NULL;
    987   img->segment_bytes_cap[ro_seg_idx] = (size_t)ro_seg_size;
    988   if (ro_seg_size && !img->segment_bytes[ro_seg_idx])
    989     compiler_panic(img->c, SRCLOC_NONE, "link: oom on ro dyn segment");
    990   if (ro_seg_size)
    991     memset(img->segment_bytes[ro_seg_idx], 0, (size_t)ro_seg_size);
    992 
    993   if (has_plt) {
    994     LinkSegment* rx_seg = &img->segments[rx_seg_idx];
    995     memset(rx_seg, 0, sizeof(*rx_seg));
    996     rx_seg->id = (LinkSegmentId)(rx_seg_idx + 1u);
    997     rx_seg->flags = SF_ALLOC | SF_EXEC;
    998     rx_seg->file_offset = rx_vaddr;
    999     rx_seg->vaddr = rx_vaddr;
   1000     rx_seg->file_size = plt_bytes;
   1001     rx_seg->mem_size = plt_bytes;
   1002     rx_seg->align = (u32)page;
   1003     rx_seg->nsections = 1;
   1004     img->segment_bytes[rx_seg_idx] = (u8*)h->alloc(h, (size_t)plt_bytes, 16);
   1005     img->segment_bytes_cap[rx_seg_idx] = (size_t)plt_bytes;
   1006     if (!img->segment_bytes[rx_seg_idx])
   1007       compiler_panic(img->c, SRCLOC_NONE, "link: oom on .plt segment");
   1008     memset(img->segment_bytes[rx_seg_idx], 0, (size_t)plt_bytes);
   1009     /* Stash plt / got.plt vaddrs now — the PLT body emit just below
   1010      * reads them, and the post-shift fixup in shift_image_addresses
   1011      * (link_elf.c) keys on these fields too. */
   1012     dyn->plt_vaddr = rx_vaddr;
   1013     dyn->plt_size = plt_bytes;
   1014     dyn->got_plt_vaddr = rw_vaddr;
   1015     dyn->got_plt_size = gotplt_bytes;
   1016     /* PLT body emit: the descriptor owns the psABI-specific bytes. */
   1017     if (!arch->emit_plt0 || !arch->emit_plt_entry)
   1018       compiler_panic(l->c, SRCLOC_NONE, "link: PLT emit not configured");
   1019     {
   1020       u8* plt_b = img->segment_bytes[rx_seg_idx];
   1021       u32 ki;
   1022       arch->emit_plt0(plt_b, dyn->plt_vaddr, dyn->got_plt_vaddr);
   1023       for (ki = 0; ki < imports.nfuncs; ++ki) {
   1024         u64 entry_vaddr = dyn->plt_vaddr + arch->plt0_size +
   1025                           (u64)arch->plt_entry_size * (u64)ki;
   1026         u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki);
   1027         u8* p =
   1028             plt_b + arch->plt0_size + (size_t)arch->plt_entry_size * (size_t)ki;
   1029         arch->emit_plt_entry(p, entry_vaddr, slot_vaddr);
   1030       }
   1031     }
   1032   }
   1033   /* rw_seg always exists — it carries .dynamic, plus .got.plt when
   1034    * imports are present. */
   1035   {
   1036     LinkSegment* rw_seg = &img->segments[rw_seg_idx];
   1037     memset(rw_seg, 0, sizeof(*rw_seg));
   1038     rw_seg->id = (LinkSegmentId)(rw_seg_idx + 1u);
   1039     rw_seg->flags = SF_ALLOC | SF_WRITE;
   1040     rw_seg->file_offset = rw_vaddr;
   1041     rw_seg->vaddr = rw_vaddr;
   1042     rw_seg->file_size = rw_seg_size;
   1043     rw_seg->mem_size = rw_seg_size;
   1044     rw_seg->align = (u32)page;
   1045     rw_seg->nsections = has_plt ? 2u : 1u;
   1046     img->segment_bytes[rw_seg_idx] = (u8*)h->alloc(h, (size_t)rw_seg_size, 16);
   1047     img->segment_bytes_cap[rw_seg_idx] = (size_t)rw_seg_size;
   1048     if (!img->segment_bytes[rw_seg_idx])
   1049       compiler_panic(img->c, SRCLOC_NONE, "link: oom on rw dyn segment");
   1050     /* Zero-initialize. .got.plt[0] (&.dynamic) is filled later, after
   1051      * shift_image_addresses has bumped dyn->dynamic_vaddr. .dynamic
   1052      * body is built post-shift in link_emit_elf. Loader
   1053      * patches all .got.plt slots from .rela.plt before user code
   1054      * under DF_1_NOW. */
   1055     memset(img->segment_bytes[rw_seg_idx], 0, (size_t)rw_seg_size);
   1056   }
   1057   img->nsegments += nseg;
   1058 
   1059   /* Step 6: synthetic LinkSection entries. Order in img->sections
   1060    * matches the loader-friendly file order and feeds emit's
   1061    * outshdr-merge pass. */
   1062   u32 nsec = 7u + (has_plt ? 2u : 0u) + (has_ver ? 2u : 0u);
   1063   u32 sec_base = dyn_alloc_sections(img, nsec);
   1064 
   1065   /* helper: populate a fresh LinkSection for a segment-internal range */
   1066   /* Inline because the args differ enough (sem, name) per slot. */
   1067   Sym name_interp = pool_intern_slice(l->c->global, SLICE_LIT(".interp"));
   1068   Sym name_dynsym = pool_intern_slice(l->c->global, SLICE_LIT(".dynsym"));
   1069   Sym name_dynstr = pool_intern_slice(l->c->global, SLICE_LIT(".dynstr"));
   1070   Sym name_gnu_hash = pool_intern_slice(l->c->global, SLICE_LIT(".gnu.hash"));
   1071   Sym name_rela_dyn = pool_intern_slice(l->c->global, SLICE_LIT(".rela.dyn"));
   1072   Sym name_rela_plt = pool_intern_slice(l->c->global, SLICE_LIT(".rela.plt"));
   1073   Sym name_dynamic = pool_intern_slice(l->c->global, SLICE_LIT(".dynamic"));
   1074   Sym name_plt = pool_intern_slice(l->c->global, SLICE_LIT(".plt"));
   1075   Sym name_got_plt = pool_intern_slice(l->c->global, SLICE_LIT(".got.plt"));
   1076   Sym name_gnu_version = pool_intern_slice(l->c->global, SLICE_LIT(".gnu.version"));
   1077   Sym name_gnu_version_r =
   1078       pool_intern_slice(l->c->global, SLICE_LIT(".gnu.version_r"));
   1079 
   1080 #define INIT_SEC(IDX, NAME, SEG_IDX, OFF_IN_SEG, SIZE, ALIGN, FLAGS, SEM)  \
   1081   do {                                                                     \
   1082     LinkSection* ls = &img->sections[sec_base + (IDX)];                    \
   1083     memset(ls, 0, sizeof(*ls));                                            \
   1084     ls->id = (LinkSectionId)(sec_base + (IDX) + 1u);                       \
   1085     ls->input_id = LINK_INPUT_NONE;                                        \
   1086     ls->obj_section_id = OBJ_SEC_NONE;                                     \
   1087     ls->segment_id = img->segments[(SEG_IDX)].id;                          \
   1088     ls->input_offset = (OFF_IN_SEG);                                       \
   1089     ls->file_offset = img->segments[(SEG_IDX)].file_offset + (OFF_IN_SEG); \
   1090     ls->vaddr = img->segments[(SEG_IDX)].vaddr + (OFF_IN_SEG);             \
   1091     ls->size = (SIZE);                                                     \
   1092     ls->flags = (FLAGS);                                                   \
   1093     ls->align = (ALIGN);                                                   \
   1094     ls->name = (NAME);                                                     \
   1095     ls->sem = (SEM);                                                       \
   1096   } while (0)
   1097 
   1098   INIT_SEC(0, name_interp, ro_seg_idx, interp_off, interp_bytes, 1, SF_ALLOC,
   1099            SSEM_PROGBITS);
   1100   INIT_SEC(1, name_dynsym, ro_seg_idx, dynsym_off, dynsym_bytes, 8, SF_ALLOC,
   1101            SSEM_PROGBITS);
   1102   INIT_SEC(2, name_dynstr, ro_seg_idx, dynstr_off, dynstr_bytes, 1, SF_ALLOC,
   1103            SSEM_PROGBITS);
   1104   INIT_SEC(3, name_gnu_hash, ro_seg_idx, gnuhash_off, gnuhash_bytes, 8,
   1105            SF_ALLOC, SSEM_PROGBITS);
   1106   INIT_SEC(4, name_rela_dyn, ro_seg_idx, rela_dyn_off, rela_dyn_bytes, 8,
   1107            SF_ALLOC, SSEM_PROGBITS);
   1108   INIT_SEC(5, name_rela_plt, ro_seg_idx, rela_plt_off, rela_plt_bytes, 8,
   1109            SF_ALLOC, SSEM_PROGBITS);
   1110   INIT_SEC(6, name_dynamic, rw_seg_idx, dynamic_off, dynamic_bytes, 8,
   1111            SF_ALLOC | SF_WRITE, SSEM_PROGBITS);
   1112 
   1113   dyn->sec_interp = (LinkSectionId)(sec_base + 0 + 1u);
   1114   dyn->sec_dynsym = (LinkSectionId)(sec_base + 1 + 1u);
   1115   dyn->sec_dynstr = (LinkSectionId)(sec_base + 2 + 1u);
   1116   dyn->sec_gnu_hash = (LinkSectionId)(sec_base + 3 + 1u);
   1117   dyn->sec_rela_dyn = (LinkSectionId)(sec_base + 4 + 1u);
   1118   dyn->sec_rela_plt = (LinkSectionId)(sec_base + 5 + 1u);
   1119   dyn->sec_dynamic = (LinkSectionId)(sec_base + 6 + 1u);
   1120   dyn->dynamic_vaddr = img->segments[rw_seg_idx].vaddr + dynamic_off;
   1121   dyn->dynamic_size = dynamic_bytes;
   1122 
   1123   if (has_plt) {
   1124     INIT_SEC(7, name_plt, rx_seg_idx, 0, plt_bytes, 16, SF_ALLOC | SF_EXEC,
   1125              SSEM_PROGBITS);
   1126     INIT_SEC(8, name_got_plt, rw_seg_idx, gotplt_off, gotplt_bytes, 8,
   1127              SF_ALLOC | SF_WRITE, SSEM_PROGBITS);
   1128     dyn->sec_plt = (LinkSectionId)(sec_base + 7 + 1u);
   1129     dyn->sec_got_plt = (LinkSectionId)(sec_base + 8 + 1u);
   1130   }
   1131   if (has_ver) {
   1132     /* Appended after the optional PLT slots; emit sorts the section-header
   1133      * table by (segment, vaddr), so array order here is not load-bearing. The
   1134      * SSEM_PROGBITS sem just parks the bytes in the ro segment — the runtime
   1135      * reads them via DT_VERSYM/DT_VERNEED, not the section headers. */
   1136     u32 vb0 = 7u + (has_plt ? 2u : 0u);
   1137     INIT_SEC(vb0, name_gnu_version, ro_seg_idx, versym_off, versym_bytes, 2,
   1138              SF_ALLOC, SSEM_PROGBITS);
   1139     INIT_SEC(vb0 + 1u, name_gnu_version_r, ro_seg_idx, verneed_off,
   1140              verneed_bytes, 4, SF_ALLOC, SSEM_PROGBITS);
   1141     dyn->sec_gnu_version = (LinkSectionId)(sec_base + vb0 + 1u);
   1142     dyn->sec_gnu_version_r = (LinkSectionId)(sec_base + vb0 + 1u + 1u);
   1143   }
   1144 #undef INIT_SEC
   1145 
   1146   img->nsections += nsec;
   1147 
   1148   /* Step 7: copy .interp / .dynsym / .dynstr / .gnu.hash bytes into
   1149    * the ro segment. .dynamic body is built during emit (it embeds
   1150    * runtime vaddrs that PIE keeps image-relative; emit just reads
   1151    * the section ids' final vaddrs). */
   1152   u8* ro_bytes = img->segment_bytes[ro_seg_idx];
   1153 
   1154   /* .interp */
   1155   if (interp_bytes && ro_bytes)
   1156     memcpy(ro_bytes + interp_off, interp_str, (size_t)interp_bytes);
   1157 
   1158   /* .dynsym: serialize DynSymRec to ELF64 wire layout. */
   1159   {
   1160     u32 si;
   1161     for (si = 0; si < dyn->ndynsym; ++si) {
   1162       u8* p = ro_bytes + dynsym_off + (u64)si * ELF64_SYM_SIZE;
   1163       const DynSymRec* r = &dyn->dynsym[si];
   1164       wr_u32_le(p + 0, r->st_name);
   1165       p[4] = r->st_info;
   1166       p[5] = r->st_other;
   1167       wr_u16_le(p + 6, r->st_shndx);
   1168       wr_u64_le(p + 8, r->st_value);
   1169       wr_u64_le(p + 16, r->st_size);
   1170     }
   1171   }
   1172 
   1173   /* .dynstr */
   1174   if (dynstr_bytes && ro_bytes && dyn->dynstr)
   1175     memcpy(ro_bytes + dynstr_off, dyn->dynstr, dyn->dynstr_len);
   1176 
   1177   /* .gnu.hash */
   1178   if (gnuhash_bytes && ro_bytes && dyn->gnu_hash)
   1179     memcpy(ro_bytes + gnuhash_off, dyn->gnu_hash, dyn->gnu_hash_len);
   1180 
   1181   /* .gnu.version + .gnu.version_r (no vaddrs inside; copied verbatim). */
   1182   if (has_ver && ro_bytes) {
   1183     if (versym_bytes && dyn->versym)
   1184       memcpy(ro_bytes + versym_off, dyn->versym, dyn->versym_len);
   1185     if (verneed_bytes && dyn->verneed)
   1186       memcpy(ro_bytes + verneed_off, dyn->verneed, dyn->verneed_len);
   1187   }
   1188 
   1189   /* .rela.plt: emit JUMP_SLOT records, one per imported function, and
   1190    * stash each import's PLT-entry vaddr in `sym_plt_vaddr` so the
   1191    * apply pass can redirect CALL26/JUMP26 against the import.  The
   1192    * record's r_offset addresses the .got.plt slot the PLT stub reads
   1193    * through; the loader patches that slot to the resolved runtime
   1194    * address before user code runs (DF_1_NOW, BIND_NOW).  Bytes are
   1195    * written here at pre-shift vaddrs; link_emit re-serializes them
   1196    * after shift_image_addresses bumps the dyn vaddrs by headers_load. */
   1197   {
   1198     u32 ki;
   1199     for (ki = 0; ki < imports.nfuncs; ++ki) {
   1200       LinkSymId lsid = imports.funcs[ki];
   1201       u32 dynidx = dyn->sym_dynidx[lsid];
   1202       u64 slot_vaddr = dyn->got_plt_vaddr + 8u * (3u + ki);
   1203       u64 plt_entry_vaddr = dyn->plt_vaddr + arch->plt0_size +
   1204                             (u64)arch->plt_entry_size * (u64)ki;
   1205       DynRela* r = &dyn->rela_plt[ki];
   1206       r->r_offset = slot_vaddr;
   1207       r->r_info = ELF64_R_INFO((u64)dynidx, elf_arch->r_jump_slot);
   1208       r->r_addend = 0;
   1209       /* Serialize into segment bytes (will be re-serialized post-shift). */
   1210       u8* p = ro_bytes + rela_plt_off + (u64)ki * ELF64_RELA_SIZE;
   1211       wr_u64_le(p + 0, r->r_offset);
   1212       wr_u64_le(p + 8, r->r_info);
   1213       wr_u64_le(p + 16, (u64)r->r_addend);
   1214       /* sym_plt_vaddr is consulted by apply_all_relocs. */
   1215       dyn->sym_plt_vaddr[lsid] = plt_entry_vaddr;
   1216     }
   1217   }
   1218 
   1219   /* .rela.dyn entries (GLOB_DAT for imports referenced via .got, and
   1220    * RELATIVE for PIE internal abs fixups) are emitted by
   1221    * apply_all_relocs as it walks every relocation.  layout_dyn
   1222    * leaves .rela.dyn empty here; the bytes are written post-shift in
   1223    * link_emit_elf. */
   1224 
   1225   /* .got.plt prelude: for BIND_NOW we leave the body zero — the
   1226    * loader patches every slot from .rela.plt before user code. Some
   1227    * loaders still inspect slot 0 (&.dynamic) at startup; provide it
   1228    * so glibc-style loaders don't fault. The loader writes the link_map
   1229    * cookie into slot 1 at load time. */
   1230   if (has_plt) {
   1231     u8* gp_bytes = img->segment_bytes[rw_seg_idx];
   1232     if (gp_bytes && gotplt_bytes >= 8u) {
   1233       wr_u64_le(gp_bytes, dyn->dynamic_vaddr);
   1234       /* Slots 1, 2, and per-PLT slots stay zero until the loader
   1235        * fills them. Phase 5 would prefill the per-PLT slots with
   1236        * the address of PLT0 to support lazy binding. */
   1237     }
   1238   }
   1239 
   1240   /* The .dynamic body is built later, after segment shifts are
   1241    * applied during emit (link_elf.c). emit_dynamic_body takes the
   1242    * post-shift vaddrs of every other dyn section and writes one
   1243    * DT_* entry per index. */
   1244 
   1245   /* Synthesize linker-defined symbols that reference the .dynamic
   1246    * vaddr.  Scrt1.o on Linux loads `_DYNAMIC` via ADRP+ADD, and
   1247    * libc_nonshared.a's atexit shim takes `__dso_handle` as the
   1248    * per-image identity (we use the .dynamic vaddr — any stable
   1249    * per-image address satisfies the contract since the shim only
   1250    * passes it through to __cxa_atexit, which the program-side glibc
   1251    * just stashes). */
   1252   link_define_boundary(l, img, "_DYNAMIC", dyn->dynamic_vaddr);
   1253   link_define_boundary(l, img, "__dso_handle", dyn->dynamic_vaddr);
   1254 
   1255   free_imports(h, &imports);
   1256 }
   1257 
   1258 /* ---- cleanup ---- */
   1259 
   1260 void link_dyn_state_free(LinkImage* img) {
   1261   Heap* h = img->heap;
   1262   LinkDynState* dyn = img->dyn;
   1263   if (!dyn) return;
   1264   if (dyn->dynsym) h->free(h, dyn->dynsym, sizeof(*dyn->dynsym) * dyn->ndynsym);
   1265   if (dyn->dynstr) h->free(h, dyn->dynstr, dyn->dynstr_len);
   1266   if (dyn->gnu_hash) h->free(h, dyn->gnu_hash, dyn->gnu_hash_len);
   1267   if (dyn->versym) h->free(h, dyn->versym, dyn->versym_len);
   1268   if (dyn->verneed) h->free(h, dyn->verneed, dyn->verneed_len);
   1269   if (dyn->rela_dyn)
   1270     h->free(h, dyn->rela_dyn, sizeof(*dyn->rela_dyn) * dyn->cap_rela_dyn);
   1271   if (dyn->rela_plt)
   1272     h->free(h, dyn->rela_plt, sizeof(*dyn->rela_plt) * dyn->nrela_plt);
   1273   if (dyn->needed) h->free(h, dyn->needed, sizeof(*dyn->needed) * dyn->nneeded);
   1274   if (dyn->sym_dynidx)
   1275     h->free(h, dyn->sym_dynidx,
   1276             sizeof(*dyn->sym_dynidx) * dyn->sym_dynidx_size);
   1277   if (dyn->sym_plt_vaddr)
   1278     h->free(h, dyn->sym_plt_vaddr,
   1279             sizeof(*dyn->sym_plt_vaddr) * dyn->sym_dynidx_size);
   1280   h->free(h, dyn, sizeof(*dyn));
   1281   img->dyn = NULL;
   1282 }