kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 6aa3cb14986ed02faabb6ecb9cd19d697a95566f
parent 8ab800473391e2774d2c694e51c6bd7675c172d4
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  9 May 2026 18:01:23 -0700

link: place .dynamic in a PF_R+W segment for glibc compatibility

glibc's loader patches DT_* d_un.d_ptr fields in .dynamic in-place at
startup (elf_get_dynamic_info adjusts STRTAB/SYMTAB/etc. by l_addr);
the prior PF_R-only placement caused SEGV_ACCERR before user code ran.
musl's loader doesn't do this rewrite, which masked the bug under
test-musl. Move .dynamic into rw_seg (always allocated, alongside
.got.plt when imports exist) so both loaders work.

Drops the assumption in link_emit_elf_aarch64 that .rela.dyn /
.rela.plt share a segment with .dynamic; each now resolves its
backing segment via segment_id.

test-glibc: 0/3 → 3/3.

Diffstat:
Mdoc/DYNLD.md | 26+++++++++++++++++++++-----
Mdoc/linker-status.md | 1+
Msrc/link/link_dyn.c | 76++++++++++++++++++++++++++++++++++++++++++++--------------------------------
3 files changed, 66 insertions(+), 37 deletions(-)

diff --git a/doc/DYNLD.md b/doc/DYNLD.md @@ -1,14 +1,22 @@ # Dynamic linking — status & remaining work Scope: producing dynamic-linked aarch64-linux ELF executables (and, -eventually, shared libs) that run against a real musl libc.so. +eventually, shared libs) that run against a real musl or glibc +libc.so. ## Status -`make test-musl` passes 6/6 (3 static + 3 dynamic). The dynamic -variant produces an ET_DYN PIE that runs end-to-end against -`/lib/ld-musl-aarch64.so.1` for `01_syscall_write`, `02_errno_touch`, -`03_printf_hello`. +`make test-musl` passes 6/6 (3 static + 3 dynamic) and +`make test-glibc` passes 3/3 (dynamic-only). Each produces an +ET_DYN PIE that runs end-to-end against the runtime loader +(`/lib/ld-musl-aarch64.so.1` or `/lib/ld-linux-aarch64.so.1`) for +`01_syscall_write`, `02_errno_touch`, `03_printf_hello`. + +`.dynamic` lives in a PF_R+W segment (alongside `.got.plt`) because +glibc's loader patches `DT_*` `d_un.d_ptr` fields in-place at startup +(`elf_get_dynamic_info` adjusts STRTAB/SYMTAB/etc. by `l_addr`); a +PF_R-only segment causes `SEGV_ACCERR`. musl's loader doesn't do +this rewrite, but RW placement is conventional and works for both. What's wired (don't re-derive — read the code if you need detail): @@ -60,6 +68,14 @@ scope for the v1 dynamic exe; the musl harness doesn't exercise them. sets; needs a sort pass before scaling. - **`--as-needed`**: today every DSO with a soname gets a DT_NEEDED. Plumb the flag through to filter on actual import use. +- **Linker-script DSO inputs**: Debian ships + `/usr/lib/aarch64-linux-gnu/libc.so` as a GNU-ld script + (`GROUP ( libc.so.6 libc_nonshared.a ld-linux-aarch64.so.1 )`). + `cfree ld` doesn't recognize a script in DSO position, so the + glibc harness hands `libc.so.6` + `libc_nonshared.a` directly. + `link_script.c` already parses the kernel.lds subset; extend it + to handle bare GROUP/INPUT scripts and wire `-l<name>` / + positional `.so` resolution to fan out the listed inputs. - **Versioned symbols** (`.gnu.version` / `.gnu.version_r`): musl doesn't use them; glibc does. - **Lazy binding**: would need a real `_dl_runtime_resolve` PLT0 diff --git a/doc/linker-status.md b/doc/linker-status.md @@ -21,6 +21,7 @@ live in `test/link/` — they are not duplicated in `test/elf/`. | `test-link` J | 38 | 0 | JIT in-process incl. GC subgroup, IFUNC, TLS | | `test-link` bad | 2 | 0 | `bad/30_undef_strong` (E + J) | | `test-musl` | 6 | 0 | musl 1.2.5 static + dynamic: syscall, errno, printf | +| `test-glibc` | 3 | 0 | glibc 2.36 dynamic: syscall, errno, printf | (R = roundtrip; E = link → aarch64 ELF → qemu/podman; J = JIT in-process.) diff --git a/src/link/link_dyn.c b/src/link/link_dyn.c @@ -594,15 +594,17 @@ void layout_dyn(Linker* l, LinkImage* img) { u64 dynamic_bytes = (u64)dyn->ndyn_entries * 16u; /* Step 5: place segments, page-aligned after the existing image - * span. Two new segments: + * span. New segments: * ro_seg (PF_R) — .interp + .dynsym + .dynstr + .gnu.hash + - * .rela.dyn + .rela.plt + .dynamic - * rx_seg (PF_R+X)— .plt - * rw_seg (PF_R+W)— .got.plt + * .rela.dyn + .rela.plt + * rx_seg (PF_R+X)— .plt (only when imports.nfuncs > 0) + * rw_seg (PF_R+W)— .got.plt + .dynamic * - * Ordering inside ro_seg matches the typical loader-friendly - * layout. The .dynamic shdr lives in ro_seg; PT_DYNAMIC's vaddr - * points at it. + * .dynamic lives in rw_seg because glibc's loader patches DT_* + * d_un.d_ptr fields in-place at startup (elf_get_dynamic_info + * adjusts STRTAB/SYMTAB/etc. by l_addr); a PF_R-only segment + * causes SEGV_ACCERR. musl's loader doesn't do this rewrite, but + * the RW placement is conventional and works for both. */ page = 0x4000u; /* keep aligned with layout_page_size default */ { @@ -637,22 +639,29 @@ void layout_dyn(Linker* l, LinkImage* img) { off = ALIGN_UP(off + rela_dyn_bytes, 8u); u64 rela_plt_off = off; off = ALIGN_UP(off + rela_plt_bytes, 8u); - u64 dynamic_off = off; - off = ALIGN_UP(off + dynamic_bytes, 8u); u64 ro_seg_size = off; - u64 ro_vaddr = base_vaddr; - u64 rx_vaddr = ALIGN_UP(ro_vaddr + ro_seg_size, page); - u64 rw_vaddr = ALIGN_UP(rx_vaddr + plt_bytes, page); - /* When no PLT is needed, suppress the RX/.plt segment entirely. */ int has_plt = imports.nfuncs > 0; - u32 nseg = 1u + (has_plt ? 2u : 0u); + /* Pack rw_seg offsets: .got.plt (when has_plt) followed by .dynamic. */ + u64 rw_off = 0; + u64 gotplt_off = rw_off; + if (has_plt) rw_off = ALIGN_UP(rw_off + gotplt_bytes, 8u); + u64 dynamic_off = rw_off; + rw_off = ALIGN_UP(rw_off + dynamic_bytes, 8u); + u64 rw_seg_size = rw_off; + + u64 ro_vaddr = base_vaddr; + u64 rx_vaddr = ALIGN_UP(ro_vaddr + ro_seg_size, page); + u64 rw_vaddr = ALIGN_UP(rx_vaddr + (has_plt ? plt_bytes : 0u), page); + + /* rw_seg always exists (it carries .dynamic). */ + u32 nseg = 2u + (has_plt ? 1u : 0u); u32 seg_base = dyn_alloc_segments(img, nseg); u32 ro_seg_idx = seg_base + 0u; u32 rx_seg_idx = has_plt ? seg_base + 1u : 0u; - u32 rw_seg_idx = has_plt ? seg_base + 2u : 0u; + u32 rw_seg_idx = seg_base + (has_plt ? 2u : 1u); LinkSegment* ro_seg = &img->segments[ro_seg_idx]; memset(ro_seg, 0, sizeof(*ro_seg)); @@ -663,7 +672,7 @@ void layout_dyn(Linker* l, LinkImage* img) { ro_seg->file_size = ro_seg_size; ro_seg->mem_size = ro_seg_size; ro_seg->align = (u32)page; - ro_seg->nsections = 7; + ro_seg->nsections = 6; img->segment_bytes[ro_seg_idx] = ro_seg_size ? (u8*)h->alloc(h, (size_t)ro_seg_size, 16) : NULL; img->segment_bytes_cap[ro_seg_idx] = (size_t)ro_seg_size; @@ -769,27 +778,30 @@ void layout_dyn(Linker* l, LinkImage* img) { } } + } + /* rw_seg always exists — it carries .dynamic, plus .got.plt when + * imports are present. */ + { LinkSegment* rw_seg = &img->segments[rw_seg_idx]; memset(rw_seg, 0, sizeof(*rw_seg)); rw_seg->id = (LinkSegmentId)(rw_seg_idx + 1u); rw_seg->flags = SF_ALLOC | SF_WRITE; rw_seg->file_offset = rw_vaddr; rw_seg->vaddr = rw_vaddr; - rw_seg->file_size = gotplt_bytes; - rw_seg->mem_size = gotplt_bytes; + rw_seg->file_size = rw_seg_size; + rw_seg->mem_size = rw_seg_size; rw_seg->align = (u32)page; - rw_seg->nsections = 1; - img->segment_bytes[rw_seg_idx] = (u8*)h->alloc(h, (size_t)gotplt_bytes, 16); - img->segment_bytes_cap[rw_seg_idx] = (size_t)gotplt_bytes; + rw_seg->nsections = has_plt ? 2u : 1u; + img->segment_bytes[rw_seg_idx] = (u8*)h->alloc(h, (size_t)rw_seg_size, 16); + img->segment_bytes_cap[rw_seg_idx] = (size_t)rw_seg_size; if (!img->segment_bytes[rw_seg_idx]) - compiler_panic(img->c, no_loc(), "link: oom on .got.plt segment"); - /* First 8 bytes hold &.dynamic at runtime; the loader fills it. - * We zero-initialize the whole region — Phase 5 will write the - * fallback PLT0 stub address into each slot to make lazy binding - * work even before BIND_NOW resolution. For BIND_NOW (DF_1_NOW) - * the loader replaces every slot before user code runs, so zero - * is a fine starting state. */ - memset(img->segment_bytes[rw_seg_idx], 0, (size_t)gotplt_bytes); + compiler_panic(img->c, no_loc(), "link: oom on rw dyn segment"); + /* Zero-initialize. .got.plt[0] (&.dynamic) is filled later, after + * shift_image_addresses has bumped dyn->dynamic_vaddr. .dynamic + * body is built post-shift in link_emit_elf_aarch64. Loader + * patches all .got.plt slots from .rela.plt before user code + * under DF_1_NOW. */ + memset(img->segment_bytes[rw_seg_idx], 0, (size_t)rw_seg_size); } img->nsegments += nseg; @@ -841,7 +853,7 @@ void layout_dyn(Linker* l, LinkImage* img) { SF_ALLOC, SSEM_PROGBITS); INIT_SEC(5, name_rela_plt, ro_seg_idx, rela_plt_off, rela_plt_bytes, 8, SF_ALLOC, SSEM_PROGBITS); - INIT_SEC(6, name_dynamic, ro_seg_idx, dynamic_off, dynamic_bytes, 8, + INIT_SEC(6, name_dynamic, rw_seg_idx, dynamic_off, dynamic_bytes, 8, SF_ALLOC | SF_WRITE, SSEM_PROGBITS); dyn->sec_interp = (LinkSectionId)(sec_base + 0 + 1u); @@ -851,13 +863,13 @@ void layout_dyn(Linker* l, LinkImage* img) { dyn->sec_rela_dyn = (LinkSectionId)(sec_base + 4 + 1u); dyn->sec_rela_plt = (LinkSectionId)(sec_base + 5 + 1u); dyn->sec_dynamic = (LinkSectionId)(sec_base + 6 + 1u); - dyn->dynamic_vaddr = img->segments[ro_seg_idx].vaddr + dynamic_off; + dyn->dynamic_vaddr = img->segments[rw_seg_idx].vaddr + dynamic_off; dyn->dynamic_size = dynamic_bytes; if (has_plt) { INIT_SEC(7, name_plt, rx_seg_idx, 0, plt_bytes, 16, SF_ALLOC | SF_EXEC, SSEM_PROGBITS); - INIT_SEC(8, name_got_plt, rw_seg_idx, 0, gotplt_bytes, 8, + INIT_SEC(8, name_got_plt, rw_seg_idx, gotplt_off, gotplt_bytes, 8, SF_ALLOC | SF_WRITE, SSEM_PROGBITS); dyn->sec_plt = (LinkSectionId)(sec_base + 7 + 1u); dyn->sec_got_plt = (LinkSectionId)(sec_base + 8 + 1u);