commit 6aa3cb14986ed02faabb6ecb9cd19d697a95566f
parent 8ab800473391e2774d2c694e51c6bd7675c172d4
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 18:01:23 -0700
link: place .dynamic in a PF_R+W segment for glibc compatibility
glibc's loader patches DT_* d_un.d_ptr fields in .dynamic in-place at
startup (elf_get_dynamic_info adjusts STRTAB/SYMTAB/etc. by l_addr);
the prior PF_R-only placement caused SEGV_ACCERR before user code ran.
musl's loader doesn't do this rewrite, which masked the bug under
test-musl. Move .dynamic into rw_seg (always allocated, alongside
.got.plt when imports exist) so both loaders work.
Drops the assumption in link_emit_elf_aarch64 that .rela.dyn /
.rela.plt share a segment with .dynamic; each now resolves its
backing segment via segment_id.
test-glibc: 0/3 → 3/3.
Diffstat:
3 files changed, 66 insertions(+), 37 deletions(-)
diff --git a/doc/DYNLD.md b/doc/DYNLD.md
@@ -1,14 +1,22 @@
# Dynamic linking — status & remaining work
Scope: producing dynamic-linked aarch64-linux ELF executables (and,
-eventually, shared libs) that run against a real musl libc.so.
+eventually, shared libs) that run against a real musl or glibc
+libc.so.
## Status
-`make test-musl` passes 6/6 (3 static + 3 dynamic). The dynamic
-variant produces an ET_DYN PIE that runs end-to-end against
-`/lib/ld-musl-aarch64.so.1` for `01_syscall_write`, `02_errno_touch`,
-`03_printf_hello`.
+`make test-musl` passes 6/6 (3 static + 3 dynamic) and
+`make test-glibc` passes 3/3 (dynamic-only). Each produces an
+ET_DYN PIE that runs end-to-end against the runtime loader
+(`/lib/ld-musl-aarch64.so.1` or `/lib/ld-linux-aarch64.so.1`) for
+`01_syscall_write`, `02_errno_touch`, `03_printf_hello`.
+
+`.dynamic` lives in a PF_R+W segment (alongside `.got.plt`) because
+glibc's loader patches `DT_*` `d_un.d_ptr` fields in-place at startup
+(`elf_get_dynamic_info` adjusts STRTAB/SYMTAB/etc. by `l_addr`); a
+PF_R-only segment causes `SEGV_ACCERR`. musl's loader doesn't do
+this rewrite, but RW placement is conventional and works for both.
What's wired (don't re-derive — read the code if you need detail):
@@ -60,6 +68,14 @@ scope for the v1 dynamic exe; the musl harness doesn't exercise them.
sets; needs a sort pass before scaling.
- **`--as-needed`**: today every DSO with a soname gets a DT_NEEDED.
Plumb the flag through to filter on actual import use.
+- **Linker-script DSO inputs**: Debian ships
+ `/usr/lib/aarch64-linux-gnu/libc.so` as a GNU-ld script
+ (`GROUP ( libc.so.6 libc_nonshared.a ld-linux-aarch64.so.1 )`).
+ `cfree ld` doesn't recognize a script in DSO position, so the
+ glibc harness hands `libc.so.6` + `libc_nonshared.a` directly.
+ `link_script.c` already parses the kernel.lds subset; extend it
+ to handle bare GROUP/INPUT scripts and wire `-l<name>` /
+ positional `.so` resolution to fan out the listed inputs.
- **Versioned symbols** (`.gnu.version` / `.gnu.version_r`): musl
doesn't use them; glibc does.
- **Lazy binding**: would need a real `_dl_runtime_resolve` PLT0
diff --git a/doc/linker-status.md b/doc/linker-status.md
@@ -21,6 +21,7 @@ live in `test/link/` — they are not duplicated in `test/elf/`.
| `test-link` J | 38 | 0 | JIT in-process incl. GC subgroup, IFUNC, TLS |
| `test-link` bad | 2 | 0 | `bad/30_undef_strong` (E + J) |
| `test-musl` | 6 | 0 | musl 1.2.5 static + dynamic: syscall, errno, printf |
+| `test-glibc` | 3 | 0 | glibc 2.36 dynamic: syscall, errno, printf |
(R = roundtrip; E = link → aarch64 ELF → qemu/podman; J = JIT in-process.)
diff --git a/src/link/link_dyn.c b/src/link/link_dyn.c
@@ -594,15 +594,17 @@ void layout_dyn(Linker* l, LinkImage* img) {
u64 dynamic_bytes = (u64)dyn->ndyn_entries * 16u;
/* Step 5: place segments, page-aligned after the existing image
- * span. Two new segments:
+ * span. New segments:
* ro_seg (PF_R) — .interp + .dynsym + .dynstr + .gnu.hash +
- * .rela.dyn + .rela.plt + .dynamic
- * rx_seg (PF_R+X)— .plt
- * rw_seg (PF_R+W)— .got.plt
+ * .rela.dyn + .rela.plt
+ * rx_seg (PF_R+X)— .plt (only when imports.nfuncs > 0)
+ * rw_seg (PF_R+W)— .got.plt + .dynamic
*
- * Ordering inside ro_seg matches the typical loader-friendly
- * layout. The .dynamic shdr lives in ro_seg; PT_DYNAMIC's vaddr
- * points at it.
+ * .dynamic lives in rw_seg because glibc's loader patches DT_*
+ * d_un.d_ptr fields in-place at startup (elf_get_dynamic_info
+ * adjusts STRTAB/SYMTAB/etc. by l_addr); a PF_R-only segment
+ * causes SEGV_ACCERR. musl's loader doesn't do this rewrite, but
+ * the RW placement is conventional and works for both.
*/
page = 0x4000u; /* keep aligned with layout_page_size default */
{
@@ -637,22 +639,29 @@ void layout_dyn(Linker* l, LinkImage* img) {
off = ALIGN_UP(off + rela_dyn_bytes, 8u);
u64 rela_plt_off = off;
off = ALIGN_UP(off + rela_plt_bytes, 8u);
- u64 dynamic_off = off;
- off = ALIGN_UP(off + dynamic_bytes, 8u);
u64 ro_seg_size = off;
- u64 ro_vaddr = base_vaddr;
- u64 rx_vaddr = ALIGN_UP(ro_vaddr + ro_seg_size, page);
- u64 rw_vaddr = ALIGN_UP(rx_vaddr + plt_bytes, page);
-
/* When no PLT is needed, suppress the RX/.plt segment entirely. */
int has_plt = imports.nfuncs > 0;
- u32 nseg = 1u + (has_plt ? 2u : 0u);
+ /* Pack rw_seg offsets: .got.plt (when has_plt) followed by .dynamic. */
+ u64 rw_off = 0;
+ u64 gotplt_off = rw_off;
+ if (has_plt) rw_off = ALIGN_UP(rw_off + gotplt_bytes, 8u);
+ u64 dynamic_off = rw_off;
+ rw_off = ALIGN_UP(rw_off + dynamic_bytes, 8u);
+ u64 rw_seg_size = rw_off;
+
+ u64 ro_vaddr = base_vaddr;
+ u64 rx_vaddr = ALIGN_UP(ro_vaddr + ro_seg_size, page);
+ u64 rw_vaddr = ALIGN_UP(rx_vaddr + (has_plt ? plt_bytes : 0u), page);
+
+ /* rw_seg always exists (it carries .dynamic). */
+ u32 nseg = 2u + (has_plt ? 1u : 0u);
u32 seg_base = dyn_alloc_segments(img, nseg);
u32 ro_seg_idx = seg_base + 0u;
u32 rx_seg_idx = has_plt ? seg_base + 1u : 0u;
- u32 rw_seg_idx = has_plt ? seg_base + 2u : 0u;
+ u32 rw_seg_idx = seg_base + (has_plt ? 2u : 1u);
LinkSegment* ro_seg = &img->segments[ro_seg_idx];
memset(ro_seg, 0, sizeof(*ro_seg));
@@ -663,7 +672,7 @@ void layout_dyn(Linker* l, LinkImage* img) {
ro_seg->file_size = ro_seg_size;
ro_seg->mem_size = ro_seg_size;
ro_seg->align = (u32)page;
- ro_seg->nsections = 7;
+ ro_seg->nsections = 6;
img->segment_bytes[ro_seg_idx] =
ro_seg_size ? (u8*)h->alloc(h, (size_t)ro_seg_size, 16) : NULL;
img->segment_bytes_cap[ro_seg_idx] = (size_t)ro_seg_size;
@@ -769,27 +778,30 @@ void layout_dyn(Linker* l, LinkImage* img) {
}
}
+ }
+ /* rw_seg always exists — it carries .dynamic, plus .got.plt when
+ * imports are present. */
+ {
LinkSegment* rw_seg = &img->segments[rw_seg_idx];
memset(rw_seg, 0, sizeof(*rw_seg));
rw_seg->id = (LinkSegmentId)(rw_seg_idx + 1u);
rw_seg->flags = SF_ALLOC | SF_WRITE;
rw_seg->file_offset = rw_vaddr;
rw_seg->vaddr = rw_vaddr;
- rw_seg->file_size = gotplt_bytes;
- rw_seg->mem_size = gotplt_bytes;
+ rw_seg->file_size = rw_seg_size;
+ rw_seg->mem_size = rw_seg_size;
rw_seg->align = (u32)page;
- rw_seg->nsections = 1;
- img->segment_bytes[rw_seg_idx] = (u8*)h->alloc(h, (size_t)gotplt_bytes, 16);
- img->segment_bytes_cap[rw_seg_idx] = (size_t)gotplt_bytes;
+ rw_seg->nsections = has_plt ? 2u : 1u;
+ img->segment_bytes[rw_seg_idx] = (u8*)h->alloc(h, (size_t)rw_seg_size, 16);
+ img->segment_bytes_cap[rw_seg_idx] = (size_t)rw_seg_size;
if (!img->segment_bytes[rw_seg_idx])
- compiler_panic(img->c, no_loc(), "link: oom on .got.plt segment");
- /* First 8 bytes hold &.dynamic at runtime; the loader fills it.
- * We zero-initialize the whole region — Phase 5 will write the
- * fallback PLT0 stub address into each slot to make lazy binding
- * work even before BIND_NOW resolution. For BIND_NOW (DF_1_NOW)
- * the loader replaces every slot before user code runs, so zero
- * is a fine starting state. */
- memset(img->segment_bytes[rw_seg_idx], 0, (size_t)gotplt_bytes);
+ compiler_panic(img->c, no_loc(), "link: oom on rw dyn segment");
+ /* Zero-initialize. .got.plt[0] (&.dynamic) is filled later, after
+ * shift_image_addresses has bumped dyn->dynamic_vaddr. .dynamic
+ * body is built post-shift in link_emit_elf_aarch64. Loader
+ * patches all .got.plt slots from .rela.plt before user code
+ * under DF_1_NOW. */
+ memset(img->segment_bytes[rw_seg_idx], 0, (size_t)rw_seg_size);
}
img->nsegments += nseg;
@@ -841,7 +853,7 @@ void layout_dyn(Linker* l, LinkImage* img) {
SF_ALLOC, SSEM_PROGBITS);
INIT_SEC(5, name_rela_plt, ro_seg_idx, rela_plt_off, rela_plt_bytes, 8,
SF_ALLOC, SSEM_PROGBITS);
- INIT_SEC(6, name_dynamic, ro_seg_idx, dynamic_off, dynamic_bytes, 8,
+ INIT_SEC(6, name_dynamic, rw_seg_idx, dynamic_off, dynamic_bytes, 8,
SF_ALLOC | SF_WRITE, SSEM_PROGBITS);
dyn->sec_interp = (LinkSectionId)(sec_base + 0 + 1u);
@@ -851,13 +863,13 @@ void layout_dyn(Linker* l, LinkImage* img) {
dyn->sec_rela_dyn = (LinkSectionId)(sec_base + 4 + 1u);
dyn->sec_rela_plt = (LinkSectionId)(sec_base + 5 + 1u);
dyn->sec_dynamic = (LinkSectionId)(sec_base + 6 + 1u);
- dyn->dynamic_vaddr = img->segments[ro_seg_idx].vaddr + dynamic_off;
+ dyn->dynamic_vaddr = img->segments[rw_seg_idx].vaddr + dynamic_off;
dyn->dynamic_size = dynamic_bytes;
if (has_plt) {
INIT_SEC(7, name_plt, rx_seg_idx, 0, plt_bytes, 16, SF_ALLOC | SF_EXEC,
SSEM_PROGBITS);
- INIT_SEC(8, name_got_plt, rw_seg_idx, 0, gotplt_bytes, 8,
+ INIT_SEC(8, name_got_plt, rw_seg_idx, gotplt_off, gotplt_bytes, 8,
SF_ALLOC | SF_WRITE, SSEM_PROGBITS);
dyn->sec_plt = (LinkSectionId)(sec_base + 7 + 1u);
dyn->sec_got_plt = (LinkSectionId)(sec_base + 8 + 1u);