commit 81f38cd7a7d2728545e735fac417faeba7354cb1
parent c5696650bb92794695f7e82f4cbffeae72f0bec4
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 15:58:28 -0700
musl dynamic link test
Diffstat:
3 files changed, 595 insertions(+), 44 deletions(-)
diff --git a/doc/DYNLD.md b/doc/DYNLD.md
@@ -0,0 +1,482 @@
+# Dynamic linking — implementation plan
+
+Scope: what it takes for `cfree ld` to produce a dynamic-linked
+aarch64-linux ELF executable that runs against a real musl libc.so.
+This is the gap exposed by `test/musl/run.sh`'s `dynamic` variant
+(`build/musl/<case>/dynamic/link.err`); see `doc/linker-status.md` row
+"Dynamic linking: PT_DYNAMIC, PT_INTERP, PLT, DT_NEEDED" for context.
+
+The harness today fails at the first foot of the pipeline (ELF reader
+rejects the `.so`). Behind that failure are the model, layout, emit,
+and driver gaps catalogued below.
+
+---
+
+## 1. What a working dynamic-exe link must look like
+
+End-to-end, for a single PIE exe linked against a single shared libc:
+
+```
+input clang case.c -fPIE -fpic -c -> case.o (ET_REL, PIC)
+ musl Scrt1.o crti.o crtn.o -> start files (ET_REL, PIC)
+ musl libc.so -> shared dep (ET_DYN)
+ libcfree_rt.a -> archive (ET_REL members)
+
+output case.exe ET_DYN exe (PIE)
+ PT_PHDR
+ PT_INTERP -> "/lib/ld-musl-aarch64.so.1"
+ PT_LOAD R (ehdr, phdrs, .interp, .dynsym, .dynstr, .gnu.hash,
+ .rela.dyn, .rela.plt, .note.gnu.build-id)
+ PT_LOAD RX (.text, .plt)
+ PT_LOAD R (.rodata, .eh_frame)
+ PT_LOAD RW (.data, .got, .got.plt, .data.rel.ro, .bss)
+ PT_DYNAMIC -> .dynamic
+ DT_NEEDED libc.so
+ DT_STRTAB / DT_SYMTAB / DT_GNU_HASH
+ DT_PLTGOT / DT_PLTRELSZ / DT_PLTREL DT_RELA / DT_JMPREL
+ DT_RELA / DT_RELASZ / DT_RELAENT
+ (DT_RUNPATH, DT_FLAGS_1 PIE, DT_INIT_ARRAY, ...)
+ PT_TLS -> .tdata template (already wired)
+
+dynamic relocs in the produced exe (against `libc.so` syms only):
+ R_AARCH64_GLOB_DAT on each .got slot for an imported data sym
+ R_AARCH64_JUMP_SLOT on each .got.plt slot for an imported function
+ R_AARCH64_RELATIVE on each absolute pointer in PIE-emitted data
+ (e.g. `.init_array` entries, vtable-shaped data)
+```
+
+Same pipeline, different shape, applies to `-shared` output (no
+PT_INTERP, allow_undefined=1, ET_DYN with DT_SONAME). The exe path is
+the more demanding consumer of the two — it's strictly a superset of
+the shared path.
+
+---
+
+## 2. Investigation: current pipeline state
+
+### 2.1 Driver — `driver/ld.c`
+
+Already has:
+- `-shared`, `-soname`, `-rpath`, `-rpath-link`, `-Bstatic`/`-Bdynamic`,
+ `--enable-new-dtags`, `-pie`, `-no-pie`, `-E`/`--export-dynamic`,
+ `--whole-archive`, `--start-group`/`--end-group`, `-l<name>` resolution
+ (`driver/lib_resolve.c`).
+- `cfree_link_shared` dispatch wired (`driver/ld.c:640-668`); shared
+ options are populated and passed.
+
+Missing:
+- No `-dynamic-linker` / `--dynamic-linker` flag. Unknown flags are
+ rejected (`driver/ld.c:427`), so callers can't even pass it as a
+ forward-compat no-op.
+- No `.so` recognition. Only `.a` is special-cased in argv parse
+ (`driver/ld.c:432`); everything else becomes an "object file" and
+ goes into `obj_bytes`. `driver_lib_resolve` (used by `-l`) does not
+ appear to distinguish `.so` from `.a` either — confirm and extend.
+- `-l<name>` resolution doesn't honor the current `-Bstatic`/`-Bdynamic`
+ link-mode for picking `.so` over `.a`.
+
+### 2.2 ELF reader — `src/obj/elf_read.c`
+
+`read_elf` is the single ingest path used by both `link_add_obj_bytes`
+(`src/link/link.c:128`) and archive members (`src/link/link.c:207`).
+
+What it parses:
+- `e_shoff` / shdrs only. Program headers ignored.
+- `SHT_PROGBITS`/`NOBITS`/`NOTE`/etc. → ObjBuilder sections.
+- Exactly one `SHT_SYMTAB` (the `.symtab`) into ObjSyms.
+- `SHT_RELA` / `SHT_REL` whose `sh_info` points at a kept section.
+- `SHT_GROUP`.
+
+Why it rejects `libc.so`: `elf_read.c:395` enforces
+`sh->sh_info != 0 && sh->sh_info < e_shnum` for every RELA/REL. Shared
+objects' `.rela.dyn` and `.rela.plt` carry `sh_info = 0` (a dynamic
+reloc isn't bound to one specific output section) — which is **valid
+ELF** but hits this guard. Also missing:
+
+- No `e_type` discrimination (silently accepts ET_DYN/ET_EXEC and
+ proceeds; would corrupt the global symbol pool if not for the rela
+ guard tripping first).
+- No `SHT_DYNSYM` / `SHT_DYNAMIC` / `SHT_GNU_HASH` reader.
+- No `PT_DYNAMIC` walk.
+
+### 2.3 Object model — `src/obj/obj.h`
+
+The relocation enum (`src/obj/obj.h:96-127`) has no entries for:
+- `R_AARCH64_GLOB_DAT` / `R_AARCH64_JUMP_SLOT` / `R_AARCH64_RELATIVE`
+ / `R_AARCH64_COPY` / `R_AARCH64_TLSDESC*` — the dynamic-only relocs
+ the loader processes at startup.
+- `R_AARCH64_PLT32` (4-byte PLT-relative) — typically not used on
+ AArch64 (CALL26/JUMP26 carry the PLT semantics) but the mapping
+ table in `src/obj/elf_reloc_aarch64.c` would refuse it if seen.
+
+The mapping in `src/obj/elf_reloc_aarch64.c` returns `(u32)-1` for
+unsupported types, which `read_elf` panics on (`elf_read.c:413`). So
+even with the `sh_info==0` check relaxed, GLOB_DAT in `libc.so`'s
+`.rela.dyn` would trip the next guard.
+
+There's also no notion of an **import** symbol or a **DSO input**.
+Today `LinkInput.kind ∈ {OBJ, OBJ_BYTES, ARCHIVE_BYTES}` (`link.h:10`);
+`LinkSymbol` has `defined`, `kind`, `value`, `vaddr` but no "needs PLT
+slot" / "needs GOT slot" / "lives in DSO N" fields.
+
+### 2.4 Linker resolve — `src/link/link_layout.c`
+
+- Static-only by construction. Comment at `link_layout.c:1646` and the
+ IFUNC ctor logic key off `l->emit_static_exe`; the rest of the layout
+ has no symmetric "build dynamic image" branch.
+- `resolve_undefs` (`link_layout.c:247`) panics on any undef that isn't
+ satisfied by `img->globals` or the in-process resolver
+ (`link_layout.c:300-307`). Dynamic linking needs a third path: undef
+ satisfied by an imported DSO sym, recorded but **kept undefined in
+ the static sense** so it routes through .plt/.got at apply time.
+- `section_kept` (`link_layout.c:53`) drops everything that isn't
+ ALLOC PROGBITS/NOBITS/INIT_ARRAY. Synthesized .dynsym / .dynstr /
+ .dynamic / .got.plt / .plt / .rela.dyn / .rela.plt would need to be
+ added as image-owned synthetic sections (same model as `layout_iplt`
+ uses for `.iplt`/`.igot.plt`/`.iplt.pairs`).
+- `link_image_alloc` and `LinkImage` (`link_internal.h:105-148`) carry
+ no fields for: dynamic strtab, dynsym table, hash table, PLT/GOT
+ slot tables, dynamic-reloc list, PT_INTERP path, soname, DT_NEEDED
+ list, runpath/rpath lists.
+
+### 2.5 ELF emit — `src/link/link_elf.c`
+
+- Hardcoded `e_type = ET_EXEC` (`link_elf.c:737`).
+- Hardcoded `IMAGE_BASE = 0x400000` (`link_elf.c:106`); ALL applied
+ reloc S/P values bake this in (`link_elf.c:192,195`). PIE/ET_DYN
+ output requires `IMAGE_BASE = 0` (the loader rebases) **and** any
+ pre-applied absolute reloc against an internal target must instead
+ emit an `R_AARCH64_RELATIVE` dyn-reloc against the image-relative
+ vaddr, which the loader patches.
+- `phdrs` synthesis covers PT_LOAD, PT_NOTE, PT_TLS only
+ (`link_elf.c:664-722`). No PT_PHDR, PT_INTERP, PT_DYNAMIC,
+ PT_GNU_RELRO, PT_GNU_STACK.
+- `.symtab` / `.strtab` / `.shstrtab` / `.note.gnu.build-id` are the
+ only synthesized non-input sections (`link_elf.c:540-867`).
+- `cfree_link_shared` is stubbed (`src/api/pipeline.c:413`):
+ `compiler_panic("cfree_link_shared: shared-library codegen is not yet
+ implemented in libcfree")`.
+
+### 2.6 Reloc apply — `src/link/link_reloc.c`
+
+Only static relocs are coded. There is no concept of "compute final
+address ⇒ also push a dyn-reloc record into `img->dyn_relocs[]`",
+which is the mode an absolute reloc takes when the target is imported
+or the output is PIE.
+
+---
+
+## 3. What's missing — concrete inventory
+
+Grouped by layer, in dependency order.
+
+### 3.1 ELF reader (smallest, gates everything)
+
+1. Accept `e_type ∈ {ET_REL, ET_DYN}`. Reject ET_EXEC, ET_CORE.
+2. For ET_REL, current behavior unchanged.
+3. For ET_DYN, switch into a **DSO-import** mode that produces a
+ different ObjBuilder shape (or a sibling `DsoBuilder` — see §5):
+ - Parse `.dynsym`/`.dynstr` (NOT `.symtab`).
+ - Skip `.rela.dyn`/`.rela.plt` body parsing — only `DT_SONAME` and
+ the dynsym table matter for satisfying undefs at link time.
+ - Read PT_DYNAMIC to extract DT_SONAME (so DT_NEEDED can record the
+ library's runtime name, not its filesystem path).
+4. Relax the `sh_info == 0` guard for ET_DYN (in DSO mode the rela
+ sections are skipped anyway). Keep it strict for ET_REL.
+5. Add new RelocKinds and elf_aarch64_reloc_{from,to} cases:
+ `R_AARCH64_GLOB_DAT`, `R_AARCH64_JUMP_SLOT`, `R_AARCH64_RELATIVE`,
+ `R_AARCH64_COPY`. (TLSDESC / IRELATIVE deferred — see §6.)
+
+### 3.2 Object/link model — symbols and inputs
+
+1. New `LinkInputKind`: `LINK_INPUT_DSO_BYTES`. Carries a
+ `DsoBuilder*` (or extended ObjBuilder) holding: soname, dynsym
+ list, version mapping (deferred), the original bytes for
+ diagnostics.
+2. `LinkSymbol` gains `dso_id` (`LinkInputId` of defining DSO, 0 for
+ "local to this image") and a flag set: `imported`, `needs_plt`,
+ `needs_got`, `needs_copy_reloc`. `defined=1, imported=1` means
+ "satisfied by a DSO; emit a dyn-reloc instead of baking the
+ address".
+3. `resolve_undefs` extension: before the panic at
+ `link_layout.c:304`, walk DSO inputs and match by name; on hit,
+ mark the LinkSymbol imported and record a DT_NEEDED for that DSO.
+4. New image-owned tables on `LinkImage`:
+ - `dynsym[]`, `dynstr` (string builder).
+ - `gnu_hash` (computed last).
+ - `plt_slots[]` (one per imported function), `got_slots[]` (one
+ per imported data sym), `gotplt_slots[]` (matched 1:1 with
+ plt_slots).
+ - `dyn_relocs[]` — `R_AARCH64_GLOB_DAT|JUMP_SLOT|RELATIVE` records.
+ - `needed[]` — DT_NEEDED soname list.
+ - `interp_path` (PT_INTERP string).
+
+### 3.3 Layout
+
+1. New synthetic-section pass alongside `layout_iplt`. Builds:
+ `.interp`, `.dynsym`, `.dynstr`, `.gnu.hash`, `.rela.dyn`,
+ `.rela.plt`, `.got`, `.got.plt`, `.plt`, `.dynamic`, `.data.rel.ro`
+ (for relocs in initialized data, RELRO target).
+2. PT_LOAD bucket-aware placement:
+ - Read-only dyn-data (.interp, .dynsym, .dynstr, .gnu.hash,
+ .rela.dyn, .rela.plt) goes into the existing R bucket (or its
+ own R PT_LOAD before .text).
+ - .plt is RX → joins SEG_RX.
+ - .got, .got.plt, .data.rel.ro → SEG_RW.
+3. PT_DYNAMIC, PT_INTERP, PT_PHDR, PT_GNU_RELRO synthesis in
+ `link_elf.c`'s phdr build loop.
+4. Drop the hardcoded `IMAGE_BASE = 0x400000`. For ET_DYN output, set
+ `e_type = ET_DYN`, `IMAGE_BASE = 0`, `e_entry = entry vaddr`
+ (already image-relative). For ET_EXEC, leave today's value. The
+ sym-table / reloc-apply code needs to read this from the image
+ instead of the macro.
+
+### 3.4 PLT/GOT emit
+
+AArch64 PLT0 + per-symbol PLT entry per the psABI. Each PLT entry is
+16 bytes:
+
+```
+adrp x16, .got.plt + slot ; ADR_PREL_PG_HI21
+ldr x17, [x16, #lo12(slot)] ; LDST64_ABS_LO12_NC
+add x16, x16, #lo12(slot) ; ADD_ABS_LO12_NC
+br x17
+```
+
+PLT0 is the standard 32-byte stub that calls `_dl_runtime_resolve`
+through `.got.plt[1..2]`. Lazy binding works out of the box if
+`.got.plt` slots are initialized to `&PLT0`; **eager binding (BIND_NOW)
+is simpler to implement** — initialize all `.got.plt` slots from
+`.rela.plt` at startup, no PLT0 trickery needed. Recommend
+**BIND_NOW-only as v1**; add lazy later if perf demands.
+
+### 3.5 Reloc apply, dynamic edition
+
+1. PIE-aware abs reloc: when `output is PIE` AND the reloc target is
+ `defined && !imported` (an internal symbol), the reloc value is
+ **left as the image-relative vaddr** in the file, AND a
+ `R_AARCH64_RELATIVE` dyn-reloc is emitted against that file
+ offset. The loader adds the load-base.
+2. Imported abs reloc: emit `R_AARCH64_GLOB_DAT` against the .got
+ slot. The CALL26/JUMP26 to an imported function instead targets
+ that symbol's .plt entry; the .plt entry's `.got.plt` slot is the
+ `R_AARCH64_JUMP_SLOT` target.
+3. ADR_GOT_PAGE / LD64_GOT_LO12_NC already-applied path needs to
+ route imported syms through the new `.got` (vs. today's `.got` is
+ only used for IFUNC).
+
+### 3.6 ELF emit
+
+1. `e_type` selection by output kind: ET_DYN for `-shared` and for
+ `-pie` exe; ET_EXEC for `-static` exe (today's path).
+2. New phdr emitters: PT_PHDR (offset/size of the phdr table itself),
+ PT_INTERP, PT_DYNAMIC, PT_GNU_RELRO, PT_GNU_STACK (PF_R|PF_W,
+ filesz=0 — non-executable stack marker).
+3. Section header emit for `.dynsym` (`SHT_DYNSYM`), `.dynstr`
+ (`SHT_STRTAB`), `.gnu.hash` (`SHT_GNU_HASH`), `.dynamic`
+ (`SHT_DYNAMIC`), `.rela.dyn`/`.rela.plt` (`SHT_RELA` with
+ `sh_info=0` and proper `sh_link` to `.dynsym`).
+4. .dynamic body: emit DT_* entries per §1's list.
+
+### 3.7 Driver
+
+1. `-dynamic-linker PATH` / `--dynamic-linker=PATH` flag → carries
+ into a new `CfreeLinkOptions.interp_path`. (Or default per-target:
+ `/lib/ld-musl-aarch64.so.1` for aarch64-linux-musl.)
+2. `.so` recognition in argv: filename ending in `.so` (or
+ `.so.<digit>...`) routes to a new `CfreeLinkInputs.dso_bytes`
+ array, separate from `obj_bytes`.
+3. `-l<name>` honors `-Bdynamic`: `driver/lib_resolve.c` looks for
+ `lib<name>.so` first under `-Bdynamic`, then `lib<name>.a`. Today
+ it presumably does archive-only.
+4. Default: when any DSO input or `-pie` is present, output is ET_DYN
+ with a default interp; otherwise ET_EXEC (current behavior).
+
+### 3.8 Public API
+
+1. `CfreeLinkInputs` gains `dso_bytes` + `ndso_bytes` fields
+ (parallel to `obj_bytes`).
+2. `CfreeLinkOptions` gains `interp_path` and a `pie` flag (or
+ `output_kind ∈ {EXE_STATIC, EXE_PIE, SHARED}`).
+3. `cfree_link_shared` stub at `src/api/pipeline.c:413` becomes a
+ thin wrapper that dispatches into the same layout as `link_exe`
+ but with `output_kind = SHARED` (no PT_INTERP, no entry symbol
+ required, allow_undefined=1).
+
+---
+
+## 4. Implementation plan in phases
+
+Each phase is independently testable against `test/musl/run.sh`'s
+dynamic variant. Phases (1)-(3) are the ELF-reader cleanup that
+unblocks every later step; (4)-(8) are the actual link work.
+
+### Phase 1 — ELF reader: accept ET_DYN as a DSO input *(small)*
+
+Files: `src/obj/elf_read.c`, `src/obj/elf_reloc_aarch64.c`,
+`src/obj/obj.h`, `src/link/link.c`.
+
+- Add `read_elf_dso` returning a `DsoBuilder`. Callers in
+ `src/link/link.c` dispatch on `e_type`.
+- `LINK_INPUT_DSO_BYTES` enum + `link_add_dso_bytes` API.
+- New RelocKinds (GLOB_DAT, JUMP_SLOT, RELATIVE, COPY) wired through
+ `elf_aarch64_reloc_{to,from}`.
+- DSO input is *parsed but not laid out* — its dynsym is searchable
+ during `resolve_undefs`, but it contributes no sections to the
+ image.
+
+Test: harness no longer fails at "rela sh_info 0 out of range". Next
+failure surfaces.
+
+### Phase 2 — Driver: `-dynamic-linker`, `.so` inputs *(small)*
+
+Files: `driver/ld.c`, `driver/lib_resolve.c`, `include/cfree.h`.
+
+- Parse `-dynamic-linker`, plumb to `CfreeLinkOptions`.
+- Recognize `.so` / `.so.N` filenames; route to new `dso_bytes` slot.
+- `-l<name>` under `-Bdynamic` finds `lib<name>.so` first.
+
+Test: case can be invoked end-to-end with the same flags GNU ld
+takes; failure is now a missing model field, not a parse error.
+
+### Phase 3 — Resolve: imported-undef path *(medium)*
+
+Files: `src/link/link_layout.c`, `src/link/link_internal.h`,
+`src/link/link.h`.
+
+- `LinkSymbol.imported`, `dso_id`, `needs_{plt,got,copy}` flags.
+- `resolve_undefs` extension: search DSO inputs by name before the
+ panic. On hit, mark imported; record DT_NEEDED.
+- Emit-time decisions deferred — at this phase the imported syms
+ just aren't fatal anymore.
+
+Test: link reaches layout. Failure shifts to "no .plt", "abs reloc
+target has no vaddr", or similar — i.e., real layout work.
+
+### Phase 4 — Synthetic dyn-tables *(medium)*
+
+Files: new `src/link/link_dyn.c`, hooked from `link_layout.c`.
+
+- Walk LinkSymbols, partition imports into PLT (function) / GOT (data)
+ slot lists.
+- Build `.dynsym`/`.dynstr`/`.gnu.hash` from the imported set plus
+ any `--export-dynamic` exports.
+- Allocate image-relative bytes for `.plt`, `.got.plt`, `.got`,
+ `.rela.plt`, `.rela.dyn`, `.dynamic`, `.interp`. Pattern after
+ `layout_iplt` (`link_layout.c:~1640`).
+
+Test: the image has all the expected sections/segments; the loader
+can `mmap` it. It probably crashes at `_start` because reloc apply
+hasn't been updated.
+
+### Phase 5 — PLT body emit + reloc rewriting *(medium)*
+
+Files: `src/link/link_dyn.c`, `src/link/link_reloc.c`.
+
+- Emit AArch64 PLT0 + per-symbol PLT entries.
+- Rewrite CALL26/JUMP26 against imported syms to target the symbol's
+ PLT entry (S = plt_vaddr).
+- Rewrite ADR_GOT_PAGE / LD64_GOT_LO12_NC against imports to target
+ the new `.got` slot.
+- Emit `R_AARCH64_JUMP_SLOT` records into `.rela.plt`,
+ `R_AARCH64_GLOB_DAT` into `.rela.dyn`.
+
+Test: `01_syscall_write` (no libc calls) should still link and run;
+`02_errno_touch` exercises the import path for `close` and `errno`.
+
+### Phase 6 — PIE / ET_DYN emit *(medium)*
+
+Files: `src/link/link_elf.c`.
+
+- Plumb `output_kind` through to emit. Set `e_type`, `IMAGE_BASE`,
+ PT_INTERP, PT_DYNAMIC, PT_PHDR, PT_GNU_RELRO, PT_GNU_STACK.
+- For PIE: emit `R_AARCH64_RELATIVE` against any internal absolute
+ reloc (currently baked at `link_elf.c:192,622`).
+- Drop `IMAGE_BASE` macro use; read from image.
+
+Test: `03_printf_hello` end-to-end against musl libc.so.
+
+### Phase 7 — `cfree_link_shared` for real *(small after 4-6)*
+
+Files: `src/api/pipeline.c`, `src/link/`.
+
+- Replace the panic at `pipeline.c:413` with a dispatch into the
+ same machinery as link_exe, with `output_kind = SHARED`,
+ `allow_undefined=1`, no entry-symbol requirement, DT_SONAME from
+ `opts->soname`, DT_R(UN)PATH from `opts->r(un)paths`, exports
+ promoted into dynsym from `opts->exports`.
+- New harness case: build a `libfoo.so` from a single .c; link an
+ exe against it; run.
+
+### Phase 8 — TLS GD/IE/LD, IRELATIVE *(separate effort)*
+
+Out of scope for v1 dynamic exe. Required for shared-lib TLS and for
+IFUNCs in dynamic outputs. Deferred — the static-exe IFUNC plan in
+`linker-status.md §"Plan: STT_GNU_IFUNC in ELF output"` covers the
+near-term surface.
+
+---
+
+## 5. Open questions
+
+1. **DsoBuilder vs. ObjBuilder reuse.** Importing a DSO shares almost
+ nothing with parsing an ET_REL — different symbol table, no
+ sections to lay out, only a name table to satisfy undefs. A
+ sibling `DsoBuilder` keeps invariants on each crisp; bolting a
+ `kind` field onto ObjBuilder muddles the round-trip contract
+ (`doc/DESIGN.md §5.5`). **Recommend: separate type.**
+
+2. **Lazy vs. eager binding (BIND_NOW).** Eager is dramatically
+ simpler — no `_dl_runtime_resolve` PLT0 entry, `.got.plt` is
+ initialized straight from `.rela.plt`. Cost is startup time. For
+ v1 of cfree where everything else is correctness-first, recommend
+ **DT_FLAGS_1 |= DF_1_NOW**; revisit lazy later.
+
+3. **Where the dyn-link state lives in the image.** Today
+ `LinkImage` is a single struct with one segment-bytes array per
+ segment. Synthetic dyn sections (`.plt`, `.got.plt`, etc.) need
+ the same shape. The IFUNC code (`layout_iplt` at
+ `link_layout.c:~1640`) already does this for `.iplt` / `.igot.plt`
+ / `.iplt.pairs` — same allocator, same SegmentBytes pattern. The
+ dyn pass should follow it verbatim, not invent a new lifecycle.
+
+4. **`emit_static_exe` flag stays.** It's the right name for the
+ "no PT_DYNAMIC, no PLT, classic ET_EXEC" path. Add a parallel
+ `emit_dynamic_exe` (or fold both into `output_kind`); don't
+ overload `emit_static_exe`'s meaning.
+
+5. **Versioned symbols (`.gnu.version_r`, `.gnu.version`).** musl
+ doesn't use them; glibc does. v1 ignores versions on read
+ (matches GNU ld's behavior with unversioned objects against
+ versioned libs — the unversioned default version is taken). Adding
+ write-side versioning is a follow-up that's invisible to the musl
+ harness.
+
+6. **`.eh_frame_hdr` interaction.** Listed as a near-term gap in
+ `linker-status.md`. It needs PT_GNU_EH_FRAME and is independent
+ of dynamic linking, but the dyn-link work touches the same phdr
+ synthesis code. Land `.eh_frame_hdr` first if it sequences in the
+ same window — the phdr count growth is shared.
+
+---
+
+## 6. Test plan
+
+`test/musl/run.sh dynamic` is the integration test. Per-phase
+expected progressions:
+
+| Phase | `01_syscall_write` | `02_errno_touch` | `03_printf_hello` |
+|------:|--------------------|-------------------|-------------------|
+| pre | link: rela sh_info | link: rela sh_info| link: rela sh_info|
+| 1 | link: …unsupported reloc / model | … | … |
+| 2 | link: model gap | … | … |
+| 3 | link: layout gap | … | … |
+| 4 | mmap ok / segfault | … | … |
+| 5 | run pass | run: GLOB_DAT path| run: PLT call path|
+| 6 | run pass | run pass | run pass |
+
+A unit-level harness for the synthetic-section builder (Phase 4) is
+worth adding under `test/link/dyn/` — round-trip the `.dynsym` /
+`.gnu.hash` / `.rela.{dyn,plt}` against `readelf -d -r --dyn-syms`
+output for a hand-crafted input. This is faster than waiting for a
+full musl run to surface a malformed `.dynamic`.
diff --git a/test/musl/Containerfile b/test/musl/Containerfile
@@ -10,6 +10,8 @@
FROM docker.io/arm64v8/alpine:3.20.10
# musl-dev: Scrt1.o, crt1.o, crti.o, crtn.o, libc.a + headers under /usr/include.
+# Plus the dynamic linker / libc.so used by the dynamic-link variant of the
+# harness (in musl, /lib/ld-musl-aarch64.so.1 *is* libc — same file).
# linux-headers: kernel uapi (linux/*, asm/*, asm-generic/*) — used by syscall
# definitions in the musl headers.
# Note: we deliberately do NOT pull clang's compiler-rt or libgcc — the
@@ -21,10 +23,14 @@ RUN apk add --no-cache musl-dev=1.2.5-r3 linux-headers
# is a single tar pipe.
RUN mkdir -p /sysroot/lib /sysroot/include \
&& cp /usr/lib/crt1.o /sysroot/lib/ \
+ && cp /usr/lib/Scrt1.o /sysroot/lib/ \
+ && cp /usr/lib/rcrt1.o /sysroot/lib/ \
&& cp /usr/lib/crti.o /sysroot/lib/ \
&& cp /usr/lib/crtn.o /sysroot/lib/ \
&& cp /usr/lib/libc.a /sysroot/lib/ \
&& cp /usr/lib/libssp_nonshared.a /sysroot/lib/ \
+ && cp /lib/ld-musl-aarch64.so.1 /sysroot/lib/ \
+ && ln -s ld-musl-aarch64.so.1 /sysroot/lib/libc.so \
&& cp -r /usr/include/. /sysroot/include/
# Pin the build for cache reuse and reproducibility audits.
diff --git a/test/musl/run.sh b/test/musl/run.sh
@@ -1,13 +1,25 @@
#!/usr/bin/env bash
-# test/musl/run.sh — drive cfree ld against static musl on aarch64-linux.
+# test/musl/run.sh — drive cfree ld against a real musl sysroot on
+# aarch64-linux. Each case in test/musl/cases/*.c is exercised in two
+# variants:
#
-# For each case in test/musl/cases/*.c:
-# 1. clang --target=aarch64-linux-musl --sysroot=$SYSROOT -c case.c -o case.o
-# 2. cfree ld -static -o case.exe \
-# $SYSROOT/lib/crt1.o $SYSROOT/lib/crti.o \
-# case.o \
-# $SYSROOT/lib/libc.a $SYSROOT/lib/crtn.o
-# 3. run case.exe under qemu-aarch64-static or podman aarch64.
+# static — non-PIC object + libc.a, classic static-exe link
+# cfree ld -static -o case.exe \
+# $SYSROOT/lib/crt1.o $SYSROOT/lib/crti.o \
+# case.o \
+# $SYSROOT/lib/libc.a $CFREE_RT \
+# $SYSROOT/lib/crtn.o
+#
+# dynamic — PIE object + libc.so, expects PT_INTERP /lib/ld-musl-aarch64.so.1
+# cfree ld -pie -o case.exe \
+# $SYSROOT/lib/Scrt1.o $SYSROOT/lib/crti.o \
+# case.o \
+# $SYSROOT/lib/libc.so $CFREE_RT \
+# $SYSROOT/lib/crtn.o
+# (musl ships ld-musl-aarch64.so.1 *as* libc — same file. The
+# harness intentionally has no -dynamic-linker flag yet because
+# cfree ld currently doesn't accept one; this is one of the gaps
+# we expect the dynamic variant to surface.)
#
# Each case file may carry an `expected` companion (default 0) and an
# optional `expected_stdout` file checked with substring match.
@@ -43,8 +55,10 @@ color_red() { printf '\033[31m%s\033[0m' "$1"; }
color_grn() { printf '\033[32m%s\033[0m' "$1"; }
color_yel() { printf '\033[33m%s\033[0m' "$1"; }
-PASS=0; FAIL=0
-FAIL_NAMES=()
+# Per-variant counters so the dynamic-link surface is visible in its own
+# right rather than being averaged into one total.
+PASS_static=0; FAIL_static=0; FAIL_NAMES_static=()
+PASS_dynamic=0; FAIL_dynamic=0; FAIL_NAMES_dynamic=()
# Pick a runner. Native arm64 hosts can run aarch64 ELFs directly under
# podman without binfmt; otherwise we want qemu-aarch64-static.
@@ -80,10 +94,13 @@ run_aarch64() {
RUN_RC=127
}
+# run_case <variant> <src>
+# variant ∈ {static, dynamic}
run_case() {
- local src="$1"
+ local variant="$1" src="$2"
local name="$(basename "$src" .c)"
- local work="$BUILD_DIR/$name"
+ local work="$BUILD_DIR/$name/$variant"
+ local label="$name [$variant]"
mkdir -p "$work"
local expected=0
@@ -95,41 +112,73 @@ run_case() {
expect_stdout="$(cat "$TEST_DIR/cases/${name}.stdout")"
fi
+ # ---- compile ----
+ local cc_flags=(--target=aarch64-linux-musl --sysroot="$SYSROOT"
+ -nostdinc -isystem "$SYSROOT/include" -O0)
+ case "$variant" in
+ static) cc_flags+=(-fno-PIC -fno-pie) ;;
+ dynamic) cc_flags+=(-fPIE -fpic) ;;
+ esac
+
local obj="$work/${name}.o"
- if ! clang --target=aarch64-linux-musl --sysroot="$SYSROOT" \
- -nostdinc -isystem "$SYSROOT/include" \
- -O0 -fno-PIC -fno-pie \
- -c "$src" -o "$obj" 2>"$work/cc.err"; then
- FAIL=$((FAIL+1)); FAIL_NAMES+=("$name (compile)")
- printf ' %s %s\n' "$(color_red FAIL)" "$name (compile)"
+ if ! clang "${cc_flags[@]}" -c "$src" -o "$obj" 2>"$work/cc.err"; then
+ eval "FAIL_${variant}=\$((FAIL_${variant}+1))"
+ eval "FAIL_NAMES_${variant}+=(\"\$label (compile)\")"
+ printf ' %s %s\n' "$(color_red FAIL)" "$label (compile)"
sed 's/^/ cc| /' "$work/cc.err"
return
fi
+ # ---- link ----
local exe="$work/${name}.exe"
- # Link order mirrors a typical static-musl invocation:
- # crt1.o crti.o obj libc.a libcfree_rt.a crtn.o
- # libcfree_rt provides the TF / soft-float builtins (__addtf3,
- # __extenddftf2 etc.) that musl's libc.a calls from printf's long-
- # double formatting. Our archive ingestion iterates demand-load to
- # a fixed point so a single trailing libcfree_rt.a is enough.
- local link_cmd=("$CFREE" "ld" -static -o "$exe"
- "$SYSROOT/lib/crt1.o" "$SYSROOT/lib/crti.o"
- "$obj"
- "$SYSROOT/lib/libc.a" "$CFREE_RT"
- "$SYSROOT/lib/crtn.o")
+ local link_cmd
+ case "$variant" in
+ static)
+ # Link order mirrors a typical static-musl invocation:
+ # crt1.o crti.o obj libc.a libcfree_rt.a crtn.o
+ # libcfree_rt provides the TF / soft-float builtins
+ # (__addtf3, __extenddftf2 etc.) that musl's libc.a calls
+ # from printf's long-double formatting. Archive ingestion
+ # iterates demand-load to a fixed point so one trailing
+ # libcfree_rt.a is enough.
+ link_cmd=("$CFREE" "ld" -static -o "$exe"
+ "$SYSROOT/lib/crt1.o" "$SYSROOT/lib/crti.o"
+ "$obj"
+ "$SYSROOT/lib/libc.a" "$CFREE_RT"
+ "$SYSROOT/lib/crtn.o")
+ ;;
+ dynamic)
+ # Dynamic-exe link: PIE start file, libc.so as a *shared*
+ # input (not an archive), expects cfree ld to:
+ # - accept ET_DYN ELF objects as input,
+ # - emit PT_INTERP "/lib/ld-musl-aarch64.so.1",
+ # - emit PT_DYNAMIC with DT_NEEDED libc.so,
+ # - emit a .dynsym/.dynstr/.gnu.hash + .rela.plt/.got.plt
+ # so the loader can bind imported symbols at runtime.
+ # libcfree_rt.a stays — soft-float TF helpers are still
+ # static-bound from our side. crti/crtn are unchanged.
+ link_cmd=("$CFREE" "ld" -pie -o "$exe"
+ "$SYSROOT/lib/Scrt1.o" "$SYSROOT/lib/crti.o"
+ "$obj"
+ "$SYSROOT/lib/libc.so" "$CFREE_RT"
+ "$SYSROOT/lib/crtn.o")
+ ;;
+ esac
if ! "${link_cmd[@]}" >"$work/link.out" 2>"$work/link.err"; then
- FAIL=$((FAIL+1)); FAIL_NAMES+=("$name (link)")
- printf ' %s %s\n' "$(color_red FAIL)" "$name (link)"
+ eval "FAIL_${variant}=\$((FAIL_${variant}+1))"
+ eval "FAIL_NAMES_${variant}+=(\"\$label (link)\")"
+ printf ' %s %s\n' "$(color_red FAIL)" "$label (link)"
sed 's/^/ ld| /' "$work/link.err" | head -10
return
fi
+ # ---- run ----
run_aarch64 "$exe" "$work/run.out" "$work/run.err"
if [ "$RUN_RC" -ne "$expected" ]; then
- FAIL=$((FAIL+1)); FAIL_NAMES+=("$name (run rc=$RUN_RC, want $expected)")
- printf ' %s %s (rc=%s, want %s)\n' "$(color_red FAIL)" "$name" \
+ eval "FAIL_${variant}=\$((FAIL_${variant}+1))"
+ eval "FAIL_NAMES_${variant}+=(\"\$label (run rc=\$RUN_RC, want \$expected)\")"
+ printf ' %s %s (rc=%s, want %s)\n' "$(color_red FAIL)" "$label" \
"$RUN_RC" "$expected"
[ -s "$work/run.err" ] && sed 's/^/ err| /' "$work/run.err" | head -5
[ -s "$work/run.out" ] && sed 's/^/ out| /' "$work/run.out" | head -5
@@ -138,30 +187,44 @@ run_case() {
if [ -n "$expect_stdout" ]; then
if ! grep -qF -- "$expect_stdout" "$work/run.out"; then
- FAIL=$((FAIL+1)); FAIL_NAMES+=("$name (stdout)")
- printf ' %s %s (stdout mismatch)\n' "$(color_red FAIL)" "$name"
+ eval "FAIL_${variant}=\$((FAIL_${variant}+1))"
+ eval "FAIL_NAMES_${variant}+=(\"\$label (stdout)\")"
+ printf ' %s %s (stdout mismatch)\n' "$(color_red FAIL)" "$label"
printf ' expected substring: %s\n' "$expect_stdout"
sed 's/^/ got| /' "$work/run.out" | head -5
return
fi
fi
- PASS=$((PASS+1))
- printf ' %s %s\n' "$(color_grn PASS)" "$name"
+ eval "PASS_${variant}=\$((PASS_${variant}+1))"
+ printf ' %s %s\n' "$(color_grn PASS)" "$label"
}
+shopt -s nullglob
+
printf 'Running musl static-link cases...\n'
+for src in "$TEST_DIR/cases"/*.c; do
+ run_case static "$src"
+done
-shopt -s nullglob
+printf '\nRunning musl dynamic-link cases...\n'
for src in "$TEST_DIR/cases"/*.c; do
- run_case "$src"
+ run_case dynamic "$src"
done
-printf '\nResults: %s pass, %s fail\n' "$PASS" "$FAIL"
-if [ ${#FAIL_NAMES[@]} -gt 0 ]; then
- printf 'Failed:\n'
- for n in "${FAIL_NAMES[@]}"; do printf ' %s\n' "$n"; done
+printf '\nResults:\n'
+printf ' static : %s pass, %s fail\n' "$PASS_static" "$FAIL_static"
+printf ' dynamic: %s pass, %s fail\n' "$PASS_dynamic" "$FAIL_dynamic"
+
+if [ ${#FAIL_NAMES_static[@]} -gt 0 ]; then
+ printf '\nFailed (static):\n'
+ for n in "${FAIL_NAMES_static[@]}"; do printf ' %s\n' "$n"; done
+fi
+if [ ${#FAIL_NAMES_dynamic[@]} -gt 0 ]; then
+ printf '\nFailed (dynamic):\n'
+ for n in "${FAIL_NAMES_dynamic[@]}"; do printf ' %s\n' "$n"; done
fi
-if [ $FAIL -gt 0 ]; then exit 1; fi
+total_fail=$((FAIL_static + FAIL_dynamic))
+if [ $total_fail -gt 0 ]; then exit 1; fi
exit 0