commit aef3673230d5108c76a0a58c69c64f1c2ff7fcd5
parent b42c7a606a0ae82b88c902d61b7e0bee646f9c3a
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 9 May 2026 14:06:32 -0700
link: static musl on aarch64 — link, run, hello
Brings cfree ld to the point where it links real C against pinned
musl libc.a + cfree's own libcfree_rt.a and the resulting static
aarch64-linux exe runs under qemu/podman. printf("hello, musl") works
end-to-end; errno (via __errno_location) and a raw write(2) syscall
case also pass.
The bugs and gaps surfaced by static-musl, and the fixes for each:
- Reloc reader/applier missed every aarch64 branch + PC-rel instruction
reloc that clang emits at -O0..-O2 outside our previous corpus.
Added: CONDBR19 (b.cond), TSTBR14 (tbz/tbnz), LD_PREL_LO19 (literal
pool), ADR_PREL_LO21, ADR_PREL_PG_HI21_NC, ABS16, PREL16. Reader and
applier now cover every reloc kind in musl 1.2.5 aarch64 libc.a.
R_PC32 collapses onto R_REL32 in the applier (PREL32 reads back as
R_PC32; same encoding either way).
- Archive demand-load required SB_GLOBAL defs to drag a member, but
GNU ld and lld pull on weak defs too. Required for musl's __init_tls
(weak def in __init_tls.lo, hard ref from __libc_start_main.lo).
- layout_sections placed each input section independently in its
RX/R/RW bucket, so the .init prologue from crti.o and the matching
epilogue from crtn.o ended up separated by .text from libc.a — _init
was no longer a contiguous function and stage2 fell through main on
every call (printed three times). Now: same-named input sections in
the same bucket are placed contiguously in first-occurrence order.
- driver/ld.c chmods the output to 0755 on a successful link, matching
GNU ld / lld so the exe is directly runnable.
- New end-to-end harness test/musl/: Containerfile pins Alpine 3.20.10
+ musl 1.2.5-r3, extract.sh produces build/musl-sysroot/{lib,include}
via podman, run.sh links a graduated three-tier corpus (raw syscall,
errno, printf) with cfree ld and runs each under aarch64. Hooked into
test/test.mk as `make test-musl` (opt-in; needs podman).
- rt-aarch64-linux Make target builds rt/build/aarch64-linux/
libcfree_rt.a from cfree's own rt/ tree (LDBL128=1) — provides the
TF / soft-float helpers (__addtf3, __extenddftf2, __floatsitf, ...)
that musl's printf calls for long-double formatting. We deliberately
do not pull clang's compiler-rt or libgcc.
- doc/linker-status.md: refreshed; gap list now ordered as
.symtab/.strtab → build-id → eh_frame → ifunc → TLS local-exec
+ PT_TLS, then dynamic linking.
test-link still 106/106; test-elf 37/37; test-musl 3/3.
Diffstat:
18 files changed, 624 insertions(+), 150 deletions(-)
diff --git a/Makefile b/Makefile
@@ -22,7 +22,7 @@ DRIVER_DEPS = $(DRIVER_OBJS:.o=.d)
LIB_AR = build/libcfree.a
BIN = build/cfree
-.PHONY: all lib driver bin clean
+.PHONY: all lib driver bin rt rt-aarch64-linux clean
# Default: compile libcfree.a, the driver objects, and link the cfree
# binary. The link step currently fails because most libcfree functions
@@ -36,6 +36,18 @@ driver: $(DRIVER_OBJS)
bin: $(BIN)
+# rt/ is the cfree compiler-rt equivalent: __extenddftf2 etc. — the
+# soft-float / 128-bit-int helpers clang emits when targeting our
+# variants. Each variant produces rt/build/<variant>/libcfree_rt.a.
+# The aarch64-linux variant (LDBL128=1) is what static-musl tests need.
+rt-aarch64-linux: rt/build/aarch64-linux/libcfree_rt.a
+
+rt/build/aarch64-linux/libcfree_rt.a:
+ $(MAKE) -C rt VARIANT=aarch64-linux OUT=build/aarch64-linux all
+
+# `rt` alias builds whichever variants are typically wanted on the host.
+rt: rt-aarch64-linux
+
# Replace the archive (`ar rcs` only adds/updates), so removing a .c file
# also removes its .o from the archive on the next build.
$(LIB_AR): $(LIB_OBJS)
diff --git a/doc/linker-status.md b/doc/linker-status.md
@@ -20,34 +20,101 @@ live in `test/link/` — they are not duplicated in `test/elf/`.
| `test-link` E | 33 | 0 | qemu/podman aarch64 exec |
| `test-link` J | 33 | 0 | JIT in-process incl. GC subgroup |
| `test-link` bad | 2 | 0 | `bad/30_undef_strong` (E + J) |
+| `test-musl` | 3 | 0 | static musl 1.2.5: syscall, errno, printf |
(R = roundtrip; E = link → aarch64 ELF → qemu/podman; J = JIT in-process.)
-All test-link cases currently pass. Archive ingestion and `--gc-sections`
-are now functional; case `25_gc_sections` was split into `25a..25h`, one
-case per liveness rule (see Recently landed).
+`test-musl` links real C against pinned musl libc.a + cfree's own
+`rt/build/aarch64-linux/libcfree_rt.a` (TF / soft-float builtins) and
+runs the result under qemu/podman. Sysroot is produced by
+`test/musl/Containerfile` (Alpine 3.20 + musl 1.2.5-r3). Excluded from
+the default `make test` because it needs podman.
---
-## test-link / JIT — Apple Silicon execmem (resolved)
+## What works today
+
+`cfree ld` links real static aarch64-linux executables, including
+against musl libc.a + cfree's own `libcfree_rt.a`. printf("hello, musl")
+works end-to-end. Beyond that:
+
+- **Reloc kinds applied:** ABS{16,32,64}, PREL{16}, REL32, PC32,
+ CONDBR19, TSTBR14, LD_PREL_LO19, ADR_PREL_LO21, JUMP26 / CALL26,
+ ADR_PREL_PG_HI21{,_NC}, ADD_ABS_LO12_NC,
+ LDST{8,16,32,64,128}_ABS_LO12_NC,
+ ADR_GOT_PAGE / LD64_GOT_LO12_NC. Plus a synthetic R_ABS64 emitter
+ for GOT slot fill. **Reads every reloc kind in musl 1.2.5 aarch64
+ libc.a.**
+- **Symbol resolution:** STB_GLOBAL/WEAK/LOCAL replacement strength;
+ STV_HIDDEN; SHN_COMMON coalesce-to-largest; STT_FILE / STT_SECTION
+ pass-through. Weak archive defs satisfy unresolved refs (matches
+ GNU ld / lld; required for musl's weak `__init_tls`).
+- **Linker-synthesized symbols:** `__init_array_start/end`,
+ `__fini_array_start/end`, and general `__start_<X>`/`__stop_<X>`
+ for any encoding section.
+- **Section / segment layout:** three-bucket RX / R / RW partition,
+ BSS, init/fini/preinit_array, synthetic `.got`. **Same-named input
+ sections merge by first-occurrence** — required for `_init`/`_fini`
+ to be contiguous when `.init` / `.fini` come from crti.o + crtn.o.
+ `-ffunction-sections` / `-fdata-sections` flow through naturally.
+- **Inputs:** loose `.o`, `.a` (demand + `--whole-archive`),
+ `--start-group` / `--end-group` cyclic resolution.
+- **GC:** `--gc-sections` at section granularity. Roots: entry sym,
+ init/fini/preinit_array, `SF_RETAIN` (`SHF_GNU_RETAIN`),
+ `__start_/__stop_` referents. Edges follow per-section relocs to
+ fixed point.
+- **Format fidelity:** ELF read+write byte-stable for the supported
+ subset; `EI_OSABI=GNU` flips automatically when GNU extensions are
+ present.
+- **Driver:** `cfree ld -static -o out crt1.o crti.o user.o libc.a
+ libcfree_rt.a crtn.o` works. Output is chmod 0755 on success.
+- **JIT path** runs the same resolved image in-process; MAP_JIT on
+ Apple Silicon.
-`driver/env.c` uses `MAP_JIT` for any region whose final perms include
-`PROT_EXEC`, with `pthread_jit_write_protect_np()` toggled around
-populate / protect. `src/link/link_jit.c` reserves one `JitSegMap` per
-`LinkSegment` so each segment can carry its own MAP_JIT/perms hint
-(MAP_JIT can't be partial within a single mapping). Reloc apply and
-`cfree_jit_lookup` translate image-vaddr → runtime via
-`vaddr_to_runtime(img, segs, vaddr)`. `flush_icache` runs per code
-segment after the protect flip.
-
-Host requires the `com.apple.security.cs.allow-jit` entitlement when
-MAP_JIT is in play. For ad-hoc dev:
-`codesign -s - --entitlements jit.plist <bin>`.
+---
-The intermittent JIT hangs previously documented as "Blocker 0" were
-W^X violations under the hardened runtime; they're gone after the
-MAP_JIT switch. Remaining J-path failures are real feature gaps
-(table above), not stability issues.
+## Gaps before this can replace GNU ld / lld
+
+Each row below would break a typical real-world Linux invocation. Roughly
+ordered by how often the gap actually bites.
+
+| Gap | What breaks | Effort |
+|-----|-------------|--------|
+| **`.symtab` / `.strtab` in the exe** | `nm`, `objdump -t`, `gdb` see no symbols; backtraces unsymbolicated. The E-path symbol verifier in `run.sh` already activates the moment this lands. | small |
+| **`.note.gnu.build-id`, `.eh_frame`, `.debug_*` in the exe** | No build-id for deterministic-build tooling. No `.eh_frame` → C++ EH and `backtrace()` broken. No DWARF → `gdb` blind. | medium (eh_frame + debug); small (build-id) |
+| **STT_GNU_IFUNC trampoline** | Reader knows the kind; linker doesn't synthesize the indirection. Anything using `__attribute__((ifunc))` (much of glibc) fails. | medium |
+| **TLS local-exec apply + PT_TLS** | `R_AARCH64_TLSLE_*` are read but `link_reloc_apply` panics; no `PT_TLS` emitted. Not needed for musl-aarch64 (errno routes through `__errno_location()` and there are no `.tdata`/`.tbss` in libc.a) but any user TU using `__thread` would hit this. | medium |
+| **TLSGD / TLSIE / TLSLD relocs** | Read but not applied. Needed for `-fpic` TLS or shared-lib TLS — moot until dynamic linking lands. | medium |
+| **Dynamic linking: PT_DYNAMIC, PT_INTERP, PLT, DT_NEEDED** | Cannot link against any `.so`. Static-only. | large |
+| **PIE / ET_DYN executables** | Driver accepts `-pie` but the writer always emits ET_EXEC at fixed `IMAGE_BASE`. Tied to dynamic-linking work. | medium (depends on dynamic) |
+| **Linker scripts** | `link_set_script` panics with "not yet implemented". Parser exists in `cfree_link_script_parse` but isn't wired into `link_resolve`. | medium |
+| **COMDAT-group atomicity in `--gc-sections`** | C++ inline / weak-template instantiations under `SHF_GROUP` could lose group members. C-only inputs don't exercise it. | small |
+| **`crt1.o`/`crti.o`/`crtn.o` auto-link** | Driver doesn't auto-include a C runtime; the user passes `crt1.o crti.o ... crtn.o` explicitly. Cosmetic. | small (driver-only) |
+
+**Bottom line:** for static aarch64-linux executables, `cfree ld` is
+already a working linker — including against real musl. The next
+priorities, roughly in order:
+
+1. **`.symtab` / `.strtab` in the exe** — unblocks `nm`, `objdump -t`,
+ `gdb`, the E-path symbol verifier already wired in `run.sh`.
+2. **`.note.gnu.build-id`** — small standalone change; deterministic-
+ build tooling, package managers, and crash-report symbol-server
+ lookups all key off the build-id.
+3. **`.eh_frame`** — required for C++ exceptions, `_Unwind_*`, and
+ `backtrace()` past the innermost frame.
+4. **STT_GNU_IFUNC trampoline** — reader already recognizes it; need
+ the call-site indirection so glibc-style `__attribute__((ifunc))`
+ resolvers work.
+5. **TLS local-exec apply + PT_TLS** — handle `R_AARCH64_TLSLE_*` and
+ emit a PT_TLS segment so user TUs with `__thread` /
+ `_Thread_local` link (`.tdata` / `.tbss` flowing through layout).
+ Not needed for musl libc.a itself (it routes errno through
+ `__errno_location()`), but needed for user code that declares
+ thread-locals.
+
+After those the next big lift is full dynamic linking (PT_DYNAMIC +
+PLT + PT_INTERP + DT_NEEDED), which also unlocks PIE output and TLS
+GD / IE / LD modes.
---
@@ -64,95 +131,12 @@ MAP_JIT switch. Remaining J-path failures are real feature gaps
lines carry per-case ms timings; a totals line prints per-path wall
time.
-On arm64-host podman, `--platform linux/arm64` triggers a per-invocation
-manifest lookup (~30s). The runner only adds it when the host isn't
-already arm64; this kept the E path at ~200ms/case.
-
----
-
-## Remaining todos (rough priority)
-
-### Linker
-
-1. **COMDAT group atomicity for `--gc-sections`** — `SHF_GROUP`
- members are currently treated independently. C-only inputs don't
- exercise this; C++ inline / weak-template inputs would. When the
- first such case lands, extend the GC pass: when any member of a
- group becomes live, all members of the same `ObjGroup` follow.
-2. **Symtab in cfree-link-exe** — the executable writer emits PHDRs
- only (no `.symtab` / `.strtab`). The E-path readelf-based
- `gc_absent` / `gc_present` verifier in `test/link/run.sh` is
- already wired; once a symtab is emitted the checks will activate
- without further harness changes.
-3. **`-ffunction-sections` / `-fdata-sections` from cfree's own
- compiler.** `arch.h:254` notes that `text_section_id` is
- per-function so the model is in place. Self-hosted GC tests
- would compose `cfree cc -ffunction-sections … -c` with `cfree ld
- --gc-sections`. Not yet exercised.
-
-## Recently landed
-
-- **`--gc-sections`** (full, except COMDAT atomicity). Section
- granularity is the input `(input_idx, ObjSecId)` pair. Roots:
- the entry symbol's section, every `SSEM_INIT_ARRAY` /
- `FINI_ARRAY` / `PREINIT_ARRAY`, and `SF_RETAIN` (`SHF_GNU_RETAIN`,
- i.e. `__attribute__((retain))`). Edges follow per-section relocs;
- references to `__start_<X>` / `__stop_<X>` additionally promote
- every section named `<X>`. After layout, defs in dropped sections
- are cleared (`defined = 0`) so `cfree_jit_lookup` returns NULL.
- Implementation in `link_layout.c:gc_compute` /
- `gc_drop_dead_globals`; pass runs after `resolve_symbols`, before
- `layout_sections`.
-- **`__start_<X>` / `__stop_<X>` boundary synthesis.** Generalized
- from the init/fini-only `emit_array_boundaries`. Any undef sym
- whose name parses as `__start_<id>` / `__stop_<id>` resolves to
- the low / high vaddr of every output `LinkSection` whose source
- was named `<id>`. Cases: `25h_gc_start_stop`.
-- **`SHF_GNU_RETAIN`.** New `SF_RETAIN` flag in `obj.h`; decoded in
- `elf_read.c`, encoded in `elf_emit.c`; emitter also bumps
- `EI_OSABI` to `ELFOSABI_GNU` when any section carries it
- (matching clang's behavior for byte-stable roundtrip).
-- **`cfree ld --gc-sections` / `--no-gc-sections`** in `driver/ld.c`.
-- **Test corpus expansion.** `25_gc_sections` split into
- `25a_gc_basic` … `25h_gc_start_stop`, one rule per case. New
- per-case `cflags` marker lets a case ask for
- `-ffunction-sections` / `-fdata-sections` without affecting other
- cases. New `gc_present` marker (mirror of `gc_absent`); `jit-runner`
- gained `--check-present SYM`. E-path readelf-based
- `gc_absent`/`gc_present` checks are wired but skip silently when
- the exe has no symtab.
-- **Static GOT** for `R_AARCH64_ADR_GOT_PAGE` /
- `R_AARCH64_LD64_GOT_LO12_NC`: `layout_got` collects unique
- GOT-needing symbols, appends a synthetic `.got` segment carrying
- one 8-byte slot per symbol, redirects the GOT-page/LO12 reloc
- target to the slot, and emits a per-slot `R_ABS64` reloc that
- fills the slot with the symbol's resolved runtime vaddr at apply
- time. Weak undef stays at `vaddr=0` so the slot reads `NULL`.
- Fixes cases `14_weak_present`, `16_weak_undef`.
-- **`vaddr_to_runtime` / `vaddr_to_write` end-of-segment lookup**:
- one-past-end vaddrs (e.g. `__fini_array_end` when `.fini_array`
- is the last section in its segment) now resolve. Fixes cases
- `21_fini_array`, `22_init_fini_both`, `23_init_order` on the J
- path.
-- **Archive ingestion** (`link_add_archive_bytes` +
- `link_ingest_archives`): demand-load and `--whole-archive` both
- iterate to a fixed point. Fixes `26_archive_demand`,
- `27_archive_whole`.
-- **JIT runner** already invokes `.init_array` (in
- `cfree_jit_from_image`), `cfree_jit_run_dtors` for `.fini_array`,
- and `test_post_fini` after `test_main`. `--check-absent SYM` and
- `--check-present SYM` cover the gc-sections verification.
-
---
## Build hygiene (still load-bearing)
- `Makefile` uses `-MMD -MP` so header edits force dependents to rebuild.
-- `ar rcs` is preceded by `rm -f $(LIB_AR)` so deleted .c files don't
- leave stale .o entries in the archive.
-- `cfree-roundtrip`, `link-exe-runner`, `jit-runner` are Make targets
- with `$(LIB_AR)` as a prerequisite — `run.sh` *locates* them, never
- *builds* them.
If a test result looks impossible given the source, suspect staleness
-first (`make clean && make lib && make test-link`).
+first (`make clean && make lib && make test-link`). If that then works,
+investigate the source of staleness and fix the Makefile.
diff --git a/driver/ld.c b/driver/ld.c
@@ -2,6 +2,7 @@
#include "lib_resolve.h"
#include <stdint.h>
+#include <sys/stat.h>
/* `cfree ld` — link object/archive inputs into an executable or shared
* library. The driver loads each input via env.file_io, optionally parses a
@@ -686,6 +687,12 @@ static int ld_run_link(LdOptions* o)
out:
if (writer) cfree_writer_close(writer);
+ /* Match GNU ld / lld: a successful link chmods the output to 0755
+ * so the file is directly runnable. Done after closing the writer
+ * so the bits are stable on disk. */
+ if (rc == 0 && o->output_path) {
+ (void)chmod(o->output_path, 0755);
+ }
if (script && compiler) cfree_link_script_free(compiler, script);
if (compiler) cfree_compiler_free(compiler);
release_file(&script_lf);
diff --git a/src/api/pipeline.c b/src/api/pipeline.c
@@ -960,10 +960,17 @@ static const char* reloc_kind_name(u16 kind)
case R_ARM_B26: return "R_ARM_B26";
case R_AARCH64_CALL26: return "R_AARCH64_CALL26";
case R_AARCH64_JUMP26: return "R_AARCH64_JUMP26";
+ case R_AARCH64_CONDBR19: return "R_AARCH64_CONDBR19";
+ case R_AARCH64_TSTBR14: return "R_AARCH64_TSTBR14";
+ case R_AARCH64_LD_PREL_LO19: return "R_AARCH64_LD_PREL_LO19";
+ case R_AARCH64_ADR_PREL_LO21: return "R_AARCH64_ADR_PREL_LO21";
case R_AARCH64_ADR_GOT_PAGE: return "R_AARCH64_ADR_GOT_PAGE";
case R_AARCH64_LD64_GOT_LO12_NC: return "R_AARCH64_LD64_GOT_LO12_NC";
case R_AARCH64_ADR_PREL_PG_HI21: return "R_AARCH64_ADR_PREL_PG_HI21";
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:return "R_AARCH64_ADR_PREL_PG_HI21_NC";
case R_AARCH64_ADD_ABS_LO12_NC: return "R_AARCH64_ADD_ABS_LO12_NC";
+ case R_AARCH64_ABS16: return "R_AARCH64_ABS16";
+ case R_AARCH64_PREL16: return "R_AARCH64_PREL16";
case R_AARCH64_LDST8_ABS_LO12_NC: return "R_AARCH64_LDST8_ABS_LO12_NC";
case R_AARCH64_LDST16_ABS_LO12_NC: return "R_AARCH64_LDST16_ABS_LO12_NC";
case R_AARCH64_LDST32_ABS_LO12_NC: return "R_AARCH64_LDST32_ABS_LO12_NC";
diff --git a/src/link/link_layout.c b/src/link/link_layout.c
@@ -630,12 +630,37 @@ typedef struct SecRef {
LinkSectionId link_sec_id;
} SecRef;
+/* Within a bucket, input sections sharing a name are placed contiguously
+ * — the standard "merge sections by name" rule. Without this the .init
+ * prologue from crti.o and the matching epilogue from crtn.o (both in
+ * a .init section) get separated by intervening .text, and `_init` is
+ * no longer a contiguous function. Placement walk:
+ *
+ * 1. Build a flat list of (input_idx, obj_sec_id) for kept+live
+ * sections.
+ * 2. For each bucket, do a stable group-by-name pass over the list:
+ * take the first ungrouped section in input order, claim every
+ * later same-name+same-bucket section in input order, lay out the
+ * whole group adjacent. Then move to the next ungrouped section.
+ * 3. Different-name groups appear in first-occurrence order.
+ *
+ * O(N²) on section count, fine for our N. */
+typedef struct PlaceEntry {
+ u32 input_idx;
+ ObjSecId obj_sec_id;
+ Sym name;
+ SegBucket bucket;
+ u8 placed;
+ u8 pad[3];
+} PlaceEntry;
+
static void layout_sections(Linker* l, LinkImage* img, const GcLive* g)
{
Heap* h = img->heap;
- /* First pass: count kept sections (filtered by GC liveness). */
- u32 ii, j;
- u32 total_kept = 0;
+ u32 ii, j;
+ u32 total_kept = 0;
+
+ /* Pass 0: count kept sections (filtered by GC liveness). */
for (ii = 0; ii < l->ninputs; ++ii) {
ObjBuilder* ob = l->inputs[ii].obj;
for (j = 1; j < obj_section_count(ob); ++j) {
@@ -651,6 +676,30 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g)
if (total_kept && !img->sections)
compiler_panic(img->c, no_loc(), "link: oom on sections");
+ /* Pass 1: collect kept sections into a flat list. */
+ PlaceEntry* entries = total_kept
+ ? (PlaceEntry*)h->alloc(h, sizeof(*entries) * total_kept,
+ _Alignof(PlaceEntry))
+ : NULL;
+ if (total_kept && !entries)
+ compiler_panic(img->c, no_loc(), "link: oom on placement entries");
+ {
+ u32 e = 0;
+ for (ii = 0; ii < l->ninputs; ++ii) {
+ ObjBuilder* ob = l->inputs[ii].obj;
+ for (j = 1; j < obj_section_count(ob); ++j) {
+ const Section* s = obj_section_get(ob, j);
+ if (!s || !section_kept(s) || !gc_live_get(g, ii, j)) continue;
+ entries[e].input_idx = ii;
+ entries[e].obj_sec_id = j;
+ entries[e].name = s->name;
+ entries[e].bucket = bucket_for(s->flags);
+ entries[e].placed = 0;
+ ++e;
+ }
+ }
+ }
+
/* Three segment buckets; tracks per-bucket size during scan and
* per-section file_offset within the bucket. */
u64 seg_size[SEG_NBUCKETS] = {0};
@@ -660,36 +709,35 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g)
* trailing nobits per bucket — only SEG_RW gets BSS in practice. */
u64 seg_bss_extra[SEG_NBUCKETS] = {0};
- /* Walk inputs in stable order and append to buckets. */
- for (ii = 0; ii < l->ninputs; ++ii) {
- ObjBuilder* ob = l->inputs[ii].obj;
- InputMap* m = &img->input_maps[ii];
- for (j = 1; j < obj_section_count(ob); ++j) {
- const Section* s = obj_section_get(ob, j);
- SegBucket bucket;
- u32 align;
+ /* Pass 2: place sections, grouped by name within each bucket and
+ * in first-occurrence order across groups. */
+ for (u32 i = 0; i < total_kept; ++i) {
+ if (entries[i].placed) continue;
+
+ Sym group_name = entries[i].name;
+ SegBucket bucket = entries[i].bucket;
+
+ /* Walk the remaining list in input order; each match in the
+ * same bucket+name lays out adjacent. */
+ for (u32 k = i; k < total_kept; ++k) {
+ PlaceEntry* pe = &entries[k];
+ if (pe->placed) continue;
+ if (pe->bucket != bucket || pe->name != group_name) continue;
+
+ ObjBuilder* ob = l->inputs[pe->input_idx].obj;
+ InputMap* m = &img->input_maps[pe->input_idx];
+ const Section* s = obj_section_get(ob, pe->obj_sec_id);
+ u32 align = s->align ? s->align : 1u;
u64 ofs;
- LinkSection* ls;
- LinkSectionId lsid;
+ LinkSection* ls;
+ LinkSectionId lsid;
- if (!s || !section_kept(s) || !gc_live_get(g, ii, j)) continue;
- bucket = bucket_for(s->flags);
- align = s->align ? s->align : 1u;
-
- /* Bump bucket size up to alignment, then place. BSS
- * (NOBITS) only contributes to mem_size; everything
- * preceding it in the bucket has already accumulated
- * file_size. */
if (s->sem == SSEM_NOBITS) {
- /* Place after current file_size + any prior bss. */
u64 cursor = seg_size[bucket] + seg_bss_extra[bucket];
cursor = align_up_u64(cursor, align);
seg_bss_extra[bucket] = cursor + (u64)s->bss_size - seg_size[bucket];
ofs = cursor;
} else {
- /* If we'd be appending NOBITS bytes ahead of progbits,
- * promote to file bytes (rare with our bucket policy
- * but defensive). */
seg_size[bucket] += seg_bss_extra[bucket];
seg_bss_extra[bucket] = 0;
ofs = align_up_u64(seg_size[bucket], align);
@@ -703,22 +751,23 @@ static void layout_sections(Linker* l, LinkImage* img, const GcLive* g)
ls = &img->sections[img->nsections++];
memset(ls, 0, sizeof(*ls));
ls->id = lsid;
- ls->input_id = l->inputs[ii].id;
- ls->obj_section_id = j;
- ls->segment_id = LINK_SEG_NONE; /* filled below */
- ls->input_offset = ofs; /* offset within segment */
- ls->file_offset = ofs; /* image-relative; segment offset added later */
- ls->vaddr = ofs; /* image-relative; segment vaddr added later */
+ ls->input_id = l->inputs[pe->input_idx].id;
+ ls->obj_section_id = pe->obj_sec_id;
+ ls->segment_id = LINK_SEG_NONE;
+ ls->input_offset = ofs;
+ ls->file_offset = ofs;
+ ls->vaddr = ofs;
ls->size = (s->sem == SSEM_NOBITS) ? s->bss_size : s->bytes.total;
ls->flags = s->flags;
ls->align = align;
- /* Stash the bucket in the section's segment_id slot
- * temporarily — fixed up after segments are created. */
ls->segment_id = (LinkSegmentId)(bucket + 1u); /* 1..3 sentinel */
- m->section[j] = lsid;
+ m->section[pe->obj_sec_id] = lsid;
+ pe->placed = 1;
}
}
+ if (entries) h->free(h, entries, sizeof(*entries) * total_kept);
+
/* Materialize one LinkSegment per non-empty bucket, then assign
* absolute (image-relative) vaddr/file_offset to each segment and
* fix up section.{vaddr,file_offset,segment_id}. */
@@ -1101,8 +1150,13 @@ static u8 reloc_width(RelocKind k)
return 4;
case R_ABS64: case R_REL64: case R_PC64:
return 8;
+ case R_AARCH64_ABS16: case R_AARCH64_PREL16:
+ return 2;
case R_AARCH64_JUMP26: case R_AARCH64_CALL26:
+ case R_AARCH64_CONDBR19: case R_AARCH64_TSTBR14:
+ case R_AARCH64_LD_PREL_LO19: case R_AARCH64_ADR_PREL_LO21:
case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
case R_AARCH64_ADD_ABS_LO12_NC:
case R_AARCH64_LDST8_ABS_LO12_NC:
case R_AARCH64_LDST16_ABS_LO12_NC:
@@ -1478,9 +1532,12 @@ static void scan_presence(Linker* l, SymHash* defined, SymHash* undefs)
}
}
-/* True if `mem` defines an SB_GLOBAL symbol that's listed in `wanted`
- * and not already in `defined`. Standard demand-load: weak defs do not
- * trigger archive pull. */
+/* True if `mem` defines a non-undef SB_GLOBAL or SB_WEAK symbol that's
+ * in `wanted` and not already in `defined`. Both GNU ld and lld pull
+ * archive members on weak defs against an unresolved undef — the
+ * "weak doesn't drag" rule applies to weak *references*, not weak
+ * definitions. (musl's __init_tls is a weak def and must be pulled
+ * to satisfy __libc_start_main's hard ref.) */
static int member_satisfies(LinkArchiveMember* mem,
const SymHash* defined, const SymHash* wanted)
{
@@ -1492,7 +1549,7 @@ static int member_satisfies(LinkArchiveMember* mem,
const ObjSym* s = e.sym;
if (s->name == 0) continue;
if (s->kind == SK_UNDEF) continue;
- if (s->bind != SB_GLOBAL) continue;
+ if (s->bind != SB_GLOBAL && s->bind != SB_WEAK) continue;
if (symhash_get(wanted, s->name) == LINK_SYM_NONE) continue;
if (symhash_get(defined, s->name) != LINK_SYM_NONE) continue;
hit = 1;
diff --git a/src/link/link_reloc.c b/src/link/link_reloc.c
@@ -14,6 +14,12 @@
static SrcLoc no_loc(void) { SrcLoc l = {0,0,0}; return l; }
+static void wr_u16_le(u8* p, u16 v)
+{
+ p[0] = (u8)(v & 0xffu);
+ p[1] = (u8)((v >> 8) & 0xffu);
+}
+
static void wr_u32_le(u8* p, u32 v)
{
p[0] = (u8)(v & 0xffu);
@@ -48,11 +54,79 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes,
wr_u64_le(P_bytes, v);
return;
}
- case R_REL32: {
+ case R_REL32:
+ case R_PC32: {
+ /* AArch64 ELF: PREL32 maps to either of these; both encode a
+ * 32-bit signed PC-relative displacement. The cfree-canonical
+ * distinction (section-relative vs PC-relative) collapses on
+ * AArch64 because the linker resolves to absolute vaddrs. */
i64 v = (i64)S + A - (i64)P;
wr_u32_le(P_bytes, (u32)((u64)v & 0xffffffffu));
return;
}
+ case R_AARCH64_ABS16: {
+ u64 v = S + (u64)A;
+ wr_u16_le(P_bytes, (u16)(v & 0xffffu));
+ return;
+ }
+ case R_AARCH64_PREL16: {
+ i64 v = (i64)S + A - (i64)P;
+ wr_u16_le(P_bytes, (u16)((u64)v & 0xffffu));
+ return;
+ }
+ case R_AARCH64_CONDBR19:
+ case R_AARCH64_LD_PREL_LO19: {
+ /* B.cond / CB(N)Z / LDR (literal) — imm19 in 4-byte units,
+ * signed, at bits [23:5]. Range: ±1MiB. */
+ i64 disp = (i64)S + A - (i64)P;
+ u32 instr;
+ u32 imm19;
+ if (disp & 3)
+ compiler_panic(c, no_loc(),
+ "link: imm19 reloc misaligned displacement");
+ if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20))
+ compiler_panic(c, no_loc(),
+ "link: imm19 reloc out of range (need ±1MiB)");
+ imm19 = (u32)((disp >> 2) & 0x7ffffu);
+ instr = rd_u32_le(P_bytes);
+ instr = (instr & ~(0x7ffffu << 5)) | (imm19 << 5);
+ wr_u32_le(P_bytes, instr);
+ return;
+ }
+ case R_AARCH64_TSTBR14: {
+ /* TBZ/TBNZ — imm14 in 4-byte units, signed, at bits [18:5].
+ * Range: ±32KiB. */
+ i64 disp = (i64)S + A - (i64)P;
+ u32 instr;
+ u32 imm14;
+ if (disp & 3)
+ compiler_panic(c, no_loc(),
+ "link: TSTBR14 misaligned displacement");
+ if (disp < -(i64)(1 << 15) || disp >= (i64)(1 << 15))
+ compiler_panic(c, no_loc(),
+ "link: TSTBR14 out of range (need ±32KiB)");
+ imm14 = (u32)((disp >> 2) & 0x3fffu);
+ instr = rd_u32_le(P_bytes);
+ instr = (instr & ~(0x3fffu << 5)) | (imm14 << 5);
+ wr_u32_le(P_bytes, instr);
+ return;
+ }
+ case R_AARCH64_ADR_PREL_LO21: {
+ /* ADR — byte-granularity imm21, encoded as immlo[30:29] +
+ * immhi[23:5]. No 12-bit shift (unlike ADRP). Range: ±1MiB. */
+ i64 disp = (i64)S + A - (i64)P;
+ u32 instr;
+ u32 immlo, immhi;
+ if (disp < -(i64)(1 << 20) || disp >= (i64)(1 << 20))
+ compiler_panic(c, no_loc(),
+ "link: ADR_PREL_LO21 out of range (need ±1MiB)");
+ immlo = (u32)(disp & 0x3u);
+ immhi = (u32)((disp >> 2) & 0x7ffffu);
+ instr = rd_u32_le(P_bytes);
+ instr = (instr & 0x9f00001fu) | (immlo << 29) | (immhi << 5);
+ wr_u32_le(P_bytes, instr);
+ return;
+ }
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26: {
/* B/BL imm26 — branch displacement in 4-byte units, signed.
@@ -74,17 +148,21 @@ void link_reloc_apply(Compiler* c, RelocKind k, u8* P_bytes,
return;
}
case R_AARCH64_ADR_GOT_PAGE:
- case R_AARCH64_ADR_PREL_PG_HI21: {
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC: {
/* ADRP — page-relative imm21, encoded as immlo[30:29] +
* immhi[23:5]. Effective immediate is (S+A) page minus P page,
- * shifted right by 12, sign-extended to 33 bits. */
+ * shifted right by 12, sign-extended to 33 bits. _NC variant
+ * skips the range check (compiler asserts it can't overflow,
+ * e.g. when paired with explicit page bracketing). */
i64 page_s = ((i64)S + A) & ~(i64)0xfff;
i64 page_p = (i64)P & ~(i64)0xfff;
i64 disp = page_s - page_p;
i64 imm21 = disp >> 12;
u32 instr;
u32 immlo, immhi;
- if (imm21 < -(i64)(1 << 20) || imm21 >= (i64)(1 << 20))
+ if (k != R_AARCH64_ADR_PREL_PG_HI21_NC &&
+ (imm21 < -(i64)(1 << 20) || imm21 >= (i64)(1 << 20)))
compiler_panic(c, no_loc(),
"link: ADR_PREL_PG_HI21 out of range (need ±4GiB)");
immlo = (u32)(imm21 & 0x3u);
diff --git a/src/obj/elf.h b/src/obj/elf.h
@@ -126,11 +126,18 @@
#define ELF_R_AARCH64_NONE 0
#define ELF_R_AARCH64_ABS64 257
#define ELF_R_AARCH64_ABS32 258
+#define ELF_R_AARCH64_ABS16 259
#define ELF_R_AARCH64_PREL64 260
#define ELF_R_AARCH64_PREL32 261
+#define ELF_R_AARCH64_PREL16 262
+#define ELF_R_AARCH64_LD_PREL_LO19 273
+#define ELF_R_AARCH64_ADR_PREL_LO21 274
#define ELF_R_AARCH64_ADR_PREL_PG_HI21 275
+#define ELF_R_AARCH64_ADR_PREL_PG_HI21_NC 276
#define ELF_R_AARCH64_ADD_ABS_LO12_NC 277
#define ELF_R_AARCH64_LDST8_ABS_LO12_NC 278
+#define ELF_R_AARCH64_TSTBR14 279
+#define ELF_R_AARCH64_CONDBR19 280
#define ELF_R_AARCH64_JUMP26 282
#define ELF_R_AARCH64_CALL26 283
#define ELF_R_AARCH64_LDST16_ABS_LO12_NC 284
diff --git a/src/obj/elf_reloc_aarch64.c b/src/obj/elf_reloc_aarch64.c
@@ -23,8 +23,15 @@ u32 elf_aarch64_reloc_to(u32 kind /* RelocKind */)
case R_REL32: return ELF_R_AARCH64_PREL32;
case R_AARCH64_JUMP26: return ELF_R_AARCH64_JUMP26;
case R_AARCH64_CALL26: return ELF_R_AARCH64_CALL26;
+ case R_AARCH64_CONDBR19: return ELF_R_AARCH64_CONDBR19;
+ case R_AARCH64_TSTBR14: return ELF_R_AARCH64_TSTBR14;
+ case R_AARCH64_LD_PREL_LO19: return ELF_R_AARCH64_LD_PREL_LO19;
+ case R_AARCH64_ADR_PREL_LO21: return ELF_R_AARCH64_ADR_PREL_LO21;
case R_AARCH64_ADR_PREL_PG_HI21: return ELF_R_AARCH64_ADR_PREL_PG_HI21;
+ case R_AARCH64_ADR_PREL_PG_HI21_NC: return ELF_R_AARCH64_ADR_PREL_PG_HI21_NC;
case R_AARCH64_ADD_ABS_LO12_NC: return ELF_R_AARCH64_ADD_ABS_LO12_NC;
+ case R_AARCH64_ABS16: return ELF_R_AARCH64_ABS16;
+ case R_AARCH64_PREL16: return ELF_R_AARCH64_PREL16;
case R_AARCH64_LDST8_ABS_LO12_NC: return ELF_R_AARCH64_LDST8_ABS_LO12_NC;
case R_AARCH64_LDST16_ABS_LO12_NC: return ELF_R_AARCH64_LDST16_ABS_LO12_NC;
case R_AARCH64_LDST32_ABS_LO12_NC: return ELF_R_AARCH64_LDST32_ABS_LO12_NC;
@@ -57,8 +64,15 @@ u32 elf_aarch64_reloc_from(u32 elf_type)
case ELF_R_AARCH64_PREL32: return R_PC32;
case ELF_R_AARCH64_JUMP26: return R_AARCH64_JUMP26;
case ELF_R_AARCH64_CALL26: return R_AARCH64_CALL26;
+ case ELF_R_AARCH64_CONDBR19: return R_AARCH64_CONDBR19;
+ case ELF_R_AARCH64_TSTBR14: return R_AARCH64_TSTBR14;
+ case ELF_R_AARCH64_LD_PREL_LO19: return R_AARCH64_LD_PREL_LO19;
+ case ELF_R_AARCH64_ADR_PREL_LO21: return R_AARCH64_ADR_PREL_LO21;
case ELF_R_AARCH64_ADR_PREL_PG_HI21: return R_AARCH64_ADR_PREL_PG_HI21;
+ case ELF_R_AARCH64_ADR_PREL_PG_HI21_NC: return R_AARCH64_ADR_PREL_PG_HI21_NC;
case ELF_R_AARCH64_ADD_ABS_LO12_NC: return R_AARCH64_ADD_ABS_LO12_NC;
+ case ELF_R_AARCH64_ABS16: return R_AARCH64_ABS16;
+ case ELF_R_AARCH64_PREL16: return R_AARCH64_PREL16;
case ELF_R_AARCH64_LDST8_ABS_LO12_NC: return R_AARCH64_LDST8_ABS_LO12_NC;
case ELF_R_AARCH64_LDST16_ABS_LO12_NC: return R_AARCH64_LDST16_ABS_LO12_NC;
case ELF_R_AARCH64_LDST32_ABS_LO12_NC: return R_AARCH64_LDST32_ABS_LO12_NC;
diff --git a/src/obj/obj.h b/src/obj/obj.h
@@ -102,10 +102,14 @@ typedef enum RelocKind {
R_AARCH64_ADR_GOT_PAGE, R_AARCH64_LD64_GOT_LO12_NC,
R_ARM_CALL, R_ARM_MOVW, R_ARM_MOVT, R_ARM_B26,
R_AARCH64_JUMP26, R_AARCH64_CALL26,
- R_AARCH64_ADR_PREL_PG_HI21, R_AARCH64_ADD_ABS_LO12_NC,
+ R_AARCH64_CONDBR19, R_AARCH64_TSTBR14,
+ R_AARCH64_LD_PREL_LO19, R_AARCH64_ADR_PREL_LO21,
+ R_AARCH64_ADR_PREL_PG_HI21, R_AARCH64_ADR_PREL_PG_HI21_NC,
+ R_AARCH64_ADD_ABS_LO12_NC,
R_AARCH64_LDST8_ABS_LO12_NC, R_AARCH64_LDST16_ABS_LO12_NC,
R_AARCH64_LDST32_ABS_LO12_NC, R_AARCH64_LDST64_ABS_LO12_NC,
R_AARCH64_LDST128_ABS_LO12_NC,
+ R_AARCH64_ABS16, R_AARCH64_PREL16,
/* AArch64 TLS Local-Exec model. */
R_AARCH64_TLSLE_ADD_TPREL_HI12,
R_AARCH64_TLSLE_ADD_TPREL_LO12,
diff --git a/test/musl/Containerfile b/test/musl/Containerfile
@@ -0,0 +1,34 @@
+# test/musl/Containerfile — produces a static musl aarch64 sysroot
+# tarball on stdout. Pinned to Alpine 3.20.10 + musl 1.2.5.
+#
+# Usage (driven by test/musl/extract.sh):
+# podman build --platform linux/arm64 -f Containerfile -t cfree-musl-sysroot .
+# podman run --rm cfree-musl-sysroot > sysroot.tar
+#
+# The image's ENTRYPOINT writes a tar of /sysroot to stdout. The extract
+# script unpacks it into build/musl-sysroot/ on the host.
+FROM docker.io/arm64v8/alpine:3.20.10
+
+# musl-dev: Scrt1.o, crt1.o, crti.o, crtn.o, libc.a + headers under /usr/include.
+# linux-headers: kernel uapi (linux/*, asm/*, asm-generic/*) — used by syscall
+# definitions in the musl headers.
+# Note: we deliberately do NOT pull clang's compiler-rt or libgcc — the
+# soft-float / TF / 128-bit-int helpers (__extenddftf2 etc.) come from
+# our own rt/ build (rt/build/aarch64-linux/libcfree_rt.a).
+RUN apk add --no-cache musl-dev=1.2.5-r3 linux-headers
+
+# Stage the artifacts the linker needs into one tree so the host extract
+# is a single tar pipe.
+RUN mkdir -p /sysroot/lib /sysroot/include \
+ && cp /usr/lib/crt1.o /sysroot/lib/ \
+ && cp /usr/lib/crti.o /sysroot/lib/ \
+ && cp /usr/lib/crtn.o /sysroot/lib/ \
+ && cp /usr/lib/libc.a /sysroot/lib/ \
+ && cp /usr/lib/libssp_nonshared.a /sysroot/lib/ \
+ && cp -r /usr/include/. /sysroot/include/
+
+# Pin the build for cache reuse and reproducibility audits.
+RUN echo "alpine 3.20.10 musl 1.2.5-r3" > /sysroot/PROVENANCE \
+ && uname -m >> /sysroot/PROVENANCE
+
+ENTRYPOINT ["sh", "-c", "tar -C /sysroot -cf - ."]
diff --git a/test/musl/cases/01_syscall_write.c b/test/musl/cases/01_syscall_write.c
@@ -0,0 +1,22 @@
+/* Tier 1: pure syscall, no libc functions. main() returns 0. We invoke
+ * the write(2) syscall by inline asm so the only thing musl provides is
+ * crt1's _start, _start_c, and __libc_start_main + the static linkage of
+ * unused libc.a archive members (none should be pulled in). */
+
+#include <unistd.h>
+
+static const char msg[] = "hello-syscall\n";
+
+int main(void)
+{
+ /* sys_write(1, msg, sizeof(msg) - 1) via raw svc #0 */
+ register long x8 __asm__("x8") = 64; /* SYS_write */
+ register long x0 __asm__("x0") = 1; /* fd */
+ register long x1 __asm__("x1") = (long)msg;
+ register long x2 __asm__("x2") = sizeof(msg) - 1;
+ __asm__ volatile("svc #0"
+ : "+r"(x0)
+ : "r"(x8), "r"(x1), "r"(x2)
+ : "memory");
+ return 0;
+}
diff --git a/test/musl/cases/01_syscall_write.stdout b/test/musl/cases/01_syscall_write.stdout
@@ -0,0 +1 @@
+hello-syscall
diff --git a/test/musl/cases/02_errno_touch.c b/test/musl/cases/02_errno_touch.c
@@ -0,0 +1,13 @@
+/* Tier 2: touch errno. errno is `__thread` in musl, so this exercises
+ * TLS local-exec relocs (R_AARCH64_TLSLE_*) and PT_TLS emission. We
+ * deliberately call a function that fails (close(-1) → EBADF) and read
+ * errno after. */
+
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+ (void)close(-1);
+ return errno == EBADF ? 0 : 1;
+}
diff --git a/test/musl/cases/03_printf_hello.c b/test/musl/cases/03_printf_hello.c
@@ -0,0 +1,10 @@
+/* Tier 3: full stdio. printf pulls in FILE buffering, locale,
+ * vfprintf, write_iov, errno, lock primitives, the works. If this
+ * runs, cfree ld is meaningfully usable for static musl programs. */
+
+#include <stdio.h>
+
+int main(void)
+{
+ return printf("hello, musl\n") < 0 ? 1 : 0;
+}
diff --git a/test/musl/cases/03_printf_hello.stdout b/test/musl/cases/03_printf_hello.stdout
@@ -0,0 +1 @@
+hello, musl
diff --git a/test/musl/extract.sh b/test/musl/extract.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# test/musl/extract.sh — build the musl sysroot image (test/musl/Containerfile)
+# and unpack /sysroot from it into build/musl-sysroot/. Cached after the first
+# successful run; pass `-f` to force a rebuild.
+#
+# Output layout:
+# build/musl-sysroot/
+# lib/ crt1.o crti.o crtn.o libc.a libssp_nonshared.a
+# include/ musl + linux-headers tree
+# PROVENANCE
+set -eu
+
+ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+SYSROOT="$ROOT/build/musl-sysroot"
+TAG="cfree-musl-sysroot"
+FORCE=0
+
+while [ $# -gt 0 ]; do
+ case "$1" in
+ -f|--force) FORCE=1; shift ;;
+ *) echo "unknown arg: $1" >&2; exit 2 ;;
+ esac
+done
+
+if [ -f "$SYSROOT/PROVENANCE" ] && [ $FORCE -eq 0 ]; then
+ echo "musl sysroot already present at $SYSROOT (use -f to rebuild)"
+ exit 0
+fi
+
+if ! command -v podman >/dev/null 2>&1; then
+ echo "extract.sh: podman is required" >&2
+ exit 1
+fi
+
+cd "$ROOT/test/musl"
+echo "Building $TAG (Alpine aarch64 + musl-dev)..."
+podman build --platform linux/arm64 -f Containerfile -t "$TAG" . >/dev/null
+
+rm -rf "$SYSROOT"
+mkdir -p "$SYSROOT"
+
+echo "Extracting sysroot to $SYSROOT..."
+podman run --rm --platform linux/arm64 "$TAG" | tar -C "$SYSROOT" -xf -
+
+echo "Done. Provenance:"
+cat "$SYSROOT/PROVENANCE"
diff --git a/test/musl/run.sh b/test/musl/run.sh
@@ -0,0 +1,167 @@
+#!/usr/bin/env bash
+# test/musl/run.sh — drive cfree ld against static musl on aarch64-linux.
+#
+# For each case in test/musl/cases/*.c:
+# 1. clang --target=aarch64-linux-musl --sysroot=$SYSROOT -c case.c -o case.o
+# 2. cfree ld -static -o case.exe \
+# $SYSROOT/lib/crt1.o $SYSROOT/lib/crti.o \
+# case.o \
+# $SYSROOT/lib/libc.a $SYSROOT/lib/crtn.o
+# 3. run case.exe under qemu-aarch64-static or podman aarch64.
+#
+# Each case file may carry an `expected` companion (default 0) and an
+# optional `expected_stdout` file checked with substring match.
+#
+# Designed to fail fast and clearly: the *first* failure surface (compile
+# / link / run / output) is the gap to fix next. Run with
+# CFREE_MUSL_KEEP=1 to leave intermediates in build/musl/<case>/.
+set -u
+
+ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+TEST_DIR="$ROOT/test/musl"
+BUILD_DIR="$ROOT/build/musl"
+SYSROOT="$ROOT/build/musl-sysroot"
+CFREE="$ROOT/build/cfree"
+CFREE_RT="$ROOT/rt/build/aarch64-linux/libcfree_rt.a"
+
+if [ ! -d "$SYSROOT" ]; then
+ echo "musl sysroot missing — run test/musl/extract.sh first" >&2
+ exit 2
+fi
+if [ ! -x "$CFREE" ]; then
+ echo "cfree driver missing at $CFREE — run 'make' first" >&2
+ exit 2
+fi
+if [ ! -f "$CFREE_RT" ]; then
+ echo "cfree rt missing at $CFREE_RT — run 'make rt-aarch64-linux'" >&2
+ exit 2
+fi
+
+mkdir -p "$BUILD_DIR"
+
+color_red() { printf '\033[31m%s\033[0m' "$1"; }
+color_grn() { printf '\033[32m%s\033[0m' "$1"; }
+color_yel() { printf '\033[33m%s\033[0m' "$1"; }
+
+PASS=0; FAIL=0
+FAIL_NAMES=()
+
+# Pick a runner. Native arm64 hosts can run aarch64 ELFs directly under
+# podman without binfmt; otherwise we want qemu-aarch64-static.
+arch_raw="$(uname -m 2>/dev/null || true)"
+is_aarch64=0
+{ [ "$arch_raw" = "aarch64" ] || [ "$arch_raw" = "arm64" ]; } && is_aarch64=1
+
+QEMU_BIN="$(command -v qemu-aarch64-static 2>/dev/null || command -v qemu-aarch64 2>/dev/null || true)"
+have_qemu=0; [ -n "$QEMU_BIN" ] && have_qemu=1
+have_podman=0; command -v podman >/dev/null 2>&1 && have_podman=1
+
+# clang must understand --target=aarch64-linux-musl. Recent clang ships
+# linux-musl as a target alias of linux-gnu for our purposes (we override
+# every system path via --sysroot).
+if ! clang --target=aarch64-linux-musl -c -x c - -o /dev/null < /dev/null 2>/dev/null; then
+ echo "clang does not accept --target=aarch64-linux-musl" >&2
+ exit 2
+fi
+
+run_aarch64() {
+ local exe="$1" out="$2" err="$3"
+ if [ $have_qemu -eq 1 ]; then
+ "$QEMU_BIN" "$exe" >"$out" 2>"$err"; RUN_RC=$?; return
+ fi
+ if [ $have_podman -eq 1 ]; then
+ local dir base platform_flag=()
+ dir="$(cd "$(dirname "$exe")" && pwd)"; base="$(basename "$exe")"
+ [ $is_aarch64 -eq 0 ] && platform_flag=(--platform linux/arm64)
+ podman run --rm "${platform_flag[@]}" --net=none \
+ -v "$dir":/work:Z -w /work alpine:latest "./$base" >"$out" 2>"$err"
+ RUN_RC=$?; return
+ fi
+ RUN_RC=127
+}
+
+run_case() {
+ local src="$1"
+ local name="$(basename "$src" .c)"
+ local work="$BUILD_DIR/$name"
+ mkdir -p "$work"
+
+ local expected=0
+ [ -f "$TEST_DIR/cases/${name}.expected" ] && \
+ expected="$(cat "$TEST_DIR/cases/${name}.expected" | tr -d '[:space:]')"
+
+ local expect_stdout=""
+ if [ -f "$TEST_DIR/cases/${name}.stdout" ]; then
+ expect_stdout="$(cat "$TEST_DIR/cases/${name}.stdout")"
+ fi
+
+ local obj="$work/${name}.o"
+ if ! clang --target=aarch64-linux-musl --sysroot="$SYSROOT" \
+ -nostdinc -isystem "$SYSROOT/include" \
+ -O0 -fno-PIC -fno-pie \
+ -c "$src" -o "$obj" 2>"$work/cc.err"; then
+ FAIL=$((FAIL+1)); FAIL_NAMES+=("$name (compile)")
+ printf ' %s %s\n' "$(color_red FAIL)" "$name (compile)"
+ sed 's/^/ cc| /' "$work/cc.err"
+ return
+ fi
+
+ local exe="$work/${name}.exe"
+ # Link order mirrors a typical static-musl invocation:
+ # crt1.o crti.o obj libc.a libcfree_rt.a crtn.o
+ # libcfree_rt provides the TF / soft-float builtins (__addtf3,
+ # __extenddftf2 etc.) that musl's libc.a calls from printf's long-
+ # double formatting. Our archive ingestion iterates demand-load to
+ # a fixed point so a single trailing libcfree_rt.a is enough.
+ local link_cmd=("$CFREE" "ld" -static -o "$exe"
+ "$SYSROOT/lib/crt1.o" "$SYSROOT/lib/crti.o"
+ "$obj"
+ "$SYSROOT/lib/libc.a" "$CFREE_RT"
+ "$SYSROOT/lib/crtn.o")
+
+ if ! "${link_cmd[@]}" >"$work/link.out" 2>"$work/link.err"; then
+ FAIL=$((FAIL+1)); FAIL_NAMES+=("$name (link)")
+ printf ' %s %s\n' "$(color_red FAIL)" "$name (link)"
+ sed 's/^/ ld| /' "$work/link.err" | head -10
+ return
+ fi
+
+ run_aarch64 "$exe" "$work/run.out" "$work/run.err"
+ if [ "$RUN_RC" -ne "$expected" ]; then
+ FAIL=$((FAIL+1)); FAIL_NAMES+=("$name (run rc=$RUN_RC, want $expected)")
+ printf ' %s %s (rc=%s, want %s)\n' "$(color_red FAIL)" "$name" \
+ "$RUN_RC" "$expected"
+ [ -s "$work/run.err" ] && sed 's/^/ err| /' "$work/run.err" | head -5
+ [ -s "$work/run.out" ] && sed 's/^/ out| /' "$work/run.out" | head -5
+ return
+ fi
+
+ if [ -n "$expect_stdout" ]; then
+ if ! grep -qF -- "$expect_stdout" "$work/run.out"; then
+ FAIL=$((FAIL+1)); FAIL_NAMES+=("$name (stdout)")
+ printf ' %s %s (stdout mismatch)\n' "$(color_red FAIL)" "$name"
+ printf ' expected substring: %s\n' "$expect_stdout"
+ sed 's/^/ got| /' "$work/run.out" | head -5
+ return
+ fi
+ fi
+
+ PASS=$((PASS+1))
+ printf ' %s %s\n' "$(color_grn PASS)" "$name"
+}
+
+printf 'Running musl static-link cases...\n'
+
+shopt -s nullglob
+for src in "$TEST_DIR/cases"/*.c; do
+ run_case "$src"
+done
+
+printf '\nResults: %s pass, %s fail\n' "$PASS" "$FAIL"
+if [ ${#FAIL_NAMES[@]} -gt 0 ]; then
+ printf 'Failed:\n'
+ for n in "${FAIL_NAMES[@]}"; do printf ' %s\n' "$n"; done
+fi
+
+if [ $FAIL -gt 0 ]; then exit 1; fi
+exit 0
diff --git a/test/test.mk b/test/test.mk
@@ -19,7 +19,7 @@
# four paths per case (D direct-JIT, R roundtrip, E exec, J jit-via-file).
# Depends only on libcfree.a; reuses test/link harness binaries.
-.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-lib-deps
+.PHONY: test test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-musl test-lib-deps
test: test-lex test-pp test-pp-err test-elf test-ar test-ar-driver test-link test-cg test-lib-deps
@@ -86,6 +86,16 @@ test-link: lib $(ROUNDTRIP_BIN) $(LINK_EXE_RUNNER) $(JIT_RUNNER)
test-cg: lib $(ROUNDTRIP_BIN) $(LINK_EXE_RUNNER) $(JIT_RUNNER)
bash test/cg/run.sh
+# test-musl: end-to-end static-musl link/run on aarch64. Pulls a pinned
+# musl sysroot (test/musl/extract.sh — uses podman against Alpine 3.20),
+# builds rt/build/aarch64-linux/libcfree_rt.a for the soft-float / TF
+# builtins, and runs `cfree ld` against the real musl libc.a. Excluded
+# from the default `test` target because it needs podman and ~30s on
+# first run; opt-in via `make test-musl`.
+test-musl: bin rt-aarch64-linux
+ @bash test/musl/extract.sh
+ @bash test/musl/run.sh
+
# Fail if libcfree.a depends on any external symbol not in the allowlist.
# Drift in either direction (new dep, or stale entry) is a failure.
LIB_DEPS_ACTUAL = build/libcfree.deps.txt