commit 6a675ba495996db615fb007194c6c42ed0bafb7c
parent 1d81b481da21ff373bfe7d861167245307bb7f74
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 10 May 2026 13:29:50 -0700
link/rv64: TLS LE — clamp SF_TLS PT_LOAD memsz to filesz
The PT_LOAD covering the TLS segment extended p_memsz past p_filesz to
cover .tbss, but SEG_TLS perms are SF_ALLOC|SF_TLS only (no PF_W), so
qemu-riscv64-static rejected images with .tbss-only TLS sections via
"PT_LOAD with non-writable bss" — the two remaining Phase 4 failures
(n02_tls_store_le, n07_tls_bss_zero_init). The .tbss extension belongs
to PT_TLS, which the loader uses to size each thread's TLS block;
matching GNU ld, the PT_LOAD now keeps memsz == filesz.
Drive-by: the run.sh fallback builds for cfree-roundtrip and
link-exe-runner were missing -Isrc and -Itest, so a deleted runner
binary couldn't be regenerated.
rv64 cg: 773 pass, 0 fail, 768 skip (was 769/4/768).
Diffstat:
3 files changed, 29 insertions(+), 25 deletions(-)
diff --git a/doc/rv64-status.md b/doc/rv64-status.md
@@ -19,13 +19,11 @@ E links and runs under qemu-riscv64 via podman.
| Path | Pass | Fail | Skip |
|----------------------------|-----:|-----:|-----:|
| R (roundtrip) | 386 | 0 | 0 |
-| E (qemu exec) | 383 | 4 | ~ |
+| E (qemu exec) | 387 | 0 | ~ |
| D / J (native JIT) | 0 | 0 | 772 |
Skips are valid: D and J require host == rv64. With
-`CFREE_TEST_ALLOW_SKIP=1`, the suite reports **769 pass, 4 fail, 768 skip**.
-
-The 4 fails are 2 cases × 2 opt levels — see the Phase 4 checklist below.
+`CFREE_TEST_ALLOW_SKIP=1`, the suite reports **773 pass, 0 fail, 768 skip**.
---
@@ -73,24 +71,21 @@ stack args** so a single `void*` walk works for any number of args.
- ✅ Stack-arg reads in `rv_param` use `caller_stack_base = 16 + 64` for
variadic functions to skip past the save area
-## Phase 4 — TLS LE ⬜
-
-Local-Exec model. `n01_tls_load_le` and `n08_tls_addend_offset` work
-(tdata read paths), so the LUI+ADD+ADDI / TPREL_HI20+LO12 sequence is
-correct in isolation. Failing cases all touch `.tbss`:
-
-- ⬜ `n02_tls_store_le` (store 42 to .tbss var, load back → got 0xff)
-- ⬜ `n07_tls_bss_zero_init` (read uninitialized .tbss var → got 0xff)
-
-Likely a linker / loader interaction (cfree-ld’s rv64 .tbss layout vs.
-`start.c`'s `tls_init` for `__riscv`). Investigation steps:
-
-- ⬜ Dump emitted relocs on n02; verify TPREL HI20/LO12 against
- cfree-ld's resolved tprel offset
-- ⬜ Check that cfree-ld emits `__tbss_size` correctly for rv64 outputs
-- ⬜ Compare runtime `tp` value to `g_tls_block` base
-- ⬜ Confirm `.tbss` follows `.tdata` contiguously in the TLS image so
- start.c's `dst[td_n + i] = 0` lands at the right offset
+## Phase 4 — TLS LE ✅
+
+Local-Exec model. The LUI+ADD+ADDI / TPREL_HI20+LO12 sequence and
+`__tbss_size` / TLS-image layout were already correct; the remaining
+failures (`n02_tls_store_le`, `n07_tls_bss_zero_init`) were rejected by
+qemu-riscv64-static before reaching code with the diagnostic
+"PT_LOAD with non-writable bss". Cases with no `.tdata` (only `.tbss`)
+produced an SF_TLS PT_LOAD whose `p_memsz` extended past `p_filesz=0`
+into the `.tbss` span without PF_W.
+
+- ✅ ELF PT_LOAD over an SF_TLS segment now keeps `p_memsz == p_filesz`;
+ the `.tbss` extension is described exclusively by PT_TLS, which the
+ loader uses to size each thread's TLS block (see `src/link/link_elf.c`
+ segment phdr loop). Matches what GNU ld emits — `.tbss` consumes no
+ PT_LOAD memory.
## Phase 5 — test-parse on rv64 ⬜
diff --git a/src/link/link_elf.c b/src/link/link_elf.c
@@ -1055,7 +1055,14 @@ void link_emit_elf(LinkImage* img, Writer* w) {
p->p_vaddr = img_base + seg->vaddr; /* post-shift */
p->p_paddr = p->p_vaddr;
p->p_filesz = seg->file_size;
- p->p_memsz = seg->mem_size;
+ /* TLS .tbss is per-thread template space, not a loadable bss
+ * region — PT_TLS already records the full memsz (incl. .tbss)
+ * for the loader's per-thread allocation, so the matching
+ * PT_LOAD must not extend memsz past filesz. qemu-riscv64
+ * rejects PT_LOADs with memsz>filesz on non-writable mappings
+ * ("PT_LOAD with non-writable bss"), and the SEG_TLS perms are
+ * SF_ALLOC|SF_TLS only. */
+ p->p_memsz = (seg->flags & SF_TLS) ? seg->file_size : seg->mem_size;
p->p_align = seg->align ? seg->align : PAGE_SIZE;
}
/* PT_NOTE for build-id. Scripted images skip the build-id entirely. */
diff --git a/test/cg/run.sh b/test/cg/run.sh
@@ -209,7 +209,8 @@ fi
# cfree-roundtrip — for path R. test/elf/run.sh builds this; skip path R if
# we can't find or build it.
if [ ! -x "$ROUNDTRIP_BIN" ]; then
- if $CC -I"$ROOT/include" "$ROOT/test/elf/cfree-roundtrip.c" "$LIB_AR" \
+ if $CC -I"$ROOT/include" -I"$ROOT/src" \
+ "$ROOT/test/elf/cfree-roundtrip.c" "$LIB_AR" \
-o "$ROUNDTRIP_BIN" 2>"$BUILD_DIR/cfree-roundtrip.err"; then
have_roundtrip=1
printf ' %s cfree-roundtrip\n' "$(color_grn built)"
@@ -223,7 +224,8 @@ fi
# link-exe-runner — for path E.
if [ ! -x "$LINK_EXE_RUNNER" ]; then
- if $CC -I"$ROOT/include" "$LINK_TEST_DIR/harness/link_exe_runner.c" \
+ if $CC -I"$ROOT/include" -I"$ROOT/test" \
+ "$LINK_TEST_DIR/harness/link_exe_runner.c" \
"$LIB_AR" -o "$LINK_EXE_RUNNER" 2>"$BUILD_DIR/link-exe-runner.err"; then
have_exe_runner=1
printf ' %s link-exe-runner\n' "$(color_grn built)"