commit aa82a0f76746048f6891d6f4395c28849d5f410b
parent f487b8ca2037417036958009646a2482cd3d172c
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 5 May 2026 08:21:04 -0700
tcc: strip /work/in[/tcc-lib]/ prefix from STT_FILE under BOOTSTRAP
Closes the last byte-identity gap between seed and podman boot4
outputs. tcc emitted the unmodified argv path into each .o's
STT_FILE symbol, so podman's /work/in/start.S and seed's flat
start.S produced byte-different relocations in crt1.o / libc.a /
libtcc1.a.
simple-patches/tcc-0.9.26/strip-file-prefix wraps the put_elf_sym
call in tccgen.c's tcc_debug_start so under #if BOOTSTRAP the
emitted name has the bootstrap-internal mount prefix stripped
(longest-first: /work/in/tcc-lib/, then /work/in/). Gated on the
existing -D BOOTSTRAP=1 from stage1-flatten.sh, so the strip bakes
into tcc.flat.c and applies uniformly to cc.scm-built tcc0 and
every self-hosted tccN.
seed-accept-boot34.sh now requires byte-identity for tcc3, hello,
crt1.o, libc.a, and libtcc1.a (was: only tcc3 + hello). All five
pass under WITH_BOOT4=1 on aarch64.
Diffstat:
5 files changed, 41 insertions(+), 26 deletions(-)
diff --git a/docs/OS-TODO.md b/docs/OS-TODO.md
@@ -163,6 +163,18 @@ scheme1 spawning the boot2-built catm via the .scm prelude.
tier2-gate ≈ 22 s; seed-accept (boot0/1/2) ≈ 2 s; boot3 acceptance
≈ 5 min wall (was multi-hour under TCG).
+- **STT_FILE prefix strip — landed.** tcc emitted the unmodified
+ argv path into each `.o`'s `STT_FILE` symbol, so podman-mounted
+ `/work/in/start.S` and seed-staged flat `start.S` produced
+ byte-different relocations. `simple-patches/tcc-0.9.26/strip-file-prefix`
+ drops the bootstrap-internal prefixes (`/work/in/tcc-lib/`,
+ `/work/in/`) under `#if BOOTSTRAP` before the symbol is emitted.
+ Patch is gated on the existing `-D BOOTSTRAP=1` from
+ `stage1-flatten.sh` so it bakes into `tcc.flat.c` and applies to
+ cc.scm-built tcc0 plus every tccN it self-hosts. With this in,
+ `seed-accept-boot34.sh` checks `tcc3`, `hello`, `crt1.o`, `libc.a`,
+ and `libtcc1.a` for byte-identity vs the podman path; all pass.
+
## Open
- **Port boot5 to the seed driver.** boot5 compiles ~500 musl TUs, each
@@ -182,24 +194,6 @@ scheme1 spawning the boot2-built catm via the .scm prelude.
the boot4-gen-runscm path just doesn't use it. Likely the
cheaper of the two and worth trying first.
-- **tcc-emitted source paths embed in .o files.** Boot4's intermediate
- artifacts (`crt1.o`, `libc.a`, `libtcc1.a`) differ from the podman
- path by exactly the length of the embedded source-filename string,
- so the seed-vs-podman byte-identity check is narrowed to tcc3 +
- hello (the linker drops those strings). Two ways to close the gap:
- - **Make tcc emit relative-only paths.** A `-DTCC_EMBED_BASENAME` or
- similar guard in tcc.flat.c so the relocation/STT_FILE entry uses
- `basename(input)` regardless of how it was passed. Either as a
- define applied at boot3 build time, or a small upstream-style
- patch carried in `simple-patches/`.
- - **Make the podman path use basenames too.** `cd /work/in &&
- tcc -c start.S` instead of `tcc -c /work/in/start.S`. Smaller diff
- but pushes the constraint into the boot4-gen-runscm + boot4.sh
- podman branch rather than the compiler itself.
- The first is the more principled fix because it makes any future
- bootN-on-seed comparison path-agnostic; the second is the lower-
- risk one if we want to land it without touching tcc.
-
- **NULL-page hardening.** Slot 0 is unmapped so a NULL deref faults to
the kernel as a user sync; the kernel currently panics rather than
delivering a SIGSEGV-equivalent. Acceptable per OS.md (default-action
diff --git a/scripts/seed-accept-boot34.sh b/scripts/seed-accept-boot34.sh
@@ -59,13 +59,12 @@ echo "[seed-accept-boot34] boot4: DRIVER=seed scripts/boot4.sh $ARCH"
DRIVER=seed scripts/boot4.sh $ARCH
fail=0
-# tcc3 and hello are the user-facing executables and must match the
-# podman path byte-for-byte. crt1.o / libc.a / libtcc1.a embed source
-# filenames in their relocations (".S" string), and the seed harness
-# stages files at flat basenames (start.S) while podman mounts them at
-# /work/in/start.S. The size delta is exactly that string — the code
-# is identical. Skip strict byte-identity on those.
-for f in tcc3 hello; do
+# All boot4 outputs — including the intermediate crt1.o / libc.a /
+# libtcc1.a — must match podman byte-for-byte. The strip-file-prefix
+# tcc patch (simple-patches/tcc-0.9.26/) drops the /work/in/[tcc-lib/]
+# mount prefix from STT_FILE entries, so seed's flat-basename staging
+# and podman's /work/in/ mounts produce identical .o relocations.
+for f in tcc3 hello crt1.o libc.a libtcc1.a; do
if ! cmp -s build/$ARCH/boot4/$f "$REF/$f.podman"; then
s_seed=$(wc -c < build/$ARCH/boot4/$f)
s_ref=$(wc -c < "$REF/$f.podman")
@@ -74,4 +73,4 @@ for f in tcc3 hello; do
fi
done
[ $fail -eq 0 ] || exit 4
-echo "[seed-accept-boot34] boot4 PASS — tcc3/hello byte-identical vs podman (libc.a/libtcc1.a/crt1.o differ only in embedded source paths)"
+echo "[seed-accept-boot34] boot4 PASS — tcc3/hello/crt1.o/libc.a/libtcc1.a byte-identical vs podman"
diff --git a/scripts/simple-patches/tcc-0.9.26/strip-file-prefix.after b/scripts/simple-patches/tcc-0.9.26/strip-file-prefix.after
@@ -0,0 +1,16 @@
+ /* an elf symbol of type STT_FILE must be put so that STB_LOCAL
+ symbols can be safely used. Under BOOTSTRAP, strip the well-known
+ container mount prefix so seed-staged inputs (flat basenames) and
+ podman-staged inputs (under /work/in/[tcc-lib/]) yield byte-
+ identical .o files. Longest match first; both prefixes are
+ bootstrap-internal layout constants. */
+ {
+ const char *fn = file->filename;
+#if BOOTSTRAP
+ if (!strncmp(fn, "/work/in/tcc-lib/", 17)) fn += 17;
+ else if (!strncmp(fn, "/work/in/", 9)) fn += 9;
+#endif
+ put_elf_sym(symtab_section, 0, 0,
+ ELFW(ST_INFO)(STB_LOCAL, STT_FILE), 0,
+ SHN_ABS, fn);
+ }
diff --git a/scripts/simple-patches/tcc-0.9.26/strip-file-prefix.before b/scripts/simple-patches/tcc-0.9.26/strip-file-prefix.before
@@ -0,0 +1,5 @@
+ /* an elf symbol of type STT_FILE must be put so that STB_LOCAL
+ symbols can be safely used */
+ put_elf_sym(symtab_section, 0, 0,
+ ELFW(ST_INFO)(STB_LOCAL, STT_FILE), 0,
+ SHN_ABS, file->filename);
diff --git a/scripts/stage1-flatten.sh b/scripts/stage1-flatten.sh
@@ -137,6 +137,7 @@ apply_our_patch longjmp-stub "$SRC/libtcc.c"
apply_our_patch set-environment-stub "$SRC/tcc.c"
apply_our_patch getclock-ms-stub "$SRC/tcc.c"
apply_our_patch getcwd-stub "$SRC/tccgen.c"
+apply_our_patch strip-file-prefix "$SRC/tccgen.c"
apply_our_patch ldexp-stub "$SRC/tccpp.c"
apply_our_patch date-time-stub "$SRC/tccpp.c"
apply_our_patch lex-char-unsigned "$SRC/tccpp.c"