boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 4bed1bcb908551c4ee63174c504b0897bea2ead9
parent d411ecb83f52cf356983e9a3a877e54ace7849ae
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat,  2 May 2026 08:37:06 -0700

tcc-cc: ship mem* runtime so -nostdlib fixtures link

tcc emits calls to memcpy/memmove/memset for struct copies and bulk
zero-init past its inline thresholds, but its ARM64 libtcc1 (lib-arm64.o)
doesn't define them — upstream expects libc to, and the tcc-cc suite
links -nostdlib. Add a tiny tcc-cc/mem.c, compiled with tcc-boot2 itself
and linked alongside start.o for every fixture.

Clears all 15 fixtures in the mem* cluster: 148 -> 163 passed (out of 178).

Diffstat:
MMakefile | 17++++++++++++++++-
Mdocs/TCC-TODO.md | 51+++++++++++++++++++++++----------------------------
Mscripts/boot-run-tests.sh | 8+++++++-
Atcc-cc/mem.c | 35+++++++++++++++++++++++++++++++++++
4 files changed, 81 insertions(+), 30 deletions(-)

diff --git a/Makefile b/Makefile @@ -323,11 +323,26 @@ $(TCC_GCC_BIN): $(TCC_FLAT) build/$(TCC_GCC_ARCH)/vendor/mes-libc/libc.flat.c \ HOST_CC ?= cc TCC_CC_ARCH := aarch64 TCC_CC_START := build/$(TCC_CC_ARCH)/tcc-cc/start.o +TCC_CC_MEM := build/$(TCC_CC_ARCH)/tcc-cc/mem.o +TCC_CC_TCC_INCLUDE := build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/include $(TCC_CC_START): tcc-cc/$(TCC_CC_ARCH)/start.S mkdir -p $(@D) $(HOST_CC) -target aarch64-linux-gnu -c -o $@ -x assembler $< +# Tiny mem* runtime: tcc emits calls to memcpy/memmove/memset for +# struct copies and bulk zero-init past its inline thresholds, but +# its ARM64 libtcc1 (lib-arm64.o) doesn't define them — upstream +# expects libc to. The tcc-cc suite links -nostdlib, so we compile +# this fallback with tcc-boot2 itself and link it alongside start.o. +$(TCC_CC_MEM): tcc-cc/mem.c \ + build/$(TCC_CC_ARCH)/tcc-boot2/tcc-boot2 \ + build/$(TCC_CC_ARCH)/.image + mkdir -p $(@D) + $(call PODMAN,$(TCC_CC_ARCH)) \ + build/$(TCC_CC_ARCH)/tcc-boot2/tcc-boot2 \ + -nostdlib -I $(TCC_CC_TCC_INCLUDE) -c -o $@ $< + # --- Native tools (opt-in dev-loop helpers) ------------------------------- NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 build/native-tools/m1pp @@ -399,7 +414,7 @@ TEST_CC_LIBC_DEPS := $(TEST_CC_DEPS) \ TEST_TCC_CC_DEPS := build/$(TCC_CC_ARCH)/.image \ build/$(TCC_CC_ARCH)/tcc-boot2/tcc-boot2 \ - $(TCC_CC_START) + $(TCC_CC_START) $(TCC_CC_MEM) test: ifeq ($(SUITE),) diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md @@ -74,17 +74,24 @@ make test SUITE=tcc-cc Result: ```text -148 passed, 30 failed +163 passed, 15 failed ``` -(178 fixtures total. The big jump from the previous `14 passed, 162 -failed` snapshot came from one cc.scm fix: cg-assign treated `=` as -scalar (8-byte load+store) for every type, so any struct/union -assignment of size > 8 bytes silently dropped fields at offset ≥ 8. -`SValue` is 64 bytes and `vswap()` does three struct copies, so every -vswap was a partial no-op and the dominant `vtop[-1].r < VT_CONST` -cluster all turned green. The fix routes struct/union `=` through a -new `cg-assign-struct` that emits a memcpy (see +(178 fixtures total. The 148→163 jump came from adding a tiny +`tcc-cc/mem.c` runtime providing `memcpy`/`memmove`/`memset`, +compiled with tcc-boot2 and linked alongside `start.o` for every +fixture. tcc emits calls to those for struct copies and bulk +zero-init past its inline thresholds, and its ARM64 `libtcc1` +(`lib-arm64.o`) does not define them — upstream expects libc to, +but the suite links `-nostdlib`. That cleared all 15 fixtures in +the `mem*` cluster in one shot. The earlier 14→148 jump was a +cc.scm fix: cg-assign treated `=` as scalar (8-byte load+store) +for every type, so any struct/union assignment of size > 8 bytes +silently dropped fields at offset ≥ 8. `SValue` is 64 bytes and +`vswap()` does three struct copies, so every vswap was a partial +no-op and the dominant `vtop[-1].r < VT_CONST` cluster all turned +green. The fix routes struct/union `=` through a new +`cg-assign-struct` that emits a memcpy (see `tests/cc/333-struct-assign-big.c`, plus `334-struct-assign-rval-rhs.c` for the comma-operator rval-of-struct rhs path).) @@ -107,12 +114,12 @@ Failure groups from per-fixture `tcc.log` files: | group | count | examples | |------:|------:|----------| | `assert fail: 0`, then segfault | 14 | `001-kitchen-sink`, `003-compound`, `013-call`, `019-static`, `027-void-call`, `071-fnptr-call`, `082-union-basic`, `117-compound-literal`, `118-const-expr`, `127-string-escapes`, `129-extern-libp1pp`, `131-vararg-mixed`, `200-lex-char-type`, `250-stringize-punct` | -| `tcc: error: undefined symbol 'memmove'` | 10 | `084-struct-assign`, `109-typedef-anon`, `111-struct-ret-1word` … `116-struct-ret-vararg`, `333-struct-assign-big`, `334-struct-assign-rval-rhs` | -| `tcc: error: undefined symbol 'memset'` | 5 | `032-local-struct-desig`, `096-fwd-struct`, `099-init-zero-tail`, `108-typedef-fnptr`, `125-anon-union` | | compile succeeds, generated program exits wrong | 1 | `220-const-promote` | -29 of 30 failures still happen before the generated fixture binary -runs. +14 of 15 failures still happen before the generated fixture binary +runs. The previous `mem*` undefined-symbol cluster (15 fixtures) is +gone — see `tcc-cc/mem.c` for the runtime, wired up via +`build/<arch>/tcc-cc/mem.o` in the Makefile. One failure is not cc.scm miscompilation — it reproduces on the gcc-built control (see Host Baseline below): @@ -123,12 +130,6 @@ This is an upstream tcc bug and would need a `simple-patches/` patch to fix. It caps the achievable cc.scm-built result at `177 passed, 1 failed` until tcc itself is patched. -The `memmove`/`memset` cluster is a libc gap, not a cc.scm bug: -tcc emits calls to `memmove`/`memset` for struct copies and bulk -zero-init that exceed its inline thresholds, and the linked -mes-libc TU does not export those symbols. Adding them to the -flattened libc would clear all 15 fixtures in one shot. - Working hypothesis for the remaining `assert fail: 0` cluster: our compiler is still miscompiling tcc itself in narrower spots. In this suite, `tcc-boot2` is a tcc binary produced by `cc.scm`; the @@ -143,7 +144,7 @@ fixtures: ```text gcc-built ARM64 tcc.flat.c (libc + tcc hdrs): 177 passed, 1 failed -cc.scm-built ARM64 tcc-boot2: 148 passed, 30 failed +cc.scm-built ARM64 tcc-boot2: 163 passed, 15 failed ``` The gcc-built control's only remaining failure (`200-lex-char-type`) @@ -154,10 +155,9 @@ flattened tcc against `libc.flat.c` + libtcc1 + a tiny mes-libc string runtime, and passes `-I tcc/include` so the bundled `<stdarg.h>` resolves under `-nostdlib`. Run with `scripts/run-gcc-libc-flat-tcc.sh`. This proves the fixtures and the -flattened tcc source are coherent end-to-end, so the remaining 29 +flattened tcc source are coherent end-to-end, so the remaining 14 cc.scm-only failures are evidence that our compiler is still -miscompiling tcc in some places (or, for the `mem*` cluster, that -our libc is missing symbols tcc emits). +miscompiling tcc in some places. ## Host Baseline @@ -246,11 +246,6 @@ Start with the earliest minimal failures in each remaining group: that cleared the vtop cluster. - `220-const-promote`: only remaining "compile succeeds, exits wrong" case — closest to "isolated codegen miscompile." -- `mem*` cluster (`032-local-struct-desig`, `084-struct-assign`, - `111-struct-ret-1word`, …): not a cc.scm bug — tcc emits calls - to `memset`/`memmove` for struct init/copy past inline thresholds, - and the linked mes-libc TU does not export those symbols. Adding - them clears all 15 fixtures in one shot. Keep using `make test SUITE=cc ARCH=aarch64 NAMES=...` as the control path for fixture semantics, and `make test SUITE=tcc-cc NAMES=...` as diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh @@ -516,6 +516,7 @@ run_tcc_cc_suite() { tcc=build/$ARCH/tcc-boot2/tcc-boot2 start=build/$ARCH/tcc-cc/start.o + mem=build/$ARCH/tcc-cc/mem.o tcc_include=build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/include if [ ! -x "$tcc" ]; then echo " FAIL [$ARCH] tcc-cc" @@ -527,6 +528,11 @@ run_tcc_cc_suite() { echo " missing $start -- run 'make test SUITE=tcc-cc ARCH=$ARCH'" >&2 return fi + if [ ! -e "$mem" ]; then + echo " FAIL [$ARCH] tcc-cc" + echo " missing $mem -- run 'make test SUITE=tcc-cc ARCH=$ARCH'" >&2 + return + fi if ! "$tcc" -version 2>/dev/null | grep 'AArch64' >/dev/null; then echo " FAIL [$ARCH] tcc-cc" echo " $tcc is not an AArch64-targeted tcc; rebuild with TCC_TARGET=ARM64" >&2 @@ -554,7 +560,7 @@ run_tcc_cc_suite() { mkdir -p "$(dirname "$elf")" "$workdir" tcc_log=$workdir/tcc.log - if ! "$tcc" -nostdlib -I "$tcc_include" "$start" "$src" -o "$elf" \ + if ! "$tcc" -nostdlib -I "$tcc_include" "$start" "$mem" "$src" -o "$elf" \ >"$tcc_log" 2>&1; then fail "$label" "tcc compile/link failed:" "$tcc_log" continue diff --git a/tcc-cc/mem.c b/tcc-cc/mem.c @@ -0,0 +1,35 @@ +/* Tiny runtime providing the mem* helpers tcc emits calls to for + struct copies and bulk zero-init past its inline thresholds. + tcc's own libtcc1 (lib-arm64.o) does not define these — upstream + assumes they come from libc, but the tcc-cc suite links with + -nostdlib so we ship them here. */ + +typedef unsigned long size_t; + +void *memcpy(void *dst, const void *src, size_t n) { + unsigned char *d = (unsigned char *)dst; + const unsigned char *s = (const unsigned char *)src; + while (n--) *d++ = *s++; + return dst; +} + +void *memmove(void *dst, const void *src, size_t n) { + unsigned char *d = (unsigned char *)dst; + const unsigned char *s = (const unsigned char *)src; + if (d == s || n == 0) return dst; + if (d < s) { + while (n--) *d++ = *s++; + } else { + d += n; + s += n; + while (n--) *--d = *--s; + } + return dst; +} + +void *memset(void *dst, int c, size_t n) { + unsigned char *d = (unsigned char *)dst; + unsigned char b = (unsigned char)c; + while (n--) *d++ = b; + return dst; +}