commit 8e2de46571802726e1a2f0c3d9d7e6d422bb53be
parent e0f434c481d6d9dd7ab2571034a998084a058fe8
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 3 May 2026 23:39:49 -0700
tcc-tcc: route tcc-cc / tcc-libc through the second-stage tcc
cc.scm builds tcc-boot2; tcc-boot2 builds tcc-tcc; the tcc-cc and
tcc-libc test suites now run their fixtures through tcc-tcc instead
of tcc-boot2. Every fixture is now a self-host check: a regression
in cc.scm's emitted code surfaces first when tcc-boot2 builds
tcc-tcc, then again when tcc-tcc runs the fixtures.
Initial attempt at the second stage segfaulted on tcc.flat.c's
expr_cond `type = bt1 == 6 ? type1 : type2;` shape. The bisect to
that line turned out to be a cc.scm codegen bug, not a tcc one:
- cg-load was lowering struct/union lvalues by loading 8 bytes into
a register and spilling them as a fresh frame slot. Anything
sizeof > 8 silently truncated when used in expression context
(notably as a ternary arm).
- cg-ifelse-merge then memcpy'd that truncated slot to its own
8-byte merge slot regardless of result type.
Fix:
- cg-load leaves struct/union lvalues as lvalues; surrounding
consumers (cg-ifelse-merge, cg-assign-struct, cg-call) already
handle aggregate lvals correctly.
- cg-ifelse-merge sizes the merge slot to the aggregate result and
memcpys each arm's lvalue into it.
Regression locked by tests/cc/336-struct-assign-ternary.
Build wiring:
- scripts/boot-build-tcc-tcc.sh: tcc-boot2 compiles tcc.flat.c +
lib-arm64.c (TFmode soft-float helpers) and links against the
tcc-libc runtime objects to produce build/$ARCH/tcc-tcc/tcc-tcc.
- Makefile: new tcc-tcc target; TEST_TCC_CC_DEPS / TEST_TCC_LIBC_DEPS
point at tcc-tcc instead of tcc-boot2.
- boot-run-tests.sh: run_tcc_cc_suite / run_tcc_libc_suite invoke
tcc-tcc.
Also a separate mes-libc malloc fix surfaced while debugging: Linux's
raw brk(2) doesn't return -1 on failure (it returns the unchanged
break), so the existing `brk(...) == -1` check let malloc hand out
pointers past the actual break. Patched to compare against the
requested address instead. Didn't fix the tcc segfault directly but
prevents a real silent OOM hazard.
Results:
cc 180/0 (was 179/0, +336)
cc-libc 17/0
tcc-cc 183/1 (was 178/1; only upstream 200-lex-char-type left)
tcc-libc 17/0
Diffstat:
9 files changed, 217 insertions(+), 27 deletions(-)
diff --git a/Makefile b/Makefile
@@ -20,6 +20,9 @@
# make tcc-boot2 cc.scm + P1pp pipeline → tcc-boot2 ELF
# make tcc-gcc same flatten output, built with stock gcc
# (sanity check; ARCH=aarch64 only)
+# make tcc-tcc second-stage tcc: tcc-boot2 compiles
+# tcc.flat.c into a self-built tcc; the
+# tcc-cc / tcc-libc suites use this
# make test every suite, every arch
# make test SUITE=m1pp m1pp suite, every arch
# make test SUITE=p1 ARCH=amd64 p1 suite, one arch
@@ -86,7 +89,7 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \
# --- Targets --------------------------------------------------------------
.PHONY: all m1pp hex2pp scheme1 cc test image tools tables \
- tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc
+ tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc tcc-tcc
all: m1pp hex2pp
@@ -385,6 +388,15 @@ TCC_LIBC_SYS_STUBS := $(TCC_LIBC_DIR)/sys_stubs.o
TCC_LIBC_MEM := $(TCC_LIBC_DIR)/mem.o
TCC_LIBC_LIBC := $(TCC_LIBC_DIR)/libc.o
+# tcc-tcc: second-stage tcc. tcc-boot2 (cc.scm-built) compiles
+# tcc.flat.c and links it against the tcc-libc runtime objects to
+# produce a self-built tcc. The tcc-cc and tcc-libc suites run their
+# fixtures through tcc-tcc, not tcc-boot2 — so a regression in
+# cc.scm's emitted code surfaces as a tcc-tcc misbehavior on a
+# fixture, and the test set spans tcc compiling itself.
+TCC_TCC_ARCH := aarch64
+TCC_TCC_BIN := build/$(TCC_TCC_ARCH)/tcc-tcc/tcc-tcc
+
$(TCC_CC_START): tcc-cc/$(TCC_CC_ARCH)/start.S
mkdir -p $(@D)
$(HOST_CC) -target aarch64-linux-gnu -c -o $@ -x assembler $<
@@ -445,6 +457,24 @@ $(TCC_LIBC_LIBC): build/$(TCC_LIBC_ARCH)/vendor/mes-libc/libc.flat.c \
-include tcc-libc/va_list_shim.h \
-c -o $@ $<
+# --- tcc-tcc: second-stage tcc -------------------------------------------
+#
+# Build inputs come straight from the tcc-libc setup: same start /
+# sys_stubs / mem / libc objects. lib-arm64.o (TFmode soft-float
+# helpers) is materialized inside the build script alongside the link
+# step, since nothing else needs it.
+tcc-tcc: $(TCC_TCC_BIN)
+
+$(TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \
+ $(TCC_FLAT) tcc-libc/va_list_shim.h \
+ build/$(TCC_TCC_ARCH)/tcc-boot2/tcc-boot2 \
+ $(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) \
+ $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) \
+ build/$(TCC_TCC_ARCH)/.image
+ mkdir -p $(@D)
+ $(call PODMAN,$(TCC_TCC_ARCH)) \
+ sh scripts/boot-build-tcc-tcc.sh $@
+
# --- Native tools (opt-in dev-loop helpers) -------------------------------
NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 \
@@ -531,11 +561,10 @@ TEST_CC_LIBC_DEPS := $(TEST_CC_DEPS) \
P1/entry-libc.P1pp P1/elf-end.P1pp
TEST_TCC_CC_DEPS := build/$(TCC_CC_ARCH)/.image \
- build/$(TCC_CC_ARCH)/tcc-boot2/tcc-boot2 \
- $(TCC_CC_START) $(TCC_CC_MEM)
+ $(TCC_TCC_BIN) $(TCC_CC_START) $(TCC_CC_MEM)
TEST_TCC_LIBC_DEPS := build/$(TCC_LIBC_ARCH)/.image \
- build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \
+ $(TCC_TCC_BIN) \
$(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC)
test:
diff --git a/cc/cc.scm b/cc/cc.scm
@@ -3456,6 +3456,15 @@
;; and route through cg-decay-array for a single source of truth.
((eq? (ctype-kind ty) 'arr)
(cg-push cg p) (cg-decay-array cg))
+ ;; Struct/union lvalues stay as lvalues — there is no
+ ;; register-sized rvalue form for an aggregate, and the
+ ;; existing 8-byte spill path silently truncated anything
+ ;; wider (the bug that broke `c = cond ? a : b` for
+ ;; sizeof(struct) > 8). Surrounding expression machinery
+ ;; (cg-ifelse-merge / cg-assign-struct / cg-call) consumes
+ ;; aggregate operands as lvalues already.
+ ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union))
+ (cg-push cg p))
((and (eq? (opnd-kind p) 'frame)
(%cg-indirect? cg (opnd-ext p)))
;; Indirect frame-lval: slot holds the address. Stage the
@@ -3872,25 +3881,51 @@
;; For `&&` / `||` callers both arms are pre-cast to %t-i32 by the
;; parser, so the merge is a no-op on type.
(define (cg-ifelse-merge cg then-thunk else-thunk)
- (let* ((cond-op (cg-pop cg))
- (slot (cg-alloc-slot cg 8 8)))
+ (let* ((cond-op (cg-pop cg)))
(%cg-load-opnd-into cg cond-op 't0)
(%cg-emit-many cg (list "%ifelse_nez(t0, {\n"))
(then-thunk)
- (let* ((p (cg-pop cg))
- (rty1 (opnd-type p)))
- (%cg-load-opnd-into cg p 'a0)
- (%cg-emit-st-slot cg 'a0 slot)
+ (let* ((p (cg-pop cg))
+ (rty1 (opnd-type p))
+ (rk1 (ctype-kind rty1))
+ ;; Struct/union arms can't ride the canonical 8-byte word
+ ;; slot — the arm's bytes have to land in a slot sized to
+ ;; the struct, and each arm memcpys its lvalue in. tcc's
+ ;; expr_cond does this exact `type = bt1 == 6 ? type1 : type2`
+ ;; pattern across CType structs, so without this case
+ ;; cc.scm-compiled tcc-boot2 self-corrupts.
+ (aggr? (or (eq? rk1 'struct) (eq? rk1 'union)))
+ (slot (cond (aggr?
+ (cg-alloc-slot cg
+ (align-up (ctype-size rty1) 8)
+ (max 8 (ctype-align rty1))))
+ (else
+ (cg-alloc-slot cg 8 8)))))
+ (%cg-merge-write-arm cg p slot aggr?)
(%cg-emit-many cg (list "}, {\n"))
(else-thunk)
(let* ((q (cg-pop cg))
(rty2 (opnd-type q)))
- (%cg-load-opnd-into cg q 'a0)
- (%cg-emit-st-slot cg 'a0 slot)
+ (%cg-merge-write-arm cg q slot aggr?)
(%cg-emit-many cg (list "})\n"))
+ ;; Aggregate result is pushed as a frame lval so cg-copy-struct
+ ;; (which asserts src must be lval) accepts it; %cg-emit-addr-of
+ ;; falls through the `lval? #t` guard (slot is direct, not
+ ;; indirect) and returns the slot's address either way.
(cg-push cg (%opnd 'frame
(%cg-merge-arith-type rty1 rty2)
- slot #f))))))
+ slot
+ aggr?))))))
+
+(define (%cg-merge-write-arm cg op slot aggr?)
+ (cond
+ (aggr?
+ (%cg-emit-addr-of cg op 't0)
+ (%cg-emit-lea-slot cg "t2" (%cg-slot-expr cg slot))
+ (%cg-emit-byte-copy cg 't2 't0 't1 (ctype-size (opnd-type op))))
+ (else
+ (%cg-load-opnd-into cg op 'a0)
+ (%cg-emit-st-slot cg 'a0 slot))))
;; Usual arithmetic conversion over two ctypes (C11 §6.3.1.8):
;; integer-promote each (sub-int → int), then pick the wider with
diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md
@@ -35,19 +35,23 @@ make test SUITE=tcc-cc
## `tcc-cc` Suite
-`tcc-cc` runs the plain `tests/cc` fixtures through `tcc-boot2`
-instead of through `cc.scm` directly. The Makefile builds an
-ARM64-targeted `tcc-boot2`, builds the tiny aarch64 `_start` object
-with the host assembler, then the runner does:
+`tcc-cc` runs the plain `tests/cc` fixtures through `tcc-tcc`
+(second-stage tcc) instead of through `cc.scm` directly. The Makefile
+chain is `cc.scm` → `tcc-boot2` → `tcc-tcc`; the runner then does:
```sh
-build/aarch64/tcc-boot2/tcc-boot2 \
+build/aarch64/tcc-tcc/tcc-tcc \
-nostdlib build/aarch64/tcc-cc/start.o build/aarch64/tcc-cc/mem.o \
tests/cc/NAME.c -o build/aarch64/tests/tcc-cc/NAME
./build/aarch64/tests/tcc-cc/NAME
```
+Routing fixtures through `tcc-tcc` (rather than `tcc-boot2` directly)
+turns every fixture into a self-host check: a regression in
+`cc.scm`'s emitted code surfaces first when `tcc-boot2` builds
+`tcc-tcc`, then again when `tcc-tcc` runs the fixtures.
+
`mem.o` is the compiler-builtin mem* runtime — `memcpy/memmove/memset`
that tcc emits direct calls to for struct copies and bulk init, plus
`memcmp` for fixtures that reach it via bare `extern int memcmp(...)`.
@@ -89,6 +93,7 @@ The path from earlier results to here:
| 175/3 | cc.scm migration to M1pp + hex2++ pipeline (dotted local labels, `.scope`/`.endscope`, `.align` directives, bare-hex string emission) cleared the entire `assert fail: 0@12051` cluster (14 fixtures) plus a hex2pp.P1 BSS-overlap fix that unblocked the tcc-boot2 link itself for inputs >1 MiB |
| 176/2 | ternary-arms common-type fix in `cg-ifelse-merge` cleared `220-const-promote` (was: arm 1's type leaked through as the result type, truncating wider arm 2 to 32-bit; tcc's `gen_opic` sign-extension idiom hit this) |
| 178/1 | reframed mem* as compiler builtins supplied by the build process: renamed libp1pp's `libp1pp__memcpy` / `_memcmp` / `_memset` to plain `memcpy` / `memcmp` / `memset` and added `memmove`; dropped mes-libc's `string/memcpy.c` / `memmove.c` / `memset.c` / `memcmp.c` from `unified-libc.c` so the symbols are not duplicated; added `memcmp` to `tcc-cc/mem.c` and linked it into the gcc-built tcc-gcc binary; updated and renamed the regression fixture (`129-extern-libp1pp` → `129-extern-mem-builtins`) to extern the plain names. Cleared the fixture on every path (cc, cc-libc, tcc-cc, tcc-gcc). |
+| 183/1 | added `tcc-tcc` (second-stage tcc) and routed `tcc-cc` / `tcc-libc` through it. cc.scm's `cg-load` was 8-byte-spilling struct lvalues — anything `sizeof > 8` got truncated when used in expression context (e.g. as a ternary arm). Fixed `cg-load` to leave aggregates as lvalues and updated `cg-ifelse-merge` to memcpy aggregate arms into a struct-sized merge slot; without this, tcc-boot2 (cc.scm-built) self-corrupted whenever it had to compile `type = bt1 == 6 ? type1 : type2;`. Regression locked by `tests/cc/336-struct-assign-ternary`. |
## Host Baseline
diff --git a/scripts/boot-build-tcc-tcc.sh b/scripts/boot-build-tcc-tcc.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+## boot-build-tcc-tcc.sh — second-stage tcc.
+##
+## tcc-boot2 (the cc.scm-built tcc) compiles tcc.flat.c into a fresh
+## tcc binary, linked against the same libc.o / mem.o / sys_stubs.o /
+## start.o the tcc-libc suite uses, plus tcc's own lib-arm64.c
+## soft-float TFmode helpers (libgcc-equivalent on aarch64). The result
+## — tcc-tcc — is the "twice-compiled" tcc: stage 1 was cc.scm
+## compiling tcc; stage 2 is tcc compiling tcc. Both are bit-distinct
+## from each other but functionally equivalent; the tcc-cc / tcc-libc
+## suites use tcc-tcc as their reference compiler.
+##
+## Env: ARCH=aarch64 (only arch wired today)
+## Usage: boot-build-tcc-tcc.sh <out>
+
+set -eu
+: "${ARCH:?ARCH must be set}"
+[ "$#" -eq 1 ] || { echo "usage: ARCH=<arch> $0 <out>" >&2; exit 2; }
+
+OUT=$1
+
+TCC_BOOT2=build/$ARCH/tcc-boot2/tcc-boot2
+TCC_INC=build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/include
+LIBARM64_C=build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/lib/lib-arm64.c
+TCC_FLAT=build/tcc/ARM64/tcc.flat.c
+LIBC_O=build/$ARCH/tcc-libc/libc.o
+MEM_O=build/$ARCH/tcc-libc/mem.o
+SYS_O=build/$ARCH/tcc-libc/sys_stubs.o
+START_O=build/$ARCH/tcc-libc/start.o
+SHIM=tcc-libc/va_list_shim.h
+WORK=$(dirname "$OUT")
+
+mkdir -p "$WORK"
+
+# lib-arm64.o: TFmode soft-float helpers (__addtf3 / __extenddftf2 / …).
+# tcc.flat.c references these for long double arithmetic; without them
+# the final link fails with undefined symbols.
+"$TCC_BOOT2" -nostdlib -I "$TCC_INC" \
+ -D HAVE_CONFIG_H=1 -D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1 \
+ -c -o "$WORK/lib-arm64.o" "$LIBARM64_C"
+
+# Compile + link tcc-tcc in one tcc-boot2 invocation.
+"$TCC_BOOT2" -nostdlib -I "$TCC_INC" -include "$SHIM" \
+ "$START_O" "$SYS_O" "$MEM_O" "$LIBC_O" "$WORK/lib-arm64.o" \
+ "$TCC_FLAT" -o "$OUT"
diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh
@@ -595,10 +595,10 @@ run_cc_ext_suite() {
## --- tcc-cc suite -------------------------------------------------------
##
-## Runs the plain tests/cc fixtures through the tcc-boot2 binary. The
-## Makefile builds tcc-boot2 with TCC_TARGET=ARM64 and supplies a tiny
-## aarch64 _start object at build/aarch64/tcc-cc/start.o, so this suite
-## can link and execute non-libc C tests directly on the target arch.
+## Runs the plain tests/cc fixtures through tcc-tcc — the second-stage
+## tcc, built by tcc-boot2 (which itself was built by cc.scm). The
+## Makefile target tcc-tcc supplies the binary; start.o / mem.o come
+## from the tcc-cc tree (cross-asm and tcc-boot2-built respectively).
run_tcc_cc_suite() {
if [ "$ARCH" != "aarch64" ]; then
echo " FAIL [$ARCH] tcc-cc"
@@ -606,7 +606,7 @@ run_tcc_cc_suite() {
return
fi
- tcc=build/$ARCH/tcc-boot2/tcc-boot2
+ tcc=build/$ARCH/tcc-tcc/tcc-tcc
start=build/$ARCH/tcc-cc/start.o
mem=build/$ARCH/tcc-cc/mem.o
tcc_include=build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/include
@@ -671,9 +671,10 @@ run_tcc_cc_suite() {
## --- tcc-libc suite -----------------------------------------------------
##
-## End-to-end "tcc as a real compiler" check: tcc-boot2 (built by cc.scm)
-## compiles the vendored mes-libc into libc.o, then for each tests/cc-libc
-## fixture, tcc-boot2 compiles + links the fixture against
+## End-to-end "tcc as a real compiler" check, run through tcc-tcc — the
+## twice-compiled tcc (cc.scm built tcc-boot2; tcc-boot2 built tcc-tcc).
+## tcc-boot2 already compiled mes-libc into libc.o; for each tests/cc-libc
+## fixture, tcc-tcc compiles + links the fixture against
## start.o aarch64 entry stub: __libc_init then main then exit
## sys_stubs.o Linux aarch64 svc-based sys_* implementations
## mem.o mem* compiler-builtin runtime (memcpy/memmove/memset/memcmp)
@@ -686,7 +687,7 @@ run_tcc_libc_suite() {
return
fi
- tcc=build/$ARCH/tcc-boot2/tcc-boot2
+ tcc=build/$ARCH/tcc-tcc/tcc-tcc
start=build/$ARCH/tcc-libc/start.o
sys_stubs=build/$ARCH/tcc-libc/sys_stubs.o
mem=build/$ARCH/tcc-libc/mem.o
diff --git a/scripts/libc-flatten.sh b/scripts/libc-flatten.sh
@@ -102,6 +102,10 @@ apply_simple_patch \
"$PATCHES/malloc-max-align.before" \
"$PATCHES/malloc-max-align.after"
apply_simple_patch \
+ "$STAGE/linux/malloc.c" \
+ "$PATCHES/malloc-brk-check.before" \
+ "$PATCHES/malloc-brk-check.after"
+apply_simple_patch \
"$STAGE/string/strstr.c" \
"$PATCHES/strstr-drop-mman.before" \
"$PATCHES/strstr-drop-mman.after"
diff --git a/tests/cc/336-struct-assign-ternary.c b/tests/cc/336-struct-assign-ternary.c
@@ -0,0 +1,56 @@
+/* Struct = ternary, mirroring the shape tcc.flat.c expr_cond uses
+ * pervasively (CType locals + else-if chain). cc.scm has to lower
+ * each `type = bt1 == 6 ? type1 : type2;` as a struct copy whose
+ * size matches the struct (16 bytes here, including alignment pad
+ * for the void* member). Until this works, cc.scm-compiled
+ * tcc-boot2 self-corrupts when handed input that exercises that
+ * exact path in its embedded codegen.
+ */
+
+struct CType { int t; void *ref; };
+
+int main(void)
+{
+ int dummy_a, dummy_b;
+ struct CType type1; type1.t = 6; type1.ref = &dummy_a;
+ struct CType type2; type2.t = 7; type2.ref = &dummy_b;
+ struct CType type;
+
+ int bt1 = 6, bt2 = 0;
+ if (bt1 == 6 || bt2 == 6) {
+ type = bt1 == 6 ? type1 : type2;
+ } else if (bt1 == 7 || bt2 == 7) {
+ type = bt1 == 7 ? type1 : type2;
+ } else {
+ type.t = -1;
+ type.ref = (void *)0;
+ }
+ if (type.t != 6) return 1;
+ if (type.ref != &dummy_a) return 2;
+
+ bt1 = 0; bt2 = 6;
+ if (bt1 == 6 || bt2 == 6) {
+ type = bt1 == 6 ? type1 : type2;
+ } else if (bt1 == 7 || bt2 == 7) {
+ type = bt1 == 7 ? type1 : type2;
+ } else {
+ type.t = -1;
+ type.ref = (void *)0;
+ }
+ if (type.t != 7) return 11;
+ if (type.ref != &dummy_b) return 12;
+
+ bt1 = 7; bt2 = 0;
+ if (bt1 == 6 || bt2 == 6) {
+ type = bt1 == 6 ? type1 : type2;
+ } else if (bt1 == 7 || bt2 == 7) {
+ type = bt1 == 7 ? type1 : type2;
+ } else {
+ type.t = -1;
+ type.ref = (void *)0;
+ }
+ if (type.t != 6) return 21;
+ if (type.ref != &dummy_a) return 22;
+
+ return 0;
+}
diff --git a/vendor/mes-libc/patches/malloc-brk-check.after b/vendor/mes-libc/patches/malloc-brk-check.after
@@ -0,0 +1,10 @@
+ /* Linux's raw brk(2) doesn't return -1 on failure; it returns the
+ * unchanged break (i.e. < requested). Compare against the requested
+ * address so a refused growth surfaces as malloc returning NULL
+ * instead of silently handing out a pointer past the break. */
+ long _new = (long) (__brk + size);
+ if (brk (__brk + size) < _new)
+ return 0;
+ char *p = __brk;
+ __brk = __brk + size;
+ return p;
diff --git a/vendor/mes-libc/patches/malloc-brk-check.before b/vendor/mes-libc/patches/malloc-brk-check.before
@@ -0,0 +1,5 @@
+ if (brk (__brk + size) == -1)
+ return 0;
+ char *p = __brk;
+ __brk = __brk + size;
+ return p;