boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 8e2de46571802726e1a2f0c3d9d7e6d422bb53be
parent e0f434c481d6d9dd7ab2571034a998084a058fe8
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun,  3 May 2026 23:39:49 -0700

tcc-tcc: route tcc-cc / tcc-libc through the second-stage tcc

cc.scm builds tcc-boot2; tcc-boot2 builds tcc-tcc; the tcc-cc and
tcc-libc test suites now run their fixtures through tcc-tcc instead
of tcc-boot2. Every fixture is now a self-host check: a regression
in cc.scm's emitted code surfaces first when tcc-boot2 builds
tcc-tcc, then again when tcc-tcc runs the fixtures.

Initial attempt at the second stage segfaulted on tcc.flat.c's
expr_cond `type = bt1 == 6 ? type1 : type2;` shape. The bisect to
that line turned out to be a cc.scm codegen bug, not a tcc one:

  - cg-load was lowering struct/union lvalues by loading 8 bytes into
    a register and spilling them as a fresh frame slot. Anything
    sizeof > 8 silently truncated when used in expression context
    (notably as a ternary arm).
  - cg-ifelse-merge then memcpy'd that truncated slot to its own
    8-byte merge slot regardless of result type.

Fix:
  - cg-load leaves struct/union lvalues as lvalues; surrounding
    consumers (cg-ifelse-merge, cg-assign-struct, cg-call) already
    handle aggregate lvals correctly.
  - cg-ifelse-merge sizes the merge slot to the aggregate result and
    memcpys each arm's lvalue into it.

Regression locked by tests/cc/336-struct-assign-ternary.

Build wiring:
  - scripts/boot-build-tcc-tcc.sh: tcc-boot2 compiles tcc.flat.c +
    lib-arm64.c (TFmode soft-float helpers) and links against the
    tcc-libc runtime objects to produce build/$ARCH/tcc-tcc/tcc-tcc.
  - Makefile: new tcc-tcc target; TEST_TCC_CC_DEPS / TEST_TCC_LIBC_DEPS
    point at tcc-tcc instead of tcc-boot2.
  - boot-run-tests.sh: run_tcc_cc_suite / run_tcc_libc_suite invoke
    tcc-tcc.

Also a separate mes-libc malloc fix surfaced while debugging: Linux's
raw brk(2) doesn't return -1 on failure (it returns the unchanged
break), so the existing `brk(...) == -1` check let malloc hand out
pointers past the actual break. Patched to compare against the
requested address instead. Didn't fix the tcc segfault directly but
prevents a real silent OOM hazard.

Results:
  cc        180/0  (was 179/0, +336)
  cc-libc    17/0
  tcc-cc    183/1  (was 178/1; only upstream 200-lex-char-type left)
  tcc-libc   17/0

Diffstat:
MMakefile | 37+++++++++++++++++++++++++++++++++----
Mcc/cc.scm | 53++++++++++++++++++++++++++++++++++++++++++++---------
Mdocs/TCC-TODO.md | 15++++++++++-----
Ascripts/boot-build-tcc-tcc.sh | 45+++++++++++++++++++++++++++++++++++++++++++++
Mscripts/boot-run-tests.sh | 19++++++++++---------
Mscripts/libc-flatten.sh | 4++++
Atests/cc/336-struct-assign-ternary.c | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Avendor/mes-libc/patches/malloc-brk-check.after | 10++++++++++
Avendor/mes-libc/patches/malloc-brk-check.before | 5+++++
9 files changed, 217 insertions(+), 27 deletions(-)

diff --git a/Makefile b/Makefile @@ -20,6 +20,9 @@ # make tcc-boot2 cc.scm + P1pp pipeline → tcc-boot2 ELF # make tcc-gcc same flatten output, built with stock gcc # (sanity check; ARCH=aarch64 only) +# make tcc-tcc second-stage tcc: tcc-boot2 compiles +# tcc.flat.c into a self-built tcc; the +# tcc-cc / tcc-libc suites use this # make test every suite, every arch # make test SUITE=m1pp m1pp suite, every arch # make test SUITE=p1 ARCH=amd64 p1 suite, one arch @@ -86,7 +89,7 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \ # --- Targets -------------------------------------------------------------- .PHONY: all m1pp hex2pp scheme1 cc test image tools tables \ - tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc + tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc tcc-tcc all: m1pp hex2pp @@ -385,6 +388,15 @@ TCC_LIBC_SYS_STUBS := $(TCC_LIBC_DIR)/sys_stubs.o TCC_LIBC_MEM := $(TCC_LIBC_DIR)/mem.o TCC_LIBC_LIBC := $(TCC_LIBC_DIR)/libc.o +# tcc-tcc: second-stage tcc. tcc-boot2 (cc.scm-built) compiles +# tcc.flat.c and links it against the tcc-libc runtime objects to +# produce a self-built tcc. The tcc-cc and tcc-libc suites run their +# fixtures through tcc-tcc, not tcc-boot2 — so a regression in +# cc.scm's emitted code surfaces as a tcc-tcc misbehavior on a +# fixture, and the test set spans tcc compiling itself. +TCC_TCC_ARCH := aarch64 +TCC_TCC_BIN := build/$(TCC_TCC_ARCH)/tcc-tcc/tcc-tcc + $(TCC_CC_START): tcc-cc/$(TCC_CC_ARCH)/start.S mkdir -p $(@D) $(HOST_CC) -target aarch64-linux-gnu -c -o $@ -x assembler $< @@ -445,6 +457,24 @@ $(TCC_LIBC_LIBC): build/$(TCC_LIBC_ARCH)/vendor/mes-libc/libc.flat.c \ -include tcc-libc/va_list_shim.h \ -c -o $@ $< +# --- tcc-tcc: second-stage tcc ------------------------------------------- +# +# Build inputs come straight from the tcc-libc setup: same start / +# sys_stubs / mem / libc objects. lib-arm64.o (TFmode soft-float +# helpers) is materialized inside the build script alongside the link +# step, since nothing else needs it. +tcc-tcc: $(TCC_TCC_BIN) + +$(TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \ + $(TCC_FLAT) tcc-libc/va_list_shim.h \ + build/$(TCC_TCC_ARCH)/tcc-boot2/tcc-boot2 \ + $(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) \ + $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) \ + build/$(TCC_TCC_ARCH)/.image + mkdir -p $(@D) + $(call PODMAN,$(TCC_TCC_ARCH)) \ + sh scripts/boot-build-tcc-tcc.sh $@ + # --- Native tools (opt-in dev-loop helpers) ------------------------------- NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 \ @@ -531,11 +561,10 @@ TEST_CC_LIBC_DEPS := $(TEST_CC_DEPS) \ P1/entry-libc.P1pp P1/elf-end.P1pp TEST_TCC_CC_DEPS := build/$(TCC_CC_ARCH)/.image \ - build/$(TCC_CC_ARCH)/tcc-boot2/tcc-boot2 \ - $(TCC_CC_START) $(TCC_CC_MEM) + $(TCC_TCC_BIN) $(TCC_CC_START) $(TCC_CC_MEM) TEST_TCC_LIBC_DEPS := build/$(TCC_LIBC_ARCH)/.image \ - build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \ + $(TCC_TCC_BIN) \ $(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) test: diff --git a/cc/cc.scm b/cc/cc.scm @@ -3456,6 +3456,15 @@ ;; and route through cg-decay-array for a single source of truth. ((eq? (ctype-kind ty) 'arr) (cg-push cg p) (cg-decay-array cg)) + ;; Struct/union lvalues stay as lvalues — there is no + ;; register-sized rvalue form for an aggregate, and the + ;; existing 8-byte spill path silently truncated anything + ;; wider (the bug that broke `c = cond ? a : b` for + ;; sizeof(struct) > 8). Surrounding expression machinery + ;; (cg-ifelse-merge / cg-assign-struct / cg-call) consumes + ;; aggregate operands as lvalues already. + ((or (eq? (ctype-kind ty) 'struct) (eq? (ctype-kind ty) 'union)) + (cg-push cg p)) ((and (eq? (opnd-kind p) 'frame) (%cg-indirect? cg (opnd-ext p))) ;; Indirect frame-lval: slot holds the address. Stage the @@ -3872,25 +3881,51 @@ ;; For `&&` / `||` callers both arms are pre-cast to %t-i32 by the ;; parser, so the merge is a no-op on type. (define (cg-ifelse-merge cg then-thunk else-thunk) - (let* ((cond-op (cg-pop cg)) - (slot (cg-alloc-slot cg 8 8))) + (let* ((cond-op (cg-pop cg))) (%cg-load-opnd-into cg cond-op 't0) (%cg-emit-many cg (list "%ifelse_nez(t0, {\n")) (then-thunk) - (let* ((p (cg-pop cg)) - (rty1 (opnd-type p))) - (%cg-load-opnd-into cg p 'a0) - (%cg-emit-st-slot cg 'a0 slot) + (let* ((p (cg-pop cg)) + (rty1 (opnd-type p)) + (rk1 (ctype-kind rty1)) + ;; Struct/union arms can't ride the canonical 8-byte word + ;; slot — the arm's bytes have to land in a slot sized to + ;; the struct, and each arm memcpys its lvalue in. tcc's + ;; expr_cond does this exact `type = bt1 == 6 ? type1 : type2` + ;; pattern across CType structs, so without this case + ;; cc.scm-compiled tcc-boot2 self-corrupts. + (aggr? (or (eq? rk1 'struct) (eq? rk1 'union))) + (slot (cond (aggr? + (cg-alloc-slot cg + (align-up (ctype-size rty1) 8) + (max 8 (ctype-align rty1)))) + (else + (cg-alloc-slot cg 8 8))))) + (%cg-merge-write-arm cg p slot aggr?) (%cg-emit-many cg (list "}, {\n")) (else-thunk) (let* ((q (cg-pop cg)) (rty2 (opnd-type q))) - (%cg-load-opnd-into cg q 'a0) - (%cg-emit-st-slot cg 'a0 slot) + (%cg-merge-write-arm cg q slot aggr?) (%cg-emit-many cg (list "})\n")) + ;; Aggregate result is pushed as a frame lval so cg-copy-struct + ;; (which asserts src must be lval) accepts it; %cg-emit-addr-of + ;; falls through the `lval? #t` guard (slot is direct, not + ;; indirect) and returns the slot's address either way. (cg-push cg (%opnd 'frame (%cg-merge-arith-type rty1 rty2) - slot #f)))))) + slot + aggr?)))))) + +(define (%cg-merge-write-arm cg op slot aggr?) + (cond + (aggr? + (%cg-emit-addr-of cg op 't0) + (%cg-emit-lea-slot cg "t2" (%cg-slot-expr cg slot)) + (%cg-emit-byte-copy cg 't2 't0 't1 (ctype-size (opnd-type op)))) + (else + (%cg-load-opnd-into cg op 'a0) + (%cg-emit-st-slot cg 'a0 slot)))) ;; Usual arithmetic conversion over two ctypes (C11 §6.3.1.8): ;; integer-promote each (sub-int → int), then pick the wider with diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md @@ -35,19 +35,23 @@ make test SUITE=tcc-cc ## `tcc-cc` Suite -`tcc-cc` runs the plain `tests/cc` fixtures through `tcc-boot2` -instead of through `cc.scm` directly. The Makefile builds an -ARM64-targeted `tcc-boot2`, builds the tiny aarch64 `_start` object -with the host assembler, then the runner does: +`tcc-cc` runs the plain `tests/cc` fixtures through `tcc-tcc` +(second-stage tcc) instead of through `cc.scm` directly. The Makefile +chain is `cc.scm` → `tcc-boot2` → `tcc-tcc`; the runner then does: ```sh -build/aarch64/tcc-boot2/tcc-boot2 \ +build/aarch64/tcc-tcc/tcc-tcc \ -nostdlib build/aarch64/tcc-cc/start.o build/aarch64/tcc-cc/mem.o \ tests/cc/NAME.c -o build/aarch64/tests/tcc-cc/NAME ./build/aarch64/tests/tcc-cc/NAME ``` +Routing fixtures through `tcc-tcc` (rather than `tcc-boot2` directly) +turns every fixture into a self-host check: a regression in +`cc.scm`'s emitted code surfaces first when `tcc-boot2` builds +`tcc-tcc`, then again when `tcc-tcc` runs the fixtures. + `mem.o` is the compiler-builtin mem* runtime — `memcpy/memmove/memset` that tcc emits direct calls to for struct copies and bulk init, plus `memcmp` for fixtures that reach it via bare `extern int memcmp(...)`. @@ -89,6 +93,7 @@ The path from earlier results to here: | 175/3 | cc.scm migration to M1pp + hex2++ pipeline (dotted local labels, `.scope`/`.endscope`, `.align` directives, bare-hex string emission) cleared the entire `assert fail: 0@12051` cluster (14 fixtures) plus a hex2pp.P1 BSS-overlap fix that unblocked the tcc-boot2 link itself for inputs >1 MiB | | 176/2 | ternary-arms common-type fix in `cg-ifelse-merge` cleared `220-const-promote` (was: arm 1's type leaked through as the result type, truncating wider arm 2 to 32-bit; tcc's `gen_opic` sign-extension idiom hit this) | | 178/1 | reframed mem* as compiler builtins supplied by the build process: renamed libp1pp's `libp1pp__memcpy` / `_memcmp` / `_memset` to plain `memcpy` / `memcmp` / `memset` and added `memmove`; dropped mes-libc's `string/memcpy.c` / `memmove.c` / `memset.c` / `memcmp.c` from `unified-libc.c` so the symbols are not duplicated; added `memcmp` to `tcc-cc/mem.c` and linked it into the gcc-built tcc-gcc binary; updated and renamed the regression fixture (`129-extern-libp1pp` → `129-extern-mem-builtins`) to extern the plain names. Cleared the fixture on every path (cc, cc-libc, tcc-cc, tcc-gcc). | +| 183/1 | added `tcc-tcc` (second-stage tcc) and routed `tcc-cc` / `tcc-libc` through it. cc.scm's `cg-load` was 8-byte-spilling struct lvalues — anything `sizeof > 8` got truncated when used in expression context (e.g. as a ternary arm). Fixed `cg-load` to leave aggregates as lvalues and updated `cg-ifelse-merge` to memcpy aggregate arms into a struct-sized merge slot; without this, tcc-boot2 (cc.scm-built) self-corrupted whenever it had to compile `type = bt1 == 6 ? type1 : type2;`. Regression locked by `tests/cc/336-struct-assign-ternary`. | ## Host Baseline diff --git a/scripts/boot-build-tcc-tcc.sh b/scripts/boot-build-tcc-tcc.sh @@ -0,0 +1,45 @@ +#!/bin/sh +## boot-build-tcc-tcc.sh — second-stage tcc. +## +## tcc-boot2 (the cc.scm-built tcc) compiles tcc.flat.c into a fresh +## tcc binary, linked against the same libc.o / mem.o / sys_stubs.o / +## start.o the tcc-libc suite uses, plus tcc's own lib-arm64.c +## soft-float TFmode helpers (libgcc-equivalent on aarch64). The result +## — tcc-tcc — is the "twice-compiled" tcc: stage 1 was cc.scm +## compiling tcc; stage 2 is tcc compiling tcc. Both are bit-distinct +## from each other but functionally equivalent; the tcc-cc / tcc-libc +## suites use tcc-tcc as their reference compiler. +## +## Env: ARCH=aarch64 (only arch wired today) +## Usage: boot-build-tcc-tcc.sh <out> + +set -eu +: "${ARCH:?ARCH must be set}" +[ "$#" -eq 1 ] || { echo "usage: ARCH=<arch> $0 <out>" >&2; exit 2; } + +OUT=$1 + +TCC_BOOT2=build/$ARCH/tcc-boot2/tcc-boot2 +TCC_INC=build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/include +LIBARM64_C=build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/lib/lib-arm64.c +TCC_FLAT=build/tcc/ARM64/tcc.flat.c +LIBC_O=build/$ARCH/tcc-libc/libc.o +MEM_O=build/$ARCH/tcc-libc/mem.o +SYS_O=build/$ARCH/tcc-libc/sys_stubs.o +START_O=build/$ARCH/tcc-libc/start.o +SHIM=tcc-libc/va_list_shim.h +WORK=$(dirname "$OUT") + +mkdir -p "$WORK" + +# lib-arm64.o: TFmode soft-float helpers (__addtf3 / __extenddftf2 / …). +# tcc.flat.c references these for long double arithmetic; without them +# the final link fails with undefined symbols. +"$TCC_BOOT2" -nostdlib -I "$TCC_INC" \ + -D HAVE_CONFIG_H=1 -D TCC_TARGET_ARM64=1 -D TCC_TARGET_ARM=1 \ + -c -o "$WORK/lib-arm64.o" "$LIBARM64_C" + +# Compile + link tcc-tcc in one tcc-boot2 invocation. +"$TCC_BOOT2" -nostdlib -I "$TCC_INC" -include "$SHIM" \ + "$START_O" "$SYS_O" "$MEM_O" "$LIBC_O" "$WORK/lib-arm64.o" \ + "$TCC_FLAT" -o "$OUT" diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh @@ -595,10 +595,10 @@ run_cc_ext_suite() { ## --- tcc-cc suite ------------------------------------------------------- ## -## Runs the plain tests/cc fixtures through the tcc-boot2 binary. The -## Makefile builds tcc-boot2 with TCC_TARGET=ARM64 and supplies a tiny -## aarch64 _start object at build/aarch64/tcc-cc/start.o, so this suite -## can link and execute non-libc C tests directly on the target arch. +## Runs the plain tests/cc fixtures through tcc-tcc — the second-stage +## tcc, built by tcc-boot2 (which itself was built by cc.scm). The +## Makefile target tcc-tcc supplies the binary; start.o / mem.o come +## from the tcc-cc tree (cross-asm and tcc-boot2-built respectively). run_tcc_cc_suite() { if [ "$ARCH" != "aarch64" ]; then echo " FAIL [$ARCH] tcc-cc" @@ -606,7 +606,7 @@ run_tcc_cc_suite() { return fi - tcc=build/$ARCH/tcc-boot2/tcc-boot2 + tcc=build/$ARCH/tcc-tcc/tcc-tcc start=build/$ARCH/tcc-cc/start.o mem=build/$ARCH/tcc-cc/mem.o tcc_include=build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/include @@ -671,9 +671,10 @@ run_tcc_cc_suite() { ## --- tcc-libc suite ----------------------------------------------------- ## -## End-to-end "tcc as a real compiler" check: tcc-boot2 (built by cc.scm) -## compiles the vendored mes-libc into libc.o, then for each tests/cc-libc -## fixture, tcc-boot2 compiles + links the fixture against +## End-to-end "tcc as a real compiler" check, run through tcc-tcc — the +## twice-compiled tcc (cc.scm built tcc-boot2; tcc-boot2 built tcc-tcc). +## tcc-boot2 already compiled mes-libc into libc.o; for each tests/cc-libc +## fixture, tcc-tcc compiles + links the fixture against ## start.o aarch64 entry stub: __libc_init then main then exit ## sys_stubs.o Linux aarch64 svc-based sys_* implementations ## mem.o mem* compiler-builtin runtime (memcpy/memmove/memset/memcmp) @@ -686,7 +687,7 @@ run_tcc_libc_suite() { return fi - tcc=build/$ARCH/tcc-boot2/tcc-boot2 + tcc=build/$ARCH/tcc-tcc/tcc-tcc start=build/$ARCH/tcc-libc/start.o sys_stubs=build/$ARCH/tcc-libc/sys_stubs.o mem=build/$ARCH/tcc-libc/mem.o diff --git a/scripts/libc-flatten.sh b/scripts/libc-flatten.sh @@ -102,6 +102,10 @@ apply_simple_patch \ "$PATCHES/malloc-max-align.before" \ "$PATCHES/malloc-max-align.after" apply_simple_patch \ + "$STAGE/linux/malloc.c" \ + "$PATCHES/malloc-brk-check.before" \ + "$PATCHES/malloc-brk-check.after" +apply_simple_patch \ "$STAGE/string/strstr.c" \ "$PATCHES/strstr-drop-mman.before" \ "$PATCHES/strstr-drop-mman.after" diff --git a/tests/cc/336-struct-assign-ternary.c b/tests/cc/336-struct-assign-ternary.c @@ -0,0 +1,56 @@ +/* Struct = ternary, mirroring the shape tcc.flat.c expr_cond uses + * pervasively (CType locals + else-if chain). cc.scm has to lower + * each `type = bt1 == 6 ? type1 : type2;` as a struct copy whose + * size matches the struct (16 bytes here, including alignment pad + * for the void* member). Until this works, cc.scm-compiled + * tcc-boot2 self-corrupts when handed input that exercises that + * exact path in its embedded codegen. + */ + +struct CType { int t; void *ref; }; + +int main(void) +{ + int dummy_a, dummy_b; + struct CType type1; type1.t = 6; type1.ref = &dummy_a; + struct CType type2; type2.t = 7; type2.ref = &dummy_b; + struct CType type; + + int bt1 = 6, bt2 = 0; + if (bt1 == 6 || bt2 == 6) { + type = bt1 == 6 ? type1 : type2; + } else if (bt1 == 7 || bt2 == 7) { + type = bt1 == 7 ? type1 : type2; + } else { + type.t = -1; + type.ref = (void *)0; + } + if (type.t != 6) return 1; + if (type.ref != &dummy_a) return 2; + + bt1 = 0; bt2 = 6; + if (bt1 == 6 || bt2 == 6) { + type = bt1 == 6 ? type1 : type2; + } else if (bt1 == 7 || bt2 == 7) { + type = bt1 == 7 ? type1 : type2; + } else { + type.t = -1; + type.ref = (void *)0; + } + if (type.t != 7) return 11; + if (type.ref != &dummy_b) return 12; + + bt1 = 7; bt2 = 0; + if (bt1 == 6 || bt2 == 6) { + type = bt1 == 6 ? type1 : type2; + } else if (bt1 == 7 || bt2 == 7) { + type = bt1 == 7 ? type1 : type2; + } else { + type.t = -1; + type.ref = (void *)0; + } + if (type.t != 6) return 21; + if (type.ref != &dummy_a) return 22; + + return 0; +} diff --git a/vendor/mes-libc/patches/malloc-brk-check.after b/vendor/mes-libc/patches/malloc-brk-check.after @@ -0,0 +1,10 @@ + /* Linux's raw brk(2) doesn't return -1 on failure; it returns the + * unchanged break (i.e. < requested). Compare against the requested + * address so a refused growth surfaces as malloc returning NULL + * instead of silently handing out a pointer past the break. */ + long _new = (long) (__brk + size); + if (brk (__brk + size) < _new) + return 0; + char *p = __brk; + __brk = __brk + size; + return p; diff --git a/vendor/mes-libc/patches/malloc-brk-check.before b/vendor/mes-libc/patches/malloc-brk-check.before @@ -0,0 +1,5 @@ + if (brk (__brk + size) == -1) + return 0; + char *p = __brk; + __brk = __brk + size; + return p;