boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit e0f434c481d6d9dd7ab2571034a998084a058fe8
parent 39fab2ed7106d643242b35b0fb553b5f659eb5d9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun,  3 May 2026 22:53:04 -0700

tcc-libc: tcc-boot2 builds mes-libc, runs cc-libc fixtures

End-to-end exercise of tcc-boot2 as a real compiler. The new suite
has tcc-boot2 (built by cc.scm) compile vendor/mes-libc/libc.flat.c
into libc.o, then for each tests/cc-libc fixture link
start.o + sys_stubs.o + mem.o + libc.o + fixture.c into a runnable
ELF and execute it natively in the aarch64 container.

New harness pieces under tcc-libc/:
  - aarch64/start.S: entry stub, calls __libc_init then main
  - aarch64/sys_stubs.S: Linux aarch64 svc-based sys_* (libp1pp shape)
  - va_list_shim.h: -include header that aliases the host-flatten's
    __builtin_va_list / __builtin_va_* tokens onto tcc's native
    va_list / va_* macros (tcc has no __builtin_va_list keyword)
mem.o is reused from tcc-cc/mem.c. start.o and sys_stubs.o are
cross-assembled by the host (tcc-boot2's aarch64 codegen has no
inline-asm support).

17 passed, 0 failed.

Diffstat:
MMakefile | 67++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mscripts/boot-run-tests.sh | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mscripts/run-tests.sh | 7+++++--
Atcc-libc/aarch64/start.S | 16++++++++++++++++
Atcc-libc/aarch64/sys_stubs.S | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atcc-libc/va_list_shim.h | 25+++++++++++++++++++++++++
6 files changed, 257 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile @@ -25,6 +25,8 @@ # make test SUITE=p1 ARCH=amd64 p1 suite, one arch # make test SUITE=scheme1 scheme1 .scm fixtures, every arch # make test SUITE=tcc-cc tcc-boot2 compiles tests/cc, aarch64 +# make test SUITE=tcc-libc tcc-boot2 builds mes-libc and runs +# tests/cc-libc against it, aarch64 # make test SUITE=cc-ext vendored c-testsuite (broad coverage, # opt-in: not part of `make test`) # make image build the per-arch container image @@ -371,6 +373,18 @@ TCC_CC_START := build/$(TCC_CC_ARCH)/tcc-cc/start.o TCC_CC_MEM := build/$(TCC_CC_ARCH)/tcc-cc/mem.o TCC_CC_TCC_INCLUDE := build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/include +# tcc-libc suite supports: tcc-boot2 (built by cc.scm) compiles +# mes-libc into libc.o, then for each tests/cc-libc fixture, links +# fixture + start.o + sys_stubs.o + mem.o + libc.o into a runnable +# ELF. End-to-end exercise of "tcc as a real compiler" against the +# same libc the cc.scm + libc.P1pp pipeline uses. +TCC_LIBC_ARCH := aarch64 +TCC_LIBC_DIR := build/$(TCC_LIBC_ARCH)/tcc-libc +TCC_LIBC_START := $(TCC_LIBC_DIR)/start.o +TCC_LIBC_SYS_STUBS := $(TCC_LIBC_DIR)/sys_stubs.o +TCC_LIBC_MEM := $(TCC_LIBC_DIR)/mem.o +TCC_LIBC_LIBC := $(TCC_LIBC_DIR)/libc.o + $(TCC_CC_START): tcc-cc/$(TCC_CC_ARCH)/start.S mkdir -p $(@D) $(HOST_CC) -target aarch64-linux-gnu -c -o $@ -x assembler $< @@ -390,6 +404,47 @@ $(TCC_CC_MEM): tcc-cc/mem.c \ build/$(TCC_CC_ARCH)/tcc-boot2/tcc-boot2 \ -nostdlib -I $(TCC_CC_TCC_INCLUDE) -c -o $@ $< +# --- tcc-libc test harness inputs ---------------------------------------- +# +# start.o threads __libc_init in front of main and exits with main's +# return value. sys_stubs.o implements the libp1pp-shaped sys_* +# wrappers via raw aarch64 svc; both are produced by the host +# cross-toolchain (no asm support in tcc-boot2's aarch64 codegen). +$(TCC_LIBC_START): tcc-libc/$(TCC_LIBC_ARCH)/start.S + mkdir -p $(@D) + $(HOST_CC) -target aarch64-linux-gnu -c -o $@ -x assembler $< + +$(TCC_LIBC_SYS_STUBS): tcc-libc/$(TCC_LIBC_ARCH)/sys_stubs.S + mkdir -p $(@D) + $(HOST_CC) -target aarch64-linux-gnu -c -o $@ -x assembler $< + +# tcc-libc reuses tcc-cc/mem.c for the compiler-builtin mem* runtime, +# but rebuilds it under build/$(TCC_LIBC_ARCH)/tcc-libc/ to keep the +# suite's outputs cleanly separated from the tcc-cc tree. +$(TCC_LIBC_MEM): tcc-cc/mem.c \ + build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \ + build/$(TCC_LIBC_ARCH)/.image + mkdir -p $(@D) + $(call PODMAN,$(TCC_LIBC_ARCH)) \ + build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \ + -nostdlib -I $(TCC_CC_TCC_INCLUDE) -c -o $@ $< + +# libc.o: tcc-boot2 compiles the same flatten output cc.scm consumes. +# `-include tcc-libc/va_list_shim.h` aliases gcc's __builtin_va_* +# names onto tcc's native va_* macros (tcc has no notion of a +# __builtin_va_list keyword); the shim is the only piece glueing +# the host-preprocessed flatten to tcc-boot2's frontend. +$(TCC_LIBC_LIBC): build/$(TCC_LIBC_ARCH)/vendor/mes-libc/libc.flat.c \ + tcc-libc/va_list_shim.h \ + build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \ + build/$(TCC_LIBC_ARCH)/.image + mkdir -p $(@D) + $(call PODMAN,$(TCC_LIBC_ARCH)) \ + build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \ + -nostdlib -I $(TCC_CC_TCC_INCLUDE) \ + -include tcc-libc/va_list_shim.h \ + -c -o $@ $< + # --- Native tools (opt-in dev-loop helpers) ------------------------------- NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 \ @@ -479,6 +534,10 @@ TEST_TCC_CC_DEPS := build/$(TCC_CC_ARCH)/.image \ build/$(TCC_CC_ARCH)/tcc-boot2/tcc-boot2 \ $(TCC_CC_START) $(TCC_CC_MEM) +TEST_TCC_LIBC_DEPS := build/$(TCC_LIBC_ARCH)/.image \ + build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \ + $(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) + test: ifeq ($(SUITE),) @$(MAKE) --no-print-directory test SUITE=m1pp @@ -517,6 +576,12 @@ else ifeq ($(SUITE),tcc-cc) fi @$(MAKE) --no-print-directory ARCH=$(TCC_CC_ARCH) $(TEST_TCC_CC_DEPS) sh scripts/run-tests.sh --suite=tcc-cc --arch=$(TCC_CC_ARCH) $(NAMES) +else ifeq ($(SUITE),tcc-libc) + @if [ -n "$(ARCH_FILTER)" ] && [ "$(ARCH_FILTER)" != "$(TCC_LIBC_ARCH)" ]; then \ + echo "tcc-libc currently supports ARCH=$(TCC_LIBC_ARCH) only" >&2; exit 2; \ + fi + @$(MAKE) --no-print-directory ARCH=$(TCC_LIBC_ARCH) $(TEST_TCC_LIBC_DEPS) + sh scripts/run-tests.sh --suite=tcc-libc --arch=$(TCC_LIBC_ARCH) $(NAMES) else - @echo "unknown SUITE='$(SUITE)' (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc | cc-libc | cc-ext | tcc-cc)" >&2; exit 2 + @echo "unknown SUITE='$(SUITE)' (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc | cc-libc | cc-ext | tcc-cc | tcc-libc)" >&2; exit 2 endif diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh @@ -14,7 +14,7 @@ ## host preflights lint and passes the explicit kept list down. ## ## Env: ARCH=aarch64|amd64|riscv64 -## Usage: boot-run-tests.sh --suite=<m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc> [name ...] +## Usage: boot-run-tests.sh --suite=<m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc|tcc-libc> [name ...] set -eu @@ -35,7 +35,7 @@ while [ "$#" -gt 0 ]; do done case "$SUITE" in - m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc) ;; + m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc|tcc-libc) ;; "") echo "$0: --suite required" >&2; exit 2 ;; *) echo "$0: unknown suite '$SUITE'" >&2; exit 2 ;; esac @@ -669,6 +669,81 @@ run_tcc_cc_suite() { done } +## --- tcc-libc suite ----------------------------------------------------- +## +## End-to-end "tcc as a real compiler" check: tcc-boot2 (built by cc.scm) +## compiles the vendored mes-libc into libc.o, then for each tests/cc-libc +## fixture, tcc-boot2 compiles + links the fixture against +## start.o aarch64 entry stub: __libc_init then main then exit +## sys_stubs.o Linux aarch64 svc-based sys_* implementations +## mem.o mem* compiler-builtin runtime (memcpy/memmove/memset/memcmp) +## libc.o tcc-boot2-built mes-libc +## and runs the resulting ELF natively in the aarch64 container. +run_tcc_libc_suite() { + if [ "$ARCH" != "aarch64" ]; then + echo " FAIL [$ARCH] tcc-libc" + echo " tcc-libc currently supports ARCH=aarch64 only" >&2 + return + fi + + tcc=build/$ARCH/tcc-boot2/tcc-boot2 + start=build/$ARCH/tcc-libc/start.o + sys_stubs=build/$ARCH/tcc-libc/sys_stubs.o + mem=build/$ARCH/tcc-libc/mem.o + libc=build/$ARCH/tcc-libc/libc.o + tcc_include=build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/include + for f in "$tcc" "$start" "$sys_stubs" "$mem" "$libc"; do + if [ ! -e "$f" ]; then + echo " FAIL [$ARCH] tcc-libc" + echo " missing $f -- run 'make test SUITE=tcc-libc ARCH=$ARCH'" >&2 + return + fi + done + if ! "$tcc" -version 2>/dev/null | grep 'AArch64' >/dev/null; then + echo " FAIL [$ARCH] tcc-libc" + echo " $tcc is not an AArch64-targeted tcc; rebuild with TCC_TARGET=ARM64" >&2 + return + fi + + [ -n "$NAMES" ] || NAMES=$(discover tests/cc-libc c) + for name in $NAMES; do + src=tests/cc-libc/$name.c + [ -e "$src" ] || { echo " SKIP $name (no .c)"; continue; } + if [ -e tests/cc-libc/$name.expected ]; then + expout=$(cat tests/cc-libc/$name.expected) + else + expout= + fi + if [ -e tests/cc-libc/$name.expected-exit ]; then + expexit=$(cat tests/cc-libc/$name.expected-exit) + else + expexit=0 + fi + + elf=build/$ARCH/tests/tcc-libc/$name + workdir=build/$ARCH/.work/tests/tcc-libc/$name + label="[$ARCH] tcc-libc/$name" + mkdir -p "$(dirname "$elf")" "$workdir" + + tcc_log=$workdir/tcc.log + if ! "$tcc" -nostdlib -I "$tcc_include" \ + "$start" "$sys_stubs" "$mem" "$libc" "$src" -o "$elf" \ + >"$tcc_log" 2>&1; then + fail "$label" "tcc compile/link failed:" "$tcc_log" + continue + fi + + tmp=$(mktemp) + if "./$elf" >"$tmp" 2>&1; then + act_exit=0 + else + act_exit=$? + fi + act_out=$(cat "$tmp"); rm -f "$tmp" + _cc_check "$label" "$expout" "$expexit" "$act_out" "$act_exit" + done +} + case "$SUITE" in m1pp) run_m1pp_suite ;; p1) run_p1_suite ;; @@ -681,4 +756,5 @@ case "$SUITE" in cc-libc) run_cc_libc_suite ;; cc-ext) run_cc_ext_suite ;; tcc-cc) run_tcc_cc_suite ;; + tcc-libc) run_tcc_libc_suite ;; esac diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh @@ -34,6 +34,9 @@ ## compile/assemble/runtime error ## counts as FAIL. ## tcc-cc tests/cc/<name>.c — tcc-boot2 -> ELF -> run. +## tcc-libc tests/cc-libc/<name>.c — tcc-boot2 builds mes-libc into +## libc.o, then compiles + links +## each fixture against it -> run. ## ## All three arches by default; --arch restricts to one. ## @@ -59,8 +62,8 @@ while [ "$#" -gt 0 ]; do done case "$SUITE" in - m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc) ;; - "") echo "$0: --suite required (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc | cc-libc | cc-ext | tcc-cc)" >&2; exit 2 ;; + m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc|tcc-libc) ;; + "") echo "$0: --suite required (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc | cc-libc | cc-ext | tcc-cc | tcc-libc)" >&2; exit 2 ;; *) echo "$0: unknown suite '$SUITE'" >&2; exit 2 ;; esac diff --git a/tcc-libc/aarch64/start.S b/tcc-libc/aarch64/start.S @@ -0,0 +1,16 @@ +/* tcc-libc entry stub — same role as P1/entry-libc.P1pp's p1_main: + * call __libc_init(argc, argv) so `environ` is set, then main(argc, + * argv), then exit with main's return value. Linux aarch64 brings + * argc at [sp] and argv at sp+8 on entry. */ + + .globl _start +_start: + ldr x0, [sp] /* argc */ + add x1, sp, #8 /* argv */ + stp x0, x1, [sp, #-16]! /* save across __libc_init call */ + bl __libc_init + ldp x0, x1, [sp], #16 + bl main + /* main's return is in x0 — feed it to exit(2). */ + mov x8, #93 /* NR_exit */ + svc #0 diff --git a/tcc-libc/aarch64/sys_stubs.S b/tcc-libc/aarch64/sys_stubs.S @@ -0,0 +1,67 @@ +/* Linux aarch64 syscall stubs matching the sys_* labels libp1pp + * provides (see P1/P1pp.P1pp). boot2-syscall.c declares them as + * extern long sys_<name>(...) and the mes-libc layers (read/write/ + * open/close/lseek/sbrk/unlink/_exit) call them. The cc.scm + libp1pp + * pipeline resolves these labels against P1pp.P1pp's wrappers; the + * tcc-libc suite links against this object instead since tcc-built + * binaries don't catm libp1pp. + * + * Linux aarch64 syscall ABI: nr in x8, args in x0-x5, return in x0. + * + * sys_open/sys_unlink shuffle args because Linux's openat/unlinkat + * take an AT_FDCWD prefix that the libp1pp-compatible wrappers + * don't surface to callers. + */ + + .globl sys_read, sys_write, sys_close, sys_open + .globl sys_lseek, sys_brk, sys_unlink, sys_exit + +sys_read: + mov x8, #63 + svc #0 + ret + +sys_write: + mov x8, #64 + svc #0 + ret + +sys_close: + mov x8, #57 + svc #0 + ret + +sys_open: + /* (path, flags, mode) -> openat(AT_FDCWD, path, flags, mode) */ + mov x3, x2 /* mode */ + mov x2, x1 /* flags */ + mov x1, x0 /* path */ + mov x0, #-100 /* AT_FDCWD */ + mov x8, #56 + svc #0 + ret + +sys_lseek: + mov x8, #62 + svc #0 + ret + +sys_brk: + mov x8, #214 + svc #0 + ret + +sys_unlink: + /* (path) -> unlinkat(AT_FDCWD, path, 0) */ + mov x1, x0 /* path */ + mov x0, #-100 /* AT_FDCWD */ + mov x2, #0 /* flags */ + mov x8, #35 + svc #0 + ret + +sys_exit: + mov x8, #93 + svc #0 + /* unreachable */ + b . diff --git a/tcc-libc/va_list_shim.h b/tcc-libc/va_list_shim.h @@ -0,0 +1,25 @@ +/* tcc-libc va_list shim — pre-included when tcc-boot2 compiles + * libc.flat.c (or any other host-preprocessed TU under our boot2 + * stdarg.h shim). The flatten step routes `va_list` through + * `__builtin_va_list`, but stock tcc's frontend does not recognize + * that token as a type — tcc's <stdarg.h> defines `va_list` as + * `__va_list_struct[1]`. Make `__builtin_va_list` an alias for the + * same array type so libc.flat.c's + * + * typedef __builtin_va_list va_list; + * + * collapses to a (legal) duplicate typedef of the existing + * tcc-stdlib `va_list`. The `__builtin_va_*` macros in the flatten + * are direct tcc intrinsics; they do not need a shim. */ +#include <stdarg.h> +typedef va_list __builtin_va_list; + +/* Likewise alias the __builtin_va_* call sites the flatten leaves + * behind onto tcc's macro-driven va_* implementations. tcc has its + * own native intrinsics underneath va_start / va_arg / va_end (see + * tcc/include/stdarg.h); these aliases just reach them by the + * gcc-conformant builtin spelling. */ +#define __builtin_va_start(ap, last) va_start(ap, last) +#define __builtin_va_end(ap) va_end(ap) +#define __builtin_va_arg(ap, type) va_arg(ap, type) +#define __builtin_va_copy(dst, src) va_copy(dst, src)