commit e0f434c481d6d9dd7ab2571034a998084a058fe8
parent 39fab2ed7106d643242b35b0fb553b5f659eb5d9
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 3 May 2026 22:53:04 -0700
tcc-libc: tcc-boot2 builds mes-libc, runs cc-libc fixtures
End-to-end exercise of tcc-boot2 as a real compiler. The new suite
has tcc-boot2 (built by cc.scm) compile vendor/mes-libc/libc.flat.c
into libc.o, then for each tests/cc-libc fixture link
start.o + sys_stubs.o + mem.o + libc.o + fixture.c into a runnable
ELF and execute it natively in the aarch64 container.
New harness pieces under tcc-libc/:
- aarch64/start.S: entry stub, calls __libc_init then main
- aarch64/sys_stubs.S: Linux aarch64 svc-based sys_* (libp1pp shape)
- va_list_shim.h: -include header that aliases the host-flatten's
__builtin_va_list / __builtin_va_* tokens onto tcc's native
va_list / va_* macros (tcc has no __builtin_va_list keyword)
mem.o is reused from tcc-cc/mem.c. start.o and sys_stubs.o are
cross-assembled by the host (tcc-boot2's aarch64 codegen has no
inline-asm support).
17 passed, 0 failed.
Diffstat:
6 files changed, 257 insertions(+), 5 deletions(-)
diff --git a/Makefile b/Makefile
@@ -25,6 +25,8 @@
# make test SUITE=p1 ARCH=amd64 p1 suite, one arch
# make test SUITE=scheme1 scheme1 .scm fixtures, every arch
# make test SUITE=tcc-cc tcc-boot2 compiles tests/cc, aarch64
+# make test SUITE=tcc-libc tcc-boot2 builds mes-libc and runs
+# tests/cc-libc against it, aarch64
# make test SUITE=cc-ext vendored c-testsuite (broad coverage,
# opt-in: not part of `make test`)
# make image build the per-arch container image
@@ -371,6 +373,18 @@ TCC_CC_START := build/$(TCC_CC_ARCH)/tcc-cc/start.o
TCC_CC_MEM := build/$(TCC_CC_ARCH)/tcc-cc/mem.o
TCC_CC_TCC_INCLUDE := build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/include
+# tcc-libc suite supports: tcc-boot2 (built by cc.scm) compiles
+# mes-libc into libc.o, then for each tests/cc-libc fixture, links
+# fixture + start.o + sys_stubs.o + mem.o + libc.o into a runnable
+# ELF. End-to-end exercise of "tcc as a real compiler" against the
+# same libc the cc.scm + libc.P1pp pipeline uses.
+TCC_LIBC_ARCH := aarch64
+TCC_LIBC_DIR := build/$(TCC_LIBC_ARCH)/tcc-libc
+TCC_LIBC_START := $(TCC_LIBC_DIR)/start.o
+TCC_LIBC_SYS_STUBS := $(TCC_LIBC_DIR)/sys_stubs.o
+TCC_LIBC_MEM := $(TCC_LIBC_DIR)/mem.o
+TCC_LIBC_LIBC := $(TCC_LIBC_DIR)/libc.o
+
$(TCC_CC_START): tcc-cc/$(TCC_CC_ARCH)/start.S
mkdir -p $(@D)
$(HOST_CC) -target aarch64-linux-gnu -c -o $@ -x assembler $<
@@ -390,6 +404,47 @@ $(TCC_CC_MEM): tcc-cc/mem.c \
build/$(TCC_CC_ARCH)/tcc-boot2/tcc-boot2 \
-nostdlib -I $(TCC_CC_TCC_INCLUDE) -c -o $@ $<
+# --- tcc-libc test harness inputs ----------------------------------------
+#
+# start.o threads __libc_init in front of main and exits with main's
+# return value. sys_stubs.o implements the libp1pp-shaped sys_*
+# wrappers via raw aarch64 svc; both are produced by the host
+# cross-toolchain (no asm support in tcc-boot2's aarch64 codegen).
+$(TCC_LIBC_START): tcc-libc/$(TCC_LIBC_ARCH)/start.S
+ mkdir -p $(@D)
+ $(HOST_CC) -target aarch64-linux-gnu -c -o $@ -x assembler $<
+
+$(TCC_LIBC_SYS_STUBS): tcc-libc/$(TCC_LIBC_ARCH)/sys_stubs.S
+ mkdir -p $(@D)
+ $(HOST_CC) -target aarch64-linux-gnu -c -o $@ -x assembler $<
+
+# tcc-libc reuses tcc-cc/mem.c for the compiler-builtin mem* runtime,
+# but rebuilds it under build/$(TCC_LIBC_ARCH)/tcc-libc/ to keep the
+# suite's outputs cleanly separated from the tcc-cc tree.
+$(TCC_LIBC_MEM): tcc-cc/mem.c \
+ build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \
+ build/$(TCC_LIBC_ARCH)/.image
+ mkdir -p $(@D)
+ $(call PODMAN,$(TCC_LIBC_ARCH)) \
+ build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \
+ -nostdlib -I $(TCC_CC_TCC_INCLUDE) -c -o $@ $<
+
+# libc.o: tcc-boot2 compiles the same flatten output cc.scm consumes.
+# `-include tcc-libc/va_list_shim.h` aliases gcc's __builtin_va_*
+# names onto tcc's native va_* macros (tcc has no notion of a
+# __builtin_va_list keyword); the shim is the only piece glueing
+# the host-preprocessed flatten to tcc-boot2's frontend.
+$(TCC_LIBC_LIBC): build/$(TCC_LIBC_ARCH)/vendor/mes-libc/libc.flat.c \
+ tcc-libc/va_list_shim.h \
+ build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \
+ build/$(TCC_LIBC_ARCH)/.image
+ mkdir -p $(@D)
+ $(call PODMAN,$(TCC_LIBC_ARCH)) \
+ build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \
+ -nostdlib -I $(TCC_CC_TCC_INCLUDE) \
+ -include tcc-libc/va_list_shim.h \
+ -c -o $@ $<
+
# --- Native tools (opt-in dev-loop helpers) -------------------------------
NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 \
@@ -479,6 +534,10 @@ TEST_TCC_CC_DEPS := build/$(TCC_CC_ARCH)/.image \
build/$(TCC_CC_ARCH)/tcc-boot2/tcc-boot2 \
$(TCC_CC_START) $(TCC_CC_MEM)
+TEST_TCC_LIBC_DEPS := build/$(TCC_LIBC_ARCH)/.image \
+ build/$(TCC_LIBC_ARCH)/tcc-boot2/tcc-boot2 \
+ $(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC)
+
test:
ifeq ($(SUITE),)
@$(MAKE) --no-print-directory test SUITE=m1pp
@@ -517,6 +576,12 @@ else ifeq ($(SUITE),tcc-cc)
fi
@$(MAKE) --no-print-directory ARCH=$(TCC_CC_ARCH) $(TEST_TCC_CC_DEPS)
sh scripts/run-tests.sh --suite=tcc-cc --arch=$(TCC_CC_ARCH) $(NAMES)
+else ifeq ($(SUITE),tcc-libc)
+ @if [ -n "$(ARCH_FILTER)" ] && [ "$(ARCH_FILTER)" != "$(TCC_LIBC_ARCH)" ]; then \
+ echo "tcc-libc currently supports ARCH=$(TCC_LIBC_ARCH) only" >&2; exit 2; \
+ fi
+ @$(MAKE) --no-print-directory ARCH=$(TCC_LIBC_ARCH) $(TEST_TCC_LIBC_DEPS)
+ sh scripts/run-tests.sh --suite=tcc-libc --arch=$(TCC_LIBC_ARCH) $(NAMES)
else
- @echo "unknown SUITE='$(SUITE)' (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc | cc-libc | cc-ext | tcc-cc)" >&2; exit 2
+ @echo "unknown SUITE='$(SUITE)' (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc | cc-libc | cc-ext | tcc-cc | tcc-libc)" >&2; exit 2
endif
diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh
@@ -14,7 +14,7 @@
## host preflights lint and passes the explicit kept list down.
##
## Env: ARCH=aarch64|amd64|riscv64
-## Usage: boot-run-tests.sh --suite=<m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc> [name ...]
+## Usage: boot-run-tests.sh --suite=<m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc|tcc-libc> [name ...]
set -eu
@@ -35,7 +35,7 @@ while [ "$#" -gt 0 ]; do
done
case "$SUITE" in
- m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc) ;;
+ m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc|tcc-libc) ;;
"") echo "$0: --suite required" >&2; exit 2 ;;
*) echo "$0: unknown suite '$SUITE'" >&2; exit 2 ;;
esac
@@ -669,6 +669,81 @@ run_tcc_cc_suite() {
done
}
+## --- tcc-libc suite -----------------------------------------------------
+##
+## End-to-end "tcc as a real compiler" check: tcc-boot2 (built by cc.scm)
+## compiles the vendored mes-libc into libc.o, then for each tests/cc-libc
+## fixture, tcc-boot2 compiles + links the fixture against
+## start.o aarch64 entry stub: __libc_init then main then exit
+## sys_stubs.o Linux aarch64 svc-based sys_* implementations
+## mem.o mem* compiler-builtin runtime (memcpy/memmove/memset/memcmp)
+## libc.o tcc-boot2-built mes-libc
+## and runs the resulting ELF natively in the aarch64 container.
+run_tcc_libc_suite() {
+ if [ "$ARCH" != "aarch64" ]; then
+ echo " FAIL [$ARCH] tcc-libc"
+ echo " tcc-libc currently supports ARCH=aarch64 only" >&2
+ return
+ fi
+
+ tcc=build/$ARCH/tcc-boot2/tcc-boot2
+ start=build/$ARCH/tcc-libc/start.o
+ sys_stubs=build/$ARCH/tcc-libc/sys_stubs.o
+ mem=build/$ARCH/tcc-libc/mem.o
+ libc=build/$ARCH/tcc-libc/libc.o
+ tcc_include=build/tcc/ARM64/tcc-0.9.26-1147-gee75a10c/include
+ for f in "$tcc" "$start" "$sys_stubs" "$mem" "$libc"; do
+ if [ ! -e "$f" ]; then
+ echo " FAIL [$ARCH] tcc-libc"
+ echo " missing $f -- run 'make test SUITE=tcc-libc ARCH=$ARCH'" >&2
+ return
+ fi
+ done
+ if ! "$tcc" -version 2>/dev/null | grep 'AArch64' >/dev/null; then
+ echo " FAIL [$ARCH] tcc-libc"
+ echo " $tcc is not an AArch64-targeted tcc; rebuild with TCC_TARGET=ARM64" >&2
+ return
+ fi
+
+ [ -n "$NAMES" ] || NAMES=$(discover tests/cc-libc c)
+ for name in $NAMES; do
+ src=tests/cc-libc/$name.c
+ [ -e "$src" ] || { echo " SKIP $name (no .c)"; continue; }
+ if [ -e tests/cc-libc/$name.expected ]; then
+ expout=$(cat tests/cc-libc/$name.expected)
+ else
+ expout=
+ fi
+ if [ -e tests/cc-libc/$name.expected-exit ]; then
+ expexit=$(cat tests/cc-libc/$name.expected-exit)
+ else
+ expexit=0
+ fi
+
+ elf=build/$ARCH/tests/tcc-libc/$name
+ workdir=build/$ARCH/.work/tests/tcc-libc/$name
+ label="[$ARCH] tcc-libc/$name"
+ mkdir -p "$(dirname "$elf")" "$workdir"
+
+ tcc_log=$workdir/tcc.log
+ if ! "$tcc" -nostdlib -I "$tcc_include" \
+ "$start" "$sys_stubs" "$mem" "$libc" "$src" -o "$elf" \
+ >"$tcc_log" 2>&1; then
+ fail "$label" "tcc compile/link failed:" "$tcc_log"
+ continue
+ fi
+
+ tmp=$(mktemp)
+ if "./$elf" >"$tmp" 2>&1; then
+ act_exit=0
+ else
+ act_exit=$?
+ fi
+ act_out=$(cat "$tmp"); rm -f "$tmp"
+ _cc_check "$label" "$expout" "$expexit" "$act_out" "$act_exit"
+ done
+}
+
case "$SUITE" in
m1pp) run_m1pp_suite ;;
p1) run_p1_suite ;;
@@ -681,4 +756,5 @@ case "$SUITE" in
cc-libc) run_cc_libc_suite ;;
cc-ext) run_cc_ext_suite ;;
tcc-cc) run_tcc_cc_suite ;;
+ tcc-libc) run_tcc_libc_suite ;;
esac
diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh
@@ -34,6 +34,9 @@
## compile/assemble/runtime error
## counts as FAIL.
## tcc-cc tests/cc/<name>.c — tcc-boot2 -> ELF -> run.
+## tcc-libc tests/cc-libc/<name>.c — tcc-boot2 builds mes-libc into
+## libc.o, then compiles + links
+## each fixture against it -> run.
##
## All three arches by default; --arch restricts to one.
##
@@ -59,8 +62,8 @@ while [ "$#" -gt 0 ]; do
done
case "$SUITE" in
- m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc) ;;
- "") echo "$0: --suite required (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc | cc-libc | cc-ext | tcc-cc)" >&2; exit 2 ;;
+ m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc|cc-libc|cc-ext|tcc-cc|tcc-libc) ;;
+ "") echo "$0: --suite required (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc | cc-libc | cc-ext | tcc-cc | tcc-libc)" >&2; exit 2 ;;
*) echo "$0: unknown suite '$SUITE'" >&2; exit 2 ;;
esac
diff --git a/tcc-libc/aarch64/start.S b/tcc-libc/aarch64/start.S
@@ -0,0 +1,16 @@
+/* tcc-libc entry stub — same role as P1/entry-libc.P1pp's p1_main:
+ * call __libc_init(argc, argv) so `environ` is set, then main(argc,
+ * argv), then exit with main's return value. Linux aarch64 brings
+ * argc at [sp] and argv at sp+8 on entry. */
+
+ .globl _start
+_start:
+ ldr x0, [sp] /* argc */
+ add x1, sp, #8 /* argv */
+ stp x0, x1, [sp, #-16]! /* save across __libc_init call */
+ bl __libc_init
+ ldp x0, x1, [sp], #16
+ bl main
+ /* main's return is in x0 — feed it to exit(2). */
+ mov x8, #93 /* NR_exit */
+ svc #0
diff --git a/tcc-libc/aarch64/sys_stubs.S b/tcc-libc/aarch64/sys_stubs.S
@@ -0,0 +1,67 @@
+/* Linux aarch64 syscall stubs matching the sys_* labels libp1pp
+ * provides (see P1/P1pp.P1pp). boot2-syscall.c declares them as
+ * extern long sys_<name>(...) and the mes-libc layers (read/write/
+ * open/close/lseek/sbrk/unlink/_exit) call them. The cc.scm + libp1pp
+ * pipeline resolves these labels against P1pp.P1pp's wrappers; the
+ * tcc-libc suite links against this object instead since tcc-built
+ * binaries don't catm libp1pp.
+ *
+ * Linux aarch64 syscall ABI: nr in x8, args in x0-x5, return in x0.
+ *
+ * sys_open/sys_unlink shuffle args because Linux's openat/unlinkat
+ * take an AT_FDCWD prefix that the libp1pp-compatible wrappers
+ * don't surface to callers.
+ */
+
+ .globl sys_read, sys_write, sys_close, sys_open
+ .globl sys_lseek, sys_brk, sys_unlink, sys_exit
+
+sys_read:
+ mov x8, #63
+ svc #0
+ ret
+
+sys_write:
+ mov x8, #64
+ svc #0
+ ret
+
+sys_close:
+ mov x8, #57
+ svc #0
+ ret
+
+sys_open:
+ /* (path, flags, mode) -> openat(AT_FDCWD, path, flags, mode) */
+ mov x3, x2 /* mode */
+ mov x2, x1 /* flags */
+ mov x1, x0 /* path */
+ mov x0, #-100 /* AT_FDCWD */
+ mov x8, #56
+ svc #0
+ ret
+
+sys_lseek:
+ mov x8, #62
+ svc #0
+ ret
+
+sys_brk:
+ mov x8, #214
+ svc #0
+ ret
+
+sys_unlink:
+ /* (path) -> unlinkat(AT_FDCWD, path, 0) */
+ mov x1, x0 /* path */
+ mov x0, #-100 /* AT_FDCWD */
+ mov x2, #0 /* flags */
+ mov x8, #35
+ svc #0
+ ret
+
+sys_exit:
+ mov x8, #93
+ svc #0
+ /* unreachable */
+ b .
diff --git a/tcc-libc/va_list_shim.h b/tcc-libc/va_list_shim.h
@@ -0,0 +1,25 @@
+/* tcc-libc va_list shim — pre-included when tcc-boot2 compiles
+ * libc.flat.c (or any other host-preprocessed TU under our boot2
+ * stdarg.h shim). The flatten step routes `va_list` through
+ * `__builtin_va_list`, but stock tcc's frontend does not recognize
+ * that token as a type — tcc's <stdarg.h> defines `va_list` as
+ * `__va_list_struct[1]`. Make `__builtin_va_list` an alias for the
+ * same array type so libc.flat.c's
+ *
+ * typedef __builtin_va_list va_list;
+ *
+ * collapses to a (legal) duplicate typedef of the existing
+ * tcc-stdlib `va_list`. The `__builtin_va_*` macros in the flatten
+ * are direct tcc intrinsics; they do not need a shim. */
+#include <stdarg.h>
+typedef va_list __builtin_va_list;
+
+/* Likewise alias the __builtin_va_* call sites the flatten leaves
+ * behind onto tcc's macro-driven va_* implementations. tcc has its
+ * own native intrinsics underneath va_start / va_arg / va_end (see
+ * tcc/include/stdarg.h); these aliases just reach them by the
+ * gcc-conformant builtin spelling. */
+#define __builtin_va_start(ap, last) va_start(ap, last)
+#define __builtin_va_end(ap) va_end(ap)
+#define __builtin_va_arg(ap, type) va_arg(ap, type)
+#define __builtin_va_copy(dst, src) va_copy(dst, src)