commit 4891b5e0b39db967260da5742589a3e6bfab01f5
parent cade790ded0af7fc59a2318afa8f63f3dd226be3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Wed, 29 Apr 2026 20:21:56 -0700
tcc-gcc: stock-gcc sanity-check harness
Sanity-check sibling of tcc-boot2: compiles the same tcc.flat.c +
libc.flat.c through stock gcc + libgcc inside an Alpine container,
with a hand-rolled _start (mirroring P1/entry-libc.P1pp) and sys_*
syscall stubs (matching libp1pp.P1pp's labelled entries). If
tcc-gcc -version runs and cc.scm-built tcc-boot2 doesn't, the bug
is downstream of the C source — i.e. in cc.scm or P1.
Wired as `make tcc-gcc` (aarch64-only today; bring up other arches
by adding tcc-gcc/<arch>/{start.S,sys_stubs.c}). Image is
boot2-alpine-gcc:<arch>, built once via scripts/Containerfile.alpine-gcc.
Diffstat:
5 files changed, 181 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
@@ -18,6 +18,8 @@
# make cc catm the cc compiler source for ARCH
# make tcc-flat flatten upstream tcc.c into one TU
# make tcc-boot2 cc.scm + P1pp pipeline → tcc-boot2 ELF
+# make tcc-gcc same flatten output, built with stock gcc
+# (sanity check; ARCH=aarch64 only)
# make test every suite, every arch
# make test SUITE=m1pp m1pp suite, every arch
# make test SUITE=p1 ARCH=amd64 p1 suite, one arch
@@ -68,7 +70,7 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \
# --- Targets --------------------------------------------------------------
.PHONY: all m1pp pokem scheme1 cc test image tools tables \
- tools-native cloc clean help tcc-boot2 tcc-flat
+ tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc
all: m1pp pokem
@@ -281,6 +283,41 @@ $(TCC_BOOT2_BINS): build/%/tcc-boot2/tcc-boot2: \
P1/entry-libc.P1pp build/$*/vendor/mes-libc/libc.P1pp \
$< P1/elf-end.P1pp
+# --- tcc-gcc: same flatten, stock gcc -------------------------------------
+#
+# Sanity-check sibling of tcc-boot2. Compiles the *same* tcc.flat.c +
+# libc.flat.c through stock gcc + libgcc inside an Alpine image, with
+# our hand-rolled _start / sys_* shim (tcc-gcc/<arch>/) replacing
+# musl's crt0 + libc. If tcc-gcc runs and tcc-boot2 doesn't, the bug
+# is downstream of the C source — i.e. in cc.scm or the P1 pipeline.
+#
+# aarch64 only today; bring up other arches by adding a tcc-gcc/<arch>/
+# variant of start.S + sys_stubs.c. The cc.scm bug we're chasing is
+# arch-agnostic, so one validation arch is enough.
+TCC_GCC_ARCH := aarch64
+TCC_GCC_PLATFORM := $(PLATFORM_$(TCC_GCC_ARCH))
+TCC_GCC_BIN := build/$(TCC_GCC_ARCH)/tcc-gcc/tcc-gcc
+TCC_GCC_IMAGE := build/$(TCC_GCC_ARCH)/.image-alpine-gcc
+TCC_GCC_HARNESS := tcc-gcc/$(TCC_GCC_ARCH)/start.S tcc-gcc/$(TCC_GCC_ARCH)/sys_stubs.c
+
+tcc-gcc: $(TCC_GCC_BIN)
+
+$(TCC_GCC_IMAGE): scripts/Containerfile.alpine-gcc
+ mkdir -p $(@D)
+ podman build --platform $(TCC_GCC_PLATFORM) \
+ -t boot2-alpine-gcc:$(TCC_GCC_ARCH) \
+ -f scripts/Containerfile.alpine-gcc scripts/
+ @touch $@
+
+$(TCC_GCC_BIN): $(TCC_FLAT) build/$(TCC_GCC_ARCH)/vendor/mes-libc/libc.flat.c \
+ $(TCC_GCC_HARNESS) scripts/build-tcc-gcc.sh $(TCC_GCC_IMAGE)
+ mkdir -p $(@D)
+ podman run --rm --pull=never --platform $(TCC_GCC_PLATFORM) \
+ -e ARCH=$(TCC_GCC_ARCH) \
+ -v $(CURDIR):/work -w /work boot2-alpine-gcc:$(TCC_GCC_ARCH) \
+ sh scripts/build-tcc-gcc.sh $@ $(TCC_FLAT) \
+ build/$(TCC_GCC_ARCH)/vendor/mes-libc/libc.flat.c
+
# --- Native tools (opt-in dev-loop helpers) -------------------------------
NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 build/native-tools/m1pp
diff --git a/scripts/Containerfile.alpine-gcc b/scripts/Containerfile.alpine-gcc
@@ -0,0 +1,18 @@
+## Alpine with gcc + musl-dev + binutils baked in, used by the
+## `make tcc-gcc` sanity-check target (Makefile).
+##
+## tcc-gcc compiles the same tcc.flat.c + libc.flat.c our cc.scm path
+## consumes, but with stock gcc + libgcc soft-float helpers + a tiny
+## hand-rolled _start / sys_* shim (tcc-gcc/<arch>/). It's a known-good
+## reference: if our cc.scm-built tcc-boot2 misbehaves and tcc-gcc
+## doesn't, the bug is in our codegen, not the source.
+##
+## Built per --platform; tag as boot2-alpine-gcc:<arch>. We don't pin
+## a digest here because alpine:3.20 is only used by this opt-in
+## harness — re-pin if the harness becomes load-bearing.
+
+FROM docker.io/library/alpine:3.20
+
+RUN apk add --no-cache gcc musl-dev binutils
+
+CMD ["/bin/sh"]
diff --git a/scripts/build-tcc-gcc.sh b/scripts/build-tcc-gcc.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+## build-tcc-gcc.sh — link tcc.flat.c + libc.flat.c with stock gcc.
+##
+## Sanity-check sibling of the cc.scm path. Inputs are the *same*
+## flatten outputs the cc.scm pipeline consumes; harness sources
+## (tcc-gcc/<arch>/{start.S,sys_stubs.c}) provide a minimal _start and
+## syscall stubs so we don't need musl's crt0 or its libc. If
+## tcc-gcc -version works and our cc.scm-built tcc-boot2 doesn't, the
+## bug is downstream of the C source.
+##
+## Runs inside the boot2-alpine-gcc:<arch> image. -nodefaultlibs (not
+## -nostdlib) keeps libgcc available for long-double soft-float
+## helpers (__addtf3 etc., needed on aarch64 musl).
+##
+## Env: ARCH=aarch64 (only arch wired today; start.S is per-arch)
+## Usage: build-tcc-gcc.sh <out> <tcc.flat.c> <libc.flat.c>
+
+set -eu
+
+: "${ARCH:?ARCH must be set}"
+[ "$#" -eq 3 ] || { echo "usage: ARCH=<arch> $0 <out> <tcc.flat.c> <libc.flat.c>" >&2; exit 2; }
+
+OUT=$1
+TCC_FLAT=$2
+LIBC_FLAT=$3
+
+HARNESS=tcc-gcc/$ARCH
+[ -d "$HARNESS" ] || { echo "no harness for ARCH=$ARCH at $HARNESS" >&2; exit 1; }
+
+mkdir -p "$(dirname "$OUT")"
+
+gcc -static -nodefaultlibs -nostartfiles -fno-stack-protector \
+ -Wno-implicit-function-declaration \
+ -Wno-builtin-declaration-mismatch \
+ -Wno-incompatible-pointer-types \
+ -Wno-int-conversion \
+ -e _start \
+ "$HARNESS/start.S" "$HARNESS/sys_stubs.c" \
+ "$TCC_FLAT" "$LIBC_FLAT" \
+ -lgcc -o "$OUT"
diff --git a/tcc-gcc/aarch64/start.S b/tcc-gcc/aarch64/start.S
@@ -0,0 +1,22 @@
+/* aarch64 _start: mirror P1/entry-libc.P1pp.
+ *
+ * On entry sp points at [argc][argv0]…[NULL][envp0]…[NULL][auxv]. Convert
+ * to (x0=argc, x1=argv) and run __libc_init(argc, argv) → main(argc, argv)
+ * → exit(rc). If exit returns, spin.
+ */
+ .text
+ .globl _start
+_start:
+ ldr x19, [sp] // argc (callee-saved x19)
+ add x20, sp, #8 // argv (callee-saved x20)
+
+ mov x0, x19
+ mov x1, x20
+ bl __libc_init
+
+ mov x0, x19
+ mov x1, x20
+ bl main
+
+ bl exit
+1: b 1b
diff --git a/tcc-gcc/aarch64/sys_stubs.c b/tcc-gcc/aarch64/sys_stubs.c
@@ -0,0 +1,63 @@
+/* aarch64 syscall stubs matching P1pp.P1pp's sys_* entry points.
+ * Same C ABI as P1pp's labelled entries — our libc.flat.c calls these
+ * with argument shapes copied straight from boot2-syscall.c.
+ *
+ * Linux aarch64 syscall ABI: nr in x8, args in x0..x5, return in x0.
+ */
+
+static inline long _syscall0(long nr) {
+ register long x8 __asm__("x8") = nr;
+ register long x0 __asm__("x0");
+ __asm__ volatile ("svc #0" : "=r"(x0) : "r"(x8) : "memory");
+ return x0;
+}
+static inline long _syscall1(long nr, long a) {
+ register long x8 __asm__("x8") = nr;
+ register long x0 __asm__("x0") = a;
+ __asm__ volatile ("svc #0" : "+r"(x0) : "r"(x8) : "memory");
+ return x0;
+}
+static inline long _syscall2(long nr, long a, long b) {
+ register long x8 __asm__("x8") = nr;
+ register long x0 __asm__("x0") = a;
+ register long x1 __asm__("x1") = b;
+ __asm__ volatile ("svc #0" : "+r"(x0) : "r"(x8), "r"(x1) : "memory");
+ return x0;
+}
+static inline long _syscall3(long nr, long a, long b, long c) {
+ register long x8 __asm__("x8") = nr;
+ register long x0 __asm__("x0") = a;
+ register long x1 __asm__("x1") = b;
+ register long x2 __asm__("x2") = c;
+ __asm__ volatile ("svc #0" : "+r"(x0) : "r"(x8), "r"(x1), "r"(x2) : "memory");
+ return x0;
+}
+static inline long _syscall4(long nr, long a, long b, long c, long d) {
+ register long x8 __asm__("x8") = nr;
+ register long x0 __asm__("x0") = a;
+ register long x1 __asm__("x1") = b;
+ register long x2 __asm__("x2") = c;
+ register long x3 __asm__("x3") = d;
+ __asm__ volatile ("svc #0" : "+r"(x0)
+ : "r"(x8), "r"(x1), "r"(x2), "r"(x3) : "memory");
+ return x0;
+}
+
+#define NR_read 63
+#define NR_write 64
+#define NR_close 57
+#define NR_openat 56
+#define NR_lseek 62
+#define NR_brk 214
+#define NR_unlinkat 35
+#define NR_exit 93
+#define AT_FDCWD (-100)
+
+long sys_read (long fd, long buf, long n) { return _syscall3(NR_read, fd, buf, n); }
+long sys_write (long fd, long buf, long n) { return _syscall3(NR_write, fd, buf, n); }
+long sys_close (long fd) { return _syscall1(NR_close, fd); }
+long sys_open (long path, long flags, long mode) { return _syscall4(NR_openat, AT_FDCWD, path, flags, mode); }
+long sys_lseek (long fd, long off, long whence) { return _syscall3(NR_lseek, fd, off, whence); }
+long sys_brk (long addr) { return _syscall1(NR_brk, addr); }
+long sys_unlink(long path) { return _syscall3(NR_unlinkat, AT_FDCWD, path, 0); }
+long sys_exit (long code) { _syscall1(NR_exit, code); for(;;); }