boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 4891b5e0b39db967260da5742589a3e6bfab01f5
parent cade790ded0af7fc59a2318afa8f63f3dd226be3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 29 Apr 2026 20:21:56 -0700

tcc-gcc: stock-gcc sanity-check harness

Sanity-check sibling of tcc-boot2: compiles the same tcc.flat.c +
libc.flat.c through stock gcc + libgcc inside an Alpine container,
with a hand-rolled _start (mirroring P1/entry-libc.P1pp) and sys_*
syscall stubs (matching libp1pp.P1pp's labelled entries). If
tcc-gcc -version runs and cc.scm-built tcc-boot2 doesn't, the bug
is downstream of the C source — i.e. in cc.scm or P1.

Wired as `make tcc-gcc` (aarch64-only today; bring up other arches
by adding tcc-gcc/<arch>/{start.S,sys_stubs.c}). Image is
boot2-alpine-gcc:<arch>, built once via scripts/Containerfile.alpine-gcc.

Diffstat:
MMakefile | 39++++++++++++++++++++++++++++++++++++++-
Ascripts/Containerfile.alpine-gcc | 18++++++++++++++++++
Ascripts/build-tcc-gcc.sh | 40++++++++++++++++++++++++++++++++++++++++
Atcc-gcc/aarch64/start.S | 22++++++++++++++++++++++
Atcc-gcc/aarch64/sys_stubs.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 181 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile @@ -18,6 +18,8 @@ # make cc catm the cc compiler source for ARCH # make tcc-flat flatten upstream tcc.c into one TU # make tcc-boot2 cc.scm + P1pp pipeline → tcc-boot2 ELF +# make tcc-gcc same flatten output, built with stock gcc +# (sanity check; ARCH=aarch64 only) # make test every suite, every arch # make test SUITE=m1pp m1pp suite, every arch # make test SUITE=p1 ARCH=amd64 p1 suite, one arch @@ -68,7 +70,7 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \ # --- Targets -------------------------------------------------------------- .PHONY: all m1pp pokem scheme1 cc test image tools tables \ - tools-native cloc clean help tcc-boot2 tcc-flat + tools-native cloc clean help tcc-boot2 tcc-flat tcc-gcc all: m1pp pokem @@ -281,6 +283,41 @@ $(TCC_BOOT2_BINS): build/%/tcc-boot2/tcc-boot2: \ P1/entry-libc.P1pp build/$*/vendor/mes-libc/libc.P1pp \ $< P1/elf-end.P1pp +# --- tcc-gcc: same flatten, stock gcc ------------------------------------- +# +# Sanity-check sibling of tcc-boot2. Compiles the *same* tcc.flat.c + +# libc.flat.c through stock gcc + libgcc inside an Alpine image, with +# our hand-rolled _start / sys_* shim (tcc-gcc/<arch>/) replacing +# musl's crt0 + libc. If tcc-gcc runs and tcc-boot2 doesn't, the bug +# is downstream of the C source — i.e. in cc.scm or the P1 pipeline. +# +# aarch64 only today; bring up other arches by adding a tcc-gcc/<arch>/ +# variant of start.S + sys_stubs.c. The cc.scm bug we're chasing is +# arch-agnostic, so one validation arch is enough. +TCC_GCC_ARCH := aarch64 +TCC_GCC_PLATFORM := $(PLATFORM_$(TCC_GCC_ARCH)) +TCC_GCC_BIN := build/$(TCC_GCC_ARCH)/tcc-gcc/tcc-gcc +TCC_GCC_IMAGE := build/$(TCC_GCC_ARCH)/.image-alpine-gcc +TCC_GCC_HARNESS := tcc-gcc/$(TCC_GCC_ARCH)/start.S tcc-gcc/$(TCC_GCC_ARCH)/sys_stubs.c + +tcc-gcc: $(TCC_GCC_BIN) + +$(TCC_GCC_IMAGE): scripts/Containerfile.alpine-gcc + mkdir -p $(@D) + podman build --platform $(TCC_GCC_PLATFORM) \ + -t boot2-alpine-gcc:$(TCC_GCC_ARCH) \ + -f scripts/Containerfile.alpine-gcc scripts/ + @touch $@ + +$(TCC_GCC_BIN): $(TCC_FLAT) build/$(TCC_GCC_ARCH)/vendor/mes-libc/libc.flat.c \ + $(TCC_GCC_HARNESS) scripts/build-tcc-gcc.sh $(TCC_GCC_IMAGE) + mkdir -p $(@D) + podman run --rm --pull=never --platform $(TCC_GCC_PLATFORM) \ + -e ARCH=$(TCC_GCC_ARCH) \ + -v $(CURDIR):/work -w /work boot2-alpine-gcc:$(TCC_GCC_ARCH) \ + sh scripts/build-tcc-gcc.sh $@ $(TCC_FLAT) \ + build/$(TCC_GCC_ARCH)/vendor/mes-libc/libc.flat.c + # --- Native tools (opt-in dev-loop helpers) ------------------------------- NATIVE_TOOLS := build/native-tools/M1 build/native-tools/hex2 build/native-tools/m1pp diff --git a/scripts/Containerfile.alpine-gcc b/scripts/Containerfile.alpine-gcc @@ -0,0 +1,18 @@ +## Alpine with gcc + musl-dev + binutils baked in, used by the +## `make tcc-gcc` sanity-check target (Makefile). +## +## tcc-gcc compiles the same tcc.flat.c + libc.flat.c our cc.scm path +## consumes, but with stock gcc + libgcc soft-float helpers + a tiny +## hand-rolled _start / sys_* shim (tcc-gcc/<arch>/). It's a known-good +## reference: if our cc.scm-built tcc-boot2 misbehaves and tcc-gcc +## doesn't, the bug is in our codegen, not the source. +## +## Built per --platform; tag as boot2-alpine-gcc:<arch>. We don't pin +## a digest here because alpine:3.20 is only used by this opt-in +## harness — re-pin if the harness becomes load-bearing. + +FROM docker.io/library/alpine:3.20 + +RUN apk add --no-cache gcc musl-dev binutils + +CMD ["/bin/sh"] diff --git a/scripts/build-tcc-gcc.sh b/scripts/build-tcc-gcc.sh @@ -0,0 +1,40 @@ +#!/bin/sh +## build-tcc-gcc.sh — link tcc.flat.c + libc.flat.c with stock gcc. +## +## Sanity-check sibling of the cc.scm path. Inputs are the *same* +## flatten outputs the cc.scm pipeline consumes; harness sources +## (tcc-gcc/<arch>/{start.S,sys_stubs.c}) provide a minimal _start and +## syscall stubs so we don't need musl's crt0 or its libc. If +## tcc-gcc -version works and our cc.scm-built tcc-boot2 doesn't, the +## bug is downstream of the C source. +## +## Runs inside the boot2-alpine-gcc:<arch> image. -nodefaultlibs (not +## -nostdlib) keeps libgcc available for long-double soft-float +## helpers (__addtf3 etc., needed on aarch64 musl). +## +## Env: ARCH=aarch64 (only arch wired today; start.S is per-arch) +## Usage: build-tcc-gcc.sh <out> <tcc.flat.c> <libc.flat.c> + +set -eu + +: "${ARCH:?ARCH must be set}" +[ "$#" -eq 3 ] || { echo "usage: ARCH=<arch> $0 <out> <tcc.flat.c> <libc.flat.c>" >&2; exit 2; } + +OUT=$1 +TCC_FLAT=$2 +LIBC_FLAT=$3 + +HARNESS=tcc-gcc/$ARCH +[ -d "$HARNESS" ] || { echo "no harness for ARCH=$ARCH at $HARNESS" >&2; exit 1; } + +mkdir -p "$(dirname "$OUT")" + +gcc -static -nodefaultlibs -nostartfiles -fno-stack-protector \ + -Wno-implicit-function-declaration \ + -Wno-builtin-declaration-mismatch \ + -Wno-incompatible-pointer-types \ + -Wno-int-conversion \ + -e _start \ + "$HARNESS/start.S" "$HARNESS/sys_stubs.c" \ + "$TCC_FLAT" "$LIBC_FLAT" \ + -lgcc -o "$OUT" diff --git a/tcc-gcc/aarch64/start.S b/tcc-gcc/aarch64/start.S @@ -0,0 +1,22 @@ +/* aarch64 _start: mirror P1/entry-libc.P1pp. + * + * On entry sp points at [argc][argv0]…[NULL][envp0]…[NULL][auxv]. Convert + * to (x0=argc, x1=argv) and run __libc_init(argc, argv) → main(argc, argv) + * → exit(rc). If exit returns, spin. + */ + .text + .globl _start +_start: + ldr x19, [sp] // argc (callee-saved x19) + add x20, sp, #8 // argv (callee-saved x20) + + mov x0, x19 + mov x1, x20 + bl __libc_init + + mov x0, x19 + mov x1, x20 + bl main + + bl exit +1: b 1b diff --git a/tcc-gcc/aarch64/sys_stubs.c b/tcc-gcc/aarch64/sys_stubs.c @@ -0,0 +1,63 @@ +/* aarch64 syscall stubs matching P1pp.P1pp's sys_* entry points. + * Same C ABI as P1pp's labelled entries — our libc.flat.c calls these + * with argument shapes copied straight from boot2-syscall.c. + * + * Linux aarch64 syscall ABI: nr in x8, args in x0..x5, return in x0. + */ + +static inline long _syscall0(long nr) { + register long x8 __asm__("x8") = nr; + register long x0 __asm__("x0"); + __asm__ volatile ("svc #0" : "=r"(x0) : "r"(x8) : "memory"); + return x0; +} +static inline long _syscall1(long nr, long a) { + register long x8 __asm__("x8") = nr; + register long x0 __asm__("x0") = a; + __asm__ volatile ("svc #0" : "+r"(x0) : "r"(x8) : "memory"); + return x0; +} +static inline long _syscall2(long nr, long a, long b) { + register long x8 __asm__("x8") = nr; + register long x0 __asm__("x0") = a; + register long x1 __asm__("x1") = b; + __asm__ volatile ("svc #0" : "+r"(x0) : "r"(x8), "r"(x1) : "memory"); + return x0; +} +static inline long _syscall3(long nr, long a, long b, long c) { + register long x8 __asm__("x8") = nr; + register long x0 __asm__("x0") = a; + register long x1 __asm__("x1") = b; + register long x2 __asm__("x2") = c; + __asm__ volatile ("svc #0" : "+r"(x0) : "r"(x8), "r"(x1), "r"(x2) : "memory"); + return x0; +} +static inline long _syscall4(long nr, long a, long b, long c, long d) { + register long x8 __asm__("x8") = nr; + register long x0 __asm__("x0") = a; + register long x1 __asm__("x1") = b; + register long x2 __asm__("x2") = c; + register long x3 __asm__("x3") = d; + __asm__ volatile ("svc #0" : "+r"(x0) + : "r"(x8), "r"(x1), "r"(x2), "r"(x3) : "memory"); + return x0; +} + +#define NR_read 63 +#define NR_write 64 +#define NR_close 57 +#define NR_openat 56 +#define NR_lseek 62 +#define NR_brk 214 +#define NR_unlinkat 35 +#define NR_exit 93 +#define AT_FDCWD (-100) + +long sys_read (long fd, long buf, long n) { return _syscall3(NR_read, fd, buf, n); } +long sys_write (long fd, long buf, long n) { return _syscall3(NR_write, fd, buf, n); } +long sys_close (long fd) { return _syscall1(NR_close, fd); } +long sys_open (long path, long flags, long mode) { return _syscall4(NR_openat, AT_FDCWD, path, flags, mode); } +long sys_lseek (long fd, long off, long whence) { return _syscall3(NR_lseek, fd, off, whence); } +long sys_brk (long addr) { return _syscall1(NR_brk, addr); } +long sys_unlink(long path) { return _syscall3(NR_unlinkat, AT_FDCWD, path, 0); } +long sys_exit (long code) { _syscall1(NR_exit, code); for(;;); }