commit 55fea366063ece9682c6d9fd1650774194042bb0
parent a5acc123df1992ba01918c00751edb1f252fdd1d
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sun, 26 Apr 2026 03:33:41 -0700
scripts: build-tcc-source.sh — flatten tcc-0.9.26 and validate self-host
Produces build/cc-bootstrap/<arch>/tcc.flat.c — a single C source bytestream
suitable for our scheme1-hosted compiler to ingest. Mirrors the live-bootstrap
tcc-mes invocation (steps/tcc-0.9.26/pass1.kaem) minus the actual compile step.
Stages (`--arch X86_64 --self-host` runs them all):
1. unpack tcc-0.9.26-1147-gee75a10c.tar.gz from lb-work/distfiles
2. apply live-bootstrap simple-patches (file-open reorder in tcctools.c)
3. host `cc -E -nostdinc` with the Mes-bundled minimal libc headers and
the same -D set MesCC sees (BOOTSTRAP=1, HAVE_LONG_LONG, ONE_SOURCE=1,
CONFIG_TCC_*, TCC_TARGET_X86_64=1, inline=, plus __linux__=1 and
__<mes-arch>__=1 which mescc injects internally)
4. (--verify) host cc compiles tcc.flat.c clean → tcc.flat.o
5. (--self-host) inside linux/amd64 alpine + gcc + musl-dev:
- build tcc-host (static, no-pie) from tcc.flat.c + a one-line
errno-shim.c (musl exposes errno via __errno_location, mes
headers declare it as `extern int errno;`)
- tcc-host -version → "tcc version 0.9.26 (x86_64 Linux)"
- tcc-host recompiles tcc.flat.c into tcc-self.o (~485 KB)
Sizes: tcc.flat.c 608 KB / 18896 lines, tcc-host 704 KB static.
This is the host-side artifact producer. Our scheme1-hosted cc consumes
tcc.flat.c verbatim. Wiring it into the live-bootstrap pass1.kaem flow
(replacing tcc-mes) is the next integration step.
Diffstat:
1 file changed, 214 insertions(+), 0 deletions(-)
diff --git a/scripts/build-tcc-source.sh b/scripts/build-tcc-source.sh
@@ -0,0 +1,214 @@
+#!/bin/sh
+## scripts/build-tcc-source.sh — produce a single flattened tcc.c that our
+## scheme1-hosted C compiler can consume.
+##
+## Mirrors the live-bootstrap tcc-mes invocation (steps/tcc-0.9.26/pass1.kaem)
+## minus the actual compile step. The output is one C source bytestream with
+## all #include "X.c" project files inlined and all standard headers expanded
+## from the Mes-bundled minimal libc headers. No mes/mescc binary is involved
+## — only the host preprocessor.
+##
+## Stages:
+## 1. unpack tcc-0.9.26-1147-gee75a10c.tar.gz
+## 2. apply live-bootstrap simple-patches (file-open reordering in tcctools.c)
+## 3. host-cc -E with the tcc-mes defines + -nostdinc + Mes header tree
+## 4. emit build/cc-bootstrap/<arch>/tcc.flat.c
+## 5. (optional) compile tcc.flat.c with host cc to verify well-formedness
+##
+## Usage:
+## scripts/build-tcc-source.sh [--arch <arch>] [--verify] [--self-host]
+##
+## --arch: X86_64 | I386 | RISCV64. Default X86_64 (live-bootstrap
+## reference path).
+## --verify: compile tcc.flat.c with host cc to confirm the source is
+## valid C. On macOS, produces a Mach-O .o (won't run, but
+## proves syntactic correctness).
+## --self-host: full chain in a Linux x86_64 container (alpine + gcc):
+## 1. build tcc-host (a real tcc binary) from tcc.flat.c
+## 2. run tcc-host -version
+## 3. use tcc-host to recompile tcc.flat.c (self-host check)
+## Requires podman and pulls alpine:latest on first run.
+## Implies --verify.
+
+set -eu
+
+# --- arg parse --------------------------------------------------------
+ARCH=X86_64
+VERIFY=0
+SELF_HOST=0
+while [ $# -gt 0 ]; do
+ case "$1" in
+ --arch) ARCH=$2; shift 2 ;;
+ --verify) VERIFY=1; shift ;;
+ --self-host) SELF_HOST=1; VERIFY=1; shift ;;
+ -h|--help) sed -n 's/^## \{0,1\}//p' "$0"; exit 0 ;;
+ *) echo "unknown arg: $1" >&2; exit 2 ;;
+ esac
+done
+
+# Map ARCH to (mes-arch, have-long-long).
+case "$ARCH" in
+ X86_64) MES_ARCH=x86_64; HAVE_LL=1 ;;
+ I386) MES_ARCH=x86; HAVE_LL=0 ;;
+ RISCV64) MES_ARCH=riscv64; HAVE_LL=1 ;;
+ AARCH64) echo "AARCH64 not in live-bootstrap; tcc.c lacks an arm64-gen.c we can use here" >&2; exit 2 ;;
+ *) echo "unknown ARCH: $ARCH" >&2; exit 2 ;;
+esac
+
+# --- paths ------------------------------------------------------------
+ROOT=$(cd "$(dirname "$0")/.." && pwd)
+WORK=$ROOT/build/cc-bootstrap/$ARCH
+DISTFILES=$ROOT/../lb-work/distfiles
+LB_PATCHES=$ROOT/../live-bootstrap/steps/tcc-0.9.26/simple-patches
+MES_INCLUDE=$ROOT/../mes/include
+MES_INCLUDE_LINUX=$MES_INCLUDE/linux/$MES_ARCH
+
+TCC_TAR=$DISTFILES/tcc-0.9.26.tar.gz
+TCC_PKG=tcc-0.9.26-1147-gee75a10c
+
+[ -r "$TCC_TAR" ] || { echo "missing $TCC_TAR" >&2; exit 1; }
+[ -d "$LB_PATCHES" ] || { echo "missing $LB_PATCHES" >&2; exit 1; }
+[ -d "$MES_INCLUDE" ] || { echo "missing $MES_INCLUDE" >&2; exit 1; }
+[ -d "$MES_INCLUDE_LINUX" ] || { echo "missing $MES_INCLUDE_LINUX" >&2; exit 1; }
+
+# --- (1) unpack -------------------------------------------------------
+mkdir -p "$WORK"
+rm -rf "$WORK/$TCC_PKG"
+tar -xzf "$TCC_TAR" -C "$WORK"
+
+SRC=$WORK/$TCC_PKG
+
+# --- (2) simple-patches ----------------------------------------------
+# Both patches edit tcctools.c. The pair (remove-fileopen, addback-fileopen)
+# moves a fopen() block earlier in the function. Implement simple-patch
+# inline: replace the .before block (verbatim) with the .after block.
+apply_simple_patch() {
+ target=$1
+ before=$2
+ after=$3
+ [ -r "$target" ] || { echo "patch target missing: $target" >&2; exit 1; }
+ [ -r "$before" ] || { echo "patch before missing: $before" >&2; exit 1; }
+ [ -r "$after" ] || { echo "patch after missing: $after" >&2; exit 1; }
+ # Use awk to do a literal multiline replace. We slurp file, find the
+ # first occurrence of the before-block, splice in the after-block.
+ awk -v BFILE="$before" -v AFILE="$after" '
+ BEGIN {
+ while ((getline line < BFILE) > 0) bef = bef line "\n";
+ close(BFILE);
+ while ((getline line < AFILE) > 0) aft = aft line "\n";
+ close(AFILE);
+ }
+ { src = src $0 "\n" }
+ END {
+ i = index(src, bef);
+ if (i == 0) { print "patch did not match" > "/dev/stderr"; exit 1 }
+ printf "%s%s%s",
+ substr(src, 1, i - 1),
+ aft,
+ substr(src, i + length(bef));
+ }
+ ' "$target" > "$target.new"
+ mv "$target.new" "$target"
+}
+
+apply_simple_patch \
+ "$SRC/tcctools.c" \
+ "$LB_PATCHES/remove-fileopen.before" \
+ "$LB_PATCHES/remove-fileopen.after"
+
+apply_simple_patch \
+ "$SRC/tcctools.c" \
+ "$LB_PATCHES/addback-fileopen.before" \
+ "$LB_PATCHES/addback-fileopen.after"
+
+# pass1.kaem creates two empty config.h files via `catm <out>` (line 27-28).
+# tcc.h does `#include "config.h"` and Mes's stdio.h reaches for
+# `<mes/config.h>`. Both are empty in the live-bootstrap build.
+: > "$SRC/config.h"
+mkdir -p "$WORK/mes-overlay/mes"
+: > "$WORK/mes-overlay/mes/config.h"
+
+# --- (3) flatten via host preprocessor --------------------------------
+HOST_CC=${HOST_CC:-cc}
+FLAT=$WORK/tcc.flat.c
+
+# Defines mirror the tcc-mes invocation in pass1.kaem. Paths in the
+# CONFIG_* defines don't matter for our purposes (we won't run the
+# resulting binary as-is) but must be syntactically valid string literals.
+"$HOST_CC" -E -P \
+ -nostdinc \
+ -I "$SRC" \
+ -I "$WORK/mes-overlay" \
+ -I "$MES_INCLUDE_LINUX" \
+ -I "$MES_INCLUDE" \
+ -D __linux__=1 \
+ -D __${MES_ARCH}__=1 \
+ -D BOOTSTRAP=1 \
+ -D HAVE_LONG_LONG=$HAVE_LL \
+ -D inline= \
+ -D "CONFIG_TCCDIR=\"/lib/tcc\"" \
+ -D "CONFIG_SYSROOT=\"/\"" \
+ -D "CONFIG_TCC_CRTPREFIX=\"/lib\"" \
+ -D "CONFIG_TCC_ELFINTERP=\"/mes/loader\"" \
+ -D "CONFIG_TCC_SYSINCLUDEPATHS=\"/include/mes\"" \
+ -D "TCC_LIBGCC=\"/lib/libc.a\"" \
+ -D CONFIG_TCC_LIBTCC1_MES=0 \
+ -D CONFIG_TCCBOOT=1 \
+ -D CONFIG_TCC_STATIC=1 \
+ -D CONFIG_USE_LIBGCC=1 \
+ -D "TCC_VERSION=\"0.9.26\"" \
+ -D ONE_SOURCE=1 \
+ -D TCC_TARGET_${ARCH}=1 \
+ "$SRC/tcc.c" > "$FLAT"
+
+LINES=$(wc -l < "$FLAT")
+BYTES=$(wc -c < "$FLAT")
+echo "produced $FLAT ($LINES lines, $BYTES bytes)"
+
+# --- (4) optional verify ---------------------------------------------
+if [ "$VERIFY" -eq 1 ]; then
+ HOST_OBJ=$WORK/tcc.flat.o
+ if "$HOST_CC" -c -w -o "$HOST_OBJ" "$FLAT" 2>"$WORK/host-cc.log"; then
+ echo "host cc: tcc.flat.c compiles cleanly to $HOST_OBJ"
+ else
+ echo "host cc: tcc.flat.c FAILED to compile; see $WORK/host-cc.log" >&2
+ echo "first 30 errors:" >&2
+ head -30 "$WORK/host-cc.log" >&2
+ exit 1
+ fi
+fi
+
+# --- (5) optional self-host validation in a Linux x86_64 container ----
+# tcc.c expects errno as a global int (mes-libc convention) but musl
+# exposes errno via __errno_location(). Provide a tiny shim so linking
+# against musl works.
+if [ "$SELF_HOST" -eq 1 ]; then
+ if [ "$ARCH" != "X86_64" ]; then
+ echo "--self-host only supported for X86_64 (the live-bootstrap path)" >&2
+ exit 2
+ fi
+ command -v podman >/dev/null 2>&1 || {
+ echo "--self-host requires podman" >&2; exit 2
+ }
+ SHIM=$WORK/errno-shim.c
+ printf 'int errno;\n' > "$SHIM"
+
+ echo "--- self-host: building tcc-host with alpine gcc ---"
+ podman run --rm --platform linux/amd64 \
+ -v "$ROOT":/work -w /work alpine:latest sh -c "
+ apk add --no-cache gcc musl-dev >/dev/null 2>&1
+ set -e
+ REL=build/cc-bootstrap/$ARCH
+ gcc -w -static -no-pie -o \$REL/tcc-host \\
+ \$REL/tcc.flat.c \\
+ \$REL/errno-shim.c
+ echo
+ echo '--- tcc-host -version ---'
+ \$REL/tcc-host -version
+ echo
+ echo '--- self-compile: tcc-host compiling tcc.flat.c ---'
+ \$REL/tcc-host -c -o \$REL/tcc-self.o \$REL/tcc.flat.c
+ ls -la \$REL/tcc-self.o
+ echo 'self-compile: OK'
+ "
+fi