boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 1be0f6600f387c486fff3eba3a2d76393e96bbdb
parent 49d093effdd28f9e16f08f184e9d4cd99a5b8a6d
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon,  4 May 2026 18:24:05 -0700

boot{3,4}: inline tcc <stdarg.h> bridge into flat.c, drop tcc-include/mes-include staging

stage1-flatten.sh now publishes the post-patch tcc <stdarg.h> as
build/tcc/stdarg-bridge.h. Both flatten scripts prepend it (guarded by
#ifndef CCSCM so cc.scm — which has __builtin_va_* as native frontend
keywords — skips it) into tcc.flat.c / libc.flat.c. The in-container
tcc compiles in boot3 no longer need -I /work/in/tcc-include or
-include /work/in/tcc-include/stdarg.h.

boot-hello.c uses forward declarations instead of <stdio.h>/<string.h>/
<stdlib.h>, so mes-include is no longer staged into boot3 either.
Together this drops ~400KB of boot3 staging (the two include trees) and
reduces every in-container tcc invocation to plain `tcc -nostdlib …`.

The same bridge replaces boot4's three per-arch musl-shim-*.h files,
which were a hand-trimmed equivalent of the same per-arch va_list
typedef + __builtin_va_* → tcc __va_* mapping.

Diffstat:
MMakefile | 7++++++-
Mscripts/boot-hello.c | 14+++++++++-----
Mscripts/boot3.sh | 29++++++++++++++---------------
Mscripts/boot4-calibrate.sh | 19++++++++-----------
Dscripts/boot4-musl-shim-aarch64.h | 25-------------------------
Dscripts/boot4-musl-shim-amd64.h | 43-------------------------------------------
Dscripts/boot4-musl-shim-riscv64.h | 40----------------------------------------
Mscripts/boot4.sh | 67++++++++++++++++++++++++++++++++++++-------------------------------
Mscripts/libc-flatten.sh | 22+++++++++++++++++++++-
Mscripts/stage1-flatten.sh | 27++++++++++++++++++++++++++-
10 files changed, 120 insertions(+), 173 deletions(-)

diff --git a/Makefile b/Makefile @@ -303,13 +303,18 @@ tcc-boot2: $(OUT_DIR)/tcc-boot2/tcc-boot2 $(TCC_FLAT): scripts/stage1-flatten.sh sh scripts/stage1-flatten.sh --arch $(TCC_TARGET) +# stage1-flatten.sh writes this as a side effect — the post-patch tcc +# <stdarg.h>, used as a cross-arch bridge prepended into both .flat.c +# files (see comments in stage1-flatten.sh / libc-flatten.sh). +build/tcc/stdarg-bridge.h: $(TCC_FLAT) + # Catalog of inputs the host preprocessor reads when flattening libc. LIBC_VENDOR_SRCS := $(shell find vendor/mes-libc -type f \( -name '*.c' -o -name '*.h' \) 2>/dev/null) \ $(wildcard vendor/mes-libc/patches/*.before) \ $(wildcard vendor/mes-libc/patches/*.after) $(LIBC_FLATS): build/%/vendor/mes-libc/libc.flat.c: \ - scripts/libc-flatten.sh $(LIBC_VENDOR_SRCS) + scripts/libc-flatten.sh build/tcc/stdarg-bridge.h $(LIBC_VENDOR_SRCS) sh scripts/libc-flatten.sh --arch $* # libc and tcc.flat are both compiled with --lib= so they omit the diff --git a/scripts/boot-hello.c b/scripts/boot-hello.c @@ -1,8 +1,12 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> - -extern char *strdup(char const *); +/* Forward declarations only — keeps boot3 staging free of any mes-libc + * header tree (formerly /work/in/mes-include) just to satisfy three + * stdlib includes. boot4 also uses this source and its musl libc.a + * provides the same symbols at link time, so prototypes here only + * have to match the call ABI. */ +extern int printf (const char *, ...); +extern char *strdup (const char *); +extern unsigned long strlen (const char *); +extern void free (void *); int main(int argc, char **argv) { printf("hello from tcc-built libc; argc=%d\n", argc); diff --git a/scripts/boot3.sh b/scripts/boot3.sh @@ -151,7 +151,7 @@ fi # tcc.flat.c + the unpacked $TCC_DIR/{include,lib} tree are produced # together by stage1-flatten.sh; libc.flat.c by libc-flatten.sh. Both # run on the host (cc -E), no container — auto-invoke if missing. -if [ ! -e "$TCC_FLAT" ] || [ ! -d "$TCC_DIR/include" ] || [ ! -e "$TCC_DIR/lib/$LIB_HELPER_SRC" ]; then +if [ ! -e "$TCC_FLAT" ] || [ ! -d "$TCC_DIR/include" ] || [ ! -e "$TCC_DIR/lib/$LIB_HELPER_SRC" ] || [ ! -e build/tcc/stdarg-bridge.h ]; then echo "[boot3 $ARCH] flatten tcc.flat.c (host)" scripts/stage1-flatten.sh --arch "$TCC_TARGET" fi @@ -165,7 +165,7 @@ done # ── reset staging, copy inputs explicitly ───────────────────────────── rm -rf "$STAGE" -mkdir -p "$STAGE/in" "$STAGE/in/tcc-include" "$STAGE/in/mes-include" "$STAGE/in/tcc-lib" "$STAGE/out" "$OUT" +mkdir -p "$STAGE/in" "$STAGE/in/tcc-lib" "$STAGE/out" "$OUT" rm -f "$OUT/tcc0" "$OUT/tcc1" "$OUT/tcc2" \ "$OUT/start.o" "$OUT/sys_stubs.o" "$OUT/mem.o" "$OUT/libc.o" @@ -202,15 +202,14 @@ for f in $LIBTCC1_C_SRCS $LIBTCC1_ASM_SRCS; do cp "$TCC_DIR/lib/$f" "$STAGE/in/tcc-lib/$f" done -# Flattened TUs +# Flattened TUs. The patched tcc <stdarg.h> bridge is already prepended +# (under #ifndef CCSCM) into both .flat.c files by the flatten scripts, +# so the in-container compiles need no -I /work/in/tcc-include or +# -include …/stdarg.h. hello.c uses forward declarations (no system +# headers), so mes-include is no longer staged either. cp "$TCC_FLAT" "$STAGE/in/tcc.flat.c" cp "$LIBC_FLAT" "$STAGE/in/libc.flat.c" -# tcc include tree (small, < 200KB) — copied wholesale so tcc0's -# -I resolves stdarg.h etc. Recursive cp keeps directory layout. -cp -R "$TCC_DIR/include/." "$STAGE/in/tcc-include/" -cp -R vendor/mes-libc/include/. "$STAGE/in/mes-include/" - cp scripts/boot-hello.c "$STAGE/in/hello.c" # Every arch's tcc-boot2 has CONFIG_TCC_ASM and assembles .S inputs @@ -238,9 +237,9 @@ emit_helpers () { workdir=$2 echo "$cc -nostdlib -c -o $workdir/start.o /work/in/start.S" echo "$cc -nostdlib -c -o $workdir/sys_stubs.o /work/in/sys_stubs.S" - echo "$cc -nostdlib -I /work/in/tcc-include -c -o $workdir/mem.o /work/in/mem.c" - echo "$cc -nostdlib -I /work/in/tcc-include -include /work/in/tcc-include/stdarg.h -c -o $workdir/libc.o /work/in/libc.flat.c" - echo "$cc -nostdlib -I /work/in/tcc-include $LIB_HELPER_DEFINES -c -o $workdir/$LIB_HELPER_OBJ /work/in/tcc-lib/$LIB_HELPER_SRC" + echo "$cc -nostdlib -c -o $workdir/mem.o /work/in/mem.c" + echo "$cc -nostdlib -c -o $workdir/libc.o /work/in/libc.flat.c" + echo "$cc -nostdlib $LIB_HELPER_DEFINES -c -o $workdir/$LIB_HELPER_OBJ /work/in/tcc-lib/$LIB_HELPER_SRC" } emit_archive () { cc=$1 @@ -251,7 +250,7 @@ emit_archive () { echo "mkdir -p $workdir/libtcc1-obj" for src in $LIBTCC1_C_SRCS; do obj=$workdir/libtcc1-obj/${src%.c}.o - echo "$cc -nostdlib -I /work/in/tcc-include $LIBTCC1_C_DEFS -c -o $obj /work/in/tcc-lib/$src" + echo "$cc -nostdlib $LIBTCC1_C_DEFS -c -o $obj /work/in/tcc-lib/$src" libtcc1_objs="$libtcc1_objs $obj" done for src in $LIBTCC1_ASM_SRCS; do @@ -265,7 +264,7 @@ emit_link_tcc () { cc=$1 workdir=$2 out=$3 - echo "$cc -nostdlib -I /work/in/tcc-include -include /work/in/tcc-include/stdarg.h $workdir/crt1.o /work/in/tcc.flat.c $workdir/libc.a $workdir/libtcc1.a $workdir/libc.a -o $out" + echo "$cc -nostdlib $workdir/crt1.o /work/in/tcc.flat.c $workdir/libc.a $workdir/libtcc1.a $workdir/libc.a -o $out" } RUN_SCRIPT=$STAGE/in/run.sh @@ -287,7 +286,7 @@ RUN_SCRIPT=$STAGE/in/run.sh emit_helpers /work/out/tcc0 /tmp/stage1 echo echo '# Stage C: tcc0 -> tcc1 (link with raw .o files; no archive yet)' - echo "/work/out/tcc0 -nostdlib -I /work/in/tcc-include -include /work/in/tcc-include/stdarg.h /tmp/stage1/start.o /tmp/stage1/sys_stubs.o /tmp/stage1/mem.o /tmp/stage1/libc.o /tmp/stage1/$LIB_HELPER_OBJ /work/in/tcc.flat.c -o /work/out/tcc1" + echo "/work/out/tcc0 -nostdlib /tmp/stage1/start.o /tmp/stage1/sys_stubs.o /tmp/stage1/mem.o /tmp/stage1/libc.o /tmp/stage1/$LIB_HELPER_OBJ /work/in/tcc.flat.c -o /work/out/tcc1" echo 'chmod +x /work/out/tcc1' echo echo '# Stage D: tcc1 rebuilds helpers + archive, links tcc2' @@ -307,7 +306,7 @@ RUN_SCRIPT=$STAGE/in/run.sh echo '# (tcc2 and tcc3 are byte-identical by the fixed-point check, so' echo '# rebuilding with tcc3 would only repeat the cycle.)' echo 'cp /tmp/stage3/crt1.o /tmp/stage3/libc.a /tmp/stage3/libtcc1.a /work/out/' - echo '/work/out/tcc2 -nostdlib -I /work/in/tcc-include -I /work/in/mes-include /work/out/crt1.o /work/in/hello.c /work/out/libc.a /work/out/libtcc1.a /work/out/libc.a -o /work/out/hello' + echo '/work/out/tcc2 -nostdlib /work/out/crt1.o /work/in/hello.c /work/out/libc.a /work/out/libtcc1.a /work/out/libc.a -o /work/out/hello' echo 'chmod +x /work/out/hello' echo 'echo "--- run ---"' echo '/work/out/hello a b c' diff --git a/scripts/boot4-calibrate.sh b/scripts/boot4-calibrate.sh @@ -29,9 +29,9 @@ usage() { echo "usage: $0 <amd64|aarch64|riscv64>" >&2; exit 2; } ARCH=$1 case "$ARCH" in - amd64) PLATFORM=linux/amd64; TCC_TARGET=X86_64; MUSL_ARCH=x86_64 ;; - aarch64) PLATFORM=linux/arm64; TCC_TARGET=ARM64; MUSL_ARCH=aarch64 ;; - riscv64) PLATFORM=linux/riscv64; TCC_TARGET=RISCV64; MUSL_ARCH=riscv64 ;; + amd64) PLATFORM=linux/amd64; MUSL_ARCH=x86_64 ;; + aarch64) PLATFORM=linux/arm64; MUSL_ARCH=aarch64 ;; + riscv64) PLATFORM=linux/riscv64; MUSL_ARCH=riscv64 ;; *) usage ;; esac @@ -41,22 +41,20 @@ cd "$ROOT" IMAGE=boot2-scratch:$ARCH BOOT3=build/$ARCH/boot3 STAGE=build/$ARCH/.boot4-calibrate -TCC_DIR=build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c MUSL_TARBALL=vendor/upstream/musl-1.2.5.tar.gz MUSL_OVERRIDES=vendor/upstream/musl-1.2.5-overrides MUSL_DELETES=vendor/upstream/musl-1.2.5-deletes.txt MUSL_GENERATED=vendor/upstream/musl-1.2.5-generated/$MUSL_ARCH -SHIM_FILE=scripts/boot4-musl-shim-$ARCH.h +BRIDGE_FILE=build/tcc/stdarg-bridge.h SKIP_OUT=vendor/upstream/musl-1.2.5-skip-$ARCH.txt [ -x "$BOOT3/tcc3" ] || { echo "missing $BOOT3/tcc3 (run scripts/boot3.sh $ARCH)" >&2; exit 1; } [ -e "$BOOT3/libtcc1.a" ] || { echo "missing $BOOT3/libtcc1.a" >&2; exit 1; } -[ -d "$TCC_DIR/include" ] || { echo "missing $TCC_DIR/include" >&2; exit 1; } [ -e "$MUSL_TARBALL" ] || { echo "missing $MUSL_TARBALL" >&2; exit 1; } [ -d "$MUSL_OVERRIDES" ] || { echo "missing $MUSL_OVERRIDES" >&2; exit 1; } [ -e "$MUSL_DELETES" ] || { echo "missing $MUSL_DELETES" >&2; exit 1; } [ -d "$MUSL_GENERATED" ] || { echo "missing $MUSL_GENERATED (run scripts/musl-vendor.sh)" >&2; exit 1; } -[ -e "$SHIM_FILE" ] || { echo "missing $SHIM_FILE" >&2; exit 1; } +[ -e "$BRIDGE_FILE" ] || { echo "missing $BRIDGE_FILE (run scripts/stage1-flatten.sh)" >&2; exit 1; } if ! podman image exists "$IMAGE"; then podman build --platform "$PLATFORM" -t "$IMAGE" \ @@ -64,18 +62,17 @@ if ! podman image exists "$IMAGE"; then fi rm -rf "$STAGE" -mkdir -p "$STAGE/in/tcc-include" "$STAGE/out" +mkdir -p "$STAGE/in" "$STAGE/out" cp "$BOOT3/tcc3" "$STAGE/in/tcc" cp "$BOOT3/libtcc1.a" "$STAGE/in/libtcc1.a" -cp -R "$TCC_DIR/include/." "$STAGE/in/tcc-include/" tar xzf "$MUSL_TARBALL" -C "$STAGE/in/" MUSL_DIR=$STAGE/in/musl-1.2.5 cp -R "$MUSL_OVERRIDES/." "$MUSL_DIR/" while read -r p; do [ -n "$p" ] && rm -rf "$MUSL_DIR/$p" done < "$MUSL_DELETES" -cp "$SHIM_FILE" "$STAGE/in/musl-shim.h" +cp "$BRIDGE_FILE" "$STAGE/in/tcc-stdarg-bridge.h" cp "$MUSL_GENERATED/alltypes.h" "$STAGE/in/musl-alltypes.h" cp "$MUSL_GENERATED/syscall.h" "$STAGE/in/musl-syscall.h" @@ -105,7 +102,7 @@ CFLAGS_BASE="-std=c99 -nostdinc -ffreestanding -fno-strict-aliasing -O2 -fomit-frame-pointer -Werror=implicit-function-declaration -Werror=implicit-int -Werror=pointer-sign -Werror=pointer-arith" -CFLAGS_C="$CFLAGS_BASE -include $IN/musl-shim.h" +CFLAGS_C="$CFLAGS_BASE -include $IN/tcc-stdarg-bridge.h" CFLAGS_ASM="$CFLAGS_BASE" SRC_TOP="src/aio src/conf src/crypt src/ctype src/dirent diff --git a/scripts/boot4-musl-shim-aarch64.h b/scripts/boot4-musl-shim-aarch64.h @@ -1,25 +0,0 @@ -/* boot4 va_list shim for compiling musl with tcc 0.9.26 on aarch64. - * - * Mirrors tcc 0.9.26's <stdarg.h> aarch64 block: __va_list_struct is - * the AAPCS register-save area (stack ptr, GR/VR tops, GR/VR offsets); - * va_list is an array-of-1 of that struct so it decays to pointer at - * use sites. tcc routes va_start/va_arg through the __va_start / - * __va_arg frontend intrinsics directly (no decomposition into - * size/align/type-tag like the x86_64 path). - * - * Untested — see docs/MUSL.md "multi-arch status". */ - -typedef struct { - void *__stack; - void *__gr_top; - void *__vr_top; - int __gr_offs; - int __vr_offs; -} __va_list_struct; - -typedef __va_list_struct __builtin_va_list[1]; - -#define __builtin_va_start(ap, last) __va_start(ap, last) -#define __builtin_va_arg(ap, type) __va_arg(ap, type) -#define __builtin_va_copy(dest, src) ((dest)[0] = (src)[0]) -#define __builtin_va_end(ap) diff --git a/scripts/boot4-musl-shim-amd64.h b/scripts/boot4-musl-shim-amd64.h @@ -1,43 +0,0 @@ -/* boot4 va_list shim for compiling musl with tcc 0.9.26. - * - * musl's stdarg.h and bits/alltypes.h spell varargs the GCC way: - * - * typedef __builtin_va_list va_list; - * #define va_start(v,l) __builtin_va_start(v,l) - * ... - * - * tcc 0.9.26 has no `__builtin_va_list` typename; its own stdarg.h - * names the same shape `__va_list_struct[1]` and routes va_start/va_arg - * through tcc intrinsics __va_start, __va_arg, __builtin_frame_address, - * and __builtin_va_arg_types. musl is compiled with -nostdinc so tcc's - * stdarg.h is unreachable on its own; this header is `-include`d on - * every musl translation unit to bridge the two notations. - * - * Layout matches tcc/lib/va_list.c — those four fields are the SysV - * x86_64 ABI register-save struct that __va_start initializes and - * __va_arg walks. boot3's libtcc1.a provides __va_start and __va_arg - * at link time. */ - -typedef struct { - unsigned int gp_offset; - unsigned int fp_offset; - union { - unsigned int overflow_offset; - char *overflow_arg_area; - }; - char *reg_save_area; -} __va_list_struct; - -typedef __va_list_struct __builtin_va_list[1]; - -void __va_start(__va_list_struct *ap, void *fp); -void *__va_arg(__va_list_struct *ap, int arg_type, int size, int align); - -#define __builtin_va_start(ap, last) __va_start(ap, __builtin_frame_address(0)) -#define __builtin_va_arg(ap, type) \ - (*(type *)(__va_arg(ap, \ - __builtin_va_arg_types(type), \ - sizeof(type), \ - __alignof__(type)))) -#define __builtin_va_copy(dest, src) (*(dest) = *(src)) -#define __builtin_va_end(ap) diff --git a/scripts/boot4-musl-shim-riscv64.h b/scripts/boot4-musl-shim-riscv64.h @@ -1,40 +0,0 @@ -/* boot4 va_list shim for compiling musl with tcc 0.9.26 on riscv64. - * - * musl's stdarg.h and bits/alltypes.h spell varargs the GCC way: - * - * typedef __builtin_va_list va_list; - * #define va_start(v,l) __builtin_va_start(v,l) - * ... - * - * tcc 0.9.26's riscv64 stdarg.h spells va_list as `char *` and - * implements va_arg / va_copy / va_end as plain pointer-arithmetic - * macros. Mirror that here, since musl is compiled with -nostdinc - * (tcc's stdarg.h is unreachable on its own); this header is - * `-include`d on every musl translation unit. - * - * The va_arg expansion is the riscv64 lp64/lp64d branch from - * tcc/include/stdarg.h: 8-byte slots, args ≤ 16 bytes by-value, args - * > 16 bytes by-pointer. __builtin_va_start is recognized internally - * by tcc's frontend; we only need to bridge the typedef and the - * remaining macros. - * - * Untested-but-promising — may need adjustment once first failures - * appear in printf/scanf/syslog/openat-style varargs callers. */ - -typedef char *__builtin_va_list; - -#define __va_reg_size 8 /* __riscv_xlen / 8, fixed for rv64 */ - -#define _tcc_align(addr,type) \ - (((unsigned long)(addr) + __alignof__(type) - 1) & -(__alignof__(type))) - -#define __builtin_va_arg(ap, type) \ - (*(sizeof(type) > (2 * __va_reg_size) \ - ? *(type **)((ap += __va_reg_size) - __va_reg_size) \ - : (ap = (__builtin_va_list)(_tcc_align(ap, type) + \ - (sizeof(type) + __va_reg_size - 1) & -__va_reg_size), \ - (type *)(ap - ((sizeof(type) + __va_reg_size - 1) & \ - -__va_reg_size))))) - -#define __builtin_va_copy(dest, src) ((dest) = (src)) -#define __builtin_va_end(ap) ((void)(ap)) diff --git a/scripts/boot4.sh b/scripts/boot4.sh @@ -24,8 +24,11 @@ ## — list of upstream files removed by the ## same patch set (one path per line, ## relative to musl-1.2.5/). -## scripts/boot4-musl-shim-$ARCH.h -## — per-arch __builtin_va_list bridge +## build/tcc/stdarg-bridge.h +## — per-arch __builtin_va_list bridge, +## generated by scripts/stage1-flatten.sh +## (shared with boot3; one file, three +## arches gated by #ifdef inside) ## ## ─── Outputs ───────────────────────────────────────────────────────── ## build/$ARCH/boot4/libc.a @@ -61,7 +64,7 @@ MUSL_OVERRIDES=vendor/upstream/musl-1.2.5-overrides MUSL_DELETES=vendor/upstream/musl-1.2.5-deletes.txt MUSL_GENERATED=vendor/upstream/musl-1.2.5-generated/$MUSL_ARCH MUSL_SKIP=vendor/upstream/musl-1.2.5-skip-$ARCH.txt -SHIM_FILE=scripts/boot4-musl-shim-$ARCH.h +BRIDGE_FILE=build/tcc/stdarg-bridge.h # ── prerequisites ───────────────────────────────────────────────────── [ -x "$BOOT3/tcc3" ] || { echo "[boot4 $ARCH] missing $BOOT3/tcc3 (run scripts/boot3.sh $ARCH)" >&2; exit 1; } @@ -71,7 +74,7 @@ SHIM_FILE=scripts/boot4-musl-shim-$ARCH.h [ -e "$MUSL_DELETES" ] || { echo "[boot4 $ARCH] missing $MUSL_DELETES" >&2; exit 1; } [ -d "$MUSL_GENERATED" ] || { echo "[boot4 $ARCH] missing $MUSL_GENERATED (run scripts/musl-vendor.sh)" >&2; exit 1; } [ -e "$MUSL_SKIP" ] || { echo "[boot4 $ARCH] missing $MUSL_SKIP (run scripts/boot4-calibrate.sh $ARCH)" >&2; exit 1; } -[ -e "$SHIM_FILE" ] || { echo "[boot4 $ARCH] missing $SHIM_FILE" >&2; exit 1; } +[ -e "$BRIDGE_FILE" ] || { echo "[boot4 $ARCH] missing $BRIDGE_FILE (run scripts/stage1-flatten.sh)" >&2; exit 1; } if ! podman image exists "$IMAGE"; then echo "[boot4 $ARCH] building $IMAGE" @@ -106,12 +109,14 @@ while read -r p; do [ -n "$p" ] && rm -rf "$MUSL_DIR/$p" done < "$MUSL_DELETES" -cp "$SHIM_FILE" "$STAGE/in/musl-shim.h" +cp "$BRIDGE_FILE" "$STAGE/in/tcc-stdarg-bridge.h" # Pre-generated alltypes.h + syscall.h for $MUSL_ARCH; replace the # in-container awk that ran mkalltypes.sed and the SYS_ rewrite. Source # of truth is scripts/musl-vendor.sh (regenerates these files). cp "$MUSL_GENERATED/alltypes.h" "$STAGE/in/musl-alltypes.h" cp "$MUSL_GENERATED/syscall.h" "$STAGE/in/musl-syscall.h" +# version.h is pre-staged so the container body has no `>` redirection. +echo '#define VERSION "1.2.5-tcc-boot4"' > "$STAGE/in/musl-version.h" cp scripts/boot-hello.c "$STAGE/in/hello.c" @@ -193,11 +198,13 @@ n_skip=$(wc -l < "$MUSL_SKIP") echo "[boot4 $ARCH] keep=$n_src skip=$n_skip (calibrated)" # ── emit flat container build script ────────────────────────────────── -# Generates a straight-line shell program: cd, mkdir, cp, then one tcc +# Generates a straight-line shell program: mkdir, cp, then one tcc # invocation per source, then ar, then link+run hello. No control flow -# beyond sequential exec; suitable for a kaem-class shell. -CFLAGS_BASE="-std=c99 -nostdinc -ffreestanding -fno-strict-aliasing -D_XOPEN_SOURCE=700 -I./arch/$MUSL_ARCH -I./arch/generic -Iobj/src/internal -I./src/include -I./src/internal -Iobj/include -I./include -O2 -fomit-frame-pointer -Werror=implicit-function-declaration -Werror=implicit-int -Werror=pointer-sign -Werror=pointer-arith" -CFLAGS_C="$CFLAGS_BASE -include /work/in/musl-shim.h" +# beyond sequential exec, no shell redirection, no `cd`; suitable for a +# kaem-class shell. All paths are absolute. +CWORK=/tmp/musl-1.2.5 +CFLAGS_BASE="-std=c99 -nostdinc -ffreestanding -fno-strict-aliasing -D_XOPEN_SOURCE=700 -I$CWORK/arch/$MUSL_ARCH -I$CWORK/arch/generic -I$CWORK/obj/src/internal -I$CWORK/src/include -I$CWORK/src/internal -I$CWORK/obj/include -I$CWORK/include -O2 -fomit-frame-pointer -Werror=implicit-function-declaration -Werror=implicit-int -Werror=pointer-sign -Werror=pointer-arith" +CFLAGS_C="$CFLAGS_BASE -include /work/in/tcc-stdarg-bridge.h" CFLAGS_ASM="$CFLAGS_BASE" CRTFLAGS_C="$CFLAGS_C -fno-stack-protector -DCRT" CRTFLAGS_ASM="$CFLAGS_ASM -fno-stack-protector -DCRT" @@ -206,12 +213,12 @@ CRTFLAGS_ASM="$CFLAGS_ASM -fno-stack-protector -DCRT" # container if/then/else). if [ -f "$MUSL_DIR/crt/$MUSL_ARCH/crti.s" ]; then CRT_LINES_TXT=$(printf '%s\n' \ - "/work/in/tcc $CRTFLAGS_ASM -c crt/$MUSL_ARCH/crti.s -o obj/crt/crti.o" \ - "/work/in/tcc $CRTFLAGS_ASM -c crt/$MUSL_ARCH/crtn.s -o obj/crt/crtn.o") + "/work/in/tcc $CRTFLAGS_ASM -c $CWORK/crt/$MUSL_ARCH/crti.s -o $CWORK/obj/crt/crti.o" \ + "/work/in/tcc $CRTFLAGS_ASM -c $CWORK/crt/$MUSL_ARCH/crtn.s -o $CWORK/obj/crt/crtn.o") else CRT_LINES_TXT=$(printf '%s\n' \ - "/work/in/tcc $CRTFLAGS_C -c crt/crti.c -o obj/crt/crti.o" \ - "/work/in/tcc $CRTFLAGS_C -c crt/crtn.c -o obj/crt/crtn.o") + "/work/in/tcc $CRTFLAGS_C -c $CWORK/crt/crti.c -o $CWORK/obj/crt/crti.o" \ + "/work/in/tcc $CRTFLAGS_C -c $CWORK/crt/crtn.c -o $CWORK/obj/crt/crtn.o") fi RUN_SCRIPT=$STAGE/in/run.sh @@ -220,21 +227,19 @@ RUN_SCRIPT=$STAGE/in/run.sh echo 'set -eu' echo echo '# stage A: working tree in tmpfs' - echo 'cd /tmp' - echo 'cp -R /work/in/musl-1.2.5 .' - echo 'cd musl-1.2.5' + echo "cp -R /work/in/musl-1.2.5 $CWORK" echo echo '# stage B: pre-generated headers + version stamp' - echo 'mkdir -p obj/include/bits obj/src/internal obj/lib obj/crt lib' - echo 'cp /work/in/musl-alltypes.h obj/include/bits/alltypes.h' - echo 'cp /work/in/musl-syscall.h obj/include/bits/syscall.h' - echo "echo '#define VERSION \"1.2.5-tcc-boot4\"' > obj/src/internal/version.h" + echo "mkdir -p $CWORK/obj/include/bits $CWORK/obj/src/internal $CWORK/obj/lib $CWORK/obj/crt $CWORK/lib" + echo "cp /work/in/musl-alltypes.h $CWORK/obj/include/bits/alltypes.h" + echo "cp /work/in/musl-syscall.h $CWORK/obj/include/bits/syscall.h" + echo "cp /work/in/musl-version.h $CWORK/obj/src/internal/version.h" echo echo '# per-source obj directories' - while read -r d; do echo "mkdir -p $d"; done < "$STAGE/_host/build-objdirs.txt" + while read -r d; do echo "mkdir -p $CWORK/$d"; done < "$STAGE/_host/build-objdirs.txt" echo echo "# stage C: compile sources ($n_src after calibration)" - awk -v CC=/work/in/tcc -v CF="$CFLAGS_C" -v AF="$CFLAGS_ASM" ' + awk -v CC=/work/in/tcc -v CF="$CFLAGS_C" -v AF="$CFLAGS_ASM" -v PFX="$CWORK/" ' { src = $0 obj = "obj/" src @@ -242,27 +247,27 @@ RUN_SCRIPT=$STAGE/in/run.sh if (src ~ /\.c$/) flags = CF else if (src ~ /\.[sS]$/) flags = AF else flags = CF - print CC " " flags " -c " src " -o " obj + print CC " " flags " -c " PFX src " -o " PFX obj } ' "$STAGE/_host/build-srcs.txt" echo echo '# stage D: CRT objects' - echo "/work/in/tcc $CRTFLAGS_C -fPIC -c crt/Scrt1.c -o obj/crt/Scrt1.o" - echo "/work/in/tcc $CRTFLAGS_C -c crt/crt1.c -o obj/crt/crt1.o" - echo "/work/in/tcc $CRTFLAGS_C -fPIC -c crt/rcrt1.c -o obj/crt/rcrt1.o" + echo "/work/in/tcc $CRTFLAGS_C -fPIC -c $CWORK/crt/Scrt1.c -o $CWORK/obj/crt/Scrt1.o" + echo "/work/in/tcc $CRTFLAGS_C -c $CWORK/crt/crt1.c -o $CWORK/obj/crt/crt1.o" + echo "/work/in/tcc $CRTFLAGS_C -fPIC -c $CWORK/crt/rcrt1.c -o $CWORK/obj/crt/rcrt1.o" printf '%s\n' "$CRT_LINES_TXT" - echo 'cp obj/crt/Scrt1.o obj/crt/crt1.o obj/crt/rcrt1.o obj/crt/crti.o obj/crt/crtn.o lib/' + echo "cp $CWORK/obj/crt/Scrt1.o $CWORK/obj/crt/crt1.o $CWORK/obj/crt/rcrt1.o $CWORK/obj/crt/crti.o $CWORK/obj/crt/crtn.o $CWORK/lib/" echo echo '# stage E: archive libc.a' - printf '/work/in/tcc -ar rcs lib/libc.a' - awk '{ obj = "obj/" $0; sub(/\.[^.]*$/, ".o", obj); printf " %s", obj }' "$STAGE/_host/build-srcs.txt" + printf '/work/in/tcc -ar rcs %s/lib/libc.a' "$CWORK" + awk -v PFX="$CWORK/" '{ obj = "obj/" $0; sub(/\.[^.]*$/, ".o", obj); printf " %s%s", PFX, obj }' "$STAGE/_host/build-srcs.txt" echo echo echo '# publish artifacts to /work/out' - echo 'cp lib/libc.a lib/crt1.o lib/crti.o lib/crtn.o /work/out/' + echo "cp $CWORK/lib/libc.a $CWORK/lib/crt1.o $CWORK/lib/crti.o $CWORK/lib/crtn.o /work/out/" echo echo '# stage F: link + run hello' - echo "/work/in/tcc -static -nostdinc -nostdlib -include /work/in/musl-shim.h -I./include -I./arch/$MUSL_ARCH -I./arch/generic -Iobj/include lib/crt1.o /work/in/hello.c -L./lib -lc -L/work/in -ltcc1 -L./lib -lc -o /work/out/hello" + echo "/work/in/tcc -static -nostdinc -nostdlib -include /work/in/tcc-stdarg-bridge.h -I$CWORK/include -I$CWORK/arch/$MUSL_ARCH -I$CWORK/arch/generic -I$CWORK/obj/include $CWORK/lib/crt1.o /work/in/hello.c -L$CWORK/lib -lc -L/work/in -ltcc1 -L$CWORK/lib -lc -o /work/out/hello" echo 'echo "--- run ---"' echo '/work/out/hello a b c' } > "$RUN_SCRIPT" diff --git a/scripts/libc-flatten.sh b/scripts/libc-flatten.sh @@ -161,6 +161,14 @@ apply_simple_patch \ # --- (3) flatten via host preprocessor -------------------------------- HOST_CC=${HOST_CC:-cc} +# Bridge file: post-patch tcc <stdarg.h>. Written by stage1-flatten.sh, +# which boot3.sh / Makefile run first. Required so we can prepend the +# per-arch va_list typedef + __builtin_va_* → tcc __va_* mapping into +# libc.flat.c, eliminating the need for `-I /work/in/tcc-include +# -include /work/in/tcc-include/stdarg.h` on every in-container compile. +BRIDGE=$ROOT/build/tcc/stdarg-bridge.h +[ -e "$BRIDGE" ] || { echo "missing $BRIDGE — run scripts/stage1-flatten.sh first" >&2; exit 1; } + # -I order matters: vendor/boot2-include first so our stdarg.h shim # (routes va_* through __builtin_va_*; see comment in that file) wins # over mes's. Then $STAGE/include for everything else — <signal.h>, @@ -179,7 +187,19 @@ HOST_CC=${HOST_CC:-cc} -D __${MES_ARCH}__=1 \ -D __riscv_xlen=64 \ -D inline= \ - "$STAGE/unified-libc.c" > "$FLAT" + "$STAGE/unified-libc.c" > "$FLAT.body" + +# Prepend the bridge, guarded by !CCSCM (cc.scm predefines CCSCM and +# handles __builtin_va_* natively, so it must skip this block). Under +# tcc, the per-arch #ifdefs inside the bridge resolve and provide the +# va_list typedef + __builtin_va_* → tcc native __va_* macros. +{ + echo '#ifndef CCSCM' + cat "$BRIDGE" + echo '#endif' + cat "$FLAT.body" +} > "$FLAT" +rm -f "$FLAT.body" BYTES=$(wc -c < "$FLAT") echo "produced $FLAT ($BYTES bytes)" diff --git a/scripts/stage1-flatten.sh b/scripts/stage1-flatten.sh @@ -270,7 +270,32 @@ FLAT=$WORK/tcc.flat.c -D "TCC_VERSION=\"0.9.26\"" \ -D ONE_SOURCE=1 \ -D TCC_TARGET_${TCC_TARGET_DEFINE}=1 \ - "$SRC/tcc.c" > "$FLAT" + "$SRC/tcc.c" > "$FLAT.body" + +# Publish the post-patch tcc <stdarg.h> as a shared bridge file. +# libc-flatten.sh prepends the same bridge to libc.flat.c, so the boot3 +# container compile no longer needs `-I /work/in/tcc-include +# -include /work/in/tcc-include/stdarg.h`. The patched stdarg.h is +# byte-identical across X86_64 / ARM64 / RISCV64 (per-arch logic lives +# inside its #ifdefs), so a cross-arch shared path is fine — whichever +# arch's stage1-flatten.sh runs last wins, idempotently. +BRIDGE=$ROOT/build/tcc/stdarg-bridge.h +mkdir -p "$ROOT/build/tcc" +cp "$SRC/include/stdarg.h" "$BRIDGE" + +# Prepend the bridge into tcc.flat.c, guarded by !CCSCM so cc.scm +# (which has __builtin_va_list / __builtin_va_* as native frontend +# keywords and predefines CCSCM) skips the whole block. Under tcc, +# the per-arch #ifdef branches inside the bridge resolve and define +# the va_list typedef + __builtin_va_* → tcc native __va_* macros +# that flat.c needs. +{ + echo '#ifndef CCSCM' + cat "$BRIDGE" + echo '#endif' + cat "$FLAT.body" +} > "$FLAT" +rm -f "$FLAT.body" BYTES=$(wc -c < "$FLAT") echo "produced $FLAT ($BYTES bytes)"