boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 8e04ae6420498beedf0939e5e12509290dd97157
parent 7dd647f943c4392b22761a9ec71c49466f885d6e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon,  4 May 2026 16:51:36 -0700

boot3: fold va_list_shim.h into tcc <stdarg.h> patch

Append the gcc/clang __builtin_va_* aliases to tcc-0.9.26's
include/stdarg.h via a new stage1-flatten patch instead of carrying
them in a separate force-included tcc-libc/va_list_shim.h. boot3.sh,
boot-build-tcc-tcc.sh, and the Makefile now -include the patched
stdarg.h directly.

Diffstat:
MMakefile | 17+++++++++--------
Mdocs/TCC-TODO.md | 4++--
Mscripts/boot-build-tcc-tcc.sh | 3+--
Mscripts/boot3.sh | 8+++-----
Ascripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.after | 22++++++++++++++++++++++
Ascripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.before | 4++++
Mscripts/stage1-flatten.sh | 18++++++++++++++++++
Dtcc-libc/va_list_shim.h | 32--------------------------------
8 files changed, 59 insertions(+), 49 deletions(-)

diff --git a/Makefile b/Makefile @@ -522,19 +522,20 @@ $(TCC_LIBC_MEM): tcc-cc/mem.c \ -nostdlib -I $(TCC_CC_TCC_INCLUDE) -c -o $@ $< # libc.o: tcc-boot2 compiles the same flatten output cc.scm consumes. -# `-include tcc-libc/va_list_shim.h` aliases gcc's __builtin_va_* -# names onto tcc's native va_* macros (tcc has no notion of a -# __builtin_va_list keyword); the shim is the only piece glueing -# the host-preprocessed flatten to tcc-boot2's frontend. +# Force-including tcc's <stdarg.h> brings in the gcc/clang spelling +# bridge appended by the stdarg-builtin-aliases patch in +# stage1-flatten.sh: it aliases gcc's __builtin_va_* names onto tcc's +# native va_* macros (tcc has no __builtin_va_list keyword on +# amd64/aarch64). Without that bridge the host-preprocessed flatten +# won't compile back through tcc-boot2. $(TCC_LIBC_LIBC): build/$(ARCH)/vendor/mes-libc/libc.flat.c \ - tcc-libc/va_list_shim.h \ build/$(ARCH)/tcc-boot2/tcc-boot2 \ build/$(ARCH)/.image mkdir -p $(@D) $(call PODMAN,$(ARCH)) \ build/$(ARCH)/tcc-boot2/tcc-boot2 \ -nostdlib -I $(TCC_CC_TCC_INCLUDE) \ - -include tcc-libc/va_list_shim.h \ + -include $(TCC_CC_TCC_INCLUDE)/stdarg.h \ -c -o $@ $< # --- tcc-tcc: second-stage tcc ------------------------------------------- @@ -546,7 +547,7 @@ $(TCC_LIBC_LIBC): build/$(ARCH)/vendor/mes-libc/libc.flat.c \ tcc-tcc: $(TCC_TCC_BIN) $(TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \ - $(TCC_FLAT) tcc-libc/va_list_shim.h \ + $(TCC_FLAT) \ build/$(ARCH)/tcc-boot2/tcc-boot2 \ $(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) \ $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) \ @@ -564,7 +565,7 @@ $(TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \ tcc-tcc-tcc: $(TCC_TCC_TCC_BIN) $(TCC_TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \ - $(TCC_FLAT) tcc-libc/va_list_shim.h \ + $(TCC_FLAT) \ $(TCC_TCC_BIN) \ $(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) \ $(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) \ diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md @@ -268,8 +268,8 @@ path. ```sh # In the riscv64 container with boot3 outputs present: -$TCC0 -nostdlib -I $TCC_INC -include $SHIM -c -o /tmp/flat-tcc0.o tcc.flat.c -$TCC1 -nostdlib -I $TCC_INC -include $SHIM -c -o /tmp/flat-tcc1.o tcc.flat.c +$TCC0 -nostdlib -I $TCC_INC -include $TCC_INC/stdarg.h -c -o /tmp/flat-tcc0.o tcc.flat.c +$TCC1 -nostdlib -I $TCC_INC -include $TCC_INC/stdarg.h -c -o /tmp/flat-tcc1.o tcc.flat.c # wc -c /tmp/flat-tcc0.o /tmp/flat-tcc1.o → 616100 vs 615892 # objdump -d both, normalize addresses, diff to find divergent functions ``` diff --git a/scripts/boot-build-tcc-tcc.sh b/scripts/boot-build-tcc-tcc.sh @@ -50,7 +50,6 @@ LIBC_O=build/$ARCH/tcc-libc/libc.o MEM_O=build/$ARCH/tcc-libc/mem.o SYS_O=build/$ARCH/tcc-libc/sys_stubs.o START_O=build/$ARCH/tcc-libc/start.o -SHIM=tcc-libc/va_list_shim.h WORK=$(dirname "$OUT") mkdir -p "$WORK" @@ -83,6 +82,6 @@ fi # Compile + link the next-stage tcc in one $CC invocation. # shellcheck disable=SC2086 # $LIB_OBJS is intentionally word-split (may be empty). -"$CC" -nostdlib -I "$TCC_INC" -include "$SHIM" \ +"$CC" -nostdlib -I "$TCC_INC" -include "$TCC_INC/stdarg.h" \ "$START_O" "$SYS_O" "$MEM_O" "$LIBC_O" $LIB_OBJS \ "$TCC_FLAT" -o "$OUT" diff --git a/scripts/boot3.sh b/scripts/boot3.sh @@ -43,7 +43,6 @@ ## (Throughout this script: tcc0/tcc1/tcc2/tcc3 are the four stages ## above; tcc0 is the cc.scm-built bootstrap, tcc2/tcc3 form the ## self-host fixed-point check.) -## tcc-libc/va_list_shim.h — gcc/tcc va_list bridge ## tcc-cc/mem.c — memcpy/memmove/memset/memcmp ## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/include/** (whole tree) ## vendor/mes-libc/include/** — mes-libc headers for hello @@ -192,7 +191,6 @@ cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2" # tcc-libc / tcc-cc helpers cp "tcc-libc/$ARCH/start.S" "$STAGE/in/start.S" cp "tcc-libc/$ARCH/sys_stubs.S" "$STAGE/in/sys_stubs.S" -cp tcc-libc/va_list_shim.h "$STAGE/in/va_list_shim.h" cp tcc-cc/mem.c "$STAGE/in/mem.c" # Per-arch libtcc1 helper sources. LIB_HELPER_SRC is always also in @@ -241,7 +239,7 @@ emit_helpers () { echo "$cc -nostdlib -c -o $workdir/start.o /work/in/start.S" echo "$cc -nostdlib -c -o $workdir/sys_stubs.o /work/in/sys_stubs.S" echo "$cc -nostdlib -I /work/in/tcc-include -c -o $workdir/mem.o /work/in/mem.c" - echo "$cc -nostdlib -I /work/in/tcc-include -include /work/in/va_list_shim.h -c -o $workdir/libc.o /work/in/libc.flat.c" + echo "$cc -nostdlib -I /work/in/tcc-include -include /work/in/tcc-include/stdarg.h -c -o $workdir/libc.o /work/in/libc.flat.c" echo "$cc -nostdlib -I /work/in/tcc-include $LIB_HELPER_DEFINES -c -o $workdir/$LIB_HELPER_OBJ /work/in/tcc-lib/$LIB_HELPER_SRC" } emit_archive () { @@ -267,7 +265,7 @@ emit_link_tcc () { cc=$1 workdir=$2 out=$3 - echo "$cc -nostdlib -I /work/in/tcc-include -include /work/in/va_list_shim.h $workdir/crt1.o /work/in/tcc.flat.c $workdir/libc.a $workdir/libtcc1.a $workdir/libc.a -o $out" + echo "$cc -nostdlib -I /work/in/tcc-include -include /work/in/tcc-include/stdarg.h $workdir/crt1.o /work/in/tcc.flat.c $workdir/libc.a $workdir/libtcc1.a $workdir/libc.a -o $out" } RUN_SCRIPT=$STAGE/in/run.sh @@ -289,7 +287,7 @@ RUN_SCRIPT=$STAGE/in/run.sh emit_helpers /work/out/tcc0 /tmp/stage1 echo echo '# Stage C: tcc0 -> tcc1 (link with raw .o files; no archive yet)' - echo "/work/out/tcc0 -nostdlib -I /work/in/tcc-include -include /work/in/va_list_shim.h /tmp/stage1/start.o /tmp/stage1/sys_stubs.o /tmp/stage1/mem.o /tmp/stage1/libc.o /tmp/stage1/$LIB_HELPER_OBJ /work/in/tcc.flat.c -o /work/out/tcc1" + echo "/work/out/tcc0 -nostdlib -I /work/in/tcc-include -include /work/in/tcc-include/stdarg.h /tmp/stage1/start.o /tmp/stage1/sys_stubs.o /tmp/stage1/mem.o /tmp/stage1/libc.o /tmp/stage1/$LIB_HELPER_OBJ /work/in/tcc.flat.c -o /work/out/tcc1" echo 'chmod +x /work/out/tcc1' echo echo '# Stage D: tcc1 rebuilds helpers + archive, links tcc2' diff --git a/scripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.after b/scripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.after @@ -0,0 +1,22 @@ +typedef va_list __gnuc_va_list; +#define _VA_LIST_DEFINED + +/* gcc/clang spelling bridge for the va_list family. cc.scm only + * recognizes __builtin_va_list and __builtin_va_start/arg/end/copy as + * builtins, so the host-preprocessed flat.c routes va_* through those + * names; on amd64/aarch64 stock tcc 0.9.26 has no such frontend + * keywords, so the same flat.c won't compile back through tcc without + * a bridge. Map __builtin_* onto the va_* macros above, which on + * those arches expand to tcc's __va_start / __va_arg intrinsics. The + * __riscv branch above already names everything by the gcc spelling, + * so gate this block on !__riscv to avoid an infinite-loop expansion + * (__builtin_va_arg -> va_arg -> __builtin_va_arg). */ +#ifndef __riscv +typedef va_list __builtin_va_list; +#define __builtin_va_start(ap, last) va_start(ap, last) +#define __builtin_va_end(ap) va_end(ap) +#define __builtin_va_arg(ap, type) va_arg(ap, type) +#define __builtin_va_copy(dst, src) va_copy(dst, src) +#endif + +#endif /* _STDARG_H */ diff --git a/scripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.before b/scripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.before @@ -0,0 +1,4 @@ +typedef va_list __gnuc_va_list; +#define _VA_LIST_DEFINED + +#endif /* _STDARG_H */ diff --git a/scripts/stage1-flatten.sh b/scripts/stage1-flatten.sh @@ -208,6 +208,16 @@ apply_our_patch arm64-tok-include-tcctok-h "$SRC/tcctok.h" # don't use '#' as an immediate prefix, so they're unaffected. apply_our_patch asm-hash-bol-only "$SRC/tccpp.c" +# Side effect of the patch above: alloca86_64-bt.S has two tab-prefixed +# tail comments (`mov %rax,%rsi # size, a second parm…`) that the +# x86_64 assembler now rejects with "end of line expected". They are +# inert documentation; strip them. The file is only compiled when +# building the amd64 libtcc1.a (LIBTCC1_ASM_SRCS in boot3.sh), so this +# rewrite is a no-op on aarch64/riscv64 builds. +awk '{ sub(/\t#.*$/, ""); print }' "$SRC/lib/alloca86_64-bt.S" \ + > "$SRC/lib/alloca86_64-bt.S.tmp" +mv "$SRC/lib/alloca86_64-bt.S.tmp" "$SRC/lib/alloca86_64-bt.S" + # riscv64 stdarg.h order fix — the upstream `#elif __riscv` branch # uses `__builtin_va_list` before it's typedef'd. Stock tcc treats # `__builtin_va_list` as a built-in keyword and forgives the forward @@ -218,6 +228,14 @@ apply_our_patch asm-hash-bol-only "$SRC/tccpp.c" # branch is absent (other tcc trees). apply_our_patch riscv-stdarg-fix "$SRC/include/stdarg.h" +# gcc/clang __builtin_va_* spelling bridge — append aliases at the end +# of tcc's <stdarg.h> so the same flat.c (which uses the gcc spelling +# because that's what cc.scm recognizes) also compiles back through +# tcc on amd64/aarch64. Gated `#ifndef __riscv` inside .after — the +# __riscv branch already maps these names natively. See the .after +# block for the full rationale. +apply_our_patch stdarg-builtin-aliases "$SRC/include/stdarg.h" + # Empty config.h shims — pass1.kaem creates these via `catm <out>` (line 27-28). : > "$SRC/config.h" mkdir -p "$WORK/mes-overlay/mes" diff --git a/tcc-libc/va_list_shim.h b/tcc-libc/va_list_shim.h @@ -1,32 +0,0 @@ -/* tcc-libc va_list shim — pre-included when tcc-boot2 compiles - * libc.flat.c (or any other host-preprocessed TU under our boot2 - * stdarg.h shim). The flatten step routes `va_list` through - * `__builtin_va_list`, but on aarch64 / amd64 stock tcc's frontend - * does not recognize that token as a type — tcc's <stdarg.h> defines - * `va_list` as `__va_list_struct[1]`. Make `__builtin_va_list` an - * alias for the same array type so libc.flat.c's - * - * typedef __builtin_va_list va_list; - * - * collapses to a (legal) duplicate typedef of the existing - * tcc-stdlib `va_list`. The `__builtin_va_*` macros in the flatten - * are direct tcc intrinsics; the aliases just reach them by the - * gcc-conformant builtin spelling. - * - * On riscv64 tcc's <stdarg.h> already names everything by the - * gcc spelling — `va_list` is a typedef to `__builtin_va_list`, - * `__builtin_va_arg` / `__builtin_va_end` / `__builtin_va_copy` are - * macros, and `__builtin_va_start` is a frontend intrinsic — so the - * shim has nothing to add. The aliases below would in fact redefine - * the macros stdarg.h already declared, triggering an infinite-loop - * expansion of `__builtin_va_arg(ap, type)` through `va_arg(ap, - * type)` and back. Gate the aliases on !__riscv. */ -#include <stdarg.h> - -#ifndef __riscv -typedef va_list __builtin_va_list; -#define __builtin_va_start(ap, last) va_start(ap, last) -#define __builtin_va_end(ap) va_end(ap) -#define __builtin_va_arg(ap, type) va_arg(ap, type) -#define __builtin_va_copy(dst, src) va_copy(dst, src) -#endif