commit 8e04ae6420498beedf0939e5e12509290dd97157
parent 7dd647f943c4392b22761a9ec71c49466f885d6e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 4 May 2026 16:51:36 -0700
boot3: fold va_list_shim.h into tcc <stdarg.h> patch
Append the gcc/clang __builtin_va_* aliases to tcc-0.9.26's
include/stdarg.h via a new stage1-flatten patch instead of carrying
them in a separate force-included tcc-libc/va_list_shim.h. boot3.sh,
boot-build-tcc-tcc.sh, and the Makefile now -include the patched
stdarg.h directly.
Diffstat:
8 files changed, 59 insertions(+), 49 deletions(-)
diff --git a/Makefile b/Makefile
@@ -522,19 +522,20 @@ $(TCC_LIBC_MEM): tcc-cc/mem.c \
-nostdlib -I $(TCC_CC_TCC_INCLUDE) -c -o $@ $<
# libc.o: tcc-boot2 compiles the same flatten output cc.scm consumes.
-# `-include tcc-libc/va_list_shim.h` aliases gcc's __builtin_va_*
-# names onto tcc's native va_* macros (tcc has no notion of a
-# __builtin_va_list keyword); the shim is the only piece glueing
-# the host-preprocessed flatten to tcc-boot2's frontend.
+# Force-including tcc's <stdarg.h> brings in the gcc/clang spelling
+# bridge appended by the stdarg-builtin-aliases patch in
+# stage1-flatten.sh: it aliases gcc's __builtin_va_* names onto tcc's
+# native va_* macros (tcc has no __builtin_va_list keyword on
+# amd64/aarch64). Without that bridge the host-preprocessed flatten
+# won't compile back through tcc-boot2.
$(TCC_LIBC_LIBC): build/$(ARCH)/vendor/mes-libc/libc.flat.c \
- tcc-libc/va_list_shim.h \
build/$(ARCH)/tcc-boot2/tcc-boot2 \
build/$(ARCH)/.image
mkdir -p $(@D)
$(call PODMAN,$(ARCH)) \
build/$(ARCH)/tcc-boot2/tcc-boot2 \
-nostdlib -I $(TCC_CC_TCC_INCLUDE) \
- -include tcc-libc/va_list_shim.h \
+ -include $(TCC_CC_TCC_INCLUDE)/stdarg.h \
-c -o $@ $<
# --- tcc-tcc: second-stage tcc -------------------------------------------
@@ -546,7 +547,7 @@ $(TCC_LIBC_LIBC): build/$(ARCH)/vendor/mes-libc/libc.flat.c \
tcc-tcc: $(TCC_TCC_BIN)
$(TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \
- $(TCC_FLAT) tcc-libc/va_list_shim.h \
+ $(TCC_FLAT) \
build/$(ARCH)/tcc-boot2/tcc-boot2 \
$(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) \
$(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) \
@@ -564,7 +565,7 @@ $(TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \
tcc-tcc-tcc: $(TCC_TCC_TCC_BIN)
$(TCC_TCC_TCC_BIN): scripts/boot-build-tcc-tcc.sh \
- $(TCC_FLAT) tcc-libc/va_list_shim.h \
+ $(TCC_FLAT) \
$(TCC_TCC_BIN) \
$(TCC_LIBC_START) $(TCC_LIBC_SYS_STUBS) \
$(TCC_LIBC_MEM) $(TCC_LIBC_LIBC) \
diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md
@@ -268,8 +268,8 @@ path.
```sh
# In the riscv64 container with boot3 outputs present:
-$TCC0 -nostdlib -I $TCC_INC -include $SHIM -c -o /tmp/flat-tcc0.o tcc.flat.c
-$TCC1 -nostdlib -I $TCC_INC -include $SHIM -c -o /tmp/flat-tcc1.o tcc.flat.c
+$TCC0 -nostdlib -I $TCC_INC -include $TCC_INC/stdarg.h -c -o /tmp/flat-tcc0.o tcc.flat.c
+$TCC1 -nostdlib -I $TCC_INC -include $TCC_INC/stdarg.h -c -o /tmp/flat-tcc1.o tcc.flat.c
# wc -c /tmp/flat-tcc0.o /tmp/flat-tcc1.o → 616100 vs 615892
# objdump -d both, normalize addresses, diff to find divergent functions
```
diff --git a/scripts/boot-build-tcc-tcc.sh b/scripts/boot-build-tcc-tcc.sh
@@ -50,7 +50,6 @@ LIBC_O=build/$ARCH/tcc-libc/libc.o
MEM_O=build/$ARCH/tcc-libc/mem.o
SYS_O=build/$ARCH/tcc-libc/sys_stubs.o
START_O=build/$ARCH/tcc-libc/start.o
-SHIM=tcc-libc/va_list_shim.h
WORK=$(dirname "$OUT")
mkdir -p "$WORK"
@@ -83,6 +82,6 @@ fi
# Compile + link the next-stage tcc in one $CC invocation.
# shellcheck disable=SC2086 # $LIB_OBJS is intentionally word-split (may be empty).
-"$CC" -nostdlib -I "$TCC_INC" -include "$SHIM" \
+"$CC" -nostdlib -I "$TCC_INC" -include "$TCC_INC/stdarg.h" \
"$START_O" "$SYS_O" "$MEM_O" "$LIBC_O" $LIB_OBJS \
"$TCC_FLAT" -o "$OUT"
diff --git a/scripts/boot3.sh b/scripts/boot3.sh
@@ -43,7 +43,6 @@
## (Throughout this script: tcc0/tcc1/tcc2/tcc3 are the four stages
## above; tcc0 is the cc.scm-built bootstrap, tcc2/tcc3 form the
## self-host fixed-point check.)
-## tcc-libc/va_list_shim.h — gcc/tcc va_list bridge
## tcc-cc/mem.c — memcpy/memmove/memset/memcmp
## build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c/include/** (whole tree)
## vendor/mes-libc/include/** — mes-libc headers for hello
@@ -192,7 +191,6 @@ cp "vendor/seed/$ARCH/ELF.hex2" "$STAGE/in/ELF.hex2"
# tcc-libc / tcc-cc helpers
cp "tcc-libc/$ARCH/start.S" "$STAGE/in/start.S"
cp "tcc-libc/$ARCH/sys_stubs.S" "$STAGE/in/sys_stubs.S"
-cp tcc-libc/va_list_shim.h "$STAGE/in/va_list_shim.h"
cp tcc-cc/mem.c "$STAGE/in/mem.c"
# Per-arch libtcc1 helper sources. LIB_HELPER_SRC is always also in
@@ -241,7 +239,7 @@ emit_helpers () {
echo "$cc -nostdlib -c -o $workdir/start.o /work/in/start.S"
echo "$cc -nostdlib -c -o $workdir/sys_stubs.o /work/in/sys_stubs.S"
echo "$cc -nostdlib -I /work/in/tcc-include -c -o $workdir/mem.o /work/in/mem.c"
- echo "$cc -nostdlib -I /work/in/tcc-include -include /work/in/va_list_shim.h -c -o $workdir/libc.o /work/in/libc.flat.c"
+ echo "$cc -nostdlib -I /work/in/tcc-include -include /work/in/tcc-include/stdarg.h -c -o $workdir/libc.o /work/in/libc.flat.c"
echo "$cc -nostdlib -I /work/in/tcc-include $LIB_HELPER_DEFINES -c -o $workdir/$LIB_HELPER_OBJ /work/in/tcc-lib/$LIB_HELPER_SRC"
}
emit_archive () {
@@ -267,7 +265,7 @@ emit_link_tcc () {
cc=$1
workdir=$2
out=$3
- echo "$cc -nostdlib -I /work/in/tcc-include -include /work/in/va_list_shim.h $workdir/crt1.o /work/in/tcc.flat.c $workdir/libc.a $workdir/libtcc1.a $workdir/libc.a -o $out"
+ echo "$cc -nostdlib -I /work/in/tcc-include -include /work/in/tcc-include/stdarg.h $workdir/crt1.o /work/in/tcc.flat.c $workdir/libc.a $workdir/libtcc1.a $workdir/libc.a -o $out"
}
RUN_SCRIPT=$STAGE/in/run.sh
@@ -289,7 +287,7 @@ RUN_SCRIPT=$STAGE/in/run.sh
emit_helpers /work/out/tcc0 /tmp/stage1
echo
echo '# Stage C: tcc0 -> tcc1 (link with raw .o files; no archive yet)'
- echo "/work/out/tcc0 -nostdlib -I /work/in/tcc-include -include /work/in/va_list_shim.h /tmp/stage1/start.o /tmp/stage1/sys_stubs.o /tmp/stage1/mem.o /tmp/stage1/libc.o /tmp/stage1/$LIB_HELPER_OBJ /work/in/tcc.flat.c -o /work/out/tcc1"
+ echo "/work/out/tcc0 -nostdlib -I /work/in/tcc-include -include /work/in/tcc-include/stdarg.h /tmp/stage1/start.o /tmp/stage1/sys_stubs.o /tmp/stage1/mem.o /tmp/stage1/libc.o /tmp/stage1/$LIB_HELPER_OBJ /work/in/tcc.flat.c -o /work/out/tcc1"
echo 'chmod +x /work/out/tcc1'
echo
echo '# Stage D: tcc1 rebuilds helpers + archive, links tcc2'
diff --git a/scripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.after b/scripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.after
@@ -0,0 +1,22 @@
+typedef va_list __gnuc_va_list;
+#define _VA_LIST_DEFINED
+
+/* gcc/clang spelling bridge for the va_list family. cc.scm only
+ * recognizes __builtin_va_list and __builtin_va_start/arg/end/copy as
+ * builtins, so the host-preprocessed flat.c routes va_* through those
+ * names; on amd64/aarch64 stock tcc 0.9.26 has no such frontend
+ * keywords, so the same flat.c won't compile back through tcc without
+ * a bridge. Map __builtin_* onto the va_* macros above, which on
+ * those arches expand to tcc's __va_start / __va_arg intrinsics. The
+ * __riscv branch above already names everything by the gcc spelling,
+ * so gate this block on !__riscv to avoid an infinite-loop expansion
+ * (__builtin_va_arg -> va_arg -> __builtin_va_arg). */
+#ifndef __riscv
+typedef va_list __builtin_va_list;
+#define __builtin_va_start(ap, last) va_start(ap, last)
+#define __builtin_va_end(ap) va_end(ap)
+#define __builtin_va_arg(ap, type) va_arg(ap, type)
+#define __builtin_va_copy(dst, src) va_copy(dst, src)
+#endif
+
+#endif /* _STDARG_H */
diff --git a/scripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.before b/scripts/simple-patches/tcc-0.9.26/stdarg-builtin-aliases.before
@@ -0,0 +1,4 @@
+typedef va_list __gnuc_va_list;
+#define _VA_LIST_DEFINED
+
+#endif /* _STDARG_H */
diff --git a/scripts/stage1-flatten.sh b/scripts/stage1-flatten.sh
@@ -208,6 +208,16 @@ apply_our_patch arm64-tok-include-tcctok-h "$SRC/tcctok.h"
# don't use '#' as an immediate prefix, so they're unaffected.
apply_our_patch asm-hash-bol-only "$SRC/tccpp.c"
+# Side effect of the patch above: alloca86_64-bt.S has two tab-prefixed
+# tail comments (`mov %rax,%rsi # size, a second parm…`) that the
+# x86_64 assembler now rejects with "end of line expected". They are
+# inert documentation; strip them. The file is only compiled when
+# building the amd64 libtcc1.a (LIBTCC1_ASM_SRCS in boot3.sh), so this
+# rewrite is a no-op on aarch64/riscv64 builds.
+awk '{ sub(/\t#.*$/, ""); print }' "$SRC/lib/alloca86_64-bt.S" \
+ > "$SRC/lib/alloca86_64-bt.S.tmp"
+mv "$SRC/lib/alloca86_64-bt.S.tmp" "$SRC/lib/alloca86_64-bt.S"
+
# riscv64 stdarg.h order fix — the upstream `#elif __riscv` branch
# uses `__builtin_va_list` before it's typedef'd. Stock tcc treats
# `__builtin_va_list` as a built-in keyword and forgives the forward
@@ -218,6 +228,14 @@ apply_our_patch asm-hash-bol-only "$SRC/tccpp.c"
# branch is absent (other tcc trees).
apply_our_patch riscv-stdarg-fix "$SRC/include/stdarg.h"
+# gcc/clang __builtin_va_* spelling bridge — append aliases at the end
+# of tcc's <stdarg.h> so the same flat.c (which uses the gcc spelling
+# because that's what cc.scm recognizes) also compiles back through
+# tcc on amd64/aarch64. Gated `#ifndef __riscv` inside .after — the
+# __riscv branch already maps these names natively. See the .after
+# block for the full rationale.
+apply_our_patch stdarg-builtin-aliases "$SRC/include/stdarg.h"
+
# Empty config.h shims — pass1.kaem creates these via `catm <out>` (line 27-28).
: > "$SRC/config.h"
mkdir -p "$WORK/mes-overlay/mes"
diff --git a/tcc-libc/va_list_shim.h b/tcc-libc/va_list_shim.h
@@ -1,32 +0,0 @@
-/* tcc-libc va_list shim — pre-included when tcc-boot2 compiles
- * libc.flat.c (or any other host-preprocessed TU under our boot2
- * stdarg.h shim). The flatten step routes `va_list` through
- * `__builtin_va_list`, but on aarch64 / amd64 stock tcc's frontend
- * does not recognize that token as a type — tcc's <stdarg.h> defines
- * `va_list` as `__va_list_struct[1]`. Make `__builtin_va_list` an
- * alias for the same array type so libc.flat.c's
- *
- * typedef __builtin_va_list va_list;
- *
- * collapses to a (legal) duplicate typedef of the existing
- * tcc-stdlib `va_list`. The `__builtin_va_*` macros in the flatten
- * are direct tcc intrinsics; the aliases just reach them by the
- * gcc-conformant builtin spelling.
- *
- * On riscv64 tcc's <stdarg.h> already names everything by the
- * gcc spelling — `va_list` is a typedef to `__builtin_va_list`,
- * `__builtin_va_arg` / `__builtin_va_end` / `__builtin_va_copy` are
- * macros, and `__builtin_va_start` is a frontend intrinsic — so the
- * shim has nothing to add. The aliases below would in fact redefine
- * the macros stdarg.h already declared, triggering an infinite-loop
- * expansion of `__builtin_va_arg(ap, type)` through `va_arg(ap,
- * type)` and back. Gate the aliases on !__riscv. */
-#include <stdarg.h>
-
-#ifndef __riscv
-typedef va_list __builtin_va_list;
-#define __builtin_va_start(ap, last) va_start(ap, last)
-#define __builtin_va_end(ap) va_end(ap)
-#define __builtin_va_arg(ap, type) va_arg(ap, type)
-#define __builtin_va_copy(dst, src) va_copy(dst, src)
-#endif