boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit cade790ded0af7fc59a2318afa8f63f3dd226be3
parent 3c5314adf20422fa3616120e78e0d1d23ce95d46
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 29 Apr 2026 20:21:43 -0700

va_list: route through __builtin_va_list across cc.scm + mes-libc

Previously the flattened source landed `typedef char *va_list;` and
inline `(ap) = (char*)(&fmt) + sizeof(void*)` / `(ap) = 0` macros —
the tcc/mes stage1 convention, which assumes x86-style stack-passed
varargs. cc.scm tolerated this because P1 spills all args to slots,
but stock gcc/clang reject it under their struct-shaped va_list ABI.

Three coordinated changes let a single source compile cleanly under
both:

- cc.scm parse-decl-spec recognizes __builtin_va_list as a builtin
  type alias for char* (cg-va-{start,arg,end} only need an 8-byte
  pointer slot).
- New vendor/boot2-include/stdarg.h shadows mes/include/stdarg.h
  with `typedef __builtin_va_list va_list;` and macros routed
  through __builtin_va_*.
- libc-flatten.sh adds -I vendor/boot2-include ahead of mes's tree
  so the shim wins for both the libc-flatten and the tcc-flatten
  paths (stage1-flatten gets the same -I in a separate commit).
  The previous in-source patch (vendor/mes-libc/patches/stdarg-builtin)
  is now redundant — deleted, along with the original stdarg.h.

Validated by stock gcc + libgcc on aarch64 musl: tcc-gcc -version
runs to "tcc version 0.9.26 (x86_64 Linux)" exit 0 with no source
patches, and cc.scm-built tcc-boot2 still reaches the same crash
site (no behavioural change in the cc.scm path).

Diffstat:
Mcc/cc.scm | 13++++++++++++-
Mscripts/libc-flatten.sh | 30+++++++++++-------------------
Avendor/boot2-include/stdarg.h | 39+++++++++++++++++++++++++++++++++++++++
Dvendor/mes-libc/include/stdarg.h | 94-------------------------------------------------------------------------------
Dvendor/mes-libc/patches/stdarg-builtin.after | 12------------
Dvendor/mes-libc/patches/stdarg-builtin.before | 7-------
6 files changed, 62 insertions(+), 133 deletions(-)

diff --git a/cc/cc.scm b/cc/cc.scm @@ -2821,7 +2821,7 @@ ;; and same-size casts round-trip the bytes; widening int→fp casts ;; leave the int bit-pattern in the wider slot; binops use integer ;; ALU ops. tcc.flat.c contains real fp code paths (parse_number, -;; ieee_finite, …) that the bootstrap tcc-lispcc never executes when +;; ieee_finite, …) that the bootstrap tcc-boot2 never executes when ;; compiling float-free programs, so producing valid-but-semantically- ;; wrong P1pp here is sufficient. Kept as a named no-op so the call ;; sites stay grep-able if a future bootstrap target needs real FP. @@ -4359,6 +4359,17 @@ (loop sto sn lg (parse-aggregate-spec ps 'union) #t)) ((at-kw? ps 'enum) (loop sto sn lg (parse-enum-spec ps) #t)) + ;; __builtin_va_list — gcc/clang builtin type. We don't model + ;; it as a struct; for our P1 ABI a va_list is just a char* + ;; into the stack save area (cg-va-start/arg/end work over an + ;; 8-byte slot). Letting __builtin_va_list mean `char *` here + ;; lets a single header source — `typedef __builtin_va_list + ;; va_list;` — compile cleanly under both cc.scm and stock + ;; gcc/clang (where it's their native struct). + ((and (not b) (eq? (tok-kind t) 'IDENT) + (bv= (tok-value t) "__builtin_va_list")) + (advance ps) + (loop sto sn lg (%ctype 'ptr 8 8 %t-i8) #t)) ((and (not b) (eq? (tok-kind t) 'IDENT) (let ((sm (scope-lookup ps (tok-value t)))) (and sm (eq? (sym-kind sm) 'typedef)))) diff --git a/scripts/libc-flatten.sh b/scripts/libc-flatten.sh @@ -1,5 +1,5 @@ #!/bin/sh -## libc-flatten.sh — flatten the vendored mes-libc + lispcc-syscall.c +## libc-flatten.sh — flatten the vendored mes-libc + boot2-syscall.c ## into a single libc.flat.c using the host preprocessor. Mirrors ## stage1-flatten.sh; runs on the host, no container — hence the ## non-`boot-` name (the convention in scripts/ is that boot-*.sh @@ -14,7 +14,7 @@ ## Stage 4 (cc.scm libc.flat.c → libc.P1pp) is a separate Makefile rule ## that reuses scripts/boot-build-cc.sh inside the per-arch container. ## -## ARCH selects the lispcc target (aarch64/amd64/riscv64). MES_ARCH is +## ARCH selects the boot2 target (aarch64/amd64/riscv64). MES_ARCH is ## the mes header tree we hand the host preprocessor; mes ships ## x86_64/riscv64 only, so aarch64 builds borrow riscv64's headers (the ## resulting libc.flat.c references no SYS_* / kernel-stat fields, so @@ -121,28 +121,20 @@ apply_simple_patch \ "$STAGE/mes/ntoab.c" \ "$PATCHES/ntoab-inline-defined.before" \ "$PATCHES/ntoab-inline-defined.after" -# Route mes's va_start/va_arg/va_end macros through cc.scm's -# __builtin_va_* (parser-recognized in cc/cc.scm parse-builtin-va-*). -# The original macros lower as raw C expressions; cc.scm's general -# expression path mishandles them subtly (cc-libc/05 prints the format -# char instead of the spilled int). The builtin path is the same one -# tests/cc/131-vararg-mixed exercises end-to-end for printf-shaped -# two-frame va_list forwarding. -apply_simple_patch \ - "$STAGE/include/stdarg.h" \ - "$PATCHES/stdarg-builtin.before" \ - "$PATCHES/stdarg-builtin.after" - # --- (3) flatten via host preprocessor -------------------------------- HOST_CC=${HOST_CC:-cc} -# -I order matters: include first so <signal.h>, <stdio.h> etc. hit -# the canonical mes/include versions; arch/<…> resolves through the -# include/arch symlink to include/linux/$MES_ARCH. Putting the per-arch -# directory ahead of include/ makes <signal.h> resolve to the partial -# arch-specific snippet (no stack_t typedef etc) and the build breaks. +# -I order matters: vendor/boot2-include first so our stdarg.h shim +# (routes va_* through __builtin_va_*; see comment in that file) wins +# over mes's. Then $STAGE/include for everything else — <signal.h>, +# <stdio.h>, etc. hit the canonical mes/include versions; arch/<…> +# resolves through the include/arch symlink to include/linux/$MES_ARCH. +# Putting the per-arch directory ahead of include/ makes <signal.h> +# resolve to the partial arch-specific snippet (no stack_t typedef etc) +# and the build breaks. "$HOST_CC" -E -P \ -nostdinc \ + -I "$ROOT/vendor/boot2-include" \ -I "$STAGE/include" \ -I "$STAGE" \ -D HAVE_CONFIG_H=0 \ diff --git a/vendor/boot2-include/stdarg.h b/vendor/boot2-include/stdarg.h @@ -0,0 +1,39 @@ +/* boot2 stdarg.h — shadows mes/include/stdarg.h for both flatten + * paths (scripts/{stage1,libc}-flatten.sh both have -I on the + * containing dir ahead of mes's include tree). Routes va_* through + * __builtin_va_*, so tcc.flat.c and libc.flat.c compile cleanly + * under both our cc.scm (which recognizes __builtin_va_list and + * __builtin_va_start/arg/end) and stock gcc/clang (where they're + * native). + * + * Mes's stdarg.h has a similar __builtin-routed branch but only + * activates under __riscv. We can't set -D __riscv at flatten time + * without also flipping setjmp.h and tcc-internal arch logic, so we + * shadow the whole header instead. + */ +#ifndef __MES_STDARG_H +#define __MES_STDARG_H 1 + +typedef __builtin_va_list va_list; + +#define va_start(v, l) __builtin_va_start((v), (l)) +#define va_end(v) __builtin_va_end((v)) +#define va_arg(v, t) __builtin_va_arg((v), t) +#define va_arg8(ap, type) va_arg((ap), type) +#define va_copy(d, s) __builtin_va_copy((d), (s)) + +/* mes/include/stdarg.h forward-declares the v* family here (instead + * of in <stdio.h>); tcc.c calls vsnprintf without ever including + * <stdio.h>, so dropping mes's stdarg.h in favor of this shim must + * still leak these prototypes. FILE and size_t come from a prior + * include in mes-libc TUs; tcc.c works because it includes + * <sys/types.h> for size_t and uses (FILE*) implicitly. */ +int vexec (char const *file_name, va_list ap); +int vfprintf (FILE *stream, char const *template, va_list ap); +int vfscanf (FILE *stream, char const *template, va_list ap); +int vprintf (char const *format, va_list ap); +int vsprintf (char *str, char const *format, va_list ap); +int vsnprintf(char *str, size_t size, char const *format, va_list ap); +int vsscanf (char const *s, char const *template, va_list ap); + +#endif /* __MES_STDARG_H */ diff --git a/vendor/mes-libc/include/stdarg.h b/vendor/mes-libc/include/stdarg.h @@ -1,94 +0,0 @@ -/* -*-comment-start: "//";comment-end:""-*- - * GNU Mes --- Maxwell Equations of Software - * Copyright © 2017,2018,2019 Jan (janneke) Nieuwenhuizen <janneke@gnu.org> - * Copyright © 2021 W. J. van der Laan <laanwj@protonmail.com> - * Copyright © 2023 Andrius Štikonas <andrius@stikonas.eu> - * - * This file is part of GNU Mes. - * - * GNU Mes is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or (at - * your option) any later version. - * - * GNU Mes is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Mes. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __MES_STDARG_H -#define __MES_STDARG_H 1 - -#if SYSTEM_LIBC -#undef __MES_STDARG_H -#include_next <stdarg.h> - -#define va_arg8(ap, type) va_arg (ap, type) - -#elif (__GNUC__ || __TINYC__) && __riscv - -// GCC on RISC-V always passes arguments in registers. Implementing -// these macros without the use of built-ins would be very involved. -// TINYCC tries to be GCC compatible in this case. - -#if __TINYC__ -// TINYCC needs some definitions in RISC-V in order to be built -// without it's own code generation tool. -typedef char *__builtin_va_list; -#define __va_reg_size (__riscv_xlen >> 3) -#define _tcc_align(addr, type) \ - (((unsigned long)addr + __alignof__(type) - 1) \ - & -(__alignof__(type))) -#define __builtin_va_arg(ap, type) \ - (*(sizeof (type) > (2*__va_reg_size) \ - ? *(type **)((ap += __va_reg_size) - __va_reg_size) \ - : (ap = (va_list)(_tcc_align (ap, type) \ - + (sizeof (type) + __va_reg_size - 1) \ - & -__va_reg_size), \ - (type *)(ap - ((sizeof (type)+ __va_reg_size - 1) \ - & -__va_reg_size))))) - -#define __builtin_va_end(ap) (void)(ap) -#if !defined (__builtin_va_copy) -#define __builtin_va_copy(dest, src) (dest) = (src) -#endif -#endif // __TINYC__ - -typedef __builtin_va_list va_list; - -#define va_start(v, l) __builtin_va_start (v, l) -#define va_end(v) __builtin_va_end (v) -#define va_arg(v, l) __builtin_va_arg (v, l) -#define va_arg8(ap, type) va_arg (ap, type) -#define va_copy(d, s) __builtin_va_copy (d, s) - -#else // ! SYSTEM_LIBC && ! __riscv - -#include <sys/types.h> - -#if __GNUC__ && __x86_64__ -#define __FOO_VARARGS 1 -#endif - -typedef char *va_list; -#define va_start(ap, last) (void)((ap) = (char*)(&(last)) + sizeof (void*)) -#define va_arg(ap, type) (type)(((long*)((ap) = ((ap) + sizeof (void*))))[-1]) -#define va_align(ap, alignment) ((char*)((((unsigned long) (ap)) + (alignment) - 1) &~ ((alignment) - 1))) -#define va_arg8(ap, type) (type)(((double*)((ap) = (va_align((ap), 8) + sizeof(double))))[-1]) -#define va_end(ap) (void)((ap) = 0) -#define va_copy(dest, src) dest = src - -int vexec (char const *file_name, va_list ap); -int vfprintf (FILE * stream, char const *template, va_list ap); -int vfscanf (FILE * stream, char const *template, va_list ap); -int vprintf (char const *format, va_list ap); -int vsprintf (char *str, char const *format, va_list ap); -int vsnprintf (char *str, size_t size, char const *format, va_list ap); -int vsscanf (char const *s, char const *template, va_list ap); - -#endif // ! SYSTEM_LIBC - -#endif // __MES_STDARG_H diff --git a/vendor/mes-libc/patches/stdarg-builtin.after b/vendor/mes-libc/patches/stdarg-builtin.after @@ -1,12 +0,0 @@ -/* Routed through cc.scm's __builtin_va_* parser-recognized names so - * va_start/va_arg/va_end go through cc/cc.scm cg-va-{start,arg,end}. - * The original mes macros lower as raw C expressions; cc.scm's - * general expression path mishandles them (see cc-libc/05). The - * builtin path is exercised by tests/cc/131-vararg-mixed for the - * printf -> vprintf -> vfprintf two-frame forwarding shape. */ -typedef char *va_list; -#define va_start(ap, last) __builtin_va_start((ap), (last)) -#define va_arg(ap, type) __builtin_va_arg((ap), type) -#define va_arg8(ap, type) __builtin_va_arg((ap), type) -#define va_end(ap) __builtin_va_end((ap)) -#define va_copy(dest, src) (dest) = (src) diff --git a/vendor/mes-libc/patches/stdarg-builtin.before b/vendor/mes-libc/patches/stdarg-builtin.before @@ -1,7 +0,0 @@ -typedef char *va_list; -#define va_start(ap, last) (void)((ap) = (char*)(&(last)) + sizeof (void*)) -#define va_arg(ap, type) (type)(((long*)((ap) = ((ap) + sizeof (void*))))[-1]) -#define va_align(ap, alignment) ((char*)((((unsigned long) (ap)) + (alignment) - 1) &~ ((alignment) - 1))) -#define va_arg8(ap, type) (type)(((double*)((ap) = (va_align((ap), 8) + sizeof(double))))[-1]) -#define va_end(ap) (void)((ap) = 0) -#define va_copy(dest, src) dest = src