boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

stage2-alpine.sh (12428B)


      1 #!/bin/sh
      2 ## scripts/stage2-alpine.sh — build tcc-boot0-mes in an alpine container.
      3 ##
      4 ## This is the stand-in slot for our scheme1-hosted C compiler: a real
      5 ## native gcc plays the role our scheme CC will eventually fill. Its
      6 ## job is to take stage 1's tcc.flat.c, build a working tcc, then use
      7 ## that tcc to compile mes libc and link a final static tcc-0.9.26
      8 ## binary against it. See docs/TCC.md.
      9 ##
     10 ## We picked alpine (musl) over debian (glibc) because mes headers
     11 ## declare errno as a plain global, while glibc declares it TLS — a
     12 ## non-TLS / TLS clash at link time. musl exposes errno only via
     13 ## __errno_location(), so a one-line `int errno;` shim is the sole
     14 ## definition and links cleanly.
     15 ##
     16 ## Pre-condition:
     17 ##   build/amd64/vendor/tcc/tcc.flat.c  (run scripts/stage1-flatten.sh)
     18 ##
     19 ## Inside alpine:latest (linux/amd64):
     20 ##   1. apk add gcc musl-dev
     21 ##   2. gcc -static tcc.flat.c errno-shim.c -> tcc-host
     22 ##      (validates the flatten output as well-formed C; tcc-host is a
     23 ##       working musl-linked tcc-0.9.26 binary)
     24 ##   3. unpack mes-0.27.1, set up include tree with arch symlink
     25 ##      (Issue §1 workaround — tcc 0.9.26 SEGVs on missing include)
     26 ##   4. tcc-host compiles each mes libc .c file individually
     27 ##      (Issue §2 workaround — concatenated TU SEGVs around 22+ files)
     28 ##   5. tcc-host -ar -> /lib/libc.a + /lib/tcc/libtcc1.a, build crt1.o
     29 ##   6. tcc-host -static compiles patched real tcc.c against mes libc
     30 ##      directly into tcc-boot0-mes — mirrors live-bootstrap's tcc-boot0
     31 ##      invocation. We skip the tcc-self.o intermediate that an older
     32 ##      iteration used: it was a redundant round-trip and exposed a tcc
     33 ##      0.9.26 bug in the static-link codepath.
     34 ##   7. (best-effort) tcc-boot0-mes -version
     35 ##      Expected to segfault under QEMU x86_64 emulation on macOS arm64
     36 ##      (Issue §3); native x86_64 needed to verify cleanly.
     37 ##
     38 ## Output: build/amd64/vendor/tcc/tcc-boot0-mes (static, mes-libc-linked).
     39 ## This artifact is what stage 3 (busybox) consumes to drive the
     40 ## tcc-boot1 / tcc-boot2 chain.
     41 ##
     42 ## Usage:
     43 ##   scripts/stage2-alpine.sh [--arch X86_64]
     44 
     45 set -eu
     46 
     47 ARCH=X86_64
     48 while [ $# -gt 0 ]; do
     49     case "$1" in
     50         --arch)    ARCH=$2; shift 2 ;;
     51         -h|--help) sed -n 's/^## \{0,1\}//p' "$0"; exit 0 ;;
     52         *) echo "unknown arg: $1" >&2; exit 2 ;;
     53     esac
     54 done
     55 
     56 if [ "$ARCH" != "X86_64" ]; then
     57     echo "stage2 currently only supports X86_64 (live-bootstrap reference path)" >&2
     58     exit 2
     59 fi
     60 MES_ARCH=x86_64
     61 BOOT_ARCH=amd64
     62 
     63 ROOT=$(cd "$(dirname "$0")/.." && pwd)
     64 WORK=$ROOT/build/$BOOT_ARCH/vendor/tcc
     65 # This is the legacy gcc-driven path the cc.scm tcc-boot2 chain
     66 # replaces (see docs/TCC.md). Not on the main `make tcc-boot2`
     67 # build path; kept around as a verification fallback. Requires the
     68 # upstream mes tarball, which lives outside the repo — set
     69 # LIVE_BOOTSTRAP=<path> or MES_TAR=<path-to-mes-0.27.1.tar.gz>
     70 # explicitly. See scripts/diag-livebootstrap-qemu.sh for the same
     71 # pattern.
     72 : "${MES_TAR:=}"
     73 if [ -z "$MES_TAR" ]; then
     74     : "${LIVE_BOOTSTRAP:?set LIVE_BOOTSTRAP=<path-to-live-bootstrap-checkout> or MES_TAR=<mes-0.27.1.tar.gz>}"
     75     MES_TAR=$LIVE_BOOTSTRAP/../lb-work/distfiles/mes-0.27.1.tar.gz
     76 fi
     77 MES_PKG=mes-0.27.1
     78 FLAT=$WORK/tcc.flat.c
     79 
     80 [ -r "$FLAT"    ] || { echo "missing $FLAT — run scripts/stage1-flatten.sh first" >&2; exit 1; }
     81 [ -r "$MES_TAR" ] || { echo "missing $MES_TAR" >&2; exit 1; }
     82 command -v podman >/dev/null 2>&1 || { echo "podman required" >&2; exit 2; }
     83 
     84 # Unpack mes outside the container so it lands on the bind mount.
     85 if [ ! -d "$WORK/$MES_PKG" ]; then
     86     tar -xzf "$MES_TAR" -C "$WORK"
     87 fi
     88 mkdir -p "$WORK/$MES_PKG/include/mes"
     89 : > "$WORK/$MES_PKG/include/mes/config.h"
     90 
     91 # errno shim: tcc.flat.c references errno as a plain global (mes-libc
     92 # convention). musl provides errno only via __errno_location(), so this
     93 # one-line int errno; is the sole storage. Without it the link fails.
     94 printf 'int errno;\n' > "$WORK/errno-shim.c"
     95 
     96 echo "=== stage 2: tcc-boot0-mes via alpine:latest ==="
     97 echo "(slow on macOS arm64 — runs under QEMU linux/amd64)"
     98 
     99 TCC_PKG=tcc-0.9.26-1147-gee75a10c
    100 
    101 podman run --rm -i --platform linux/amd64 \
    102     -v "$ROOT":/work -w /work alpine:latest sh -s "$ARCH" "$MES_ARCH" "$MES_PKG" "$TCC_PKG" "$BOOT_ARCH" <<'CONTAINER_SCRIPT'
    103 set -eu
    104 ARCH=$1
    105 MES_ARCH=$2
    106 MES_PKG=$3
    107 TCC_PKG=$4
    108 BOOT_ARCH=$5
    109 WORK=/work/build/$BOOT_ARCH/vendor/tcc
    110 
    111 # --- (1) install gcc + musl-dev (provides libc.a for -static) --------
    112 apk add --no-cache gcc musl-dev >/dev/null
    113 echo "host gcc: $(gcc --version | head -1)"
    114 
    115 # --- (2) gcc tcc.flat.c -> tcc-host ----------------------------------
    116 echo "--- gcc -static -> tcc-host ---"
    117 gcc -w -static -no-pie -o "$WORK/tcc-host" \
    118     "$WORK/tcc.flat.c" "$WORK/errno-shim.c"
    119 "$WORK/tcc-host" -version
    120 
    121 # --- (3) sanitized include tree --------------------------------------
    122 MES_SRC=$WORK/$MES_PKG
    123 INC=/tmp/mes-inc
    124 rm -rf $INC
    125 mkdir -p $INC
    126 cp -r $MES_SRC/include/. $INC/
    127 ln -sfn linux/$MES_ARCH $INC/arch
    128 
    129 # --- (4) compile mes libc per-file -----------------------------------
    130 echo "--- tcc-host compiling mes libc (per-file to dodge Issue §2) ---"
    131 mkdir -p /tmp/objs
    132 cd $MES_SRC/lib
    133 
    134 ALL_FILES="ctype/isalnum.c ctype/isalpha.c ctype/isascii.c ctype/iscntrl.c \
    135 ctype/isdigit.c ctype/isgraph.c ctype/islower.c ctype/isnumber.c \
    136 ctype/isprint.c ctype/ispunct.c ctype/isspace.c ctype/isupper.c \
    137 ctype/isxdigit.c ctype/tolower.c ctype/toupper.c \
    138 dirent/closedir.c dirent/__getdirentries.c dirent/opendir.c \
    139 linux/readdir.c linux/access.c linux/brk.c linux/chdir.c linux/chmod.c \
    140 linux/clock_gettime.c linux/close.c linux/dup2.c linux/dup.c linux/execve.c \
    141 linux/fcntl.c linux/fork.c linux/fsync.c linux/fstat.c linux/_getcwd.c \
    142 linux/getdents.c linux/getegid.c linux/geteuid.c linux/getgid.c linux/getpid.c \
    143 linux/getppid.c linux/getrusage.c linux/gettimeofday.c linux/getuid.c \
    144 linux/ioctl.c linux/ioctl3.c linux/kill.c linux/link.c linux/lseek.c \
    145 linux/lstat.c linux/malloc.c linux/mkdir.c linux/mknod.c linux/nanosleep.c \
    146 linux/_open3.c linux/pipe.c linux/_read.c linux/readlink.c linux/rename.c \
    147 linux/rmdir.c linux/setgid.c linux/settimer.c linux/setuid.c linux/signal.c \
    148 linux/sigprogmask.c linux/symlink.c linux/stat.c linux/time.c linux/unlink.c \
    149 linux/waitpid.c linux/wait4.c \
    150 linux/${MES_ARCH}-mes-gcc/_exit.c linux/${MES_ARCH}-mes-gcc/syscall.c \
    151 linux/${MES_ARCH}-mes-gcc/_write.c \
    152 math/ceil.c math/fabs.c math/floor.c \
    153 mes/abtod.c mes/abtol.c mes/__assert_fail.c mes/assert_msg.c \
    154 mes/__buffered_read.c mes/__init_io.c mes/cast.c mes/dtoab.c \
    155 mes/eputc.c mes/eputs.c mes/fdgetc.c mes/fdgets.c mes/fdputc.c mes/fdputs.c \
    156 mes/fdungetc.c mes/globals.c mes/itoa.c mes/ltoab.c mes/ltoa.c \
    157 mes/__mes_debug.c mes/mes_open.c mes/ntoab.c mes/oputc.c mes/oputs.c \
    158 mes/search-path.c mes/ultoa.c mes/utoa.c \
    159 posix/alarm.c posix/buffered-read.c posix/execl.c posix/execlp.c \
    160 posix/execv.c posix/execvp.c posix/getcwd.c posix/getenv.c posix/isatty.c \
    161 posix/mktemp.c posix/open.c posix/pathconf.c posix/raise.c posix/sbrk.c \
    162 posix/setenv.c posix/sleep.c posix/unsetenv.c posix/wait.c posix/write.c \
    163 stdio/clearerr.c stdio/fclose.c stdio/fdopen.c stdio/feof.c stdio/ferror.c \
    164 stdio/fflush.c stdio/fgetc.c stdio/fgets.c stdio/fileno.c stdio/fopen.c \
    165 stdio/fprintf.c stdio/fputc.c stdio/fputs.c stdio/fread.c stdio/freopen.c \
    166 stdio/fscanf.c stdio/fseek.c stdio/ftell.c stdio/fwrite.c stdio/getc.c \
    167 stdio/getchar.c stdio/perror.c stdio/printf.c stdio/putc.c stdio/putchar.c \
    168 stdio/remove.c stdio/snprintf.c stdio/sprintf.c stdio/sscanf.c stdio/ungetc.c \
    169 stdio/vfprintf.c stdio/vfscanf.c stdio/vprintf.c stdio/vsnprintf.c \
    170 stdio/vsprintf.c stdio/vsscanf.c \
    171 stdlib/abort.c stdlib/abs.c stdlib/alloca.c stdlib/atexit.c stdlib/atof.c \
    172 stdlib/atoi.c stdlib/atol.c stdlib/calloc.c stdlib/__exit.c stdlib/exit.c \
    173 stdlib/free.c stdlib/mbstowcs.c stdlib/puts.c stdlib/qsort.c stdlib/realloc.c \
    174 stdlib/strtod.c stdlib/strtof.c stdlib/strtol.c stdlib/strtold.c \
    175 stdlib/strtoll.c stdlib/strtoul.c stdlib/strtoull.c \
    176 string/bcmp.c string/bcopy.c string/bzero.c string/index.c string/memchr.c \
    177 string/memcmp.c string/memcpy.c string/memmem.c string/memmove.c string/memset.c \
    178 string/rindex.c string/strcat.c string/strchr.c string/strcmp.c string/strcpy.c \
    179 string/strcspn.c string/strdup.c string/strerror.c string/strlen.c \
    180 string/strlwr.c string/strncat.c string/strncmp.c string/strncpy.c \
    181 string/strpbrk.c string/strrchr.c string/strspn.c string/strstr.c string/strupr.c \
    182 stub/atan2.c stub/bsearch.c stub/chown.c stub/__cleanup.c stub/cos.c \
    183 stub/ctime.c stub/exp.c stub/fpurge.c stub/freadahead.c stub/frexp.c \
    184 stub/getgrgid.c stub/getgrnam.c stub/getlogin.c stub/getpgid.c stub/getpgrp.c \
    185 stub/getpwnam.c stub/getpwuid.c stub/gmtime.c stub/ldexp.c stub/localtime.c \
    186 stub/log.c stub/mktime.c stub/modf.c stub/mprotect.c stub/pclose.c \
    187 stub/popen.c stub/pow.c stub/putenv.c stub/rand.c stub/realpath.c stub/rewind.c \
    188 stub/setbuf.c stub/setgrent.c stub/setlocale.c stub/setvbuf.c stub/sigaction.c \
    189 stub/sigaddset.c stub/sigblock.c stub/sigdelset.c stub/sigemptyset.c \
    190 stub/sigsetmask.c stub/sin.c stub/sys_siglist.c stub/system.c stub/sqrt.c \
    191 stub/strftime.c stub/times.c stub/ttyname.c stub/umask.c stub/utime.c \
    192 ${MES_ARCH}-mes-gcc/setjmp.c"
    193 
    194 OBJS=
    195 n_compiled=0
    196 n_failed=0
    197 for f in $ALL_FILES; do
    198     name=$(echo "$f" | tr / _)
    199     o=/tmp/objs/${name%.c}.o
    200     if "$WORK/tcc-host" -c -D HAVE_CONFIG_H=1 -I "$INC" -I "$INC/linux/$MES_ARCH" \
    201             -o "$o" "$f" 2>/dev/null; then
    202         OBJS="$OBJS $o"
    203         n_compiled=$((n_compiled+1))
    204     else
    205         echo "compile failed: $f" >&2
    206         n_failed=$((n_failed+1))
    207     fi
    208 done
    209 echo "compiled $n_compiled libc .o files (failed: $n_failed)"
    210 [ "$n_failed" -eq 0 ] || { echo "abort: some libc files failed" >&2; exit 1; }
    211 
    212 # --- (5) ar -> libc.a, crt1.o, libtcc1.a -----------------------------
    213 mkdir -p /lib/tcc /include/mes
    214 "$WORK/tcc-host" -ar cr /lib/libc.a $OBJS
    215 
    216 "$WORK/tcc-host" -c -D HAVE_CONFIG_H=1 -I "$INC" -I "$INC/linux/$MES_ARCH" \
    217     -o /lib/crt1.o "linux/$MES_ARCH-mes-gcc/crt1.c"
    218 : > /lib/crtn.o
    219 : > /lib/crti.o
    220 
    221 "$WORK/tcc-host" -c -D HAVE_CONFIG_H=1 -D HAVE_LONG_LONG=1 -D HAVE_FLOAT=1 \
    222     -I "$INC" -I "$INC/linux/$MES_ARCH" \
    223     -o /tmp/libtcc1.o libtcc1.c
    224 "$WORK/tcc-host" -ar cr /lib/tcc/libtcc1.a /tmp/libtcc1.o
    225 
    226 cp -r "$INC/." /include/mes/
    227 ls -la /lib/crt1.o /lib/libc.a /lib/tcc/libtcc1.a
    228 
    229 # --- (6) tcc-host -static compile real tcc.c -> tcc-boot0-mes --------
    230 # Mirrors live-bootstrap pass1.kaem's tcc-boot0 invocation: same flags,
    231 # same source tree, just driven by tcc-host instead of tcc-mes. Direct
    232 # compile+link in one shot — no intermediate .o.
    233 echo "--- tcc-host -static compile+link real tcc.c -> tcc-boot0-mes ---"
    234 cd "$WORK/$TCC_PKG"
    235 "$WORK/tcc-host" \
    236     -g \
    237     -static \
    238     -o "$WORK/tcc-boot0-mes" \
    239     -D BOOTSTRAP=1 \
    240     -D HAVE_FLOAT=1 \
    241     -D HAVE_BITFIELD=1 \
    242     -D HAVE_LONG_LONG=1 \
    243     -D HAVE_SETJMP=1 \
    244     -I . \
    245     -I /include/mes \
    246     -D TCC_TARGET_${ARCH}=1 \
    247     -D CONFIG_TCCDIR=\"/lib/tcc\" \
    248     -D CONFIG_TCC_CRTPREFIX=\"/lib\" \
    249     -D CONFIG_TCC_ELFINTERP=\"/mes/loader\" \
    250     -D CONFIG_TCC_LIBPATHS=\"/lib:/lib/tcc\" \
    251     -D CONFIG_TCC_SYSINCLUDEPATHS=\"/include/mes\" \
    252     -D TCC_LIBGCC=\"/lib/libc.a\" \
    253     -D TCC_LIBTCC1=\"libtcc1.a\" \
    254     -D CONFIG_TCCBOOT=1 \
    255     -D CONFIG_TCC_STATIC=1 \
    256     -D CONFIG_USE_LIBGCC=1 \
    257     -D TCC_VERSION=\"0.9.26\" \
    258     -D ONE_SOURCE=1 \
    259     -L . \
    260     -L /lib \
    261     tcc.c
    262 ls -la "$WORK/tcc-boot0-mes"
    263 
    264 # Stage out mes libc + libtcc1 + crt1.o + headers so stage 3 can mount
    265 # them in next to tcc-boot0-mes without re-running stage 2.
    266 STAGE3=$WORK/stage3-input
    267 rm -rf "$STAGE3"
    268 mkdir -p "$STAGE3/lib/tcc" "$STAGE3/include"
    269 cp /lib/libc.a       "$STAGE3/lib/"
    270 cp /lib/crt1.o       "$STAGE3/lib/"
    271 cp /lib/crtn.o       "$STAGE3/lib/"
    272 cp /lib/crti.o       "$STAGE3/lib/"
    273 cp /lib/tcc/libtcc1.a "$STAGE3/lib/tcc/"
    274 cp -r "$INC/."       "$STAGE3/include/mes/"
    275 echo "staged mes libc bits into $STAGE3 for stage 3"
    276 
    277 # --- (7) best-effort version probe -----------------------------------
    278 echo
    279 echo "--- tcc-boot0-mes -version (Issue §3: expected SEGV under QEMU) ---"
    280 rc=0; "$WORK/tcc-boot0-mes" -version 2>&1 || rc=$?
    281 echo "exit=$rc"
    282 CONTAINER_SCRIPT
    283 
    284 echo
    285 echo "=== stage 2 artifacts ==="
    286 ls -la "$WORK/tcc-host" "$WORK/tcc-boot0-mes" 2>/dev/null || \
    287     echo "(some artifacts missing — see container output above)"