boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

libc-flatten.sh (8135B)


      1 #!/bin/sh
      2 ## libc-flatten.sh — flatten the vendored mes-libc + boot2-syscall.c
      3 ## into a single libc.flat.c using the host preprocessor. Mirrors
      4 ## stage1-flatten.sh; runs on the host, no container — hence the
      5 ## non-`boot-` name (the convention in scripts/ is that boot-*.sh
      6 ## runs inside the minimal container).
      7 ##
      8 ## Steps:
      9 ##   1. stage vendor/mes-libc → build/<arch>/vendor/mes-libc/libc-stage/
     10 ##   2. apply simple-patches (literal-block replacement, idempotent)
     11 ##   3. HOST_CC -E -nostdinc -I staging/include … staging/unified-libc.c
     12 ##      → build/<arch>/vendor/mes-libc/libc.flat.c
     13 ##
     14 ## Stage 4 (cc.scm libc.flat.c → libc.P1pp) is a separate Makefile rule
     15 ## that reuses scripts/boot-build-cc.sh inside the per-arch container.
     16 ##
     17 ## ARCH selects the boot2 target (aarch64/amd64/riscv64). MES_ARCH is
     18 ## the mes header tree we hand the host preprocessor; mes ships
     19 ## x86_64/riscv64 only, so aarch64 builds borrow riscv64's headers (the
     20 ## resulting libc.flat.c references no SYS_* / kernel-stat fields, so
     21 ## the choice only affects type widths, all 64-bit Linux-identical).
     22 ##
     23 ## Usage: scripts/libc-flatten.sh [--arch <aarch64|amd64|riscv64>]
     24 
     25 set -eu
     26 
     27 ARCH=aarch64
     28 while [ $# -gt 0 ]; do
     29     case "$1" in
     30         --arch)    ARCH=$2; shift 2 ;;
     31         -h|--help) awk '/^##/ { sub(/^## ?/, ""); print }' "$0"; exit 0 ;;
     32         *) echo "unknown arg: $1" >&2; exit 2 ;;
     33     esac
     34 done
     35 
     36 case "$ARCH" in
     37     aarch64) MES_ARCH=riscv64 ;;
     38     amd64)   MES_ARCH=x86_64  ;;
     39     riscv64) MES_ARCH=riscv64 ;;
     40     *) echo "unknown ARCH: $ARCH" >&2; exit 2 ;;
     41 esac
     42 
     43 ROOT=$(cd "$(dirname "$0")/.." && pwd)
     44 VENDOR=$ROOT/vendor/mes-libc
     45 WORK=$ROOT/build/$ARCH/vendor/mes-libc
     46 STAGE=$WORK/libc-stage
     47 FLAT=$WORK/libc.flat.c
     48 
     49 [ -d "$VENDOR"          ] || { echo "missing $VENDOR"          >&2; exit 1; }
     50 [ -d "$VENDOR/include"  ] || { echo "missing $VENDOR/include"  >&2; exit 1; }
     51 [ -d "$VENDOR/include/linux/$MES_ARCH" ] \
     52     || { echo "missing $VENDOR/include/linux/$MES_ARCH" >&2; exit 1; }
     53 
     54 # --- (1) stage --------------------------------------------------------
     55 mkdir -p "$WORK"
     56 rm -rf "$STAGE"
     57 mkdir -p "$STAGE"
     58 # cp -R copies symlinks as files; staging is our writable scratch.
     59 cp -R "$VENDOR/." "$STAGE/"
     60 
     61 # mes's sys/stat.h, signal.h, dirent.h reach for <arch/kernel-stat.h>
     62 # and similar; the per-arch tree under include/linux/<MES_ARCH>/ is what
     63 # they want. Copy the per-arch tree into include/arch so the unprefixed
     64 # `arch/...` includes resolve. (cp -R, not ln -sfn — keeps the dep set
     65 # down to coreutils we already use.)
     66 cp -R "$STAGE/include/linux/$MES_ARCH" "$STAGE/include/arch"
     67 
     68 
     69 # --- (2) patches ------------------------------------------------------
     70 # Same literal-block replacer as stage1-flatten.sh apply_simple_patch.
     71 apply_simple_patch() {
     72     target=$1; before=$2; after=$3
     73     [ -r "$target" ] || { echo "patch target missing: $target" >&2; exit 1; }
     74     [ -r "$before" ] || { echo "patch before missing: $before" >&2; exit 1; }
     75     [ -r "$after"  ] || { echo "patch after missing: $after"   >&2; exit 1; }
     76     awk -v BFILE="$before" -v AFILE="$after" '
     77         BEGIN {
     78             while ((getline line < BFILE) > 0) bef = bef line "\n";
     79             close(BFILE);
     80             while ((getline line < AFILE) > 0) aft = aft line "\n";
     81             close(AFILE);
     82         }
     83         { src = src $0 "\n" }
     84         END {
     85             if (index(src, aft) > 0) {
     86                 printf "%s", src;
     87                 exit 0;
     88             }
     89             i = index(src, bef);
     90             if (i == 0) { print "patch did not match" > "/dev/stderr"; exit 1 }
     91             printf "%s%s%s",
     92                 substr(src, 1, i - 1),
     93                 aft,
     94                 substr(src, i + length(bef));
     95         }
     96     ' "$target" > "$target.new"
     97     mv "$target.new" "$target"
     98 }
     99 
    100 PATCHES=$STAGE/patches
    101 apply_simple_patch \
    102     "$STAGE/linux/malloc.c" \
    103     "$PATCHES/malloc-max-align.before" \
    104     "$PATCHES/malloc-max-align.after"
    105 apply_simple_patch \
    106     "$STAGE/linux/malloc.c" \
    107     "$PATCHES/malloc-brk-check.before" \
    108     "$PATCHES/malloc-brk-check.after"
    109 apply_simple_patch \
    110     "$STAGE/string/strstr.c" \
    111     "$PATCHES/strstr-drop-mman.before" \
    112     "$PATCHES/strstr-drop-mman.after"
    113 apply_simple_patch \
    114     "$STAGE/include/mes/lib-mini.h" \
    115     "$PATCHES/libmini-write-proto.before" \
    116     "$PATCHES/libmini-write-proto.after"
    117 apply_simple_patch \
    118     "$STAGE/include/mes/lib-mini.h" \
    119     "$PATCHES/libmini-write-proto2.before" \
    120     "$PATCHES/libmini-write-proto2.after"
    121 apply_simple_patch \
    122     "$STAGE/include/mes/lib.h" \
    123     "$PATCHES/lib-mes-debug-proto.before" \
    124     "$PATCHES/lib-mes-debug-proto.after"
    125 apply_simple_patch \
    126     "$STAGE/mes/ntoab.c" \
    127     "$PATCHES/ntoab-inline-defined.before" \
    128     "$PATCHES/ntoab-inline-defined.after"
    129 # stdio/{printf,sprintf,snprintf}.c carry a mes-mescc-specific
    130 # `ap += (__FOO_VARARGS + ...)` block guarded by `__GNUC__ && __x86_64__`.
    131 # That arithmetic is meaningful only inside mes's compiler; under stock
    132 # gcc preprocessing for amd64 it expands to a reference to an undefined
    133 # `__FOO_VARARGS` and breaks cc.scm. Strip the block — the va_start that
    134 # follows handles varargs correctly under any standard C compiler.
    135 apply_simple_patch \
    136     "$STAGE/stdio/printf.c" \
    137     "$PATCHES/printf-mes-varargs.before" \
    138     "$PATCHES/printf-mes-varargs.after"
    139 apply_simple_patch \
    140     "$STAGE/stdio/sprintf.c" \
    141     "$PATCHES/sprintf-mes-varargs.before" \
    142     "$PATCHES/sprintf-mes-varargs.after"
    143 apply_simple_patch \
    144     "$STAGE/stdio/snprintf.c" \
    145     "$PATCHES/snprintf-mes-varargs.before" \
    146     "$PATCHES/snprintf-mes-varargs.after"
    147 # stdio/vfprintf.c and stdio/vsnprintf.c read every integer / char
    148 # variadic via `va_arg(ap, long)`. On amd64 SysV an `int` arg occupies
    149 # an 8-byte reg-save slot whose upper 32 bits are unspecified — tcc's
    150 # codegen (and most other compilers') doesn't sign-extend ints into
    151 # the slot. Reading as `long` then leaks the garbage upper bits. Track
    152 # the `l` length modifier and dispatch the va_arg type accordingly.
    153 apply_simple_patch \
    154     "$STAGE/stdio/vfprintf.c" \
    155     "$PATCHES/printf-int-promo.before" \
    156     "$PATCHES/printf-int-promo.after"
    157 apply_simple_patch \
    158     "$STAGE/stdio/vsnprintf.c" \
    159     "$PATCHES/vsnprintf-int-promo.before" \
    160     "$PATCHES/vsnprintf-int-promo.after"
    161 # --- (3) flatten via host preprocessor --------------------------------
    162 HOST_CC=${HOST_CC:-cc}
    163 
    164 # Bridge file: post-patch tcc <stdarg.h>. Written by stage1-flatten.sh,
    165 # which boot3.sh / Makefile run first. Required so we can prepend the
    166 # per-arch va_list typedef + __builtin_va_* → tcc __va_* mapping into
    167 # libc.flat.c, eliminating the need for `-I /work/in/tcc-include
    168 # -include /work/in/tcc-include/stdarg.h` on every in-container compile.
    169 BRIDGE=$ROOT/build/$ARCH/vendor/tcc/stdarg-bridge.h
    170 [ -e "$BRIDGE" ] || { echo "missing $BRIDGE — run scripts/stage1-flatten.sh first" >&2; exit 1; }
    171 
    172 # -I order matters: vendor/boot2-include first so our stdarg.h shim
    173 # (routes va_* through __builtin_va_*; see comment in that file) wins
    174 # over mes's. Then $STAGE/include for everything else — <signal.h>,
    175 # <stdio.h>, etc. hit the canonical mes/include versions; arch/<…>
    176 # resolves through the include/arch symlink to include/linux/$MES_ARCH.
    177 # Putting the per-arch directory ahead of include/ makes <signal.h>
    178 # resolve to the partial arch-specific snippet (no stack_t typedef etc)
    179 # and the build breaks.
    180 "$HOST_CC" -E -P \
    181     -nostdinc \
    182     -I "$ROOT/vendor/boot2-include" \
    183     -I "$STAGE/include" \
    184     -I "$STAGE" \
    185     -D HAVE_CONFIG_H=0 \
    186     -D __linux__=1 \
    187     -D __${MES_ARCH}__=1 \
    188     -D __riscv_xlen=64 \
    189     -D inline= \
    190     "$STAGE/unified-libc.c" > "$FLAT.body"
    191 
    192 # Prepend the bridge, guarded by !CCSCM (cc.scm predefines CCSCM and
    193 # handles __builtin_va_* natively, so it must skip this block). Under
    194 # tcc, the per-arch #ifdefs inside the bridge resolve and provide the
    195 # va_list typedef + __builtin_va_* → tcc native __va_* macros.
    196 {
    197     echo '#ifndef CCSCM'
    198     cat "$BRIDGE"
    199     echo '#endif'
    200     cat "$FLAT.body"
    201 } > "$FLAT"
    202 rm -f "$FLAT.body"
    203 
    204 BYTES=$(wc -c < "$FLAT")
    205 echo "produced $FLAT  ($BYTES bytes)"