libc-flatten.sh (8135B)
1 #!/bin/sh 2 ## libc-flatten.sh — flatten the vendored mes-libc + boot2-syscall.c 3 ## into a single libc.flat.c using the host preprocessor. Mirrors 4 ## stage1-flatten.sh; runs on the host, no container — hence the 5 ## non-`boot-` name (the convention in scripts/ is that boot-*.sh 6 ## runs inside the minimal container). 7 ## 8 ## Steps: 9 ## 1. stage vendor/mes-libc → build/<arch>/vendor/mes-libc/libc-stage/ 10 ## 2. apply simple-patches (literal-block replacement, idempotent) 11 ## 3. HOST_CC -E -nostdinc -I staging/include … staging/unified-libc.c 12 ## → build/<arch>/vendor/mes-libc/libc.flat.c 13 ## 14 ## Stage 4 (cc.scm libc.flat.c → libc.P1pp) is a separate Makefile rule 15 ## that reuses scripts/boot-build-cc.sh inside the per-arch container. 16 ## 17 ## ARCH selects the boot2 target (aarch64/amd64/riscv64). MES_ARCH is 18 ## the mes header tree we hand the host preprocessor; mes ships 19 ## x86_64/riscv64 only, so aarch64 builds borrow riscv64's headers (the 20 ## resulting libc.flat.c references no SYS_* / kernel-stat fields, so 21 ## the choice only affects type widths, all 64-bit Linux-identical). 22 ## 23 ## Usage: scripts/libc-flatten.sh [--arch <aarch64|amd64|riscv64>] 24 25 set -eu 26 27 ARCH=aarch64 28 while [ $# -gt 0 ]; do 29 case "$1" in 30 --arch) ARCH=$2; shift 2 ;; 31 -h|--help) awk '/^##/ { sub(/^## ?/, ""); print }' "$0"; exit 0 ;; 32 *) echo "unknown arg: $1" >&2; exit 2 ;; 33 esac 34 done 35 36 case "$ARCH" in 37 aarch64) MES_ARCH=riscv64 ;; 38 amd64) MES_ARCH=x86_64 ;; 39 riscv64) MES_ARCH=riscv64 ;; 40 *) echo "unknown ARCH: $ARCH" >&2; exit 2 ;; 41 esac 42 43 ROOT=$(cd "$(dirname "$0")/.." && pwd) 44 VENDOR=$ROOT/vendor/mes-libc 45 WORK=$ROOT/build/$ARCH/vendor/mes-libc 46 STAGE=$WORK/libc-stage 47 FLAT=$WORK/libc.flat.c 48 49 [ -d "$VENDOR" ] || { echo "missing $VENDOR" >&2; exit 1; } 50 [ -d "$VENDOR/include" ] || { echo "missing $VENDOR/include" >&2; exit 1; } 51 [ -d "$VENDOR/include/linux/$MES_ARCH" ] \ 52 || { echo "missing $VENDOR/include/linux/$MES_ARCH" >&2; exit 1; } 53 54 # --- (1) stage -------------------------------------------------------- 55 mkdir -p "$WORK" 56 rm -rf "$STAGE" 57 mkdir -p "$STAGE" 58 # cp -R copies symlinks as files; staging is our writable scratch. 59 cp -R "$VENDOR/." "$STAGE/" 60 61 # mes's sys/stat.h, signal.h, dirent.h reach for <arch/kernel-stat.h> 62 # and similar; the per-arch tree under include/linux/<MES_ARCH>/ is what 63 # they want. Copy the per-arch tree into include/arch so the unprefixed 64 # `arch/...` includes resolve. (cp -R, not ln -sfn — keeps the dep set 65 # down to coreutils we already use.) 66 cp -R "$STAGE/include/linux/$MES_ARCH" "$STAGE/include/arch" 67 68 69 # --- (2) patches ------------------------------------------------------ 70 # Same literal-block replacer as stage1-flatten.sh apply_simple_patch. 71 apply_simple_patch() { 72 target=$1; before=$2; after=$3 73 [ -r "$target" ] || { echo "patch target missing: $target" >&2; exit 1; } 74 [ -r "$before" ] || { echo "patch before missing: $before" >&2; exit 1; } 75 [ -r "$after" ] || { echo "patch after missing: $after" >&2; exit 1; } 76 awk -v BFILE="$before" -v AFILE="$after" ' 77 BEGIN { 78 while ((getline line < BFILE) > 0) bef = bef line "\n"; 79 close(BFILE); 80 while ((getline line < AFILE) > 0) aft = aft line "\n"; 81 close(AFILE); 82 } 83 { src = src $0 "\n" } 84 END { 85 if (index(src, aft) > 0) { 86 printf "%s", src; 87 exit 0; 88 } 89 i = index(src, bef); 90 if (i == 0) { print "patch did not match" > "/dev/stderr"; exit 1 } 91 printf "%s%s%s", 92 substr(src, 1, i - 1), 93 aft, 94 substr(src, i + length(bef)); 95 } 96 ' "$target" > "$target.new" 97 mv "$target.new" "$target" 98 } 99 100 PATCHES=$STAGE/patches 101 apply_simple_patch \ 102 "$STAGE/linux/malloc.c" \ 103 "$PATCHES/malloc-max-align.before" \ 104 "$PATCHES/malloc-max-align.after" 105 apply_simple_patch \ 106 "$STAGE/linux/malloc.c" \ 107 "$PATCHES/malloc-brk-check.before" \ 108 "$PATCHES/malloc-brk-check.after" 109 apply_simple_patch \ 110 "$STAGE/string/strstr.c" \ 111 "$PATCHES/strstr-drop-mman.before" \ 112 "$PATCHES/strstr-drop-mman.after" 113 apply_simple_patch \ 114 "$STAGE/include/mes/lib-mini.h" \ 115 "$PATCHES/libmini-write-proto.before" \ 116 "$PATCHES/libmini-write-proto.after" 117 apply_simple_patch \ 118 "$STAGE/include/mes/lib-mini.h" \ 119 "$PATCHES/libmini-write-proto2.before" \ 120 "$PATCHES/libmini-write-proto2.after" 121 apply_simple_patch \ 122 "$STAGE/include/mes/lib.h" \ 123 "$PATCHES/lib-mes-debug-proto.before" \ 124 "$PATCHES/lib-mes-debug-proto.after" 125 apply_simple_patch \ 126 "$STAGE/mes/ntoab.c" \ 127 "$PATCHES/ntoab-inline-defined.before" \ 128 "$PATCHES/ntoab-inline-defined.after" 129 # stdio/{printf,sprintf,snprintf}.c carry a mes-mescc-specific 130 # `ap += (__FOO_VARARGS + ...)` block guarded by `__GNUC__ && __x86_64__`. 131 # That arithmetic is meaningful only inside mes's compiler; under stock 132 # gcc preprocessing for amd64 it expands to a reference to an undefined 133 # `__FOO_VARARGS` and breaks cc.scm. Strip the block — the va_start that 134 # follows handles varargs correctly under any standard C compiler. 135 apply_simple_patch \ 136 "$STAGE/stdio/printf.c" \ 137 "$PATCHES/printf-mes-varargs.before" \ 138 "$PATCHES/printf-mes-varargs.after" 139 apply_simple_patch \ 140 "$STAGE/stdio/sprintf.c" \ 141 "$PATCHES/sprintf-mes-varargs.before" \ 142 "$PATCHES/sprintf-mes-varargs.after" 143 apply_simple_patch \ 144 "$STAGE/stdio/snprintf.c" \ 145 "$PATCHES/snprintf-mes-varargs.before" \ 146 "$PATCHES/snprintf-mes-varargs.after" 147 # stdio/vfprintf.c and stdio/vsnprintf.c read every integer / char 148 # variadic via `va_arg(ap, long)`. On amd64 SysV an `int` arg occupies 149 # an 8-byte reg-save slot whose upper 32 bits are unspecified — tcc's 150 # codegen (and most other compilers') doesn't sign-extend ints into 151 # the slot. Reading as `long` then leaks the garbage upper bits. Track 152 # the `l` length modifier and dispatch the va_arg type accordingly. 153 apply_simple_patch \ 154 "$STAGE/stdio/vfprintf.c" \ 155 "$PATCHES/printf-int-promo.before" \ 156 "$PATCHES/printf-int-promo.after" 157 apply_simple_patch \ 158 "$STAGE/stdio/vsnprintf.c" \ 159 "$PATCHES/vsnprintf-int-promo.before" \ 160 "$PATCHES/vsnprintf-int-promo.after" 161 # --- (3) flatten via host preprocessor -------------------------------- 162 HOST_CC=${HOST_CC:-cc} 163 164 # Bridge file: post-patch tcc <stdarg.h>. Written by stage1-flatten.sh, 165 # which boot3.sh / Makefile run first. Required so we can prepend the 166 # per-arch va_list typedef + __builtin_va_* → tcc __va_* mapping into 167 # libc.flat.c, eliminating the need for `-I /work/in/tcc-include 168 # -include /work/in/tcc-include/stdarg.h` on every in-container compile. 169 BRIDGE=$ROOT/build/$ARCH/vendor/tcc/stdarg-bridge.h 170 [ -e "$BRIDGE" ] || { echo "missing $BRIDGE — run scripts/stage1-flatten.sh first" >&2; exit 1; } 171 172 # -I order matters: vendor/boot2-include first so our stdarg.h shim 173 # (routes va_* through __builtin_va_*; see comment in that file) wins 174 # over mes's. Then $STAGE/include for everything else — <signal.h>, 175 # <stdio.h>, etc. hit the canonical mes/include versions; arch/<…> 176 # resolves through the include/arch symlink to include/linux/$MES_ARCH. 177 # Putting the per-arch directory ahead of include/ makes <signal.h> 178 # resolve to the partial arch-specific snippet (no stack_t typedef etc) 179 # and the build breaks. 180 "$HOST_CC" -E -P \ 181 -nostdinc \ 182 -I "$ROOT/vendor/boot2-include" \ 183 -I "$STAGE/include" \ 184 -I "$STAGE" \ 185 -D HAVE_CONFIG_H=0 \ 186 -D __linux__=1 \ 187 -D __${MES_ARCH}__=1 \ 188 -D __riscv_xlen=64 \ 189 -D inline= \ 190 "$STAGE/unified-libc.c" > "$FLAT.body" 191 192 # Prepend the bridge, guarded by !CCSCM (cc.scm predefines CCSCM and 193 # handles __builtin_va_* natively, so it must skip this block). Under 194 # tcc, the per-arch #ifdefs inside the bridge resolve and provide the 195 # va_list typedef + __builtin_va_* → tcc native __va_* macros. 196 { 197 echo '#ifndef CCSCM' 198 cat "$BRIDGE" 199 echo '#endif' 200 cat "$FLAT.body" 201 } > "$FLAT" 202 rm -f "$FLAT.body" 203 204 BYTES=$(wc -c < "$FLAT") 205 echo "produced $FLAT ($BYTES bytes)"