boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit f56fbdf806233d2fa60d837b415f909d71ef10a6
parent 7c103f48b35e13efa8aafa4fb2c49a5a50a257dd
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue,  5 May 2026 16:17:00 -0700

boot{0..5}: split stage dirs into in/ + out/

Replace the single $STAGE/cpio/ flat namespace with $STAGE/in/ (ro
inputs) + $STAGE/out/ (rw outputs). Both pipelines (lib-runscm for
boot3/4/5; lib-pipeline for boot0/1/2) use the convention; extract-blk
filters dumped tmpfs entries to out/-prefixed only and strips the
prefix on the way to host $STAGE/out/.

Per docs/STAGE-IN-OUT.md.

scripts/boot.sh: per-stage and total wall time via stage wrapper +
EXIT trap.

Diffstat:
Mscripts/boot.sh | 25++++++++++++++++++-------
Mscripts/boot3-run.scm | 22+++++++++++-----------
Mscripts/boot4-gen-runscm.sh | 60+++++++++++++++++++++++++++++++++---------------------------
Mscripts/boot5-gen-runscm.sh | 85++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mscripts/boot5.sh | 48+++++++++++++++++++++++++-----------------------
Mscripts/lib-pipeline.sh | 82++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mscripts/lib-runscm.sh | 127+++++++++++++++++++++++++++++++------------------------------------------------
Mseed-kernel/scripts/extract-blk.sh | 17+++++++++++++++--
8 files changed, 262 insertions(+), 204 deletions(-)

diff --git a/scripts/boot.sh b/scripts/boot.sh @@ -8,7 +8,7 @@ ## DRIVER (default podman) is exported and consumed by each bootN.sh. ## DRIVER=seed is aarch64-only and requires seed-kernel/build/Image. -set -ex +set -e ARCH=$1 DRIVER=${DRIVER:-podman} @@ -30,9 +30,20 @@ export DRIVER rm -rf build/$ARCH -./scripts/boot0.sh $ARCH -./scripts/boot1.sh $ARCH -./scripts/boot2.sh $ARCH -./scripts/boot3.sh $ARCH -./scripts/boot4.sh $ARCH -./scripts/boot5.sh $ARCH +T0=$(date +%s) +trap 'echo "[boot/$DRIVER $ARCH] elapsed at exit: $(($(date +%s) - T0))s"' EXIT + +stage() { + name=$1; shift + s=$(date +%s) + "$@" + e=$(date +%s) + echo "[boot/$DRIVER $ARCH] $name: $((e - s))s (cum $((e - T0))s)" +} + +stage boot0 ./scripts/boot0.sh $ARCH +stage boot1 ./scripts/boot1.sh $ARCH +stage boot2 ./scripts/boot2.sh $ARCH +stage boot3 ./scripts/boot3.sh $ARCH +stage boot4 ./scripts/boot4.sh $ARCH +stage boot5 ./scripts/boot5.sh $ARCH diff --git a/scripts/boot3-run.scm b/scripts/boot3-run.scm @@ -1,6 +1,6 @@ ;; boot3 run.scm — drive cc.scm → tcc0 inside the seed kernel. -;; Mirrors the podman path's run.sh exactly; intermediate files live -;; in the flat tmpfs at top level (no /tmp/ prefix). +;; Mirrors the podman path's run.sh exactly. Reads use in/<name>; +;; writes use out/<name>. Both drivers see the same flat namespace. (define (must r tag) (if (and (car r) (= 0 (cdr r))) @@ -12,27 +12,27 @@ (exit 1)))) (write-string stdout "boot3: catm cc-bundle\n") -(must (run "catm" "cc-bundled.scm" "prelude.scm" "cc.scm" "main.scm") +(must (run "in/catm" "out/cc-bundled.scm" "in/prelude.scm" "in/cc.scm" "in/main.scm") "catm cc-bundle") (write-string stdout "boot3: scheme1 libc\n") -(must (run "scheme1" "cc-bundled.scm" "--lib=libc__" "libc.flat.c" "libc.P1pp") +(must (run "in/scheme1" "out/cc-bundled.scm" "--lib=libc__" "in/libc.flat.c" "out/libc.P1pp") "scheme1 libc") (write-string stdout "boot3: scheme1 tcc\n") -(must (run "scheme1" "cc-bundled.scm" "--lib=tcc__" "tcc.flat.c" "tcc.flat.P1pp") +(must (run "in/scheme1" "out/cc-bundled.scm" "--lib=tcc__" "in/tcc.flat.c" "out/tcc.flat.P1pp") "scheme1 tcc") (write-string stdout "boot3: catm combined.M1pp\n") -(must (run "catm" "combined.M1pp" - "backend.M1pp" "frontend.M1pp" "libp1pp.P1pp" - "entry-libc.P1pp" "libc.P1pp" "tcc.flat.P1pp" "elf-end.P1pp") +(must (run "in/catm" "out/combined.M1pp" + "in/backend.M1pp" "in/frontend.M1pp" "in/libp1pp.P1pp" + "in/entry-libc.P1pp" "out/libc.P1pp" "out/tcc.flat.P1pp" "in/elf-end.P1pp") "catm combined.M1pp") (write-string stdout "boot3: M1pp\n") -(must (run "M1pp" "combined.M1pp" "expanded.hex2pp") +(must (run "in/M1pp" "out/combined.M1pp" "out/expanded.hex2pp") "M1pp") (write-string stdout "boot3: catm linked.hex2pp\n") -(must (run "catm" "linked.hex2pp" "ELF.hex2" "expanded.hex2pp") +(must (run "in/catm" "out/linked.hex2pp" "in/ELF.hex2" "out/expanded.hex2pp") "catm linked.hex2pp") (write-string stdout "boot3: hex2pp\n") -(must (run "hex2pp" "-B" "0x600000" "linked.hex2pp" "tcc0") +(must (run "in/hex2pp" "-B" "0x600000" "out/linked.hex2pp" "out/tcc0") "hex2pp") (exit 0) diff --git a/scripts/boot4-gen-runscm.sh b/scripts/boot4-gen-runscm.sh @@ -4,6 +4,8 @@ ## emission; per-arch values resolved on the host so the .scm body is ## straight-line (run …) calls. ## +## Reads use in/<name>; writes (intermediates and exports) use out/<name>. +## ## Usage: boot4-gen-runscm.sh <arch> <out.scm> set -eu @@ -29,14 +31,17 @@ case "$ARCH" in *) echo "boot4-gen: unknown arch $ARCH" >&2; exit 2 ;; esac +# emit_helpers — cc reads .S/.c sources from in/, writes .o to out/. +# cc_path is the cwd-relative path to the spawned compiler binary (in/tcc0 +# for round B; out/tcc1, out/tcc2 in later rounds). emit_helpers() { - cc=$1 + cc_path=$1; tag=$2 cat <<EOF -(must (run "$cc" "-nostdlib" "-c" "-o" "start.o" "start.S") "$cc start.o") -(must (run "$cc" "-nostdlib" "-c" "-o" "sys_stubs.o" "sys_stubs.S") "$cc sys_stubs.o") -(must (run "$cc" "-nostdlib" "-c" "-o" "mem.o" "mem.c") "$cc mem.o") -(must (run "$cc" "-nostdlib" "-c" "-o" "libc.o" "libc.flat.c") "$cc libc.o") -(must (run "$cc" "-nostdlib" $LIB_HELPER_DEFS "-c" "-o" "$LIB_HELPER_OBJ" "$LIB_HELPER_SRC") "$cc $LIB_HELPER_OBJ") +(must (run "$cc_path" "-nostdlib" "-c" "-o" "out/start.o" "in/start.S") "$tag start.o") +(must (run "$cc_path" "-nostdlib" "-c" "-o" "out/sys_stubs.o" "in/sys_stubs.S") "$tag sys_stubs.o") +(must (run "$cc_path" "-nostdlib" "-c" "-o" "out/mem.o" "in/mem.c") "$tag mem.o") +(must (run "$cc_path" "-nostdlib" "-c" "-o" "out/libc.o" "in/libc.flat.c") "$tag libc.o") +(must (run "$cc_path" "-nostdlib" $LIB_HELPER_DEFS "-c" "-o" "out/$LIB_HELPER_OBJ" "in/$LIB_HELPER_SRC") "$tag $LIB_HELPER_OBJ") EOF } @@ -47,34 +52,35 @@ EOF # the stage's helper-named .o files; nothing post-archive in the same # stage reads them as standalone .o. emit_archive() { - cc=$1; pfx=$2 - echo "(must (run \"catm\" \"${pfx}crt1.o\" \"start.o\") \"copy crt1.o $pfx\")" - echo "(must (run \"$cc\" \"-ar\" \"rcs\" \"${pfx}libc.a\" \"sys_stubs.o\" \"mem.o\" \"libc.o\") \"$cc ${pfx}libc.a\")" + cc_path=$1; tag=$2; pfx=$3 + echo "(must (run \"in/catm\" \"out/${pfx}crt1.o\" \"out/start.o\") \"copy crt1.o $pfx\")" + echo "(must (run \"$cc_path\" \"-ar\" \"rcs\" \"out/${pfx}libc.a\" \"out/sys_stubs.o\" \"out/mem.o\" \"out/libc.o\") \"$tag ${pfx}libc.a\")" libtcc1_objs="" for src in $LIBTCC1_C_SRCS; do obj=${src%.c}.o - echo "(must (run \"$cc\" \"-nostdlib\" $LIBTCC1_C_DEFS \"-c\" \"-o\" \"${obj}\" \"$src\") \"$cc lt ${obj}\")" - libtcc1_objs="$libtcc1_objs \"${obj}\"" + echo "(must (run \"$cc_path\" \"-nostdlib\" $LIBTCC1_C_DEFS \"-c\" \"-o\" \"out/${obj}\" \"in/$src\") \"$tag lt ${obj}\")" + libtcc1_objs="$libtcc1_objs \"out/${obj}\"" done for src in $LIBTCC1_ASM_SRCS; do obj=${src%.S}.o - echo "(must (run \"$cc\" \"-nostdlib\" \"-c\" \"-o\" \"${obj}\" \"$src\") \"$cc lt ${obj}\")" - libtcc1_objs="$libtcc1_objs \"${obj}\"" + echo "(must (run \"$cc_path\" \"-nostdlib\" \"-c\" \"-o\" \"out/${obj}\" \"in/$src\") \"$tag lt ${obj}\")" + libtcc1_objs="$libtcc1_objs \"out/${obj}\"" done - echo "(must (run \"$cc\" \"-ar\" \"rcs\" \"${pfx}libtcc1.a\"$libtcc1_objs) \"$cc ${pfx}libtcc1.a\")" + echo "(must (run \"$cc_path\" \"-ar\" \"rcs\" \"out/${pfx}libtcc1.a\"$libtcc1_objs) \"$tag ${pfx}libtcc1.a\")" } emit_link_tcc() { - cc=$1; pfx=$2; out=$3 - echo "(must (run \"$cc\" \"-nostdlib\" \"${pfx}crt1.o\" \"tcc.flat.c\" \"${pfx}libc.a\" \"${pfx}libtcc1.a\" \"${pfx}libc.a\" \"-o\" \"$out\") \"$cc -> $out\")" + cc_path=$1; tag=$2; pfx=$3; out=$4 + echo "(must (run \"$cc_path\" \"-nostdlib\" \"out/${pfx}crt1.o\" \"in/tcc.flat.c\" \"out/${pfx}libc.a\" \"out/${pfx}libtcc1.a\" \"out/${pfx}libc.a\" \"-o\" \"out/$out\") \"$tag -> $out\")" } { cat <<'PROLOGUE' ;; boot4 run.scm — drive tcc0 -> tcc1 -> tcc2 -> tcc3 inside seed kernel. ;; Generated by scripts/boot4-gen-runscm.sh; mirrors scripts/boot4.sh's -;; podman path stage-for-stage. Intermediate .o/.a files live flat in -;; the tmpfs (no /tmp/stageN/ prefix; stages are namespaced by filename). +;; podman path stage-for-stage. Reads use in/; writes (intermediates and +;; exports) use out/. tcc0 is staged as in/tcc0; tcc1/tcc2/tcc3 are +;; produced and exported under out/. (define (must r tag) (if (and (car r) (= 0 (cdr r))) @@ -89,20 +95,20 @@ cat <<'PROLOGUE' PROLOGUE # Stage B: tcc0 builds helper objects (no archive). -emit_helpers tcc0 +emit_helpers in/tcc0 tcc0 cat <<EOF (write-string stdout "boot4: stage C (tcc0 -> tcc1)\n") -(must (run "tcc0" "-nostdlib" "start.o" "sys_stubs.o" "mem.o" "libc.o" "$LIB_HELPER_OBJ" "tcc.flat.c" "-o" "tcc1") "tcc0 -> tcc1") +(must (run "in/tcc0" "-nostdlib" "out/start.o" "out/sys_stubs.o" "out/mem.o" "out/libc.o" "out/$LIB_HELPER_OBJ" "in/tcc.flat.c" "-o" "out/tcc1") "tcc0 -> tcc1") (write-string stdout "boot4: stage D (tcc1 -> tcc2)\n") EOF # Stage D: tcc1 rebuilds helpers + archive, links tcc2. -emit_helpers tcc1 -emit_archive tcc1 "s2-" -emit_link_tcc tcc1 "s2-" tcc2 +emit_helpers out/tcc1 tcc1 +emit_archive out/tcc1 tcc1 "s2-" +emit_link_tcc out/tcc1 tcc1 "s2-" tcc2 cat <<EOF @@ -110,14 +116,14 @@ cat <<EOF EOF # Stage E: tcc2 rebuilds helpers + archive, links tcc3. -emit_helpers tcc2 -emit_archive tcc2 "s3-" -emit_link_tcc tcc2 "s3-" tcc3 +emit_helpers out/tcc2 tcc2 +emit_archive out/tcc2 tcc2 "s3-" +emit_link_tcc out/tcc2 tcc2 "s3-" tcc3 cat <<'EPILOGUE' (write-string stdout "boot4: linking hello\n") -(must (run "tcc2" "-nostdlib" "s3-crt1.o" "hello.c" "s3-libc.a" "s3-libtcc1.a" "s3-libc.a" "-o" "hello") "tcc2 -> hello") +(must (run "out/tcc2" "-nostdlib" "out/s3-crt1.o" "in/hello.c" "out/s3-libc.a" "out/s3-libtcc1.a" "out/s3-libc.a" "-o" "out/hello") "tcc2 -> hello") (write-string stdout "boot4: ALL-OK\n") (exit 0) EPILOGUE diff --git a/scripts/boot5-gen-runscm.sh b/scripts/boot5-gen-runscm.sh @@ -3,7 +3,7 @@ ## inside the seed kernel. Mirrors scripts/boot5.sh's podman-path script ## generation step-for-step: per-source `tcc -c`, per-arch CRT, archive, ## link hello. Source enumeration done by boot5.sh; this script consumes -## the resulting build-srcs.txt and emits one `(run "tcc" …)` form per TU. +## the resulting build-srcs.txt and emits one `(run "in/tcc" …)` form per TU. ## ## Usage: ## boot5-gen-runscm.sh <musl-arch> <stage-host-dir> <out.scm> @@ -14,16 +14,16 @@ ## ## Conventions (cwd-relative; resolves to / under seed init, /work under ## podman bind-mount): -## musl tree tmp/musl-1.2.5/<rel-path> (staged in cpio) -## pre-gen hdrs tmp/musl-1.2.5/obj/include/bits/{alltypes,syscall}.h, -## tmp/musl-1.2.5/obj/src/internal/version.h -## .o outputs tmp/musl-1.2.5/obj/<src-with-.o> -## tcc binary tcc (basename in cpio) -## libtcc1.a libtcc1.a -## stdarg bridge tcc-stdarg-bridge.h -## hello.c hello.c -## exports libc.a, crt1.o, crti.o, crtn.o, hello (flat at root -## so runscm_export can pull them by basename) +## musl tree in/tmp/musl-1.2.5/<rel-path> (read-only) +## pre-gen hdrs in/tmp/musl-1.2.5/obj/include/bits/{alltypes,syscall}.h, +## in/tmp/musl-1.2.5/obj/src/internal/version.h +## .o outputs out/obj/musl-1.2.5/<src-with-.o> (rw; pre-mkdir'd by host) +## tcc binary in/tcc (input) +## libtcc1.a in/libtcc1.a (input) +## stdarg bridge in/tcc-stdarg-bridge.h +## hello.c in/hello.c +## exports out/{libc.a,crt1.o,crti.o,crtn.o,hello} +## (flat at out/ root so runscm_export pulls by basename) set -eu [ "$#" -eq 3 ] || { echo "usage: $0 <musl-arch> <stage-host-dir> <out.scm>" >&2; exit 2; } @@ -33,15 +33,16 @@ SRCS=$STAGE_HOST/build-srcs.txt CRT_MODE=$(cat "$STAGE_HOST/crt-mode") [ -e "$SRCS" ] || { echo "missing $SRCS" >&2; exit 1; } -CWORK=tmp/musl-1.2.5 +CIN=in/tmp/musl-1.2.5 +COUT=out/obj/musl-1.2.5 # Mirrors boot5.sh's CFLAGS_BASE exactly; the only difference is that # every per-arg token is quoted as its own scheme bytevector. The leading -# "tcc" is the spawned binary; everything after is its argv. +# "in/tcc" is the spawned binary; everything after is its argv. CFLAGS_BASE_QUOTED='"-std=c99" "-nostdinc" "-ffreestanding" "-fno-strict-aliasing" "-D_XOPEN_SOURCE=700"' -CFLAGS_BASE_QUOTED="$CFLAGS_BASE_QUOTED \"-I$CWORK/arch/$MUSL_ARCH\" \"-I$CWORK/arch/generic\" \"-I$CWORK/obj/src/internal\" \"-I$CWORK/src/include\" \"-I$CWORK/src/internal\" \"-I$CWORK/obj/include\" \"-I$CWORK/include\"" +CFLAGS_BASE_QUOTED="$CFLAGS_BASE_QUOTED \"-I$CIN/arch/$MUSL_ARCH\" \"-I$CIN/arch/generic\" \"-I$CIN/obj/src/internal\" \"-I$CIN/src/include\" \"-I$CIN/src/internal\" \"-I$CIN/obj/include\" \"-I$CIN/include\"" CFLAGS_BASE_QUOTED="$CFLAGS_BASE_QUOTED \"-O2\" \"-fomit-frame-pointer\" \"-Werror=implicit-function-declaration\" \"-Werror=implicit-int\" \"-Werror=pointer-sign\" \"-Werror=pointer-arith\"" -CFLAGS_C_QUOTED="$CFLAGS_BASE_QUOTED \"-include\" \"tcc-stdarg-bridge.h\"" +CFLAGS_C_QUOTED="$CFLAGS_BASE_QUOTED \"-include\" \"in/tcc-stdarg-bridge.h\"" CFLAGS_ASM_QUOTED="$CFLAGS_BASE_QUOTED" CRTFLAGS_C_QUOTED="$CFLAGS_C_QUOTED \"-fno-stack-protector\" \"-DCRT\"" CRTFLAGS_ASM_QUOTED="$CFLAGS_ASM_QUOTED \"-fno-stack-protector\" \"-DCRT\"" @@ -51,10 +52,10 @@ cat <<'PROLOGUE' ;; boot5 run.scm — drive musl-1.2.5 (~500 TUs) + hello. ;; Generated by scripts/boot5-gen-runscm.sh; consumed by both DRIVER=podman ;; (cwd=/work bind mount) and DRIVER=seed (cwd=/, cpio rootfs). The musl -;; source tree is staged at tmp/musl-1.2.5/...; per-source .o outputs go -;; to tmp/musl-1.2.5/obj/...; final artefacts (libc.a, crt1.o, crti.o, -;; crtn.o, hello) land at flat root paths so runscm_export can pull them -;; by basename. +;; source tree is staged read-only at in/tmp/musl-1.2.5/...; per-source .o +;; outputs go to out/obj/musl-1.2.5/...; final artefacts (libc.a, crt1.o, +;; crti.o, crtn.o, hello) land at flat out/ paths so runscm_export can +;; pull them by basename. (define (must r tag) (if (and (car r) (= 0 (cdr r))) @@ -70,16 +71,16 @@ PROLOGUE # Stage A: per-source compile. Each line of build-srcs.txt is a path # relative to musl-1.2.5/; choose flags by extension. -awk -v CFLAGS_C="$CFLAGS_C_QUOTED" -v CFLAGS_ASM="$CFLAGS_ASM_QUOTED" -v CWORK="$CWORK" ' +awk -v CFLAGS_C="$CFLAGS_C_QUOTED" -v CFLAGS_ASM="$CFLAGS_ASM_QUOTED" -v CIN="$CIN" -v COUT="$COUT" ' { src = $0 - obj = "obj/" src + obj = src sub(/\.[^.]*$/, ".o", obj) if (src ~ /\.c$/) flags = CFLAGS_C else if (src ~ /\.[sS]$/) flags = CFLAGS_ASM else flags = CFLAGS_C - printf "(must (run \"tcc\" %s \"-c\" \"%s/%s\" \"-o\" \"%s/%s\") \"%s\")\n", \ - flags, CWORK, src, CWORK, obj, src + printf "(must (run \"in/tcc\" %s \"-c\" \"%s/%s\" \"-o\" \"%s/%s\") \"%s\")\n", \ + flags, CIN, src, COUT, obj, src }' "$SRCS" cat <<EOF @@ -87,20 +88,20 @@ cat <<EOF (write-string stdout "boot5: stage B (CRT)\n") ;; Position-independent + non-PIC CRT helpers. -fPIC objects are needed ;; for shared-binding tools, even though our hello is fully static. -(must (run "tcc" $CRTFLAGS_C_QUOTED "-fPIC" "-c" "$CWORK/crt/Scrt1.c" "-o" "$CWORK/obj/crt/Scrt1.o") "Scrt1.o") -(must (run "tcc" $CRTFLAGS_C_QUOTED "-c" "$CWORK/crt/crt1.c" "-o" "$CWORK/obj/crt/crt1.o") "crt1.o") -(must (run "tcc" $CRTFLAGS_C_QUOTED "-fPIC" "-c" "$CWORK/crt/rcrt1.c" "-o" "$CWORK/obj/crt/rcrt1.o") "rcrt1.o") +(must (run "in/tcc" $CRTFLAGS_C_QUOTED "-fPIC" "-c" "$CIN/crt/Scrt1.c" "-o" "$COUT/crt/Scrt1.o") "Scrt1.o") +(must (run "in/tcc" $CRTFLAGS_C_QUOTED "-c" "$CIN/crt/crt1.c" "-o" "$COUT/crt/crt1.o") "crt1.o") +(must (run "in/tcc" $CRTFLAGS_C_QUOTED "-fPIC" "-c" "$CIN/crt/rcrt1.c" "-o" "$COUT/crt/rcrt1.o") "rcrt1.o") EOF if [ "$CRT_MODE" = asm ]; then cat <<EOF -(must (run "tcc" $CRTFLAGS_ASM_QUOTED "-c" "$CWORK/crt/$MUSL_ARCH/crti.s" "-o" "$CWORK/obj/crt/crti.o") "crti.o") -(must (run "tcc" $CRTFLAGS_ASM_QUOTED "-c" "$CWORK/crt/$MUSL_ARCH/crtn.s" "-o" "$CWORK/obj/crt/crtn.o") "crtn.o") +(must (run "in/tcc" $CRTFLAGS_ASM_QUOTED "-c" "$CIN/crt/$MUSL_ARCH/crti.s" "-o" "$COUT/crt/crti.o") "crti.o") +(must (run "in/tcc" $CRTFLAGS_ASM_QUOTED "-c" "$CIN/crt/$MUSL_ARCH/crtn.s" "-o" "$COUT/crt/crtn.o") "crtn.o") EOF else cat <<EOF -(must (run "tcc" $CRTFLAGS_C_QUOTED "-c" "$CWORK/crt/crti.c" "-o" "$CWORK/obj/crt/crti.o") "crti.o") -(must (run "tcc" $CRTFLAGS_C_QUOTED "-c" "$CWORK/crt/crtn.c" "-o" "$CWORK/obj/crt/crtn.o") "crtn.o") +(must (run "in/tcc" $CRTFLAGS_C_QUOTED "-c" "$CIN/crt/crti.c" "-o" "$COUT/crt/crti.o") "crti.o") +(must (run "in/tcc" $CRTFLAGS_C_QUOTED "-c" "$CIN/crt/crtn.c" "-o" "$COUT/crt/crtn.o") "crtn.o") EOF fi @@ -109,27 +110,27 @@ fi # ~60 KB on a single line) but the prelude reader handles it fine. { printf '\n(write-string stdout "boot5: stage C (libc.a)\\n")\n' - printf '(must (run "tcc" "-ar" "rcs" "libc.a"' - awk -v CWORK="$CWORK" '{ - obj = "obj/" $0 + printf '(must (run "in/tcc" "-ar" "rcs" "out/libc.a"' + awk -v COUT="$COUT" '{ + obj = $0 sub(/\.[^.]*$/, ".o", obj) - printf " \"%s/%s\"", CWORK, obj + printf " \"%s/%s\"", COUT, obj }' "$SRCS" printf ') "libc.a")\n' } cat <<EOF -;; Publish CRT objects at flat cwd-relative paths so runscm_export can pull them. -(must (run "catm" "crt1.o" "$CWORK/obj/crt/crt1.o") "crt1.o publish") -(must (run "catm" "crti.o" "$CWORK/obj/crt/crti.o") "crti.o publish") -(must (run "catm" "crtn.o" "$CWORK/obj/crt/crtn.o") "crtn.o publish") +;; Publish CRT objects at flat out/ paths so runscm_export can pull them. +(must (run "in/catm" "out/crt1.o" "$COUT/crt/crt1.o") "crt1.o publish") +(must (run "in/catm" "out/crti.o" "$COUT/crt/crti.o") "crti.o publish") +(must (run "in/catm" "out/crtn.o" "$COUT/crt/crtn.o") "crtn.o publish") (write-string stdout "boot5: stage D (link hello)\n") -;; -L. picks up libc.a + libtcc1.a from the flat staging root (cwd). -(must (run "tcc" "-static" "-nostdinc" "-nostdlib" "-include" "tcc-stdarg-bridge.h" - "-I$CWORK/include" "-I$CWORK/arch/$MUSL_ARCH" "-I$CWORK/arch/generic" "-I$CWORK/obj/include" - "crt1.o" "hello.c" "-L." "-lc" "-L." "-ltcc1" "-L." "-lc" "-o" "hello") "link hello") +;; -Lout pulls libc.a (just built); -Lin pulls libtcc1.a (input). +(must (run "in/tcc" "-static" "-nostdinc" "-nostdlib" "-include" "in/tcc-stdarg-bridge.h" + "-I$CIN/include" "-I$CIN/arch/$MUSL_ARCH" "-I$CIN/arch/generic" "-I$CIN/obj/include" + "out/crt1.o" "in/hello.c" "-Lout" "-lc" "-Lin" "-ltcc1" "-Lout" "-lc" "-o" "out/hello") "link hello") (write-string stdout "boot5: ALL-OK\n") (exit 0) diff --git a/scripts/boot5.sh b/scripts/boot5.sh @@ -111,23 +111,23 @@ fi export IMAGE PLATFORM DRIVER # ── prepare staging dirs and musl tree on host ──────────────────────── -# $STAGE/cpio/ — flat staging root (becomes /work bind mount or cpio rootfs) +# $STAGE/in/ — read-only inputs (becomes /work/in or in/ in tmpfs) +# $STAGE/out/ — writable outputs (becomes /work/out or out/ in tmpfs) # $STAGE/_host/ — host-side scratch (enumeration outputs, intermediates); # never visible to the container/kernel -# runscm_init wipes $STAGE then mkdirs cpio/ and out/. Do that first so +# runscm_init wipes $STAGE then mkdirs in/ and out/. Do that first so # we control the layout below. . scripts/lib-runscm.sh runscm_init "$STAGE" "$OUT" mkdir -p "$STAGE/_host" -# Extract musl directly into the cpio at tmp/musl-1.2.5/, then apply -# overrides + deletes — gives us a fully-prepared tree we can enumerate -# to drive the (kaem-friendly) flat run.scm. The podman bind mount -# reads it in place; the seed driver registers it via -# runscm_register_tree below. -MUSL_DIR=$STAGE/cpio/tmp/musl-1.2.5 -mkdir -p "$STAGE/cpio/tmp" -tar xzf "$MUSL_TARBALL" -C "$STAGE/cpio/tmp/" +# Extract musl directly into in/tmp/musl-1.2.5/, then apply overrides + +# deletes — gives us a fully-prepared tree we can enumerate to drive the +# (kaem-friendly) flat run.scm. The podman bind mount reads it in place; +# the seed driver picks it up via the `find in -type f` cpio walk. +MUSL_DIR=$STAGE/in/tmp/musl-1.2.5 +mkdir -p "$STAGE/in/tmp" +tar xzf "$MUSL_TARBALL" -C "$STAGE/in/tmp/" cp -R "$MUSL_OVERRIDES/." "$MUSL_DIR/" while read -r p; do [ -n "$p" ] && rm -rf "$MUSL_DIR/$p" @@ -210,20 +210,23 @@ else echo c > "$STAGE/_host/crt-mode" fi -# Pre-create per-source obj/ directories under the cpio so scheme1's -# (run "tcc" -c …) doesn't need to mkdir at runtime (tcc errors out if -# the parent dir is missing, and scheme1 has no mkdir primitive). +# Pre-create per-source obj/ directories under $STAGE/out/obj/musl-1.2.5/ +# so scheme1's (run "in/tcc" -c …) doesn't need to mkdir at runtime (tcc +# errors out if the parent dir is missing, and scheme1 has no mkdir +# primitive). awk ' { sub(/\.[^.]*$/, "") - if (match($0, /\/[^\/]*$/)) print "obj/" substr($0, 1, RSTART - 1) + if (match($0, /\/[^\/]*$/)) print substr($0, 1, RSTART - 1) } ' "$STAGE/_host/build-srcs.txt" | sort -u > "$STAGE/_host/build-objdirs.txt" -mkdir -p "$MUSL_DIR/obj/include/bits" "$MUSL_DIR/obj/src/internal" "$MUSL_DIR/obj/lib" "$MUSL_DIR/obj/crt" "$MUSL_DIR/lib" -while read -r d; do mkdir -p "$MUSL_DIR/$d"; done < "$STAGE/_host/build-objdirs.txt" +COBJ=$STAGE/out/obj/musl-1.2.5 +mkdir -p "$COBJ/crt" +while read -r d; do mkdir -p "$COBJ/$d"; done < "$STAGE/_host/build-objdirs.txt" -# Pre-generated alltypes.h + syscall.h for $MUSL_ARCH; replace the -# in-container awk that ran mkalltypes.sed and the SYS_ rewrite. +# Pre-generated alltypes.h + syscall.h for $MUSL_ARCH; live under in/ +# (read at compile time via -I$CIN/obj/include and -I$CIN/obj/src/internal). +mkdir -p "$MUSL_DIR/obj/include/bits" "$MUSL_DIR/obj/src/internal" cp "$MUSL_GENERATED/alltypes.h" "$MUSL_DIR/obj/include/bits/alltypes.h" cp "$MUSL_GENERATED/syscall.h" "$MUSL_DIR/obj/include/bits/syscall.h" echo '#define VERSION "1.2.5-tcc-boot5"' > "$MUSL_DIR/obj/src/internal/version.h" @@ -237,17 +240,16 @@ runscm_scheme1 "$BOOT2/scheme1" runscm_prelude scheme1/prelude.scm runscm_runscm "$RUNSCM" -# Chain binaries staged at flat root (cwd-relative names in run.scm). +# Chain binaries staged at flat in/ root (cwd-relative names in run.scm). runscm_input tcc "$BOOT4/tcc3" runscm_input libtcc1.a "$BOOT4/libtcc1.a" runscm_input catm "$BOOT2/catm" runscm_input tcc-stdarg-bridge.h "$BRIDGE_FILE" runscm_input hello.c scripts/boot-hello.c -# Musl tree is already laid out under $STAGE/cpio/tmp/musl-1.2.5/ above; -# register names so the seed driver knows which files to pack into cpio. -# (The podman driver bind-mounts the dir directly and ignores S_NAMES.) -runscm_register_tree tmp/musl-1.2.5 +# Musl tree is already laid out under $STAGE/in/tmp/musl-1.2.5/ above; +# both drivers pick it up automatically (podman bind-mounts $STAGE/in; +# seed packs `find in -type f` into the cpio). runscm_export libc.a runscm_export crt1.o diff --git a/scripts/lib-pipeline.sh b/scripts/lib-pipeline.sh @@ -7,9 +7,14 @@ # podman — accumulate stages into one /work/run.sh, run once in a # container against $IMAGE / $PLATFORM (env-set by caller). # seed — run each stage as one qemu boot of seed-kernel via -# tier1-gate.sh's pattern (cpio /init + inputs on virtio-blk -# hd0, output dumped to virtio-blk hd1 as SEEDFS, extract). -# aarch64 only. +# tier1-gate.sh's pattern (cpio /init + in/<inputs> on +# virtio-blk hd0, output dumped to virtio-blk hd1 as SEEDFS, +# extract). aarch64 only. +# +# Both drivers respect the `in/`+`out/` convention: inputs read from +# `in/<name>`, outputs written to `out/<name>`. The stage primitive +# rewrites argv tokens that match input/output names with the +# appropriate prefix; bare flag/literal tokens pass through untouched. # # DSL (source as `. scripts/lib-pipeline.sh`): # @@ -33,23 +38,28 @@ P_OUT_DIR= P_SCRIPT= P_IDX=0 P_EXPORTS= +P_INPUT_NAMES= +P_PRODUCED_NAMES= pipeline_init() { P_STAGE_DIR=$1; P_OUT_DIR=$2; P_DRIVER=$3 rm -rf "$P_STAGE_DIR" - mkdir -p "$P_STAGE_DIR/in" "$P_OUT_DIR" + mkdir -p "$P_STAGE_DIR/in" "$P_STAGE_DIR/out" "$P_OUT_DIR" P_IDX=0 P_EXPORTS= + P_INPUT_NAMES= + P_PRODUCED_NAMES= case "$P_DRIVER" in podman) - mkdir -p "$P_STAGE_DIR/out" P_SCRIPT=$P_STAGE_DIR/run.sh { echo '#!/bin/sh' echo 'set -eu' # Stage everything in /tmp (RAM tmpfs) — the seed-stage tools # do one syscall per byte, virtiofs round-trips would dominate. - echo 'cp /work/in/* /tmp/' + # Mirror the in/ + out/ split so argv references resolve. + echo 'mkdir -p /tmp/in /tmp/out' + echo 'cp /work/in/* /tmp/in/' echo 'cd /tmp' } > "$P_SCRIPT" ;; @@ -69,11 +79,31 @@ pipeline_input() { if [ "$P_DRIVER" = "seed" ]; then cp "$src" "$P_STAGE_DIR/work/$name" fi + P_INPUT_NAMES="$P_INPUT_NAMES $name" +} + +# Look up a token: if it names an input, prefix `in/`; if it names a +# previously produced output, prefix `out/`; else leave unchanged. +_p_lookup() { + tok=$1 + for n in $P_IN; do [ "$tok" = "$n" ] && { echo "in/$tok"; return; }; done + for n in $P_OUT; do [ "$tok" = "$n" ] && { echo "out/$tok"; return; }; done + echo "$tok" +} + +# Resolve where the bin binary lives: in/ if it's a pipeline_input, out/ +# if a prior stage produced it. Stages with the same name as both an +# input and a produced output use the produced one. +_p_bin_path() { + b=$1 + for n in $P_PRODUCED_NAMES; do [ "$b" = "$n" ] && { echo "out/$b"; return; }; done + for n in $P_INPUT_NAMES; do [ "$b" = "$n" ] && { echo "in/$b"; return; }; done + echo "$b" } stage() { bin=$1; shift - P_HEAD=""; P_IN=""; P_OUT=""; _s=head + P_HEAD_RAW=""; P_IN=""; P_OUT=""; _s=head while [ $# -gt 0 ]; do if [ "$1" = "--" ]; then case "$_s" in @@ -84,38 +114,56 @@ stage() { shift; continue fi case "$_s" in - head) P_HEAD="$P_HEAD $1" ;; + head) P_HEAD_RAW="$P_HEAD_RAW $1" ;; in) P_IN="$P_IN $1" ;; out) P_OUT="$P_OUT $1" ;; esac shift done [ "$_s" = "out" ] || { echo "lib-pipeline: stage needs '<bin> argv... -- inputs... -- outputs...'" >&2; exit 2; } + + # Rewrite head tokens with in/ or out/ prefixes. + P_HEAD="" + for tok in $P_HEAD_RAW; do + P_HEAD="$P_HEAD $(_p_lookup "$tok")" + done + P_BIN_PATH=$(_p_bin_path "$bin") + P_IDX=$((P_IDX + 1)) case "$P_DRIVER" in podman) _stage_podman ;; seed) _stage_seed ;; esac + + # Track produced names so later stages can locate the binary if a + # subsequent `stage` uses one of these as its bin. + for o in $P_OUT; do P_PRODUCED_NAMES="$P_PRODUCED_NAMES $o"; done } _stage_podman() { { echo "# stage $P_IDX: $bin$P_HEAD" - echo "chmod +x ./$bin" - echo "./$bin$P_HEAD" + echo "chmod +x ./$P_BIN_PATH" + echo "./$P_BIN_PATH$P_HEAD" + # Mirror this stage's outputs back into in/ so a later stage that + # declares one of them as an input finds it under in/<name>. + # (The seed driver does this naturally via its per-stage cpio.) + for o in $P_OUT; do + echo "cp -f out/$o in/$o" + done } >> "$P_SCRIPT" } _stage_seed() { cpio_dir=$P_STAGE_DIR/s$(printf '%02d' "$P_IDX") - rm -rf "$cpio_dir"; mkdir -p "$cpio_dir/cpio" + rm -rf "$cpio_dir"; mkdir -p "$cpio_dir/cpio/in" cp "$P_STAGE_DIR/work/$bin" "$cpio_dir/cpio/init" chmod +x "$cpio_dir/cpio/init" NAMES="init" for inp in $P_IN; do - cp "$P_STAGE_DIR/work/$inp" "$cpio_dir/cpio/$inp" + cp "$P_STAGE_DIR/work/$inp" "$cpio_dir/cpio/in/$inp" NAMES="$NAMES -$inp" +in/$inp" done ( cd "$cpio_dir/cpio" && printf '%s\n' "$NAMES" | cpio -o -H newc 2>/dev/null ) > "$cpio_dir/initramfs.cpio" sz=$(wc -c < "$cpio_dir/initramfs.cpio") @@ -143,6 +191,7 @@ $inp" QPID=$! ( sleep 240; kill -9 $QPID 2>/dev/null ) </dev/null >/dev/null 2>&1 & WATCHER=$! + disown $WATCHER 2>/dev/null || true wait $QPID 2>/dev/null || true kill $WATCHER 2>/dev/null || true @@ -186,7 +235,7 @@ _run_podman() { : "${IMAGE:?lib-pipeline:podman: IMAGE not set}" if [ -n "$P_EXPORTS" ]; then cmd="cp" - for n in $P_EXPORTS; do cmd="$cmd $n"; done + for n in $P_EXPORTS; do cmd="$cmd out/$n"; done cmd="$cmd /work/out/" echo "$cmd" >> "$P_SCRIPT" fi @@ -194,6 +243,9 @@ _run_podman() { SDIR=$(cd "$P_STAGE_DIR" && pwd) podman run --rm -i --pull=never --platform "$PLATFORM" \ --tmpfs /tmp:size=512M \ - -v "$SDIR:/work" -w /work "$IMAGE" \ + -v "$SDIR/run.sh:/work/run.sh:ro" \ + -v "$SDIR/in:/work/in:ro" \ + -v "$SDIR/out:/work/out:rw" \ + -w /work "$IMAGE" \ sh -eu /work/run.sh } diff --git a/scripts/lib-runscm.sh b/scripts/lib-runscm.sh @@ -2,16 +2,18 @@ # # Boot3/4/5 each drive a per-stage pipeline by invoking scheme1 against a # host-generated run.scm. Two transports: -# DRIVER=podman → bind-mount the staging dir at /work in a scratch+busybox -# container, exec scheme1 ./combined.scm with cwd=/work. +# DRIVER=podman → bind-mount in/ ro and out/ rw under /work in a +# scratch+busybox container, exec in/scheme1 +# in/combined.scm with cwd=/work. # DRIVER=seed → pack the staging dir into a cpio on a virtio-blk read- -# only disk, boot the seed kernel with init=scheme1 and -# combined.scm, recover outputs via the SEEDFS dump on a -# second virtio-blk disk. +# only disk (init at cpio root + in/ subtree), boot the +# seed kernel with init=init and combined.scm, recover +# outputs via the SEEDFS dump on a second virtio-blk +# disk. The host extractor filters to out/-prefixed +# entries, strips the prefix, writes to $STAGE/out/. # -# Both drivers see the same flat namespace: scheme1's cwd is the staging -# root, and run.scm uses cwd-relative paths exclusively. The host stages -# inputs once into <staging-dir>/cpio/<name>; runscm_run dispatches. +# Both drivers see the same flat namespace; run.scm uses explicit +# in/<name> for reads and out/<name> for writes. # # DSL (source as `. scripts/lib-runscm.sh`): # @@ -19,8 +21,8 @@ # runscm_scheme1 <path> # init=scheme1 (boot2) # runscm_prelude <path> # scheme1/prelude.scm # runscm_runscm <path> # the host-generated driver -# runscm_input <name> <host-path> # repeatable; staged at cpio/<name> -# runscm_input_tree <prefix> <src-root> # repeatable; tree under cpio/<prefix> +# runscm_input <name> <host-path> # repeatable; staged at in/<name> +# runscm_input_tree <prefix> <src-root> # repeatable; tree under in/<prefix> # runscm_export <name> # repeatable; out file # runscm_run [timeout-s] # default 600s # @@ -34,15 +36,14 @@ S_OUT_DIR= S_SCHEME1= S_PRELUDE= S_RUNSCM= -S_NAMES= S_EXPORTS= runscm_init() { S_STAGE_DIR=$1; S_OUT_DIR=$2 rm -rf "$S_STAGE_DIR" - mkdir -p "$S_STAGE_DIR/cpio" "$S_OUT_DIR" + mkdir -p "$S_STAGE_DIR/in" "$S_STAGE_DIR/out" "$S_OUT_DIR" S_SCHEME1=; S_PRELUDE=; S_RUNSCM= - S_NAMES=; S_EXPORTS= + S_EXPORTS= } runscm_scheme1() { S_SCHEME1=$1; } @@ -52,67 +53,41 @@ runscm_runscm() { S_RUNSCM=$1; } runscm_input() { name=$1; src=$2 case "$name" in - */*) mkdir -p "$S_STAGE_DIR/cpio/$(dirname "$name")" ;; + */*) mkdir -p "$S_STAGE_DIR/in/$(dirname "$name")" ;; esac - cp "$src" "$S_STAGE_DIR/cpio/$name" - S_NAMES="$S_NAMES -$name" + cp "$src" "$S_STAGE_DIR/in/$name" } -# Stage every regular file under <src-root> into the cpio at <prefix>/..., -# preserving the relative directory tree. Names are appended to S_NAMES. -# The find pipeline runs in a subshell so it can't mutate S_NAMES directly; -# names are collected via a tempfile then appended at the end. +# Stage every regular file under <src-root> into in/<prefix>/..., +# preserving the relative directory tree. runscm_input_tree() { prefix=$1; src_root=$2 [ -d "$src_root" ] || { echo "runscm: input_tree: $src_root not a dir" >&2; exit 2; } - tmp=$S_STAGE_DIR/.tree-names - : > "$tmp" ( cd "$src_root" && find . -type f ) | sed 's|^\./||' | sort | while read -r rel; do [ -n "$rel" ] || continue - mkdir -p "$S_STAGE_DIR/cpio/$prefix/$(dirname "$rel")" - cp "$src_root/$rel" "$S_STAGE_DIR/cpio/$prefix/$rel" - printf '%s/%s\n' "$prefix" "$rel" >> "$tmp" + mkdir -p "$S_STAGE_DIR/in/$prefix/$(dirname "$rel")" + cp "$src_root/$rel" "$S_STAGE_DIR/in/$prefix/$rel" done - while read -r n; do - S_NAMES="$S_NAMES -$n" - done < "$tmp" - rm -f "$tmp" } runscm_export() { S_EXPORTS="$S_EXPORTS $1" } -# Like runscm_input_tree but the tree is already laid out under cpio/<prefix>; -# this just enumerates the regular files so the seed driver's cpio packer -# includes them. Use when the caller has staged a large source tree in -# place (e.g. tar xzf … -C <staging-root>/cpio/<prefix>) and wants to -# avoid a second filesystem copy. -runscm_register_tree() { - prefix=$1 - [ -d "$S_STAGE_DIR/cpio/$prefix" ] || { - echo "runscm: register_tree: cpio/$prefix not a dir" >&2; exit 2; } - tmp=$S_STAGE_DIR/.tree-names - ( cd "$S_STAGE_DIR/cpio/$prefix" && find . -type f ) | sed 's|^\./||' | sort > "$tmp" - while read -r rel; do - [ -n "$rel" ] || continue - S_NAMES="$S_NAMES -$prefix/$rel" - done < "$tmp" - rm -f "$tmp" -} - runscm_run() { timeout=${1:-600} [ -n "$S_SCHEME1" ] || { echo "runscm: scheme1 not set" >&2; exit 2; } [ -n "$S_PRELUDE" ] || { echo "runscm: prelude not set" >&2; exit 2; } [ -n "$S_RUNSCM" ] || { echo "runscm: run.scm not set" >&2; exit 2; } - cp "$S_SCHEME1" "$S_STAGE_DIR/cpio/scheme1" - chmod +x "$S_STAGE_DIR/cpio/scheme1" - cat "$S_PRELUDE" "$S_RUNSCM" > "$S_STAGE_DIR/cpio/combined.scm" - cp "$S_RUNSCM" "$S_STAGE_DIR/cpio/run.scm" + cp "$S_SCHEME1" "$S_STAGE_DIR/in/scheme1" + chmod +x "$S_STAGE_DIR/in/scheme1" + cat "$S_PRELUDE" "$S_RUNSCM" > "$S_STAGE_DIR/in/combined.scm" + # Top-level reference copy of run.scm for human inspection. + # boot4/5 gen scripts already write here; skip the self-copy. + case "$S_RUNSCM" in + "$S_STAGE_DIR/run.scm") : ;; + *) cp "$S_RUNSCM" "$S_STAGE_DIR/run.scm" ;; + esac case "${DRIVER:-podman}" in podman) _runscm_run_podman "$timeout" ;; @@ -121,43 +96,44 @@ runscm_run() { esac for n in $S_EXPORTS; do - if [ ! -f "$S_DUMP_DIR/$n" ]; then + if [ ! -f "$S_STAGE_DIR/out/$n" ]; then echo "[runscm/$DRIVER] FAIL: missing output '$n'" >&2 - ls "$S_DUMP_DIR" >&2 || true + ls "$S_STAGE_DIR/out" >&2 || true exit 5 fi - cp "$S_DUMP_DIR/$n" "$S_OUT_DIR/$n" + cp "$S_STAGE_DIR/out/$n" "$S_OUT_DIR/$n" chmod 0700 "$S_OUT_DIR/$n" done } -# Podman: bind-mount cpio dir at /work writeable, exec scheme1 there. -# Outputs land in $cpio/<name> directly via the bind mount. +# Podman: bind-mount in/ ro and out/ rw under /work; exec in/scheme1. +# Outputs land in $S_STAGE_DIR/out/ directly via the rw bind mount. _runscm_run_podman() { : "${IMAGE:?lib-runscm: IMAGE not set}" : "${PLATFORM:?lib-runscm: PLATFORM not set}" - abs=$(cd "$S_STAGE_DIR/cpio" && pwd) + in_abs=$(cd "$S_STAGE_DIR/in" && pwd) + out_abs=$(cd "$S_STAGE_DIR/out" && pwd) echo "[runscm/podman] scheme1 combined.scm under $IMAGE" >&2 podman run --rm -i --pull=never --platform "$PLATFORM" \ - -v "$abs:/work" -w /work "$IMAGE" \ - ./scheme1 ./combined.scm - S_DUMP_DIR=$S_STAGE_DIR/cpio + -v "$in_abs:/work/in:ro" \ + -v "$out_abs:/work/out:rw" \ + -w /work "$IMAGE" \ + in/scheme1 in/combined.scm } -# Seed: stage cpio onto virtio-blk hd0 (read-only), boot kernel with -# init=scheme1, recover outputs from the SEEDFS dump on hd1. +# Seed: pack cpio with `init` at the root and the in/ subtree under it; +# boot kernel with init=init combined.scm; recover outputs from the +# SEEDFS dump on hd1; extract filters out/-prefixed entries directly +# into $S_STAGE_DIR/out/. _runscm_run_seed() { timeout=$1 : "${KERNEL_IMAGE:?lib-runscm: KERNEL_IMAGE not set}" : "${EXTRACT:?lib-runscm: EXTRACT not set}" mem=${QEMU_MEM:-2048M} - cp "$S_STAGE_DIR/cpio/scheme1" "$S_STAGE_DIR/cpio/init" - chmod +x "$S_STAGE_DIR/cpio/init" - NAMES="init -combined.scm -run.scm -scheme1$S_NAMES" - ( cd "$S_STAGE_DIR/cpio" && printf '%s\n' "$NAMES" | cpio -o -H newc 2>/dev/null ) > "$S_STAGE_DIR/initramfs.cpio" + cp "$S_STAGE_DIR/in/scheme1" "$S_STAGE_DIR/init" + chmod +x "$S_STAGE_DIR/init" + ( cd "$S_STAGE_DIR" && { echo init; find in -type f; } | sort -u | cpio -o -H newc 2>/dev/null ) \ + > "$S_STAGE_DIR/initramfs.cpio" sz=$(wc -c < "$S_STAGE_DIR/initramfs.cpio") pad=$(( (512 - sz % 512) % 512 )) if [ "$pad" -gt 0 ]; then @@ -177,7 +153,7 @@ scheme1$S_NAMES" -device virtio-blk-device,drive=hd0 \ -drive file="$S_STAGE_DIR/out.img",if=none,format=raw,id=hd1 \ -device virtio-blk-device,drive=hd1 \ - -append "init combined.scm" \ + -append "init in/combined.scm" \ > "$TRANSCRIPT" 2>&1 & QPID=$! ( sleep "$timeout"; kill -9 $QPID 2>/dev/null ) </dev/null >/dev/null 2>&1 & @@ -192,8 +168,7 @@ scheme1$S_NAMES" wait $QPID 2>/dev/null || true kill $WATCHER 2>/dev/null || true - mkdir -p "$S_STAGE_DIR/dump" - if ! "$EXTRACT" "$S_STAGE_DIR/dump" "$S_STAGE_DIR/out.img" >/dev/null 2>&1; then + if ! "$EXTRACT" "$S_STAGE_DIR/out" "$S_STAGE_DIR/out.img" >/dev/null 2>&1; then echo "[runscm/seed] FAIL: extract-blk failed (kernel didn't reach exit?)" >&2 tail -40 "$TRANSCRIPT" >&2 exit 3 @@ -205,6 +180,4 @@ scheme1$S_NAMES" tail -40 "$TRANSCRIPT" >&2 exit 4 ;; esac - - S_DUMP_DIR=$S_STAGE_DIR/dump } diff --git a/seed-kernel/scripts/extract-blk.sh b/seed-kernel/scripts/extract-blk.sh @@ -9,6 +9,10 @@ Layout (sector-aligned, little-endian): (entry size 112 B, 4 entries/sector → T=ceil(nfiles/4)) sector T+1..: file data, each padded up to a 512-byte boundary. +The kernel dumps every tmpfs entry, including in/-prefixed inputs. We +filter to out/-prefixed entries here, strip the prefix, and write to +<outdir>/<rel> — so callers can point <outdir> at $STAGE/out/ directly. + Usage: extract-blk.sh <outdir> <out.img> """ @@ -52,6 +56,7 @@ def main() -> int: if len(table) < nfiles * ENT_SIZE: fail("image truncated mid-table") + written = 0 for i in range(nfiles): path_bytes, off_sectors, _pad, size_bytes = struct.unpack_from( ENT_FMT, table, i * ENT_SIZE @@ -60,11 +65,18 @@ def main() -> int: if not path: fail(f"empty path at entry {i}") - out = os.path.join(outdir, path) + if not path.startswith("out/"): + continue + rel = path[len("out/"):] + if not rel: + continue + + out = os.path.join(outdir, rel) os.makedirs(os.path.dirname(out) or ".", exist_ok=True) if size_bytes == 0: open(out, "wb").close() + written += 1 continue f.seek(off_sectors * 512) @@ -76,8 +88,9 @@ def main() -> int: fail(f"image truncated reading {path}") g.write(chunk) remaining -= len(chunk) + written += 1 - print(f"extract-blk: wrote {nfiles} file(s) to {outdir}", file=sys.stderr) + print(f"extract-blk: wrote {written} file(s) to {outdir} (filtered from {nfiles})", file=sys.stderr) return 0