boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 47e13ea04f3c6287b8aab1f6d125a7596d309047
parent df64aa7a117864615950a5249af34444eb75b249
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 20 May 2026 14:34:57 -0700

release: rev-free tarball name; mes-libc strtoll + execvp stubs

Two pieces, surfaced while validating per-arch releases:

1. Release tarballs are now content-addressed: the tarball name,
   top-level directory, and bundled docs no longer embed the git rev.
   sha256 reflects only the bundled inputs + manifests, so two builds
   from different repo paths or different commits with identical
   content hash identically. Provenance (git rev — with `-dirty`
   marker — build timestamp, build host arch) is emitted as a sidecar
   `dist/boot2-<arch>.tar.gz.provenance` outside the tarball, so it
   doesn't perturb the content hash.

   - `tools/mkrelease.sh`: NAME=boot2-$ARCH (no $REV); REV env dropped;
     @REV@ substitution dropped from shipped README + verify.sh.
   - `tools/release.sh`: same naming; writes the .provenance sidecar
     when minting; tags rev with `-dirty` if working tree is dirty.
   - `tools/release/{README.md, verify.sh}`: rev-free wording.

2. mes-libc gained two stubs the per-arch tcc backends need:

   - `strtoll`: riscv64's tcc backend has a manual 64-bit `.quad` asm
     parser (`vl = strtoll(p, ...)`); aarch64/amd64 use the simple
     `size=8; goto asm_data;` path so they didn't hit this. Shimmed
     to delegate to `strtol` (matches the existing `strtoull` shim).
   - `execvp`: amd64's `tcc_tool_cross()` re-execs as `i386-tcc` /
     `x86_64-tcc` for `-m32`/`-m64`. Dead code at runtime for the
     bootstrap (we never pass those flags), but cc.scm + hex2pp still
     demand the symbol at link time. Added as an ENOSYS stub matching
     `execve`.

Validated: aarch64 re-mints cleanly under the rev-free layout
(sha 749f746c…). amd64 + riscv64 reproductions next.

Diffstat:
Mbootprep/headers/stdlib.h | 1+
Mtools/mkrelease.sh | 17++++++++---------
Mtools/release.sh | 33+++++++++++++++++++++++++--------
Mtools/release/README.md | 13+++++++++----
Mtools/release/verify.sh | 2+-
Mvendor/mes-libc/libc.c | 11+++++++++--
6 files changed, 53 insertions(+), 24 deletions(-)

diff --git a/bootprep/headers/stdlib.h b/bootprep/headers/stdlib.h @@ -13,6 +13,7 @@ long atol(const char *); long strtol(const char *, char **, int); unsigned long strtoul(const char *, char **, int); double strtod(const char *, char **); +long long strtoll(const char *, char **, int); unsigned long long strtoull(const char *, char **, int); char *getenv(const char *); int system(const char *); diff --git a/tools/mkrelease.sh b/tools/mkrelease.sh @@ -28,8 +28,11 @@ ## DRIVER podman (default) | seed — which build tree to hash for the ## output manifest. The manifest is ## claimed driver-agnostic regardless. -## REV short rev string to tag the tarball name; auto-detected from -## `git rev-parse --short HEAD` when unset, "norev" otherwise. +## +## The tarball name is `boot2-<arch>.tar.gz` — deliberately rev-free, so +## the tarball's sha256 reflects content and nothing else. Provenance +## (git rev, build timestamp) lives in a sidecar produced by the +## validated mint path (tools/release.sh), not inside the tarball. set -eu @@ -54,11 +57,7 @@ case "$ARCH" in riscv64) KERNEL_NAME=kernel.elf ;; esac -REV=${REV:-} -if [ -z "$REV" ]; then - REV=$(git rev-parse --short HEAD 2>/dev/null || echo norev) -fi -NAME=boot2-$ARCH-$REV +NAME=boot2-$ARCH SRC_TREE=build/$ARCH/src BUILD_TREE=build/$ARCH/$DRIVER @@ -91,11 +90,11 @@ cp tools/release/README.md "$STAGING/README.md" cp tools/release/verify.sh "$STAGING/verify.sh" chmod +x "$STAGING/verify.sh" -# Substitute @ARCH@ / @REV@ / @KERNEL_NAME@ into shipped docs/scripts. +# Substitute @ARCH@ / @KERNEL_NAME@ into shipped docs/scripts. No git +# rev is embedded — content stays rev-free so sha256 reflects content. for f in "$STAGING/README.md" "$STAGING/verify.sh"; do sed -i.bak \ -e "s/@ARCH@/$ARCH/g" \ - -e "s/@REV@/$REV/g" \ -e "s/@KERNEL_NAME@/$KERNEL_NAME/g" \ "$f" rm -f "$f.bak" diff --git a/tools/release.sh b/tools/release.sh @@ -29,8 +29,11 @@ ## <arch> ∈ {aarch64, amd64, riscv64} ## Env: ## DRIVER podman (default) | seed — passed through to make. -## REV short rev string for the tarball name; auto-detected from -## git when unset. +## +## Tarball name is `boot2-<arch>.tar.gz` — no git rev embedded, so its +## sha256 is a pure content hash. Provenance (rev, build date, build +## host) is written to `dist/boot2-<arch>.tar.gz.provenance` next to +## the tarball. set -eu @@ -49,11 +52,13 @@ esac ROOT=$(cd "$(dirname "$0")/.." && pwd) cd "$ROOT" -REV=${REV:-} -if [ -z "$REV" ]; then - REV=$(git rev-parse --short HEAD 2>/dev/null || echo norev) +REV=$(git rev-parse --short HEAD 2>/dev/null || echo norev) +DIRTY= +if ! git diff --quiet HEAD 2>/dev/null || \ + ! git diff --quiet --cached HEAD 2>/dev/null; then + DIRTY=-dirty fi -NAME=boot2-$ARCH-$REV +NAME=boot2-$ARCH REL_DIR=build/$ARCH/release DIST=dist @@ -82,8 +87,8 @@ do_pass() { hr "pass $_label" log "make clean" make clean >/dev/null - log "make package ARCH=$ARCH DRIVER=$DRIVER REV=$REV" - REV=$REV make package ARCH="$ARCH" DRIVER="$DRIVER" + log "make package ARCH=$ARCH DRIVER=$DRIVER" + make package ARCH="$ARCH" DRIVER="$DRIVER" _src=$REL_DIR/$NAME.tar.gz _dst=$VAULT/$NAME.pass-$_label.tar.gz [ -f "$_src" ] || { log "FAIL: pass $_label produced no $_src"; exit 1; } @@ -138,6 +143,18 @@ mkdir -p "$DIST" cp "$VAULT/$NAME.pass-A.tar.gz" "$DIST/$NAME.tar.gz" printf '%s %s\n' "$SHA_A" "$NAME.tar.gz" > "$DIST/$NAME.tar.gz.sha256" +# Provenance sidecar — keeps git rev / build host / driver / timestamp +# outside the tarball so they don't perturb the content hash. +{ + printf 'tarball: %s.tar.gz\n' "$NAME" + printf 'sha256: %s\n' "$SHA_A" + printf 'arch: %s\n' "$ARCH" + printf 'driver: %s\n' "$DRIVER" + printf 'git_rev: %s%s\n' "$REV" "$DIRTY" + printf 'built_at: %s\n' "$(date -u '+%Y-%m-%dT%H:%M:%SZ')" + printf 'built_on: %s %s\n' "$(uname -s)" "$(uname -m)" +} > "$DIST/$NAME.tar.gz.provenance" + # Clean up the verify dir (it's reproducible from the tarball). rm -rf "$VERIFY_BASE" diff --git a/tools/release/README.md b/tools/release/README.md @@ -1,14 +1,19 @@ -# boot2 release — @ARCH@ @REV@ +# boot2 release — @ARCH@ This tarball is a self-contained input bundle for the boot2 bootstrap chain on `@ARCH@`. Extract it, run `./verify.sh`, and the chain will be re-run from boot0 through boot6 using only the bundled inputs. The outputs are sha256-compared against `OUTPUT_MANIFEST.txt`. +The tarball is content-addressed: its sha256 reflects only the bundled +inputs + manifests. Provenance (the git rev it was built from, build +date, build host arch) lives in the sidecar `<tarball>.provenance` +file in the same directory as the tarball, not inside it. + ## Contents ``` -boot2-@ARCH@-@REV@/ +boot2-@ARCH@/ ├── README.md this file ├── verify.sh build + diff driver ├── INPUT_MANIFEST.txt sha256 of every file under src/ + boot/ @@ -44,8 +49,8 @@ You can audit the manifest end-to-end before running anything. > with a clearer message. ```sh -tar xzf boot2-@ARCH@-@REV@.tar.gz -cd boot2-@ARCH@-@REV@ +tar xzf boot2-@ARCH@.tar.gz +cd boot2-@ARCH@ # Default: DRIVER=podman. Builds container images on first run. ./verify.sh diff --git a/tools/release/verify.sh b/tools/release/verify.sh @@ -2,7 +2,7 @@ ## verify.sh — drive boot0..boot6 off the bundled inputs and compare ## the outputs against OUTPUT_MANIFEST.txt. ## -## This script ships inside a boot2-@ARCH@-@REV@ release tarball. It +## This script ships inside a boot2-@ARCH@ release tarball. It ## stages the sealed src/ tree at build/@ARCH@/src/ (the layout every ## boot/bootN.sh expects), runs the chain end-to-end, then diffs each ## per-stage artifact's sha256 against the bundled manifest. diff --git a/vendor/mes-libc/libc.c b/vendor/mes-libc/libc.c @@ -6,14 +6,14 @@ * Provides: * syscalls _read _write _open3 close lseek brk unlink _exit * raise abort environ getenv __libc_init - * ENOSYS stubs: access execve fsync rmdir stat strtod + * ENOSYS stubs: access execve execvp fsync rmdir stat strtod * I/O stdin/stdout/stderr (FILE *), FILE = long alias for fd * fopen fdopen fclose fflush fseek ftell remove * fread fwrite fputs fputc fgetc puts strdup * fprintf printf snprintf sprintf * vfprintf vsnprintf vprintf vsprintf * stdlib malloc free realloc qsort exit atoi - * strtol strtoul strtoull strtof + * strtol strtoul strtoll strtoull strtof * string strlen strcmp strcpy strncmp strncpy strchr strrchr * strstr strcat strdup memmem * ctype isdigit islower isnumber isspace isxdigit toupper @@ -204,6 +204,12 @@ execve (char const *path, char *const argv[], char *const envp[]) } int +execvp (char const *file, char *const argv[]) +{ + (void) file; (void) argv; errno = ENOSYS; return -1; +} + +int fsync (int fd) { (void) fd; return 0; @@ -448,6 +454,7 @@ strtol (char const *s, char **tail, int base) } unsigned long strtoul (char const *s, char **tail, int base) { return strtol (s, tail, base); } +long long strtoll (char const *s, char **tail, int base) { return strtol (s, tail, base); } unsigned long long strtoull (char const *s, char **tail, int base) { return strtol (s, tail, base); } float strtof (char const *s, char **tail) { return strtod (s, tail); }