boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 9df676723aaea35b2c541a711b6219dc74a50c46
parent a5249ddd82cee9881f5db480ce7773c660453804
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 26 Apr 2026 02:39:12 -0700

make: wire cc-* suites into run-tests.sh + add cc-e2e

Six new suites plumbed through the existing Make + run-tests.sh
machinery, mirroring the m1pp / p1 / scheme1 pattern:

  make test SUITE=cc-util    14 fixtures
  make test SUITE=cc-lex     16 fixtures
  make test SUITE=cc-pp      22 fixtures
  make test SUITE=cc-cg      15 fixtures
  make test SUITE=cc-parse   15 fixtures
  make test SUITE=cc-e2e      1 fixture (00-return-argc -> ELF -> run)

Bare `make test` (no SUITE) now sweeps all nine suites.

Build artifacts:
  build/$ARCH/cc/cc.scm        — catm'd seven-file compiler source.
                                 Per-arch by directory naming, identical
                                 content (catm runs in the per-arch
                                 container).

  make cc                      — builds the artifact for $ARCH.

Suite-internal infrastructure files renamed with `_` prefix so
discover() ignores them automatically (matches the existing convention
that filenames starting with `_` are skipped):

  tests/cc-lex/run-lex.scm   -> _run-lex.scm
  tests/cc-pp/run-pp.scm     -> _run-pp.scm
  tests/cc-parse/cg-trace.scm    -> _cg-trace.scm
  tests/cc-parse/mini-prelude.scm -> _mini-prelude.scm

  tests/cc-lex/run.sh          deleted (subsumed by run-tests.sh)

Two `.expected` files restored for cc-util's die tests (09, 10) and
two added for cc-pp's negative tests (#error, #include) so the
merged-stdout+stderr capture path matches the scheme1 suite's
convention (every fixture has a deterministic .expected).

Lessons-learned (recorded in run-tests.sh comments):

- `cmd >file 2>&1; rc=$?` swallows non-zero exit under busybox sh's
  redirection semantics. All cc suite runners use the if/else pattern
  that the existing scheme1 suite already uses.
- `expout=$(grep -v '^;;' file)` exits non-zero when zero matches,
  killing the script. Guarded with `|| true`.

Diffstat:
MMakefile | 41+++++++++++++++++++++++++++++++++++++++--
Mscripts/run-tests.sh | 201+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Rtests/cc-lex/run-lex.scm -> tests/cc-lex/_run-lex.scm | 0
Dtests/cc-lex/run.sh | 104-------------------------------------------------------------------------------
Rtests/cc-parse/cg-trace.scm -> tests/cc-parse/_cg-trace.scm | 0
Rtests/cc-parse/mini-prelude.scm -> tests/cc-parse/_mini-prelude.scm | 0
Atests/cc-pp/16-error.expected | 1+
Atests/cc-pp/17-include-rejected.expected | 1+
Rtests/cc-pp/run-pp.scm -> tests/cc-pp/_run-pp.scm | 0
Atests/cc-util/09-die-noloc.expected | 1+
Atests/cc-util/10-die-no-irritants.expected | 1+
11 files changed, 239 insertions(+), 111 deletions(-)

diff --git a/Makefile b/Makefile @@ -16,6 +16,7 @@ # make pokem build pokem for ARCH # make hello build hello via the bootstrap chain # make scheme1 build the scheme1 interpreter for ARCH +# make cc catm the cc compiler source for ARCH # make run run hello in the container # make test every suite, every arch # make test SUITE=m1pp m1pp suite, every arch @@ -61,7 +62,7 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \ # --- Targets -------------------------------------------------------------- -.PHONY: all m1pp pokem hello scheme1 run test image tools tables \ +.PHONY: all m1pp pokem hello scheme1 cc run test image tools tables \ tools-native cloc clean help all: m1pp pokem @@ -156,10 +157,18 @@ HELLO_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/hello) SCHEME1_SRC := scheme1/scheme1.P1pp SCHEME1_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/scheme1) +# Catm'd cc compiler source. Per-arch only because catm runs in the +# per-arch container; the resulting .scm is identical across arches but +# we keep it under build/$arch/ for naming consistency. +CC_SRCS := scheme1/prelude.scm cc/util.scm cc/data.scm cc/lex.scm \ + cc/pp.scm cc/cg.scm cc/parse.scm cc/main.scm +CC_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/cc/cc.scm) + m1pp: $(OUT_DIR)/m1pp pokem: $(OUT_DIR)/pokem hello: $(OUT_DIR)/hello scheme1: $(OUT_DIR)/scheme1 +cc: $(OUT_DIR)/cc/cc.scm # Per-arch deps for .P1/.M1 builds (raw M1, no macro expansion). P1_BUILD_DEPS = scripts/lint.sh scripts/boot-build-p1.sh \ @@ -187,6 +196,12 @@ $(HELLO_BINS): build/%/hello: $(HELLO_SRC) $(P1_BUILD_DEPS) $(SCHEME1_BINS): build/%/scheme1: $(SCHEME1_SRC) $(P1PP_BUILD_DEPS) $(call PODMAN,$*) sh scripts/boot-build-p1pp.sh $(SCHEME1_SRC) $@ +# cc.scm: catm prelude + the six cc/*.scm files into one source the +# scheme1 interpreter can run. Catm runs inside the per-arch container. +$(CC_BINS): build/%/cc/cc.scm: $(CC_SRCS) build/%/.image build/%/tools/M0 + mkdir -p $(@D) + $(call PODMAN,$*) build/$*/tools/catm $@ $(CC_SRCS) + run: $(OUT_DIR)/hello $(IMAGE_STAMP) $(call PODMAN,$(ARCH)) ./$(OUT_DIR)/hello @@ -237,11 +252,27 @@ TEST_P1_DEPS := $(foreach a,$(TEST_ARCHES), \ TEST_SCHEME1_DEPS := $(foreach a,$(TEST_ARCHES), \ build/$(a)/.image build/$(a)/tools/M0 build/$(a)/m1pp build/$(a)/scheme1) +# cc-* unit suites (lex / pp / cg / parse / util) just need scheme1 + +# the catm'd cc compiler source as artifacts. +TEST_CC_UNIT_DEPS := $(foreach a,$(TEST_ARCHES), \ + build/$(a)/.image build/$(a)/tools/M0 build/$(a)/m1pp build/$(a)/scheme1 \ + build/$(a)/cc/cc.scm) + +# cc-e2e additionally needs the P1pp toolchain to assemble cc-emitted +# P1pp into native ELF. +TEST_CC_E2E_DEPS := $(TEST_CC_UNIT_DEPS) + test: ifeq ($(SUITE),) @$(MAKE) --no-print-directory test SUITE=m1pp @$(MAKE) --no-print-directory test SUITE=p1 @$(MAKE) --no-print-directory test SUITE=scheme1 + @$(MAKE) --no-print-directory test SUITE=cc-util + @$(MAKE) --no-print-directory test SUITE=cc-lex + @$(MAKE) --no-print-directory test SUITE=cc-pp + @$(MAKE) --no-print-directory test SUITE=cc-cg + @$(MAKE) --no-print-directory test SUITE=cc-parse + @$(MAKE) --no-print-directory test SUITE=cc-e2e else ifeq ($(SUITE),m1pp) @$(MAKE) --no-print-directory $(TEST_M1PP_DEPS) sh scripts/run-tests.sh --suite=m1pp $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) @@ -251,6 +282,12 @@ else ifeq ($(SUITE),p1) else ifeq ($(SUITE),scheme1) @$(MAKE) --no-print-directory $(TEST_SCHEME1_DEPS) sh scripts/run-tests.sh --suite=scheme1 $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) +else ifeq ($(filter $(SUITE),cc-util cc-lex cc-pp cc-cg cc-parse),$(SUITE)) + @$(MAKE) --no-print-directory $(TEST_CC_UNIT_DEPS) + sh scripts/run-tests.sh --suite=$(SUITE) $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) +else ifeq ($(SUITE),cc-e2e) + @$(MAKE) --no-print-directory $(TEST_CC_E2E_DEPS) + sh scripts/run-tests.sh --suite=cc-e2e $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) else - @echo "unknown SUITE='$(SUITE)' (expected m1pp | p1 | scheme1)" >&2; exit 2 + @echo "unknown SUITE='$(SUITE)' (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc-parse | cc-e2e)" >&2; exit 2 endif diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh @@ -47,8 +47,8 @@ while [ "$#" -gt 0 ]; do done case "$SUITE" in - m1pp|p1|scheme1) ;; - "") echo "$0: --suite required (m1pp | p1 | scheme1)" >&2; exit 2 ;; + m1pp|p1|scheme1|cc-util|cc-lex|cc-pp|cc-cg|cc-parse|cc-e2e) ;; + "") echo "$0: --suite required (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc-parse | cc-e2e)" >&2; exit 2 ;; *) echo "$0: unknown suite '$SUITE'" >&2; exit 2 ;; esac @@ -280,10 +280,201 @@ run_scheme1_suite() { done } +## --- cc-* suites -------------------------------------------------------- +## +## Six suites, all sharing the same shape: catm a list of layer files +## into a per-fixture combined .scm, run scheme1 on the result, diff +## merged stdout+stderr against an expected-output file, diff exit +## status against .expected-exit (default 0). +## +## Caller (Make) ensures build/<arch>/scheme1 and build/<arch>/cc/cc.scm +## already exist. The cc-e2e suite additionally requires the P1pp +## toolchain (m1pp + tools), which the Make deps already cover. +## +## Filenames starting with `_` are skipped by `discover()` — that's how +## suite-internal driver and mock files (_run-lex.scm, _cg-trace.scm, +## _mini-prelude.scm, _run-pp.scm) avoid being picked up as fixtures. + +# _cc_check <label> <expected-stdout> <expected-exit> <actual-stdout> <actual-exit> +_cc_check() { + lbl=$1; exp_out=$2; exp_exit=$3; act_out=$4; act_exit=$5 + if [ "$act_out" = "$exp_out" ] && [ "$act_exit" = "$exp_exit" ]; then + report "$lbl" PASS + else + report "$lbl" FAIL + if [ "$act_out" != "$exp_out" ]; then show_diff "$exp_out" "$act_out"; fi + if [ "$act_exit" != "$exp_exit" ]; then + echo " exit: expected $exp_exit, got $act_exit" + fi + fi +} + +# _cc_arches: aarch64 amd64 riscv64 by default; honors --arch=... +_cc_arches() { + if [ -z "$ARCH" ]; then echo "aarch64 amd64 riscv64" + else echo "$ARCH"; fi +} + +# _cc_unit_suite <suite-name> <expected-ext> <layer-list> +# Generic unit-suite runner for cc-util / cc-pp / cc-cg / cc-parse. +# Each fixture is `tests/<suite>/<name>.scm`; expected stdout in +# `tests/<suite>/<name>.<expected-ext>` (default empty); exit in +# `tests/<suite>/<name>.expected-exit` (default 0). +_cc_unit_suite() { + suite=$1; ext=$2; layers=$3 + [ -n "$NAMES" ] || NAMES=$(discover tests/$suite scm) + for arch in $(_cc_arches); do + for name in $NAMES; do + fixture=tests/$suite/$name.scm + [ -e "$fixture" ] || { echo " SKIP $name (no .scm)"; continue; } + if [ -e "tests/$suite/$name.$ext" ]; then + expout=$(cat "tests/$suite/$name.$ext") + else + expout= + fi + if [ -e "tests/$suite/$name.expected-exit" ]; then + expexit=$(cat "tests/$suite/$name.expected-exit") + else + expexit=0 + fi + tmp=$(mktemp) + if run_in_container "$arch" sh -c " + build/$arch/tools/catm /tmp/cc-test.scm $layers $fixture + exec build/$arch/scheme1 /tmp/cc-test.scm + " >"$tmp" 2>&1; then + act_exit=0 + else + act_exit=$? + fi + act_out=$(cat "$tmp"); rm -f "$tmp" + _cc_check "[$arch] $suite/$name" "$expout" "$expexit" "$act_out" "$act_exit" + done + done +} + +# cc-util: scheme1 prelude + util only. +run_cc_util_suite() { + _cc_unit_suite cc-util expected "scheme1/prelude.scm cc/util.scm" +} + +# cc-pp: prelude + util + data + pp. +run_cc_pp_suite() { + _cc_unit_suite cc-pp expected \ + "scheme1/prelude.scm cc/util.scm cc/data.scm cc/pp.scm" +} + +# cc-cg: prelude + util + data + cg. +run_cc_cg_suite() { + _cc_unit_suite cc-cg expected \ + "scheme1/prelude.scm cc/util.scm cc/data.scm cc/cg.scm" +} + +# cc-parse: mini-prelude + util + data + cg-trace mock + parse. +# (Full prelude+everything would push past scheme1's source-buffer cap +# — see CC-INTERNALS.md and the project memory note.) +run_cc_parse_suite() { + _cc_unit_suite cc-parse expected-trace \ + "tests/cc-parse/_mini-prelude.scm cc/util.scm cc/data.scm tests/cc-parse/_cg-trace.scm cc/parse.scm" +} + +# cc-lex: needs the _run-lex.scm driver + the .c fixture as scheme1 argv. +# Expected file is .expected-toks; `;;` lines are stripped (negative +# fixtures use them for human notes). +run_cc_lex_suite() { + [ -n "$NAMES" ] || NAMES=$(discover tests/cc-lex c) + layers="scheme1/prelude.scm cc/util.scm cc/data.scm cc/lex.scm tests/cc-lex/_run-lex.scm" + for arch in $(_cc_arches); do + for name in $NAMES; do + fixture=tests/cc-lex/$name.c + [ -e "$fixture" ] || { echo " SKIP $name (no .c)"; continue; } + if [ -e "tests/cc-lex/$name.expected-toks" ]; then + # `|| true` because grep returns 1 when zero lines match + # (negative-test fixtures have only ;; comments). + expout=$(grep -v '^;;' "tests/cc-lex/$name.expected-toks" || true) + else + expout= + fi + if [ -e "tests/cc-lex/$name.expected-exit" ]; then + expexit=$(cat "tests/cc-lex/$name.expected-exit") + else + expexit=0 + fi + tmp=$(mktemp) + # Negative fixtures (expected-exit != 0) write diagnostics to + # stderr; we don't compare their output, only exit code. + # The merged stdout+stderr capture is uniform either way. + if run_in_container "$arch" sh -c " + build/$arch/tools/catm /tmp/cc-test.scm $layers + exec build/$arch/scheme1 /tmp/cc-test.scm $fixture + " >"$tmp" 2>&1; then + act_exit=0 + else + act_exit=$? + fi + if [ "$expexit" != "0" ]; then + act_out= + else + act_out=$(cat "$tmp") + fi + rm -f "$tmp" + _cc_check "[$arch] cc-lex/$name" "$expout" "$expexit" "$act_out" "$act_exit" + done + done +} + +# cc-e2e: compile a .c through cc, assemble to ELF, run. +# Fixture: <name>.c; expected stdout in <name>.expected (default empty); +# exit in <name>.expected-exit (default 0). +run_cc_e2e_suite() { + [ -n "$NAMES" ] || NAMES=$(discover tests/cc-e2e c) + for arch in $(_cc_arches); do + for name in $NAMES; do + src=tests/cc-e2e/$name.c + [ -e "$src" ] || { echo " SKIP $name (no .c)"; continue; } + expout=$([ -e tests/cc-e2e/$name.expected ] \ + && cat tests/cc-e2e/$name.expected || echo "") + expexit=$([ -e tests/cc-e2e/$name.expected-exit ] \ + && cat tests/cc-e2e/$name.expected-exit || echo 0) + outdir=build/$arch/cc-e2e/$name + p1pp=$outdir/$name.P1pp + elf=$outdir/$name + mkdir -p "$outdir" + if ! run_in_container "$arch" sh -c \ + "build/$arch/scheme1 build/$arch/cc/cc.scm $src $p1pp" \ + >/dev/null 2>&1; then + report "[$arch] cc-e2e/$name" FAIL + echo " cc compile failed" + continue + fi + if ! run_in_container "$arch" sh scripts/boot-build-p1pp.sh \ + "$p1pp" "$elf" >/dev/null 2>&1; then + report "[$arch] cc-e2e/$name" FAIL + run_in_container "$arch" sh scripts/boot-build-p1pp.sh \ + "$p1pp" "$elf" 2>&1 | sed 's/^/ /' >&2 || true + continue + fi + tmp=$(mktemp) + if run_in_container "$arch" "./$elf" >"$tmp" 2>&1; then + act_exit=0 + else + act_exit=$? + fi + act_out=$(cat "$tmp"); rm -f "$tmp" + _cc_check "[$arch] cc-e2e/$name" "$expout" "$expexit" "$act_out" "$act_exit" + done + done +} + case "$SUITE" in - m1pp) run_m1pp_suite ;; - p1) run_p1_suite ;; - scheme1) run_scheme1_suite ;; + m1pp) run_m1pp_suite ;; + p1) run_p1_suite ;; + scheme1) run_scheme1_suite ;; + cc-util) run_cc_util_suite ;; + cc-lex) run_cc_lex_suite ;; + cc-pp) run_cc_pp_suite ;; + cc-cg) run_cc_cg_suite ;; + cc-parse) run_cc_parse_suite ;; + cc-e2e) run_cc_e2e_suite ;; esac echo "$PASS passed, $FAIL failed" diff --git a/tests/cc-lex/run-lex.scm b/tests/cc-lex/_run-lex.scm diff --git a/tests/cc-lex/run.sh b/tests/cc-lex/run.sh @@ -1,104 +0,0 @@ -#!/bin/sh -## tests/cc-lex/run.sh — fixture-by-fixture lex test loop. -## -## Usage: -## tests/cc-lex/run.sh # run all fixtures -## tests/cc-lex/run.sh 02 05 # run only matching prefixes -## -## Per-fixture protocol: -## - <NN>-<name>.c is the input. -## - <NN>-<name>.expected-toks is the golden token serialization -## (one tok per line, per CC-CONTRACTS §2.1). Lines beginning -## with `;;` are ignored, allowing negative tests to carry a -## human-readable note in their .expected-toks file. -## - <NN>-<name>.expected-exit overrides the expected exit status -## (default: 0). -## -## Each fixture is run inside boot2-busybox:aarch64 by catm'ing -## prelude+lex+data+util+driver, then invoking scheme1 on the combined -## file with the .c fixture as argv[2]. Stdout is diffed against the -## (filtered) .expected-toks; exit status is diffed against -## .expected-exit. stderr is suppressed (negative tests only check -## exit status). - -set -eu -REPO=$(cd "$(dirname "$0")/../.." && pwd) -cd "$REPO" - -ARCH=${ARCH:-aarch64} -case "$ARCH" in - aarch64) PLATFORM=linux/arm64 ;; - amd64) PLATFORM=linux/amd64 ;; - riscv64) PLATFORM=linux/riscv64 ;; - *) echo "unknown ARCH=$ARCH" >&2; exit 2 ;; -esac - -PASS=0 -FAIL=0 - -discover() { - ls tests/cc-lex \ - | sed -n 's/^\([^_][^.]*\)\.c$/\1/p' \ - | sort -u -} - -NAMES="$*" -[ -n "$NAMES" ] || NAMES=$(discover) - -# Strip ;; comment lines from a file; emit the rest. Used to filter -# expected-toks so negative tests can carry a human note. -strip_comments() { - sed -n '/^;;/d; p' "$1" -} - -for prefix in $NAMES; do - for c in tests/cc-lex/${prefix}*.c; do - [ -e "$c" ] || continue - name=$(basename "$c" .c) - expected_toks=tests/cc-lex/$name.expected-toks - expected_exit_file=tests/cc-lex/$name.expected-exit - - if [ ! -e "$expected_toks" ]; then - echo " SKIP $name (no .expected-toks)" - continue - fi - expected=$(strip_comments "$expected_toks") - if [ -e "$expected_exit_file" ]; then - expected_exit=$(cat "$expected_exit_file" | tr -d '\n') - else - expected_exit=0 - fi - - # Build the combined source and run. - actual=$(podman run --rm --pull=never --platform "$PLATFORM" \ - --tmpfs /tmp:size=512M -v "$REPO":/work -w /work \ - "boot2-busybox:$ARCH" \ - sh -c "build/$ARCH/tools/catm /tmp/c.scm \ - scheme1/prelude.scm cc/util.scm cc/data.scm \ - cc/lex.scm tests/cc-lex/run-lex.scm; \ - build/$ARCH/scheme1 /tmp/c.scm tests/cc-lex/$name.c" \ - 2>/dev/null) || actual_exit=$? - actual_exit=${actual_exit:-0} - - if [ "$actual" = "$expected" ] && [ "$actual_exit" = "$expected_exit" ]; then - PASS=$((PASS + 1)) - echo " PASS $name" - else - FAIL=$((FAIL + 1)) - echo " FAIL $name" - if [ "$actual" != "$expected" ]; then - echo " --- expected ---" - printf '%s\n' "$expected" | sed 's/^/ /' - echo " --- actual ---" - printf '%s\n' "$actual" | sed 's/^/ /' - fi - if [ "$actual_exit" != "$expected_exit" ]; then - echo " exit: expected $expected_exit, got $actual_exit" - fi - fi - unset actual_exit - done -done - -echo "$PASS passed, $FAIL failed" -[ "$FAIL" -eq 0 ] diff --git a/tests/cc-parse/cg-trace.scm b/tests/cc-parse/_cg-trace.scm diff --git a/tests/cc-parse/mini-prelude.scm b/tests/cc-parse/_mini-prelude.scm diff --git a/tests/cc-pp/16-error.expected b/tests/cc-pp/16-error.expected @@ -0,0 +1 @@ +t.c:1:2: error: #error: boom diff --git a/tests/cc-pp/17-include-rejected.expected b/tests/cc-pp/17-include-rejected.expected @@ -0,0 +1 @@ +t.c:1:10: error: #include: file inclusion is handled upstream by pre-flatten diff --git a/tests/cc-pp/run-pp.scm b/tests/cc-pp/_run-pp.scm diff --git a/tests/cc-util/09-die-noloc.expected b/tests/cc-util/09-die-noloc.expected @@ -0,0 +1 @@ +error: bad token: unexpected 42 diff --git a/tests/cc-util/10-die-no-irritants.expected b/tests/cc-util/10-die-no-irritants.expected @@ -0,0 +1 @@ +error: something went wrong