boot2

Playing with the boostrap
git clone https://git.ryansepassi.com/git/boot2.git
Log | Files | Refs | README

commit 684090878187d2a4704d4bcac4aadd7f6f8f4a67
parent b67efec9e6f40e94adf984d9e31165e297b6f1e7
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 29 Apr 2026 17:55:28 -0700

regularize build/ dir

Diffstat:
MMakefile | 133++++++++++++++++++++++++++++++++++++++++++-------------------------------------
MP1/gen/p1_gen.py | 76++++++++++++++++++++++++++++++++++++++--------------------------------------
Mdocs/LIBC.md | 43+++++++++++++++++++++++--------------------
Mdocs/TCC-TODO.md | 56++++++++++++++++++++++++++++++++++++++++++++++++++------
Mdocs/TCC.md | 14+++++++-------
Mscripts/boot-build-cc.sh | 13++++++++++---
Mscripts/boot-build-p1.sh | 10+++++++++-
Mscripts/boot-build-p1pp.sh | 26++++++++++++++++++++++----
Mscripts/boot-run-scheme1.sh | 8++++----
Mscripts/boot-run-tests.sh | 138++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mscripts/boot-undef.sh | 2+-
Mscripts/boot2.sh | 6+++---
Mscripts/disasm-elf.sh | 63++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Mscripts/libc-flatten.sh | 6+++---
Mscripts/run-tests.sh | 62++++++++++++++++++++++++++++++++++++++------------------------
Mscripts/stage1-flatten.sh | 4++--
Mscripts/stage2-alpine.sh | 8++++----
Mscripts/stage3-rebuild.sh | 12++++++------
Rtests/M1pp/00-hello.M1 -> tests/P1/00-hello.P1 | 0
Rtests/M1pp/00-hello.expected -> tests/P1/00-hello.expected | 0
20 files changed, 422 insertions(+), 258 deletions(-)

diff --git a/Makefile b/Makefile @@ -14,12 +14,10 @@ # make all (m1pp + pokem for ARCH) # make m1pp build the m1pp expander for ARCH # make pokem build pokem for ARCH -# make hello build hello via the bootstrap chain # make scheme1 build the scheme1 interpreter for ARCH # make cc catm the cc compiler source for ARCH # make tcc-flat flatten upstream tcc.c into one TU # make tcc-boot2 cc.scm + P1pp pipeline → tcc-boot2 ELF -# make run run hello in the container # make test every suite, every arch # make test SUITE=m1pp m1pp suite, every arch # make test SUITE=p1 ARCH=amd64 p1 suite, one arch @@ -30,6 +28,11 @@ # make tools-native build host-native M1/hex2/m1pp (opt-in) # make cloc line counts for the core sources # make clean rm -rf build/ +# +# Output layout: every binary lives at build/<arch>/<src-path-without-ext>, +# mirroring the source path under the repo root (e.g. M1pp/M1pp.P1 -> +# build/<arch>/M1pp/M1pp; tests/cc/foo.c -> build/<arch>/tests/cc/foo). +# Per-source intermediates land under build/<arch>/.work/<src-path>/. ARCH ?= aarch64 @@ -64,7 +67,7 @@ PODMAN = podman run --rm --pull=never --platform $(PLATFORM_$(1)) \ # --- Targets -------------------------------------------------------------- -.PHONY: all m1pp pokem hello scheme1 cc run test image tools tables \ +.PHONY: all m1pp pokem scheme1 cc test image tools tables \ tools-native cloc clean help tcc-boot2 tcc-flat all: m1pp pokem @@ -140,28 +143,26 @@ $(TOOLS_M0): build/%/tools/M0: scripts/boot1.sh build/%/.image \ # tables` after editing P1/gen/*.py or any of the prune-source files # below, then commit the updated P1/*.M1. -P1_PRUNE_SRCS := M1pp/M1pp.P1 pokem/pokem.P1 $(wildcard tests/M1pp/*.M1) +P1_PRUNE_SRCS := M1pp/M1pp.P1 pokem/pokem.P1 $(wildcard tests/P1/*.P1) tables: $(foreach a,$(ALL_ARCHES),P1/P1-$(a).M1) -build/p1/aarch64.M1 build/p1/amd64.M1 build/p1/riscv64.M1 &: \ - $(wildcard P1/gen/*.py) - mkdir -p build/p1 - python3 P1/gen/p1_gen.py --arch aarch64 build/p1 - python3 P1/gen/p1_gen.py --arch amd64 build/p1 - python3 P1/gen/p1_gen.py --arch riscv64 build/p1 +build/%/P1/P1.M1: $(wildcard P1/gen/*.py) + mkdir -p $(@D) + python3 P1/gen/p1_gen.py --arch $* --out $@ + +# Keep the unpruned per-arch tables around after the prune step. +.SECONDARY: $(foreach a,$(ALL_ARCHES),build/$(a)/P1/P1.M1) -P1/P1-%.M1: build/p1/%.M1 scripts/prune-p1-table.sh $(P1_PRUNE_SRCS) +P1/P1-%.M1: build/%/P1/P1.M1 scripts/prune-p1-table.sh $(P1_PRUNE_SRCS) sh scripts/prune-p1-table.sh $< $@ $(P1_PRUNE_SRCS) # --- Programs (per arch) -------------------------------------------------- -M1PP_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/m1pp) -POKEM_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/pokem) -HELLO_SRC := tests/M1pp/00-hello.M1 -HELLO_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/hello) +M1PP_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/M1pp/M1pp) +POKEM_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/pokem/pokem) SCHEME1_SRC := scheme1/scheme1.P1pp -SCHEME1_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/scheme1) +SCHEME1_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/scheme1/scheme1) # Catm'd cc compiler source. Per-arch only because catm runs in the # per-arch container; the resulting .scm is identical across arches but @@ -169,10 +170,9 @@ SCHEME1_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/scheme1) CC_SRCS := scheme1/prelude.scm cc/cc.scm cc/main.scm CC_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/cc/cc.scm) -m1pp: $(OUT_DIR)/m1pp -pokem: $(OUT_DIR)/pokem -hello: $(OUT_DIR)/hello -scheme1: $(OUT_DIR)/scheme1 +m1pp: $(OUT_DIR)/M1pp/M1pp +pokem: $(OUT_DIR)/pokem/pokem +scheme1: $(OUT_DIR)/scheme1/scheme1 cc: $(OUT_DIR)/cc/cc.scm # Per-arch deps for .P1/.M1 builds (raw M1, no macro expansion). @@ -182,23 +182,19 @@ P1_BUILD_DEPS = scripts/lint.sh scripts/boot-build-p1.sh \ # Per-arch deps for .P1pp builds (m1pp expansion + libp1pp). P1PP_BUILD_DEPS = scripts/boot-build-p1pp.sh \ - build/%/.image build/%/tools/M0 build/%/m1pp \ + build/%/.image build/%/tools/M0 build/%/M1pp/M1pp \ vendor/seed/%/ELF.hex2 \ P1/P1-%.M1pp P1/P1.M1pp P1/P1pp.P1pp -$(M1PP_BINS): build/%/m1pp: M1pp/M1pp.P1 $(P1_BUILD_DEPS) +$(M1PP_BINS): build/%/M1pp/M1pp: M1pp/M1pp.P1 $(P1_BUILD_DEPS) ARCH=$* sh scripts/lint.sh M1pp/M1pp.P1 $(call PODMAN,$*) sh scripts/boot-build-p1.sh M1pp/M1pp.P1 $@ -$(POKEM_BINS): build/%/pokem: pokem/pokem.P1 $(P1_BUILD_DEPS) +$(POKEM_BINS): build/%/pokem/pokem: pokem/pokem.P1 $(P1_BUILD_DEPS) ARCH=$* sh scripts/lint.sh pokem/pokem.P1 $(call PODMAN,$*) sh scripts/boot-build-p1.sh pokem/pokem.P1 $@ -$(HELLO_BINS): build/%/hello: $(HELLO_SRC) $(P1_BUILD_DEPS) - ARCH=$* sh scripts/lint.sh $(HELLO_SRC) - $(call PODMAN,$*) sh scripts/boot-build-p1.sh $(HELLO_SRC) $@ - -$(SCHEME1_BINS): build/%/scheme1: $(SCHEME1_SRC) $(P1PP_BUILD_DEPS) +$(SCHEME1_BINS): build/%/scheme1/scheme1: $(SCHEME1_SRC) $(P1PP_BUILD_DEPS) $(call PODMAN,$*) sh scripts/boot-build-p1pp.sh $@ $(SCHEME1_SRC) # cc.scm: catm prelude + cc.scm + main.scm entry into one source the @@ -207,21 +203,19 @@ $(CC_BINS): build/%/cc/cc.scm: $(CC_SRCS) build/%/.image build/%/tools/M0 mkdir -p $(@D) $(call PODMAN,$*) build/$*/tools/catm $@ $(CC_SRCS) -run: $(OUT_DIR)/hello $(IMAGE_STAMP) - $(call PODMAN,$(ARCH)) ./$(OUT_DIR)/hello - # --- tcc-boot2 end-to-end harness ----------------------------------------- # # Drives stage1-flatten.sh (host preprocessor only — no container) to -# produce build/cc-bootstrap/$(TCC_TARGET)/tcc.flat.c, then runs cc.scm -# inside the per-arch container against the flattened TU, then assembles -# the resulting P1pp into a runnable ELF using the standard P1pp -# pipeline. The resulting binary embeds tcc's $(TCC_TARGET) codegen, so -# match $(ARCH) to it (amd64↔X86_64, riscv64↔RISCV64) if you want to -# run the binary natively in the container. +# produce build/tcc/$(TCC_TARGET)/tcc.flat.c, then runs cc.scm inside +# the per-arch container against the flattened TU, then assembles the +# resulting P1pp into a runnable ELF using the standard P1pp pipeline. +# The resulting binary embeds tcc's $(TCC_TARGET) codegen, so match +# $(ARCH) to it (amd64↔X86_64, riscv64↔RISCV64) if you want to run the +# binary natively in the container. tcc.flat.c lives outside the per- +# arch tree because it depends on TCC_TARGET, not the build arch. TCC_TARGET ?= X86_64 -TCC_FLAT := build/cc-bootstrap/$(TCC_TARGET)/tcc.flat.c +TCC_FLAT := build/tcc/$(TCC_TARGET)/tcc.flat.c TCC_BOOT2_BINS := $(foreach a,$(ALL_ARCHES),build/$(a)/tcc-boot2/tcc-boot2) TCC_BOOT2_P1PPS := $(foreach a,$(ALL_ARCHES),build/$(a)/tcc-boot2/tcc.flat.P1pp) @@ -230,8 +224,8 @@ TCC_BOOT2_P1PPS := $(foreach a,$(ALL_ARCHES),build/$(a)/tcc-boot2/tcc.flat.P1pp) # linked into tcc-boot2 itself so the unresolved libc symbols (printf, # malloc, fopen, …) resolve against our own libc.P1pp instead of the # host's. Phase A of docs/LIBC.md. -LIBC_FLATS := $(foreach a,$(ALL_ARCHES),build/cc-bootstrap/$(a)/libc.flat.c) -LIBC_P1PPS := $(foreach a,$(ALL_ARCHES),build/$(a)/libc.P1pp) +LIBC_FLATS := $(foreach a,$(ALL_ARCHES),build/$(a)/vendor/mes-libc/libc.flat.c) +LIBC_P1PPS := $(foreach a,$(ALL_ARCHES),build/$(a)/vendor/mes-libc/libc.P1pp) tcc-flat: $(TCC_FLAT) tcc-boot2: $(OUT_DIR)/tcc-boot2/tcc-boot2 @@ -244,7 +238,7 @@ LIBC_VENDOR_SRCS := $(shell find vendor/mes-libc -type f \( -name '*.c' -o -name $(wildcard vendor/mes-libc/patches/*.before) \ $(wildcard vendor/mes-libc/patches/*.after) -$(LIBC_FLATS): build/cc-bootstrap/%/libc.flat.c: \ +$(LIBC_FLATS): build/%/vendor/mes-libc/libc.flat.c: \ scripts/libc-flatten.sh $(LIBC_VENDOR_SRCS) sh scripts/libc-flatten.sh --arch $* @@ -253,26 +247,39 @@ $(LIBC_FLATS): build/cc-bootstrap/%/libc.flat.c: \ # and P1/elf-end.P1pp at link time). Distinct prefixes keep the # anonymous string labels (cc__str_N) from colliding when both TUs # end up in the same catm chain. -$(LIBC_P1PPS): build/%/libc.P1pp: \ - build/cc-bootstrap/%/libc.flat.c \ - build/%/scheme1 build/%/cc/cc.scm \ +# CC_DEBUG=1 / CC_TRACE_EMIT=1 forward to scripts/boot-build-cc.sh, +# which threads them as --cc-debug / --cc-trace-emit on the cc.scm +# command line. Both are opt-in so the default tcc-boot2 build stays +# clean; the trace emit fattens output by one libp1pp call per fn. +CC_DEBUG ?= 0 +CC_TRACE_EMIT ?= 0 + +$(LIBC_P1PPS): build/%/vendor/mes-libc/libc.P1pp: \ + build/%/vendor/mes-libc/libc.flat.c \ + build/%/scheme1/scheme1 build/%/cc/cc.scm \ scripts/boot-build-cc.sh build/%/.image - $(call PODMAN,$*) env CC_LIB=libc__ sh scripts/boot-build-cc.sh $< $@ + $(call PODMAN,$*) env CC_LIB=libc__ \ + CC_DEBUG=$(CC_DEBUG) CC_TRACE_EMIT=$(CC_TRACE_EMIT) \ + sh scripts/boot-build-cc.sh $< $@ $(TCC_BOOT2_P1PPS): build/%/tcc-boot2/tcc.flat.P1pp: \ - $(TCC_FLAT) build/%/scheme1 build/%/cc/cc.scm \ + $(TCC_FLAT) build/%/scheme1/scheme1 build/%/cc/cc.scm \ scripts/boot-build-cc.sh build/%/.image - $(call PODMAN,$*) env CC_LIB=tcc__ sh scripts/boot-build-cc.sh $(TCC_FLAT) $@ + $(call PODMAN,$*) env CC_LIB=tcc__ \ + CC_DEBUG=$(CC_DEBUG) CC_TRACE_EMIT=$(CC_TRACE_EMIT) \ + sh scripts/boot-build-cc.sh $(TCC_FLAT) $@ # tcc-boot2 link: pure catm chain — entry stub, libc, client TU, # elf terminator. boot-build-p1pp.sh concatenates them in order # ahead of the M1pp expander/M0/hex2 pipeline. $(TCC_BOOT2_BINS): build/%/tcc-boot2/tcc-boot2: \ - build/%/tcc-boot2/tcc.flat.P1pp build/%/libc.P1pp \ + build/%/tcc-boot2/tcc.flat.P1pp build/%/vendor/mes-libc/libc.P1pp \ P1/entry-libc.P1pp P1/elf-end.P1pp \ $(P1PP_BUILD_DEPS) - $(call PODMAN,$*) env P1PP_TRACE=1 sh scripts/boot-build-p1pp.sh $@ \ - P1/entry-libc.P1pp build/$*/libc.P1pp $< P1/elf-end.P1pp + $(call PODMAN,$*) env P1PP_TRACE=1 WORK_SUBPATH=tcc-boot2/tcc-boot2 \ + sh scripts/boot-build-p1pp.sh $@ \ + P1/entry-libc.P1pp build/$*/vendor/mes-libc/libc.P1pp \ + $< P1/elf-end.P1pp # --- Native tools (opt-in dev-loop helpers) ------------------------------- @@ -310,17 +317,18 @@ endif # m1pp suite per-arch deps: image, tools, table, expander. TEST_M1PP_DEPS := $(foreach a,$(TEST_ARCHES), \ - build/$(a)/.image build/$(a)/tools/M0 P1/P1-$(a).M1 build/$(a)/m1pp) + build/$(a)/.image build/$(a)/tools/M0 P1/P1-$(a).M1 build/$(a)/M1pp/M1pp) -# p1 suite per-arch deps: image, tools, expander. +# p1 suite per-arch deps: image, tools, table, expander. TEST_P1_DEPS := $(foreach a,$(TEST_ARCHES), \ - build/$(a)/.image build/$(a)/tools/M0 build/$(a)/m1pp) + build/$(a)/.image build/$(a)/tools/M0 P1/P1-$(a).M1 build/$(a)/M1pp/M1pp) # scheme1 suite per-arch deps: image, tools, expander, scheme1 binary. # (run-tests.sh runs the pre-built binary against each .scm fixture; it # does not rebuild the interpreter per fixture.) TEST_SCHEME1_DEPS := $(foreach a,$(TEST_ARCHES), \ - build/$(a)/.image build/$(a)/tools/M0 build/$(a)/m1pp build/$(a)/scheme1) + build/$(a)/.image build/$(a)/tools/M0 build/$(a)/M1pp/M1pp \ + build/$(a)/scheme1/scheme1) # cc-* suites: scheme1 + m1pp cover everything. cc-util / cc-lex / # cc-pp byte-diff their pure transformations; cc-cg / cc compile the @@ -329,7 +337,8 @@ TEST_SCHEME1_DEPS := $(foreach a,$(TEST_ARCHES), \ # the catm'd compiler against a .c fixture); the rest catm their own # per-suite layer list. TEST_CC_UNIT_DEPS := $(foreach a,$(TEST_ARCHES), \ - build/$(a)/.image build/$(a)/tools/M0 build/$(a)/m1pp build/$(a)/scheme1) + build/$(a)/.image build/$(a)/tools/M0 build/$(a)/M1pp/M1pp \ + build/$(a)/scheme1/scheme1) TEST_CC_DEPS := $(TEST_CC_UNIT_DEPS) \ $(foreach a,$(TEST_ARCHES),build/$(a)/cc/cc.scm) @@ -338,7 +347,7 @@ TEST_CC_DEPS := $(TEST_CC_UNIT_DEPS) \ # fixture, plus the entry/elf-end fragments the catm chain depends on. # Targeted red-green TDD on the cc.scm + libc combination. TEST_CC_LIBC_DEPS := $(TEST_CC_DEPS) \ - $(foreach a,$(TEST_ARCHES),build/$(a)/libc.P1pp) \ + $(foreach a,$(TEST_ARCHES),build/$(a)/vendor/mes-libc/libc.P1pp) \ P1/entry-libc.P1pp P1/elf-end.P1pp test: @@ -354,22 +363,22 @@ ifeq ($(SUITE),) @$(MAKE) --no-print-directory test SUITE=cc-libc else ifeq ($(SUITE),m1pp) @$(MAKE) --no-print-directory $(TEST_M1PP_DEPS) - sh scripts/run-tests.sh --suite=m1pp $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) + sh scripts/run-tests.sh --suite=m1pp $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) $(NAMES) else ifeq ($(SUITE),p1) @$(MAKE) --no-print-directory $(TEST_P1_DEPS) - sh scripts/run-tests.sh --suite=p1 $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) + sh scripts/run-tests.sh --suite=p1 $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) $(NAMES) else ifeq ($(SUITE),scheme1) @$(MAKE) --no-print-directory $(TEST_SCHEME1_DEPS) - sh scripts/run-tests.sh --suite=scheme1 $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) + sh scripts/run-tests.sh --suite=scheme1 $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) $(NAMES) else ifeq ($(filter $(SUITE),cc-util cc-lex cc-pp cc-cg),$(SUITE)) @$(MAKE) --no-print-directory $(TEST_CC_UNIT_DEPS) - sh scripts/run-tests.sh --suite=$(SUITE) $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) + sh scripts/run-tests.sh --suite=$(SUITE) $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) $(NAMES) else ifeq ($(SUITE),cc) @$(MAKE) --no-print-directory $(TEST_CC_DEPS) - sh scripts/run-tests.sh --suite=cc $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) + sh scripts/run-tests.sh --suite=cc $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) $(NAMES) else ifeq ($(SUITE),cc-libc) @$(MAKE) --no-print-directory $(TEST_CC_LIBC_DEPS) - sh scripts/run-tests.sh --suite=cc-libc $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) + sh scripts/run-tests.sh --suite=cc-libc $(if $(ARCH_FILTER),--arch=$(ARCH_FILTER)) $(NAMES) else @echo "unknown SUITE='$(SUITE)' (m1pp | p1 | scheme1 | cc-util | cc-lex | cc-pp | cc-cg | cc | cc-libc)" >&2; exit 2 endif diff --git a/P1/gen/p1_gen.py b/P1/gen/p1_gen.py @@ -8,8 +8,8 @@ than the full theoretical spec space, so extending coverage is a one-line data edit instead of an architecture rewrite. Usage: - python3 p1/gen/p1_gen.py [--arch ARCH] [build-root] - python3 p1/gen/p1_gen.py --check [--arch ARCH] [build-root] + python3 p1/gen/p1_gen.py --arch ARCH --out PATH + python3 p1/gen/p1_gen.py --check --arch ARCH --out PATH python3 p1/gen/p1_gen.py --list-archs """ @@ -203,55 +203,55 @@ def emit(arch_name): def parse_args(argv): check = False - archs = [] - positional = [] + arch = None + out = None i = 0 while i < len(argv): - arg = argv[i] - if arg == '--check': + a = argv[i] + if a == '--check': check = True - elif arg == '--list-archs': + elif a == '--list-archs': print('\n'.join(sorted(ARCHES))) sys.exit(0) - elif arg == '--arch': + elif a == '--arch': i += 1 if i >= len(argv): raise SystemExit('--arch requires a value') - archs.append(argv[i]) + arch = argv[i] + elif a == '--out': + i += 1 + if i >= len(argv): + raise SystemExit('--out requires a value') + out = argv[i] else: - positional.append(arg) + raise SystemExit(f'unexpected argument: {a}') i += 1 - build_root = positional[0] if positional else os.path.join('build', 'p1') - if not archs: - archs = list(sorted(ARCHES)) - return check, archs, build_root + if arch is None: + raise SystemExit('--arch is required') + if out is None: + raise SystemExit('--out is required') + if arch not in ARCHES: + raise SystemExit(f'unknown arch: {arch}') + return check, arch, out def main(argv=None): - check, archs, build_root = parse_args(argv or sys.argv[1:]) - had_diff = False - - for arch_name in archs: - arch = ARCHES[arch_name] - path = os.path.join(build_root, f'{arch.name}.M1') - content = emit(arch.name) - if check: - try: - with open(path) as f: - existing = f.read() - except FileNotFoundError: - existing = '' - if existing != content: - sys.stderr.write(f'DIFF: {path}\n') - had_diff = True - continue - os.makedirs(build_root, exist_ok=True) - with open(path, 'w') as f: - f.write(content) - print(f'wrote {path} ({len(content)} bytes)') - - if check and had_diff: - sys.exit(1) + check, arch_name, path = parse_args(argv or sys.argv[1:]) + content = emit(arch_name) + if check: + try: + with open(path) as f: + existing = f.read() + except FileNotFoundError: + existing = '' + if existing != content: + sys.stderr.write(f'DIFF: {path}\n') + sys.exit(1) + return + os.makedirs(os.path.dirname(path) or '.', exist_ok=True) + with open(path, 'w') as f: + f.write(content) + print(f'wrote {path} ({len(content)} bytes)') if __name__ == '__main__': diff --git a/docs/LIBC.md b/docs/LIBC.md @@ -174,14 +174,14 @@ The only file we author. Provides: `scripts/libc-flatten.sh --arch <a>` (host): -1. Stage `vendor/mes-libc/` to `build/cc-bootstrap/$ARCH/libc-stage/` +1. Stage `vendor/mes-libc/` to `build/$ARCH/vendor/mes-libc/libc-stage/` so patching is non-destructive. 2. `ln -sfn linux/$MES_ARCH include/arch` so mes's `<arch/...>` includes resolve through the canonical `<sys/stat.h>` chain. 3. Apply patches on the staged copy. 4. `host_cc -E -nostdinc -I include -I . -D __linux__=1 -D __${MES_ARCH}__=1 -D __riscv_xlen=64 unified-libc.c - → build/cc-bootstrap/$ARCH/libc.flat.c` (~52 KB, ~2400 lines). + → build/$ARCH/vendor/mes-libc/libc.flat.c` (~52 KB, ~2400 lines). `MES_ARCH` mapping is `aarch64→riscv64`, `amd64→x86_64`, `riscv64→riscv64`. mes ships no aarch64 headers; the riscv64 set @@ -195,8 +195,8 @@ holds a partial `signal.h` that shadows the canonical `typedef long stack_t;`. `scripts/boot-build-cc.sh` (container) then runs `cc.scm` over -`libc.flat.c` to produce `build/$ARCH/libc.P1pp` (~520 KB, -~21 K lines). +`libc.flat.c` to produce `build/$ARCH/vendor/mes-libc/libc.P1pp` +(~520 KB, ~21 K lines). ### Linking — catm chain @@ -213,10 +213,10 @@ string shows up in `.data`. Wired together, the link is just `catm`: ``` -P1/entry-libc.P1pp # :p1_main → __libc_init → main -build/$ARCH/libc.P1pp # cc.scm --lib=libc__ → libc__cc__str_* -<client>.P1pp # cc.scm --lib=<pfx>__ → <pfx>__cc__str_* -P1/elf-end.P1pp # :ELF_end +P1/entry-libc.P1pp # :p1_main → __libc_init → main +build/$ARCH/vendor/mes-libc/libc.P1pp # cc.scm --lib=libc__ → libc__cc__str_* +<client>.P1pp # cc.scm --lib=<pfx>__ → <pfx>__cc__str_* +P1/elf-end.P1pp # :ELF_end ``` `scripts/boot-build-p1pp.sh` already cats its inputs in front of @@ -285,19 +285,22 @@ make test SUITE=cc-libc ARCH=aarch64 make test SUITE=cc-libc ARCH=aarch64 -- 05-printf-int # one fixture ``` -Per-fixture artefacts at `build/$ARCH/cc-libc/<name>/`: +Per-fixture artefacts: -- `<name>.client.P1pp` — cc.scm output for the fixture (lib mode, - prefix `app__`) -- `<name>` — final ELF -- `cc.log` / `p1pp.log` — captured stdout+stderr from each pipeline - stage; the suite handler dumps the relevant log under the FAIL row - when a stage exits non-zero. +- `build/$ARCH/tests/cc-libc/<name>` — final ELF. +- `build/$ARCH/.work/tests/cc-libc/<name>/` — scratch: + - `<name>.client.P1pp` — cc.scm output for the fixture (lib mode, + prefix `app__`) + - `cc.log` / `p1pp.log` — captured stdout+stderr from each pipeline + stage; the suite handler dumps the relevant log under the FAIL + row when a stage exits non-zero. When triaging a failure, the catm'd source the M1pp expander sees -lives at `build/$ARCH/.work/<name>/combined.M1pp` (boot-build-p1pp.sh -copies it there alongside the rest of the per-stage scratch outputs); -grep that for the symbol or sequence in question. +lives at `build/$ARCH/.work/tests/cc-libc/<name>/combined.M1pp` +(boot-build-p1pp.sh copies it there alongside the rest of the per- +stage scratch outputs; the path is also recorded in the sidecar +`<elf>.workdir` next to the binary). Grep that for the symbol or +sequence in question. ## Phase B — build the on-disk archives tcc-boot2 needs @@ -350,8 +353,8 @@ The file is already vendored implicitly via `stage1-flatten.sh` Add `scripts/boot-build-libtcc1.sh`: ```sh -TCC_BOOT2=build/$ARCH/tcc-boot2 -TCC_SRC=build/cc-bootstrap/$ARCH/tcc-0.9.26-1147-gee75a10c +TCC_BOOT2=build/$ARCH/tcc-boot2/tcc-boot2 +TCC_SRC=build/tcc/$TCC_TARGET/tcc-0.9.26-1147-gee75a10c $TCC_BOOT2 -c -D HAVE_CONFIG_H=1 -D HAVE_LONG_LONG=1 -D HAVE_FLOAT=1 \ -I vendor/mes-libc/include \ -I vendor/mes-libc/include/linux/$MES_ARCH \ diff --git a/docs/TCC-TODO.md b/docs/TCC-TODO.md @@ -12,7 +12,7 @@ preprocessor: ``` sh scripts/stage1-flatten.sh --arch X86_64 -# -> build/cc-bootstrap/X86_64/tcc.flat.c (608 KB, 18 896 lines, 0 directives) +# -> build/tcc/X86_64/tcc.flat.c (608 KB, 18 896 lines, 0 directives) ``` Run the catm'd scheme cc against it inside the per-arch container. @@ -22,8 +22,8 @@ The cc-debug flag prints heap usage between phases on stderr: podman run --rm --pull=never --platform linux/arm64 \ --tmpfs /tmp:size=512M -e ARCH=aarch64 \ -v "$(pwd)":/work -w /work boot2-busybox:aarch64 \ - build/aarch64/scheme1 build/aarch64/cc/cc.scm --cc-debug \ - build/cc-bootstrap/X86_64/tcc.flat.c /tmp/tcc.flat.P1pp + build/aarch64/scheme1/scheme1 build/aarch64/cc/cc.scm --cc-debug \ + build/tcc/X86_64/tcc.flat.c /tmp/tcc.flat.P1pp ``` Prerequisites: `make scheme1 cc ARCH=aarch64` (or any other arch) so @@ -32,8 +32,8 @@ Prerequisites: `make scheme1 cc ARCH=aarch64` (or any other arch) so For triage, the small-prefix probe is useful: ``` -head -c 50000 build/cc-bootstrap/X86_64/tcc.flat.c \ - > build/cc-bootstrap/X86_64/tcc.head.c +head -c 50000 build/tcc/X86_64/tcc.flat.c \ + > build/tcc/X86_64/tcc.head.c # then re-run the podman invocation against tcc.head.c ``` @@ -195,7 +195,7 @@ initializer unit rewind path ([CC-INIT-SCRATCH.md](CC-INIT-SCRATCH.md)) plus the recent scope-bind alist / scratch reclamation work. Current full-file aarch64 run against -`build/cc-bootstrap/X86_64/tcc.flat.c` — parse + cg-finish complete: +`build/tcc/X86_64/tcc.flat.c` — parse + cg-finish complete: ``` [cc] phase=start: heap 1 225 052 @@ -277,6 +277,50 @@ enum constants) overflowed even 128 MiB of scratch because O(N²) in member count. The recent scratch / alist work makes that decl complete with parse heap at ~31 MB on the 1612-line cut. +## Tracepoint instrumentation (`%trace` / `--cc-trace-emit`) + +For debugging a built ELF that segfaults at startup, the cheapest +"how far did we get?" probe is the libp1pp `%trace(tag)` macro +(defined in [P1/P1pp.P1pp](../P1/P1pp.P1pp), §Tracepoint). At each +call site it emits one stderr line of the form: + +``` +[trace @0x000000000060013c] +``` + +where the address is the runtime location of the instruction +immediately following the trace's call sequence. To map an address +back to a function: disassemble (`scripts/disasm-elf.sh <elf>`) and +locate the line at that PC — the surrounding `:scope__*` label and +the `%fn(...)` it sits in identify the source. + +The `tag` argument is purely source-level documentation (`grep` for +`%trace(<tag>)` in your inputs / in the expanded `.M1`). Embedding +the tag bytes in the runtime print would require length-dependent +padding to keep the next instruction aligned; we skip that. + +`cc.scm --cc-trace-emit` (or `CC_TRACE_EMIT=1` to +`scripts/boot-build-cc.sh`) injects a `%trace(<mangled>)` line at the +top of every emitted function body, between argument-spill and body. +With this on, every call into compiled C produces a stderr line — +the printed address falls on a known function-entry boundary, so the +disassembly lookup is one-step. + +End-to-end on tcc-boot2: + +``` +make tcc-boot2 ARCH=aarch64 CC_TRACE_EMIT=1 +./build/aarch64/tcc-boot2/tcc-boot2 -version 2> trace.log +scripts/disasm-elf.sh build/aarch64/tcc-boot2/tcc-boot2 \ + | grep -A 2 "<address from last trace.log line>" +``` + +Cost per probe: ~6 instrs + the `libp1pp__trace` call. Free-form +manual `%trace(tag)` inserts work too — drop them into any +`build/$ARCH/.work/<src-path>/combined.M1pp` snapshot (path is also +recorded in the sidecar `<elf>.workdir` next to each binary), re-run +the m1pp/M0/hex2 stages, and bisect by stderr position. + ## Expected next-tier blockers (downstream of cc.scm) The semantic parser has covered every construct in this TU. The next diff --git a/docs/TCC.md b/docs/TCC.md @@ -45,7 +45,7 @@ tcc-0.9.26-1147-gee75a10c.tar.gz live-bootstrap source │ • apply 2 simple-patches │ • host cc -E -nostdinc with mes headers + tcc-mes defines ▼ -build/cc-bootstrap/X86_64/tcc.flat.c 608 KB single-file C +build/tcc/X86_64/tcc.flat.c 608 KB single-file C │ │ stage2-alpine.sh (alpine:latest) │ • apk add gcc musl-dev @@ -55,18 +55,18 @@ build/cc-bootstrap/X86_64/tcc.flat.c 608 KB single-file C │ • tcc-host -static compiles+links real tcc.c -> tcc-boot0-mes │ (mirrors live-bootstrap's tcc-boot0 invocation) ▼ -build/cc-bootstrap/X86_64/tcc-boot0-mes ~750 KB tcc-0.9.26 ELF +build/tcc/X86_64/tcc-boot0-mes ~750 KB tcc-0.9.26 ELF │ │ stage3-rebuild.sh (busybox:musl) │ • tcc-boot0-mes rebuilds libc, then compiles real tcc.c -> tcc-boot1 │ • tcc-boot1 rebuilds libc, then compiles real tcc.c -> tcc-boot2 ▼ -build/cc-bootstrap/X86_64/tcc-boot2 final tcc-0.9.26 +build/tcc/X86_64/tcc-boot2 final tcc-0.9.26 ``` Two containers, three scripts, one host-side step. Stage 1's `tcc.flat.c` is a portable artifact; stage 2's `tcc-boot0-mes` plus -mes libc bits cross into stage 3 via `build/cc-bootstrap/<arch>/stage3-input/`. +mes libc bits cross into stage 3 via `build/tcc/<arch>/stage3-input/`. ## Stage 1 — flatten tcc.c into tcc.flat.c @@ -85,7 +85,7 @@ regardless of where stage 1 ran. ### Sub-steps -1. **Unpack** `tcc-0.9.26.tar.gz` into `build/cc-bootstrap/X86_64/`. +1. **Unpack** `tcc-0.9.26.tar.gz` into `build/tcc/X86_64/`. 2. **Apply simple-patches**: `remove-fileopen.before/.after` then `addback-fileopen.before/.after` against `tcctools.c`. Implemented as an `awk` literal-block replacer (live-bootstrap's `simple-patch` @@ -190,7 +190,7 @@ libc, where errno is the plain global mes expects. live-bootstrap-style direct invocation doesn't already validate. ~750 KB output. 7. **Stage out** mes libc + libtcc1 + crt1.o + headers into - `build/cc-bootstrap/X86_64/stage3-input/`, so stage 3 can mount + `build/tcc/X86_64/stage3-input/`, so stage 3 can mount them without re-running stage 2. 8. **Smoke test**: `tcc-boot0-mes -version`. **Expected to SEGV under QEMU on macOS arm64** (Issue §3); native x86_64 needed to verify @@ -281,7 +281,7 @@ scripts/stage2-alpine.sh --arch X86_64 scripts/stage3-rebuild.sh --arch X86_64 # blocked on Issue §3 today ``` -Artifacts in `build/cc-bootstrap/X86_64/`: +Artifacts in `build/tcc/X86_64/`: | File | Stage | Size | Built by | What it is | |-------------------|-------|---------|-----------------------|-------------------------------------------| diff --git a/scripts/boot-build-cc.sh b/scripts/boot-build-cc.sh @@ -9,6 +9,12 @@ ## Env: ARCH=aarch64|amd64|riscv64 ## CC_DEBUG=1 (optional) — pass --cc-debug to cc.scm so it prints ## per-phase heap usage on stderr. +## CC_TRACE_EMIT=1 (optional) — pass --cc-trace-emit so cc.scm +## wraps every emitted function with a `%trace(<mangled>)` +## call at entry. Pair with libp1pp's %trace macro and +## libp1pp__trace runtime helper (in P1/P1pp.P1pp) to +## produce a stderr line per function entry at runtime. +## See docs/TCC-TODO.md "Tracepoint" section. ## CC_LIB=PFX (optional) — compile in library mode (cc.scm ## --lib=PFX). Skips cc.scm's auto-emitted entry ## stub and trailing :ELF_end so the output catm's @@ -28,7 +34,7 @@ set -eu SRC=$1 OUT=$2 -SCHEME1_BIN=build/$ARCH/scheme1 +SCHEME1_BIN=build/$ARCH/scheme1/scheme1 CC_SRC=build/$ARCH/cc/cc.scm [ -x "$SCHEME1_BIN" ] || { echo "missing $SCHEME1_BIN" >&2; exit 1; } @@ -40,7 +46,8 @@ mkdir -p "$(dirname "$OUT")" # Build cc-flag list once. Order doesn't matter to cc-main but # stays stable for log readability. set -- -[ "${CC_DEBUG:-0}" = "1" ] && set -- "$@" --cc-debug -[ -n "${CC_LIB:-}" ] && set -- "$@" "--lib=$CC_LIB" +[ "${CC_DEBUG:-0}" = "1" ] && set -- "$@" --cc-debug +[ "${CC_TRACE_EMIT:-0}" = "1" ] && set -- "$@" --cc-trace-emit +[ -n "${CC_LIB:-}" ] && set -- "$@" "--lib=$CC_LIB" "$SCHEME1_BIN" "$CC_SRC" "$@" "$SRC" "$OUT" diff --git a/scripts/boot-build-p1.sh b/scripts/boot-build-p1.sh @@ -14,6 +14,13 @@ ## Stages through /tmp because the stage0 tools do one syscall per byte; ## virtiofs round-trips would dominate otherwise. ## +## Per-call intermediates (combined.M1, prog.hex2, linked.hex2) land at +## build/$ARCH/.work/<src-without-ext>/, mirroring the source path under +## the repo root (e.g. tests/P1/00-hello.P1 -> build/aarch64/.work/ +## tests/P1/00-hello/). A one-line sidecar at <out>.workdir records +## that path so tooling (scripts/disasm-elf.sh) can find the artifacts +## from the binary alone. +## ## Env: ARCH=aarch64|amd64|riscv64 ## Usage: boot-build-p1.sh <src> <out> @@ -28,7 +35,7 @@ OUT=$2 TABLE=P1/P1-$ARCH.M1 ELF_HDR=vendor/seed/$ARCH/ELF.hex2 TOOLS=build/$ARCH/tools -NAME=$(basename "$SRC" | sed 's/\.[^.]*$//') +NAME=${SRC%.*} WORK=build/$ARCH/.work/$NAME mkdir -p "$WORK" "$(dirname "$OUT")" @@ -44,3 +51,4 @@ cp /tmp/prog.hex2 "$WORK/prog.hex2" cp /tmp/linked.hex2 "$WORK/linked.hex2" cp /tmp/prog.bin "$OUT" chmod 0700 "$OUT" +printf '%s\n' "$WORK" > "$OUT.workdir" diff --git a/scripts/boot-build-p1pp.sh b/scripts/boot-build-p1pp.sh @@ -3,7 +3,7 @@ ## ## Pure transformation. Caller (the Makefile) ensures every fixed-path ## input below already exists, including the per-arch self-hosted m1pp -## ELF binary (build/$ARCH/m1pp, built by boot2.sh / boot-build-p1.sh). +## ELF binary (build/$ARCH/M1pp/M1pp, built by boot2.sh / boot-build-p1.sh). ## ## Pipeline: ## cat <P1-$ARCH.M1pp> <P1.M1pp> <P1pp.P1pp> <srcs...> -> /tmp/combined.M1pp @@ -21,13 +21,26 @@ ## ## Multiple <srcs> are concatenated in the order given. This is how ## libc-using executables compose: a typical chain is -## P1/entry-libc.P1pp build/$ARCH/libc.P1pp client.P1pp P1/elf-end.P1pp +## P1/entry-libc.P1pp build/$ARCH/vendor/mes-libc/libc.P1pp client.P1pp P1/elf-end.P1pp ## with libc.P1pp / client.P1pp produced by cc.scm --lib=PFX so they ## omit the entry stub and trailing :ELF_end (those come from the ## fixed fragments instead). For a single-TU exec, pass exactly one ## source built without --lib= and the fragments are unnecessary. ## +## Per-call intermediates land at build/$ARCH/.work/<work-subpath>/. +## <work-subpath> defaults to the first src's path with extension +## stripped — fine for single-source builds (scheme1, p1 tests). For +## catm chains where the first src is a wrapper (e.g. P1/entry-libc.P1pp +## or a generated build/.../*.P1pp), the caller MUST set WORK_SUBPATH +## explicitly so the work dir mirrors the logical primary source path +## (e.g. tests/cc-libc/00-exit). A one-line sidecar at <out>.workdir +## records the resolved work dir so tooling (scripts/disasm-elf.sh) can +## locate the artifacts from the binary alone. +## ## Env: ARCH=aarch64|amd64|riscv64 +## WORK_SUBPATH=<repo-relative-path-without-ext> — overrides the +## work-dir name; required when the first src isn't +## the logical primary source. ## P1PP_TRACE=1 — print a one-line marker (phase, in/out path, size) ## before each pipeline stage so the failing tool is ## obvious. Off by default to keep test runs quiet. @@ -53,8 +66,12 @@ FRONTEND=P1/P1.M1pp LIBP1PP=P1/P1pp.P1pp ELF_HDR=vendor/seed/$ARCH/ELF.hex2 TOOLS=build/$ARCH/tools -M1PP_BIN=build/$ARCH/m1pp -NAME=$(basename "$OUT") +M1PP_BIN=build/$ARCH/M1pp/M1pp +if [ -n "${WORK_SUBPATH:-}" ]; then + NAME=$WORK_SUBPATH +else + NAME=${1%.*} +fi WORK=build/$ARCH/.work/$NAME mkdir -p "$WORK" "$(dirname "$OUT")" @@ -77,3 +94,4 @@ cp /tmp/prog.hex2 "$WORK/prog.hex2" cp /tmp/linked.hex2 "$WORK/linked.hex2" cp /tmp/prog.bin "$OUT" chmod 0700 "$OUT" +printf '%s\n' "$WORK" > "$OUT.workdir" diff --git a/scripts/boot-run-scheme1.sh b/scripts/boot-run-scheme1.sh @@ -1,12 +1,12 @@ #!/bin/sh ## boot-run-scheme1.sh — in-container wrapper that runs the scheme1 ## binary on USER_SRC with scheme1/prelude.scm catm'd in front. Caller -## (Make / the test runner) ensures build/$ARCH/{scheme1,tools/catm} -## and scheme1/prelude.scm exist before invoking. +## (Make / the test runner) ensures build/$ARCH/scheme1/scheme1 and +## build/$ARCH/tools/catm and scheme1/prelude.scm exist before invoking. ## ## Pipeline: ## catm /tmp/combined.scm scheme1/prelude.scm <user_src> -## build/$ARCH/scheme1 /tmp/combined.scm +## build/$ARCH/scheme1/scheme1 /tmp/combined.scm ## ## Env: ARCH=aarch64|amd64|riscv64 ## Usage: boot-run-scheme1.sh <user_src> @@ -18,7 +18,7 @@ set -eu USER_SRC=$1 TOOLS=build/$ARCH/tools -SCHEME1=build/$ARCH/scheme1 +SCHEME1=build/$ARCH/scheme1/scheme1 PRELUDE=scheme1/prelude.scm COMBINED=/tmp/scheme1-combined.scm diff --git a/scripts/boot-run-tests.sh b/scripts/boot-run-tests.sh @@ -40,6 +40,10 @@ case "$SUITE" in *) echo "$0: unknown suite '$SUITE'" >&2; exit 2 ;; esac +CC_EXTRA_FLAGS= +[ "${CC_TRACE_EMIT:-0}" = "1" ] && CC_EXTRA_FLAGS="$CC_EXTRA_FLAGS --cc-trace-emit" +[ "${CC_DEBUG:-0}" = "1" ] && CC_EXTRA_FLAGS="$CC_EXTRA_FLAGS --cc-debug" + discover() { dir=$1; ext=$2 ls "$dir" 2>/dev/null \ @@ -77,15 +81,16 @@ fail() { run_m1pp_suite() { if [ -z "$NAMES" ]; then - m1=$(discover tests/M1pp M1) - m1pp=$(discover tests/M1pp M1pp) - NAMES=$(printf '%s\n%s\n' "$m1" "$m1pp" | sort -u | tr '\n' ' ') + NAMES=$(discover tests/M1pp M1pp) fi for name in $NAMES; do expected=tests/M1pp/$name.expected - m1_src=tests/M1pp/$name.M1 m1pp_src=tests/M1pp/$name.M1pp + if [ ! -e "$m1pp_src" ]; then + echo " SKIP $name (no .M1pp)" + continue + fi if [ ! -e "$expected" ]; then echo " SKIP $name (no .expected)" continue @@ -93,29 +98,14 @@ run_m1pp_suite() { expected_content=$(cat "$expected") label="[$ARCH] $name" - if [ -e "$m1pp_src" ]; then - outfile=build/$ARCH/m1pp-out/$name - mkdir -p "$(dirname "$outfile")" - rm -f "$outfile" - "./build/$ARCH/m1pp" "$m1pp_src" "$outfile" >/dev/null 2>&1 || true - if [ -e "$outfile" ]; then - actual=$(cat "$outfile") - else - actual= - fi - elif [ -e "$m1_src" ]; then - bin=build/$ARCH/m1pp-tests/$name - log=$bin.build.log - mkdir -p "$(dirname "$bin")" - if ! sh scripts/boot-build-p1.sh "$m1_src" "$bin" \ - >"$log" 2>&1; then - fail "$label" "" "$log" - continue - fi - actual=$("./$bin" 2>&1 || true) + outfile=build/$ARCH/tests/M1pp/$name.M1 + mkdir -p "$(dirname "$outfile")" + rm -f "$outfile" + "./build/$ARCH/M1pp/M1pp" "$m1pp_src" "$outfile" >/dev/null 2>&1 || true + if [ -e "$outfile" ]; then + actual=$(cat "$outfile") else - echo " SKIP $name (no .M1 or .M1pp)" - continue + actual= fi if [ "$actual" = "$expected_content" ]; then @@ -131,22 +121,35 @@ run_m1pp_suite() { run_p1_suite() { if [ -z "$NAMES" ]; then - NAMES=$(discover tests/P1 P1pp) + raw=$(discover tests/P1 P1) + pp=$(discover tests/P1 P1pp) + NAMES=$(printf '%s\n%s\n' "$raw" "$pp" | sort -u | tr '\n' ' ') fi for name in $NAMES; do - fixture=tests/P1/$name.P1pp + raw_src=tests/P1/$name.P1 + pp_src=tests/P1/$name.P1pp expected=tests/P1/$name.expected - if [ ! -e "$fixture" ]; then echo " SKIP $name (no .P1pp)"; continue; fi if [ ! -e "$expected" ]; then echo " SKIP $name (no .expected)"; continue; fi expected_content=$(cat "$expected") label="[$ARCH] $name" - bin=build/$ARCH/p1-tests/$name - log=$bin.build.log - mkdir -p "$(dirname "$bin")" - if ! sh scripts/boot-build-p1pp.sh "$bin" "$fixture" \ - >"$log" 2>&1; then - fail "$label" "" "$log" + bin=build/$ARCH/tests/P1/$name + log=build/$ARCH/.work/tests/P1/$name/build.log + mkdir -p "$(dirname "$bin")" "$(dirname "$log")" + if [ -e "$pp_src" ]; then + if ! sh scripts/boot-build-p1pp.sh "$bin" "$pp_src" \ + >"$log" 2>&1; then + fail "$label" "" "$log" + continue + fi + elif [ -e "$raw_src" ]; then + if ! sh scripts/boot-build-p1.sh "$raw_src" "$bin" \ + >"$log" 2>&1; then + fail "$label" "" "$log" + continue + fi + else + echo " SKIP $name (no .P1 or .P1pp)" continue fi actual=$("./$bin" 2>&1 || true) @@ -183,7 +186,7 @@ run_scheme1_suite() { fi label="[$ARCH] $name" - bin=build/$ARCH/scheme1 + bin=build/$ARCH/scheme1/scheme1 if [ ! -x "$bin" ]; then report "$label" FAIL echo " (missing $bin -- run 'make scheme1 ARCH=$ARCH')" >&2 @@ -251,7 +254,7 @@ _cc_unit_suite() { # scheme1 still runs (matches host runner's prior behavior). if sh -c " build/$ARCH/tools/catm /tmp/cc-test.scm $layers $fixture - exec build/$ARCH/scheme1 /tmp/cc-test.scm + exec build/$ARCH/scheme1/scheme1 /tmp/cc-test.scm " >"$tmp" 2>&1; then act_exit=0 else @@ -286,7 +289,7 @@ _cc_pipeline_suite() { tmp=$(mktemp) if sh -c " build/$ARCH/tools/catm /tmp/cc-test.scm $layers - exec build/$ARCH/scheme1 /tmp/cc-test.scm $fixture + exec build/$ARCH/scheme1/scheme1 /tmp/cc-test.scm $fixture " >"$tmp" 2>&1; then act_exit=0 else @@ -339,31 +342,32 @@ _cc_runtime_suite() { expexit=0 fi - outdir=build/$ARCH/$suite/$name - p1pp=$outdir/$name.P1pp - elf=$outdir/$name - mkdir -p "$outdir" + elf=build/$ARCH/tests/$suite/$name + workdir=build/$ARCH/.work/tests/$suite/$name + p1pp=$workdir/$name.P1pp + mkdir -p "$(dirname "$elf")" "$workdir" if [ "$arg_pass" = "1" ]; then cmd=" build/$ARCH/tools/catm /tmp/cc-test.scm $layers - exec build/$ARCH/scheme1 /tmp/cc-test.scm $fixture + exec build/$ARCH/scheme1/scheme1 /tmp/cc-test.scm $fixture " else cmd=" build/$ARCH/tools/catm /tmp/cc-test.scm $layers $fixture - exec build/$ARCH/scheme1 /tmp/cc-test.scm + exec build/$ARCH/scheme1/scheme1 /tmp/cc-test.scm " fi label="[$ARCH] $suite/$name" - cg_log=$outdir/cg.log + cg_log=$workdir/cg.log if ! sh -c "$cmd" >"$p1pp" 2>"$cg_log"; then fail "$label" "cg emission failed:" "$cg_log" continue fi - p1pp_log=$outdir/p1pp.log - if ! sh scripts/boot-build-p1pp.sh "$elf" "$p1pp" \ + p1pp_log=$workdir/p1pp.log + if ! WORK_SUBPATH=tests/$suite/$name \ + sh scripts/boot-build-p1pp.sh "$elf" "$p1pp" \ >"$p1pp_log" 2>&1; then fail "$label" "P1pp assemble failed:" "$p1pp_log" continue @@ -400,21 +404,23 @@ run_cc_suite() { else expexit=0 fi - outdir=build/$ARCH/cc/$name - p1pp=$outdir/$name.P1pp - elf=$outdir/$name + elf=build/$ARCH/tests/cc/$name + workdir=build/$ARCH/.work/tests/cc/$name + p1pp=$workdir/$name.P1pp label="[$ARCH] cc/$name" - mkdir -p "$outdir" + mkdir -p "$(dirname "$elf")" "$workdir" - cc_log=$outdir/cc.log - if ! build/$ARCH/scheme1 build/$ARCH/cc/cc.scm "$src" "$p1pp" \ - >"$cc_log" 2>&1; then + cc_log=$workdir/cc.log + # shellcheck disable=SC2086 # CC_EXTRA_FLAGS is intentionally word-split. + if ! build/$ARCH/scheme1/scheme1 build/$ARCH/cc/cc.scm $CC_EXTRA_FLAGS \ + "$src" "$p1pp" >"$cc_log" 2>&1; then fail "$label" "cc compile failed:" "$cc_log" continue fi - p1pp_log=$outdir/p1pp.log - if ! sh scripts/boot-build-p1pp.sh "$elf" "$p1pp" \ + p1pp_log=$workdir/p1pp.log + if ! WORK_SUBPATH=tests/cc/$name \ + sh scripts/boot-build-p1pp.sh "$elf" "$p1pp" \ >"$p1pp_log" 2>&1; then fail "$label" "P1pp assemble failed:" "$p1pp_log" continue @@ -452,18 +458,19 @@ run_cc_libc_suite() { else expexit=0 fi - outdir=build/$ARCH/cc-libc/$name - client_p1pp=$outdir/$name.client.P1pp - elf=$outdir/$name + elf=build/$ARCH/tests/cc-libc/$name + workdir=build/$ARCH/.work/tests/cc-libc/$name + client_p1pp=$workdir/$name.client.P1pp label="[$ARCH] cc-libc/$name" - mkdir -p "$outdir" + mkdir -p "$(dirname "$elf")" "$workdir" # Compile the client TU in lib mode so it doesn't emit its # own :p1_main / :ELF_end and namespaces its anonymous string # labels under app__cc__str_N — distinct from libc.P1pp's # libc__cc__str_N. - cc_log=$outdir/cc.log - if ! build/$ARCH/scheme1 build/$ARCH/cc/cc.scm \ + cc_log=$workdir/cc.log + # shellcheck disable=SC2086 # CC_EXTRA_FLAGS is intentionally word-split. + if ! build/$ARCH/scheme1/scheme1 build/$ARCH/cc/cc.scm $CC_EXTRA_FLAGS \ --lib=app__ "$src" "$client_p1pp" \ >"$cc_log" 2>&1; then fail "$label" "cc compile failed:" "$cc_log" @@ -473,9 +480,10 @@ run_cc_libc_suite() { # catm chain: entry-libc supplies :p1_main (calls __libc_init # then main), libc.P1pp supplies the libc routines, the client # supplies :main, elf-end supplies the :ELF_end terminator. - p1pp_log=$outdir/p1pp.log - if ! sh scripts/boot-build-p1pp.sh "$elf" \ - P1/entry-libc.P1pp build/$ARCH/libc.P1pp \ + p1pp_log=$workdir/p1pp.log + if ! WORK_SUBPATH=tests/cc-libc/$name \ + sh scripts/boot-build-p1pp.sh "$elf" \ + P1/entry-libc.P1pp build/$ARCH/vendor/mes-libc/libc.P1pp \ "$client_p1pp" P1/elf-end.P1pp \ >"$p1pp_log" 2>&1; then fail "$label" "P1pp assemble failed:" "$p1pp_log" diff --git a/scripts/boot-undef.sh b/scripts/boot-undef.sh @@ -34,7 +34,7 @@ while [ $# -gt 0 ]; do done ROOT=$(cd "$(dirname "$0")/.." && pwd) -: "${LINKED:=$ROOT/build/$ARCH/.work/tcc.flat/linked.hex2}" +: "${LINKED:=$ROOT/build/$ARCH/.work/tcc-boot2/tcc-boot2/linked.hex2}" [ -r "$LINKED" ] || { echo "missing $LINKED" >&2 diff --git a/scripts/boot2.sh b/scripts/boot2.sh @@ -5,7 +5,7 @@ ## checked-in pre-pruned P1 backend table (P1/P1-$ARCH.M1) plus their ## sources, by calling scripts/boot-build-p1.sh. ## -## Outputs: build/$ARCH/m1pp, build/$ARCH/pokem +## Outputs: build/$ARCH/M1pp/M1pp, build/$ARCH/pokem/pokem ## ## Env: ARCH=aarch64|amd64|riscv64 @@ -18,5 +18,5 @@ case "$ARCH" in *) echo "boot2.sh: unsupported arch '$ARCH'" >&2; exit 1 ;; esac -sh scripts/boot-build-p1.sh M1pp/M1pp.P1 build/$ARCH/m1pp -sh scripts/boot-build-p1.sh pokem/pokem.P1 build/$ARCH/pokem +sh scripts/boot-build-p1.sh M1pp/M1pp.P1 build/$ARCH/M1pp/M1pp +sh scripts/boot-build-p1.sh pokem/pokem.P1 build/$ARCH/pokem/pokem diff --git a/scripts/disasm-elf.sh b/scripts/disasm-elf.sh @@ -17,6 +17,16 @@ ## instructions. Pass an explicit --start-address (e.g. 0x600000) to ## override and see the header bytes. ## +## boot-build-p1*.sh writes a one-line sidecar at <elf>.workdir pointing +## at build/$ARCH/.work/<src-without-ext>/, where prog.hex2 lives. When +## that sidecar is present we extract a label map via scripts/m1-symbols.py +## and: +## - default --stop-address to :_text_end if that sentinel label is +## present, so trailing rodata doesn't decode as bogus instructions +## - inject "<label>:" headers and rewrite "<PT_LOAD#0+0xNNN>" xrefs +## in the disasm output +## Pass NO_LABELS=1 to disable both behaviors. +## ## Usage: disasm-elf.sh <elf> [llvm-objdump args...] ## defaults to `-d` (text only). For data + text, pass `-D`. @@ -30,9 +40,11 @@ ELF=$1; shift OBJDUMP=${LLVM_OBJDUMP:-llvm-objdump} TRIPLE=${TRIPLE:-aarch64-linux-gnu} -# Extract ph_filesz from the first program header (only one in our -# layout; e_phoff = 0x40, ph_filesz at offset 0x20 inside it = 0x60, -# ph_memsz at 0x28 = 0x68). Both little-endian 8-byte. +# ELF fields we read (little-endian 8-byte): +# e_entry at file offset 0x18 +# ph_filesz at file offset 0x60 (e_phoff 0x40 + 0x20) +# ph_memsz at file offset 0x68 (e_phoff 0x40 + 0x28) +# Single-program-header layout, per our seed ELF. read_le8() { od -An -tu8 -N8 -j"$2" "$1" | tr -d ' \n' } @@ -63,13 +75,54 @@ fi # Auto-skip the ELF header + program header by defaulting # --start-address to e_entry, unless the user supplied their own. have_start=0 +have_stop=0 for arg in "$@"; do case "$arg" in - --start-address=*|--start-address) have_start=1; break;; + --start-address=*|--start-address) have_start=1;; + --stop-address=*|--stop-address) have_stop=1;; esac done if [ "$have_start" -eq 0 ]; then set -- "--start-address=0x$(printf '%x' "$ENTRY")" "$@" fi -exec "$OBJDUMP" --triple="$TRIPLE" "$@" "$TMP" +# Locate prog.hex2 via the <elf>.workdir sidecar produced by +# boot-build-p1*.sh. The sidecar holds a repo-relative path +# (build/$ARCH/.work/<src>/), so resolve it against the repo root +# inferred from this script's location — works regardless of cwd. +HERE=$(dirname "$0") +REPO_ROOT=$(cd "$HERE/.." && pwd) +HEX2="" +if [ -e "$ELF.workdir" ]; then + workdir=$(cat "$ELF.workdir") + case "$workdir" in + /*) ;; # absolute, leave alone + *) workdir="$REPO_ROOT/$workdir" ;; + esac + if [ -e "$workdir/prog.hex2" ]; then + HEX2="$workdir/prog.hex2" + else + echo "disasm-elf: $ELF.workdir -> $workdir, but no prog.hex2 there" >&2 + fi +elif [ "${NO_LABELS:-0}" != "1" ]; then + echo "disasm-elf: no $ELF.workdir sidecar; rebuild for label annotation" >&2 +fi +MAP="" +if [ "${NO_LABELS:-0}" != "1" ] && [ -n "$HEX2" ]; then + MAP=$(mktemp -t disasm-elf-map.XXXXXX) + trap 'rm -f "$TMP" "$MAP"' EXIT + "$HERE/m1-symbols.py" map "$HEX2" > "$MAP" + # Default --stop-address to :_text_end if no user value and the + # sentinel exists in the map. + if [ "$have_stop" -eq 0 ]; then + text_end=$(awk '$2 == "_text_end" {print $1; exit}' "$MAP") + [ -n "$text_end" ] && set -- "--stop-address=$text_end" "$@" + fi +fi + +if [ -n "$MAP" ]; then + "$OBJDUMP" --triple="$TRIPLE" "$@" "$TMP" \ + | "$HERE/m1-symbols.py" annotate "$MAP" +else + exec "$OBJDUMP" --triple="$TRIPLE" "$@" "$TMP" +fi diff --git a/scripts/libc-flatten.sh b/scripts/libc-flatten.sh @@ -6,10 +6,10 @@ ## runs inside the minimal container). ## ## Steps: -## 1. stage vendor/mes-libc → build/cc-bootstrap/<arch>/libc-stage/ +## 1. stage vendor/mes-libc → build/<arch>/vendor/mes-libc/libc-stage/ ## 2. apply simple-patches (literal-block replacement, idempotent) ## 3. HOST_CC -E -nostdinc -I staging/include … staging/unified-libc.c -## → build/cc-bootstrap/<arch>/libc.flat.c +## → build/<arch>/vendor/mes-libc/libc.flat.c ## ## Stage 4 (cc.scm libc.flat.c → libc.P1pp) is a separate Makefile rule ## that reuses scripts/boot-build-cc.sh inside the per-arch container. @@ -42,7 +42,7 @@ esac ROOT=$(cd "$(dirname "$0")/.." && pwd) VENDOR=$ROOT/vendor/mes-libc -WORK=$ROOT/build/cc-bootstrap/$ARCH +WORK=$ROOT/build/$ARCH/vendor/mes-libc STAGE=$WORK/libc-stage FLAT=$WORK/libc.flat.c diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh @@ -8,17 +8,17 @@ ## container per fixture (and per build/run within a fixture); now ## a whole arch's suite is one podman invocation. ## -## The one bit of work that stays on the host is the m1pp lint -## preflight: scripts/lint.sh runs python, which the busybox -## container doesn't carry. Names that fail lint are reported here -## (FAIL + diagnostic) and excluded from the in-container batch. +## The one bit of work that stays on the host is the lint preflight +## for the m1pp and p1 suites: scripts/lint.sh runs python, which the +## busybox container doesn't carry. Names that fail lint are reported +## here (FAIL + diagnostic) and excluded from the in-container batch. ## ## Suites: -## m1pp tests/M1pp/<name>.M1 — built via boot-build-p1.sh -## and run; stdout diffed. -## tests/M1pp/<name>.M1pp — m1pp expander parity test. +## m1pp tests/M1pp/<name>.M1pp — m1pp expander parity test. ## p1 tests/P1/<name>.P1pp — built via boot-build-p1pp.sh ## and run; stdout diffed. +## tests/P1/<name>.P1 — raw P1, built via +## boot-build-p1.sh (no expander). ## scheme1 tests/scheme1/<name>.scm — run by per-arch scheme1. ## cc-util tests/cc-util/<name>.scm — scheme1 prelude+util byte-diff. ## cc-lex tests/cc-lex/<name>.c — lex pipeline byte-diff. @@ -72,6 +72,9 @@ run_in_container() { podman run --rm --pull=never --platform "$(platform_of "$arch")" \ --tmpfs /tmp:size=512M \ -e "ARCH=$arch" \ + -e "CC_TRACE_EMIT=${CC_TRACE_EMIT:-0}" \ + -e "CC_DEBUG=${CC_DEBUG:-0}" \ + -e "P1PP_TRACE=${P1PP_TRACE:-0}" \ -v "$REPO":/work -w /work \ "boot2-busybox:$arch" "$@" } @@ -85,29 +88,34 @@ fi PASS=0 FAIL=0 -# m1pp lint preflight: lint.sh uses python (host-only). Discover the -# fixture set if --names was empty, lint each .M1 fixture, emit a -# host-side FAIL + diagnostic for any miss, write the kept name list -# to $keep_file. FAIL line goes to stdout to interleave with the +# Lint preflight: lint.sh uses python (host-only). Discover the +# fixture set if --names was empty, lint each raw fixture (.M1pp / .P1), +# emit a host-side FAIL + diagnostic for any miss, write the kept name +# list to $keep_file. FAIL line goes to stdout to interleave with the # container's PASS/FAIL output; the FAIL counter updates in-scope. -m1pp_preflight() { - arch=$1; keep_file=$2 +# +# Suite layout: +# m1pp: tests/M1pp/<name>.M1pp (no raw .M1 fixtures any more) +# p1: tests/P1/<name>.P1pp (lint skipped — expander output) +# tests/P1/<name>.P1 (lint runs) +lint_preflight() { + arch=$1; keep_file=$2; dir=$3; raw_ext=$4; pp_ext=$5 : > "$keep_file" if [ -z "$NAMES" ]; then - m1=$(ls tests/M1pp 2>/dev/null \ - | sed -n 's/^\([^_][^.]*\)\.M1$/\1/p') - m1pp_n=$(ls tests/M1pp 2>/dev/null \ - | sed -n 's/^\([^_][^.]*\)\.M1pp$/\1/p') - all=$(printf '%s\n%s\n' "$m1" "$m1pp_n" | sort -u | tr '\n' ' ') + raw=$(ls "$dir" 2>/dev/null \ + | sed -n "s/^\([^_][^.]*\)\.${raw_ext}\$/\1/p") + pp=$(ls "$dir" 2>/dev/null \ + | sed -n "s/^\([^_][^.]*\)\.${pp_ext}\$/\1/p") + all=$(printf '%s\n%s\n' "$raw" "$pp" | sort -u | tr '\n' ' ') else all=$NAMES fi for name in $all; do - m1_src=tests/M1pp/$name.M1 - if [ -e "$m1_src" ] \ - && ! ARCH=$arch sh scripts/lint.sh "$m1_src" >/dev/null 2>&1; then + raw_src=$dir/$name.$raw_ext + if [ -e "$raw_src" ] \ + && ! ARCH=$arch sh scripts/lint.sh "$raw_src" >/dev/null 2>&1; then echo " FAIL [$arch] $name" - ARCH=$arch sh scripts/lint.sh "$m1_src" 2>&1 \ + ARCH=$arch sh scripts/lint.sh "$raw_src" 2>&1 \ | sed 's/^/ /' >&2 || true FAIL=$((FAIL + 1)) else @@ -117,9 +125,15 @@ m1pp_preflight() { } for arch in $ARCHES; do - if [ "$SUITE" = "m1pp" ]; then + case "$SUITE" in + m1pp) preflight_args="tests/M1pp M1 M1pp" ;; + p1) preflight_args="tests/P1 P1 P1pp" ;; + *) preflight_args= ;; + esac + if [ -n "$preflight_args" ]; then keep_file=$(mktemp) - m1pp_preflight "$arch" "$keep_file" + # shellcheck disable=SC2086 # $preflight_args is intentionally word-split. + lint_preflight "$arch" "$keep_file" $preflight_args names=$(cat "$keep_file") rm -f "$keep_file" # Skip the container call only when the user gave names AND diff --git a/scripts/stage1-flatten.sh b/scripts/stage1-flatten.sh @@ -9,7 +9,7 @@ ## 1. unpack tcc-0.9.26-1147-gee75a10c.tar.gz ## 2. apply live-bootstrap simple-patches (tcctools.c file-open reorder) ## 3. host cc -E -nostdinc with mes-bundled headers + tcc-mes defines -## 4. emit build/cc-bootstrap/<arch>/tcc.flat.c +## 4. emit build/tcc/<arch>/tcc.flat.c ## 5. (--verify) compile tcc.flat.c with host cc to confirm well-formedness ## ## Stage 1 deliberately stays on the host: it is just text manipulation @@ -43,7 +43,7 @@ esac # --- paths ------------------------------------------------------------ ROOT=$(cd "$(dirname "$0")/.." && pwd) -WORK=$ROOT/build/cc-bootstrap/$ARCH +WORK=$ROOT/build/tcc/$ARCH DISTFILES=$ROOT/../lb-work/distfiles LB_PATCHES=$ROOT/../live-bootstrap/steps/tcc-0.9.26/simple-patches OUR_PATCHES=$ROOT/scripts/simple-patches/tcc-0.9.26 diff --git a/scripts/stage2-alpine.sh b/scripts/stage2-alpine.sh @@ -14,7 +14,7 @@ ## definition and links cleanly. ## ## Pre-condition: -## build/cc-bootstrap/<arch>/tcc.flat.c (run scripts/stage1-flatten.sh) +## build/tcc/<arch>/tcc.flat.c (run scripts/stage1-flatten.sh) ## ## Inside alpine:latest (linux/amd64): ## 1. apk add gcc musl-dev @@ -35,7 +35,7 @@ ## Expected to segfault under QEMU x86_64 emulation on macOS arm64 ## (Issue §3); native x86_64 needed to verify cleanly. ## -## Output: build/cc-bootstrap/<arch>/tcc-boot0-mes (static, mes-libc-linked). +## Output: build/tcc/<arch>/tcc-boot0-mes (static, mes-libc-linked). ## This artifact is what stage 3 (busybox) consumes to drive the ## tcc-boot1 / tcc-boot2 chain. ## @@ -60,7 +60,7 @@ fi MES_ARCH=x86_64 ROOT=$(cd "$(dirname "$0")/.." && pwd) -WORK=$ROOT/build/cc-bootstrap/$ARCH +WORK=$ROOT/build/tcc/$ARCH DISTFILES=$ROOT/../lb-work/distfiles MES_TAR=$DISTFILES/mes-0.27.1.tar.gz MES_PKG=mes-0.27.1 @@ -94,7 +94,7 @@ ARCH=$1 MES_ARCH=$2 MES_PKG=$3 TCC_PKG=$4 -WORK=/work/build/cc-bootstrap/$ARCH +WORK=/work/build/tcc/$ARCH # --- (1) install gcc + musl-dev (provides libc.a for -static) -------- apk add --no-cache gcc musl-dev >/dev/null diff --git a/scripts/stage3-rebuild.sh b/scripts/stage3-rebuild.sh @@ -14,10 +14,10 @@ ## flatten set). ## ## Pre-condition: -## build/cc-bootstrap/<arch>/tcc-boot0-mes -## build/cc-bootstrap/<arch>/stage3-input/ (staged by stage 2) -## build/cc-bootstrap/<arch>/tcc-0.9.26-1147-gee75a10c/ (patched, from stage 1) -## build/cc-bootstrap/<arch>/mes-0.27.1/ (from stage 2) +## build/tcc/<arch>/tcc-boot0-mes +## build/tcc/<arch>/stage3-input/ (staged by stage 2) +## build/tcc/<arch>/tcc-0.9.26-1147-gee75a10c/ (patched, from stage 1) +## build/tcc/<arch>/mes-0.27.1/ (from stage 2) ## ## Container: docker.io/library/busybox:musl on linux/amd64. ## Tools used inside: busybox sh + tcc-boot0-mes (which provides its own @@ -51,7 +51,7 @@ fi MES_ARCH=x86_64 ROOT=$(cd "$(dirname "$0")/.." && pwd) -WORK=$ROOT/build/cc-bootstrap/$ARCH +WORK=$ROOT/build/tcc/$ARCH TCC_PKG=tcc-0.9.26-1147-gee75a10c MES_PKG=mes-0.27.1 @@ -72,7 +72,7 @@ ARCH=$1 MES_ARCH=$2 TCC_PKG=$3 MES_PKG=$4 -WORK=/work/build/cc-bootstrap/$ARCH +WORK=/work/build/tcc/$ARCH # --- install tcc-boot0-mes + mes libc bits at baked-in paths -------- mkdir -p /lib/tcc /include/mes /bin diff --git a/tests/M1pp/00-hello.M1 b/tests/P1/00-hello.P1 diff --git a/tests/M1pp/00-hello.expected b/tests/P1/00-hello.expected