kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit d37d6b3f8c61a24a3b2f259c22817b69525bf27d
parent c77e51615803de12a2838a0c3b72b9f850e4af86
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sun, 24 May 2026 10:47:00 -0700

Integrate self-hosted bootstrap

Build bootstrap stages through the normal Makefile with cfree busybox-style tool symlinks, and remove the old stage2 link script.

Stage libcfree archive inputs with unique member names so cfree ar does not replace same-basename objects, and keep rt outputs under BUILD_DIR.

Tighten SSA slot promotion checks so mismatched load/store operand types do not get promoted.

Diffstat:
MMakefile | 108++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Mrt/Makefile | 2+-
Dscripts/stage2_link.sh | 205-------------------------------------------------------------------------------
Msrc/opt/pass_ssa.c | 8++++++--
4 files changed, 75 insertions(+), 248 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,6 +1,9 @@ CC = clang +AR = ar +LD = ld +BUILD_DIR ?= build SYSROOT = $(shell xcrun --show-sdk-path) -HOST_OPTFLAGS ?= -O2 +HOST_OPTFLAGS ?= -O1 # -isysroot lives in its own var so stage/bootstrap recipes can override # host SDK handling when cfree is used as the compiler. @@ -61,35 +64,36 @@ LANG_TOY_SRCS = $(wildcard lang/toy/*.c) LANG_OBJS = ifeq ($(CFREE_LANG_CPP_ENABLED),1) -LANG_OBJS += $(patsubst lang/cpp/%.c,build/lang/cpp/%.o,$(LANG_CPP_SRCS)) +LANG_OBJS += $(patsubst lang/cpp/%.c,$(BUILD_DIR)/lang/cpp/%.o,$(LANG_CPP_SRCS)) endif ifeq ($(CFREE_LANG_C_ENABLED),1) -LANG_OBJS += $(patsubst lang/c/%.c,build/lang/c/%.o,$(LANG_C_SRCS)) +LANG_OBJS += $(patsubst lang/c/%.c,$(BUILD_DIR)/lang/c/%.o,$(LANG_C_SRCS)) endif ifeq ($(CFREE_LANG_WASM_ENABLED),1) -LANG_OBJS += $(patsubst lang/wasm/%.c,build/lang/wasm/%.o,$(LANG_WASM_SRCS)) +LANG_OBJS += $(patsubst lang/wasm/%.c,$(BUILD_DIR)/lang/wasm/%.o,$(LANG_WASM_SRCS)) endif ifeq ($(CFREE_LANG_TOY_ENABLED),1) -LANG_OBJS += $(patsubst lang/toy/%.c,build/lang/toy/%.o,$(LANG_TOY_SRCS)) +LANG_OBJS += $(patsubst lang/toy/%.c,$(BUILD_DIR)/lang/toy/%.o,$(LANG_TOY_SRCS)) endif LIB_ASMS = $(shell find src -name '*.S') -LIB_OBJS = $(patsubst src/%.c,build/lib/%.o,$(LIB_SRCS)) \ +LIB_OBJS = $(patsubst src/%.c,$(BUILD_DIR)/lib/%.o,$(LIB_SRCS)) \ $(LANG_OBJS) \ - $(patsubst src/%.S,build/lib/%.o,$(LIB_ASMS)) + $(patsubst src/%.S,$(BUILD_DIR)/lib/%.o,$(LIB_ASMS)) LIB_DEPS = $(LIB_OBJS:.o=.d) +LIB_AR_STAGING = $(BUILD_DIR)/ar/libcfree DRIVER_SRCS = $(wildcard driver/*.c) ifneq ($(CFREE_LANG_CPP_ENABLED),1) DRIVER_SRCS := $(filter-out driver/cpp.c,$(DRIVER_SRCS)) endif -DRIVER_OBJS = $(patsubst driver/%.c,build/driver/%.o,$(DRIVER_SRCS)) +DRIVER_OBJS = $(patsubst driver/%.c,$(BUILD_DIR)/driver/%.o,$(DRIVER_SRCS)) DRIVER_DEPS = $(DRIVER_OBJS:.o=.d) -LIB_AR = build/libcfree.a -BIN = build/cfree +LIB_AR = $(BUILD_DIR)/libcfree.a +BIN = $(BUILD_DIR)/cfree -.PHONY: all lib bin format clean bootstrap bench-opt +.PHONY: all lib bin format clean bootstrap bootstrap-test-toy bench-opt all: lib bin @@ -102,74 +106,98 @@ bin: $(BIN) $(LIB_AR): $(LIB_OBJS) @mkdir -p $(dir $@) @rm -f $@ - ar rcs $@ $(LIB_OBJS) + @rm -rf $(LIB_AR_STAGING) + @mkdir -p $(LIB_AR_STAGING) + @set -e; \ + for obj in $(LIB_OBJS); do \ + rel=$${obj#$(BUILD_DIR)/}; \ + name=$$(printf '%s' "$$rel" | tr '/' '_'); \ + case "$$obj" in \ + /*) target="$$obj" ;; \ + *) target="$$PWD/$$obj" ;; \ + esac; \ + ln -sf "$$target" "$(LIB_AR_STAGING)/$$name"; \ + done + $(AR) rcs $@ $(LIB_AR_STAGING)/*.o $(BIN): $(DRIVER_OBJS) $(LIB_AR) $(CC) $(HOST_SYSROOT_LDFLAGS) -o $@ $(DRIVER_OBJS) $(LIB_AR) -build/lib/%.o: src/%.c Makefile +$(BUILD_DIR)/lib/%.o: src/%.c Makefile @mkdir -p $(dir $@) $(CC) $(LIB_CFLAGS) $(DEPFLAGS) -c $< -o $@ # lang_registry.c is the one libcfree source that crosses into lang/*; it # uses -Ilang so the frontend headers can be reached as "c/c.h" etc. -build/lib/api/lang_registry.o: src/api/lang_registry.c Makefile +$(BUILD_DIR)/lib/api/lang_registry.o: src/api/lang_registry.c Makefile @mkdir -p $(dir $@) $(CC) $(LIB_CFLAGS) -Ilang $(DEPFLAGS) -c $< -o $@ -build/lang/cpp/%.o: lang/cpp/%.c Makefile +$(BUILD_DIR)/lang/cpp/%.o: lang/cpp/%.c Makefile @mkdir -p $(dir $@) $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/cpp $(DEPFLAGS) -c $< -o $@ # The C frontend includes the lexer and preprocessor headers (pp/pp.h, # lex/lex.h) which now live under lang/cpp/, and cpp_support.h is the # shared substrate. So lang/c objects build with -Ilang/cpp -Ilang/c. -build/lang/c/%.o: lang/c/%.c Makefile +$(BUILD_DIR)/lang/c/%.o: lang/c/%.c Makefile @mkdir -p $(dir $@) $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/cpp -Ilang/c $(DEPFLAGS) -c $< -o $@ -build/lang/wasm/%.o: lang/wasm/%.c Makefile +$(BUILD_DIR)/lang/wasm/%.o: lang/wasm/%.c Makefile @mkdir -p $(dir $@) $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/wasm $(DEPFLAGS) -c $< -o $@ -build/lib/%.o: src/%.S Makefile +$(BUILD_DIR)/lib/%.o: src/%.S Makefile @mkdir -p $(dir $@) $(CC) $(LIB_CFLAGS) $(DEPFLAGS) -c $< -o $@ -build/driver/%.o: driver/%.c Makefile +$(BUILD_DIR)/driver/%.o: driver/%.c Makefile @mkdir -p $(dir $@) $(CC) $(DRIVER_CFLAGS) $(DEPFLAGS) -c $< -o $@ -build/lang/toy/%.o: lang/toy/%.c Makefile +$(BUILD_DIR)/lang/toy/%.o: lang/toy/%.c Makefile @mkdir -p $(dir $@) $(CC) $(CFLAGS_COMMON) -ffreestanding -Iinclude -Ilang/toy $(DEPFLAGS) -c $< -o $@ include rt/Makefile -# Bootstrap: build cfree with the host compiler as stage 1, rebuild it with -# stage 1 as stage 2, rebuild it again with stage 2 as stage 3, then require -# stages 2 and 3 to be bitwise identical. -STAGE1_BIN = build/cfree-stage1 -BOOTSTRAP_STAGE2_OUT = build/stage2-probe -BOOTSTRAP_STAGE3_OUT = build/stage3-probe -BOOTSTRAP_STAGE2_BIN = build/cfree-stage2 -BOOTSTRAP_STAGE3_BIN = build/cfree-stage3 +# Bootstrap: build cfree with the host compiler as stage 1, then rebuild it +# twice through the normal Makefile using cfree's busybox-style cc/ar/ld +# symlinks. Stages 2 and 3 must be bitwise identical. +BOOTSTRAP_DIR = $(BUILD_DIR)/bootstrap +BOOTSTRAP_STAGE1_DIR = $(BOOTSTRAP_DIR)/stage1 +BOOTSTRAP_STAGE2_DIR = $(BOOTSTRAP_DIR)/stage2 +BOOTSTRAP_STAGE3_DIR = $(BOOTSTRAP_DIR)/stage3 +BOOTSTRAP_STAGE1_BIN = $(BOOTSTRAP_STAGE1_DIR)/cfree +BOOTSTRAP_STAGE2_BIN = $(BOOTSTRAP_STAGE2_DIR)/cfree +BOOTSTRAP_STAGE3_BIN = $(BOOTSTRAP_STAGE3_DIR)/cfree +BOOTSTRAP_TOOLS = cc ld ar ranlib as bootstrap: $(BIN) rt - cp $(BIN) $(STAGE1_BIN) - rm -rf $(BOOTSTRAP_STAGE2_OUT) $(BOOTSTRAP_STAGE3_OUT) \ - $(BOOTSTRAP_STAGE2_BIN) $(BOOTSTRAP_STAGE3_BIN) - CFREE_STAGE_BIN='$(abspath $(STAGE1_BIN))' \ - CFREE_STAGE_OUT='$(abspath $(BOOTSTRAP_STAGE2_OUT))' \ - CFREE_STAGE_OUTPUT='$(abspath $(BOOTSTRAP_STAGE2_BIN))' \ - scripts/stage2_link.sh - CFREE_STAGE_BIN='$(abspath $(BOOTSTRAP_STAGE2_BIN))' \ - CFREE_STAGE_OUT='$(abspath $(BOOTSTRAP_STAGE3_OUT))' \ - CFREE_STAGE_OUTPUT='$(abspath $(BOOTSTRAP_STAGE3_BIN))' \ - scripts/stage2_link.sh + rm -rf $(BOOTSTRAP_DIR) + @mkdir -p $(BOOTSTRAP_STAGE1_DIR) + cp $(BIN) $(BOOTSTRAP_STAGE1_BIN) + @for tool in $(BOOTSTRAP_TOOLS); do ln -sf cfree "$(BOOTSTRAP_STAGE1_DIR)/$$tool"; done + $(MAKE) lib bin \ + BUILD_DIR='$(abspath $(BOOTSTRAP_STAGE2_DIR))' \ + HOST_OPTFLAGS='$(HOST_OPTFLAGS)' \ + CC='$(abspath $(BOOTSTRAP_STAGE1_DIR))/cc' \ + AR='$(abspath $(BOOTSTRAP_STAGE1_DIR))/ar' \ + LD='$(abspath $(BOOTSTRAP_STAGE1_DIR))/ld' + @for tool in $(BOOTSTRAP_TOOLS); do ln -sf cfree "$(BOOTSTRAP_STAGE2_DIR)/$$tool"; done + $(MAKE) lib bin \ + BUILD_DIR='$(abspath $(BOOTSTRAP_STAGE3_DIR))' \ + HOST_OPTFLAGS='$(HOST_OPTFLAGS)' \ + CC='$(abspath $(BOOTSTRAP_STAGE2_DIR))/cc' \ + AR='$(abspath $(BOOTSTRAP_STAGE2_DIR))/ar' \ + LD='$(abspath $(BOOTSTRAP_STAGE2_DIR))/ld' cmp $(BOOTSTRAP_STAGE2_BIN) $(BOOTSTRAP_STAGE3_BIN) shasum -a 256 $(BOOTSTRAP_STAGE2_BIN) $(BOOTSTRAP_STAGE3_BIN) +bootstrap-test-toy: bootstrap + @CFREE='$(abspath $(BOOTSTRAP_STAGE3_BIN))' test/toy/run.sh + bench-opt: bin @bash scripts/opt_bench.sh @@ -177,7 +205,7 @@ format: find src include driver lang test rt -path test/pp -prune -o \( -name '*.c' -o -name '*.h' \) -print | xargs clang-format -i --style=google clean: - rm -rf build + rm -rf $(BUILD_DIR) -include $(LIB_DEPS) -include $(DRIVER_DEPS) diff --git a/rt/Makefile b/rt/Makefile @@ -5,7 +5,7 @@ RT_AR ?= $(BIN) ar RT_AS ?= $(BIN) as RT_AS_COMPILE_FLAGS ?= -RT_BUILD_DIR = build/rt +RT_BUILD_DIR ?= $(BUILD_DIR)/rt RT_COMMON_CFLAGS = -Werror RT_LIB_INCS = -Irt/lib/include/common -Irt/lib/impl diff --git a/scripts/stage2_link.sh b/scripts/stage2_link.sh @@ -1,205 +0,0 @@ -#!/usr/bin/env bash -# Stage-2 standalone link probe. -# -# 1. Compile every src/**/*.c with cfree-stage1. -# 2. Compile every driver/*.c with cfree-stage1. -# 3. Link all the resulting objects with `cfree ld` against libSystem.B.tbd. -# -# Runs out-of-tree under build/stage2-probe/ so the Makefile's build/ tree -# is left alone. -set -u - -ROOT="$(cd "$(dirname "$0")/.." && pwd)" -cd "$ROOT" - -BIN="${CFREE_STAGE_BIN:-$ROOT/build/cfree}" -if [ ! -x "$BIN" ]; then - echo "missing $BIN — run \`make\` first" >&2 - exit 2 -fi - -SDK="$(xcrun --show-sdk-path)" -OUT="${CFREE_STAGE_OUT:-$ROOT/build/stage2-probe}" -LIB_OUT="$OUT/lib" -LANG_CPP_OUT="$OUT/lang/cpp" -LANG_C_OUT="$OUT/lang/c" -LANG_WASM_OUT="$OUT/lang/wasm" -LANG_TOY_OUT="$OUT/lang/toy" -DRV_OUT="$OUT/driver" -LOG="$OUT/log" -mkdir -p "$LIB_OUT" "$LANG_CPP_OUT" "$LANG_C_OUT" "$LANG_WASM_OUT" \ - "$LANG_TOY_OUT" "$DRV_OUT" "$LOG" - -CFREE_FLAGS="--support-dir $ROOT -Iinclude -Isrc" -# The lexer/preprocessor substrate (lang/cpp) is part of libcfree. -LANG_CPP_FLAGS="--support-dir $ROOT -Iinclude -Ilang/cpp" -# lang/c sources reach the lexer/preprocessor and cpp_support.h substrate under -# lang/cpp, mirroring the Makefile's `-Ilang/cpp -Ilang/c`. -LANG_C_FLAGS="--support-dir $ROOT -Iinclude -Ilang/cpp -Ilang/c" -LANG_WASM_FLAGS="--support-dir $ROOT -Iinclude -Ilang/wasm" -LANG_TOY_FLAGS="--support-dir $ROOT -Iinclude" -# `cc` reaches the C frontend header as "c/c.h"; -Ilang mirrors DRIVER_CFLAGS. -DRIVER_FLAGS="--support-dir $ROOT -Iinclude -I. -Ilang" - -cfree_objs=() -fail_src=() -fail_lang=() -fail_driver=() - -compile_with_cfree() { - local src="$1" obj="$2" flags="$3" - mkdir -p "$(dirname "$obj")" - if "$BIN" cc $flags -c "$src" -o "$obj" >"$LOG/$(basename "$obj").log" 2>&1; then - return 0 - fi - return 1 -} - -echo "=== compiling src/ with cfree ===" -while IFS= read -r src; do - rel="${src#src/}" - obj="$LIB_OUT/${rel%.*}.o" - src_flags="$CFREE_FLAGS" - # lang_registry.c is the one libcfree source that reaches into lang/* and - # includes frontend headers as "c/c.h" — matches the Makefile's -Ilang. - if [ "$src" = "src/api/lang_registry.c" ]; then src_flags="$src_flags -Ilang"; fi - # emu/cpu.c pulls in the system <math.h> for sqrt; cfree's bundled freestanding - # header set has no math.h, so reach for the SDK's like the host build does. - if [ "$src" = "src/emu/cpu.c" ]; then src_flags="$src_flags -isystem $SDK/usr/include"; fi - if compile_with_cfree "$src" "$obj" "$src_flags"; then - cfree_objs+=("$obj") - printf ' ok %s\n' "$src" - else - fail_src+=("$src") - head -1 "$LOG/$(basename "$obj").log" | sed "s|^| FAIL $src: |" - fi -done < <(find src \( -name '*.c' -o -name '*.S' \) | sort) - -echo -echo "=== compiling lang/cpp with cfree ===" -while IFS= read -r src; do - rel="${src#lang/cpp/}" - obj="$LANG_CPP_OUT/${rel%.c}.o" - if compile_with_cfree "$src" "$obj" "$LANG_CPP_FLAGS"; then - cfree_objs+=("$obj") - printf ' ok %s\n' "$src" - else - fail_lang+=("$src") - head -1 "$LOG/$(basename "$obj").log" | sed "s|^| FAIL $src: |" - fi -done < <(find lang/cpp -name '*.c' | sort) - -echo -echo "=== compiling lang/c with cfree ===" -while IFS= read -r src; do - rel="${src#lang/c/}" - obj="$LANG_C_OUT/${rel%.c}.o" - if compile_with_cfree "$src" "$obj" "$LANG_C_FLAGS"; then - cfree_objs+=("$obj") - printf ' ok %s\n' "$src" - else - fail_lang+=("$src") - head -1 "$LOG/$(basename "$obj").log" | sed "s|^| FAIL $src: |" - fi -done < <(find lang/c -name '*.c' | sort) - -echo -echo "=== compiling lang/wasm with cfree ===" -while IFS= read -r src; do - rel="${src#lang/wasm/}" - obj="$LANG_WASM_OUT/${rel%.c}.o" - if compile_with_cfree "$src" "$obj" "$LANG_WASM_FLAGS"; then - cfree_objs+=("$obj") - printf ' ok %s\n' "$src" - else - fail_lang+=("$src") - head -1 "$LOG/$(basename "$obj").log" | sed "s|^| FAIL $src: |" - fi -done < <(find lang/wasm -name '*.c' 2>/dev/null | sort) - -echo -echo "=== compiling lang/toy with cfree ===" -while IFS= read -r src; do - rel="${src#lang/toy/}" - obj="$LANG_TOY_OUT/${rel%.c}.o" - if compile_with_cfree "$src" "$obj" "$LANG_TOY_FLAGS"; then - cfree_objs+=("$obj") - printf ' ok %s\n' "$src" - else - fail_lang+=("$src") - head -1 "$LOG/$(basename "$obj").log" | sed "s|^| FAIL $src: |" - fi -done < <(find lang/toy -name '*.c' | sort) - -echo -echo "=== compiling driver/ ===" -for src in $(ls driver/*.c | sort); do - base="$(basename "$src")" - obj="$DRV_OUT/${base%.c}.o" - extra_flags="" - if [ "$base" = "env.c" ]; then extra_flags="--sysroot $SDK -lc"; fi - if compile_with_cfree "$src" "$obj" "$DRIVER_FLAGS $extra_flags"; then - cfree_objs+=("$obj") - printf ' ok %s\n' "$src" - else - fail_driver+=("$src (cfree)") - head -1 "$LOG/$(basename "$obj").log" | sed "s|^| FAIL $src: |" - fi -done - -echo -echo "=== compile summary ===" -echo " cfree objects: ${#cfree_objs[@]}" -echo " src failures: ${#fail_src[@]}" -echo " lang failures: ${#fail_lang[@]}" -echo " driver failures: ${#fail_driver[@]}" - -if [ "${#fail_src[@]}" -gt 0 ] || [ "${#fail_lang[@]}" -gt 0 ] || \ - [ "${#fail_driver[@]}" -gt 0 ]; then - echo - echo "compile failures present; skipping link" >&2 - exit 1 -fi - -echo -echo "=== linking with cfree ld ===" -BIN_OUT="${CFREE_STAGE_OUTPUT:-$OUT/cfree-stage2}" -LIBSYS_DIR="$SDK/usr/lib" -if [ ! -f "$LIBSYS_DIR/libSystem.B.tbd" ] && \ - [ ! -f "$LIBSYS_DIR/libSystem.tbd" ]; then - echo "libSystem stub not found under $LIBSYS_DIR" >&2 - exit 2 -fi - -# cfree lowers some operations (e.g. 128-bit multiply -> __multi3) to -# compiler-rt-style builtins that libSystem does not provide. The host build -# resolves these from clang's compiler-rt; here we link cfree's own runtime for -# the host architecture (built by `make rt`). -case "$(uname -m)" in - arm64|aarch64) RT_TRIPLE=aarch64-apple-darwin ;; - x86_64) RT_TRIPLE=x86_64-apple-darwin ;; - *) echo "unknown host arch $(uname -m) for runtime selection" >&2; exit 2 ;; -esac -RT_LIB="$ROOT/build/rt/$RT_TRIPLE/libcfree_rt.a" -if [ ! -f "$RT_LIB" ]; then - echo "missing $RT_LIB — run \`make rt\` first" >&2 - exit 2 -fi - -set -x -"$BIN" ld -o "$BIN_OUT" -pie \ - "${cfree_objs[@]}" \ - "$RT_LIB" \ - -L "$LIBSYS_DIR" -lSystem -status=$? -set +x - -if [ "$status" -ne 0 ]; then - echo "cfree ld failed (exit $status)" >&2 - exit "$status" -fi - -echo -echo "=== link succeeded ===" -file "$BIN_OUT" -ls -l "$BIN_OUT" diff --git a/src/opt/pass_ssa.c b/src/opt/pass_ssa.c @@ -131,7 +131,9 @@ static int slot_access_promotable(const Func* f, const Inst* in, * slot's declared type. */ const IRFrameSlot* s = &f->frame_slots[slot_id - 1u]; CfreeCgTypeId at = in->extra.mem.type; - return !at || at == s->type; + if (at && at != s->type) return 0; + if (in->opnds[0].type && in->opnds[0].type != s->type) return 0; + return 1; } if ((IROp)in->op == IR_STORE) { if (in->nopnds < 2 || opnd_slot_id(&in->opnds[0]) != slot_id) return 1; @@ -140,7 +142,9 @@ static int slot_access_promotable(const Func* f, const Inst* in, return 0; const IRFrameSlot* s = &f->frame_slots[slot_id - 1u]; CfreeCgTypeId at = in->extra.mem.type; - return !at || at == s->type; + if (at && at != s->type) return 0; + if (in->opnds[1].type && in->opnds[1].type != s->type) return 0; + return 1; } for (u32 i = 0; i < in->nopnds; ++i) if (opnd_slot_id(&in->opnds[i]) == slot_id) return 0;