kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 28f717c79bf761e9f3bcf3b33276733f67a37347
parent f9d2f3384870b09083786f88b7ecdf5209f68794
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 22 May 2026 04:54:01 -0700

Add OPT benchmark harness

Diffstat:
MMakefile | 5++++-
Mdoc/OPT.md | 72+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mdriver/run.c | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Ascripts/opt_bench.sh | 463+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 646 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile @@ -42,7 +42,7 @@ LIB_AR = build/libcfree.a LANG_TOY_AR = build/libcfree_toy.a BIN = build/cfree -.PHONY: all lib bin format clean bootstrap +.PHONY: all lib bin format clean bootstrap bench-opt all: lib bin @@ -115,6 +115,9 @@ bootstrap: $(BIN) cmp $(BOOTSTRAP_STAGE2_BIN) $(BOOTSTRAP_STAGE3_BIN) shasum -a 256 $(BOOTSTRAP_STAGE2_BIN) $(BOOTSTRAP_STAGE3_BIN) +bench-opt: bin + @bash scripts/opt_bench.sh + format: find src include driver lang test rt \( -path test/lex -o -path test/pp \) -prune -o \( -name '*.c' -o -name '*.h' \) -print | xargs clang-format -i --style=google diff --git a/doc/OPT.md b/doc/OPT.md @@ -1151,14 +1151,84 @@ Benchmark set: benchmark that requires only supported libc/runtime features. - cfree-specific stress cases for ABI, TLS, atomics, and inline asm. +Primary harness: + +``` +make bench-opt +``` + +`scripts/opt_bench.sh` compares MIR `c2m`, `clang`, `gcc-15`, `cfree cc`, +and `cfree run` at `-O0`, `-O1`, and `-O2` on MIR's `c-benchmarks` sources +from `~/tmp/mir/c-benchmarks`. It writes per-case logs, `results.csv`, and a +geomean summary under `build/bench/opt/`. + +The hosted `cfree cc` lane passes `--sysroot` and `-lc`; on Darwin the +sysroot defaults to `xcrun --show-sdk-path`, and it can be overridden with +`CFREE_OPT_BENCH_SYSROOT`. The `cfree run` lane is an in-process JIT run for +direct comparison with MIR; it also passes `--sysroot` and `-lc`, expands the +hosted libc headers/defines in the driver, and relies on host-symbol fallback +for libc calls. + +The benchmark intentionally records unsupported cases instead of stopping at +the first failure. A cfree row that cannot compile a hosted benchmark is a +`COMPILE_FAIL` data point, not a harness failure; this lets optimizer work +track coverage and performance in the same run. + +Useful focused runs: + +``` +CFREE_OPT_BENCHES="sieve spectral-norm" make bench-opt +CFREE_OPT_BENCH_LEVELS="1 2" CFREE_OPT_BENCH_RUN_REPEATS=5 make bench-opt +GCC=/opt/homebrew/bin/gcc-15 MIR_DIR=~/tmp/mir make bench-opt +``` + Measure: - Compile wall time for `-O0`, `-O1`, `-O2`. -- Executable run time against clang/gcc `-O2` when available. +- Executable run time against `gcc-15 -O2`, `clang -O2`, and MIR `c2m -O2` + when available. +- MIR-specific split: `compile_ms` is C-to-binary-MIR time, `codegen_ms` is + the JIT link/generation slice reported by `c2m -v`, and `runtime_ms` is the + generated function execution slice. Native compiler rows use compile+link + wall time for `compile_ms` and executable wall time for `runtime_ms`. +- `cfree-run` uses `cfree run --bench-time`: `compile_ms` is compile+JIT time, + and `runtime_ms` is the in-process entry-call execution slice. - Code size for hot text sections. - Pass counters: removed GVN expressions, folded branches, removed stores, coalesced moves, spills/restores, split ranges, post-RA deleted moves. +Initial representative run, 2026-05-22: + +- Scope: `array`, `binary-trees`, `hash`, `hash2`, `matrix`, `nbody`, + `sieve`, and `spectral-norm`; levels `0 1 2`; one compile repeat and one + run repeat. Output was written to `build/bench/opt/results.csv` and + `build/bench/opt/summary.md`. +- Coverage: 120 data rows; 91 `OK`, 18 `COMPILE_FAIL`, 9 `RUN_FAIL`, and + 2 `OUTPUT_FAIL`. +- `gcc-15` and `clang` completed all rows. cfree and MIR are blocked on some + hosted/math cases: `binary-trees`, `nbody`, and `spectral-norm` hit Darwin + `math.h`/builtin compatibility issues. cfree also has an `-O2` wrong-code + failure on `matrix`. +- Runtime geomean versus `gcc-15 -O2` on completed rows: + `cfree-run` improved from `0.372x` at `-O0` to `0.445x` at `-O1` and + `0.480x` at `-O2`; MIR improved from `0.554x` to `0.609x` to `0.796x`. + `clang -O2` measured `1.076x`. +- Compile/JIT geomean versus `gcc-15 -O2`: `cfree-run` was about `11.5x` to + `12.0x` faster, while MIR was about `2.7x` faster. On cases where both + completed, cfree-run compile/JIT time was roughly 4-5x faster than MIR. +- Direct cfree-run versus MIR runtime on common successful cases: + `0.67x` at `-O0`, `0.73x` at `-O1`, and `0.57x` at `-O2`. This confirms + the current split: cfree's compile/JIT path is very fast, but generated-code + quality still trails MIR, especially at `-O2`. + +Immediate benchmark blockers: + +- Fix the `matrix -O2` wrong-code regression before trusting O2 timing. +- Add or model the hosted math builtins needed by Darwin `math.h`, starting + with `__builtin_fabsf`. +- Re-run the full MIR benchmark set after those blockers, then increase repeat + counts for stable numbers. + Target: - `-O1` should be the fast optimized tier and materially faster to compile diff --git a/driver/run.c b/driver/run.c @@ -1,15 +1,15 @@ +#include <cfree/compile.h> +#include <cfree/core.h> +#include <cfree/jit.h> +#include <cfree/link.h> #include <stdint.h> #include <stdlib.h> #include "cflags.h" #include "driver.h" +#include "hosted.h" #include "inputs.h" -#include <cfree/compile.h> -#include <cfree/core.h> -#include <cfree/jit.h> -#include <cfree/link.h> - /* `cfree run` — JIT-compile one or more inputs and invoke the entry symbol * (default `main`) in-process. Args after `--` are passed to the JITed * program as argv. Mirrors the cc front-end for input shape (.c / - sources, @@ -30,10 +30,14 @@ typedef struct RunOptions { int opt_level; int debug_info; int metrics; + int bench_time; int warnings_are_errors; /* -Werror */ uint32_t max_errors; /* -fmax-errors=N */ const char* entry; /* -e, default "main" */ + const char* sysroot; /* --sysroot */ + int wants_hosted_libc; /* -lc */ CfreeTarget target; /* -target / host */ + DriverHostedPlan hosted; DriverCflags cf; DriverInputs inputs; @@ -75,8 +79,8 @@ static void run_metrics_scope_end(void* user, const char* name) { f = m->stack[depth]; end_ns = driver_now_ns(); elapsed = (end_ns >= f.start_ns) ? (end_ns - f.start_ns) : 0; - driver_logf("%*s%s %.3f ms", (int)(depth * 2u), "", - f.name ? f.name : name, (double)elapsed / 1000000.0); + driver_logf("%*s%s %.3f ms", (int)(depth * 2u), "", f.name ? f.name : name, + (double)elapsed / 1000000.0); } static void run_metrics_count(void* user, const char* name, uint64_t value) { @@ -109,6 +113,10 @@ static void run_metrics_finish(RunMetrics* m) { } } +static void run_bench_time(const char* name, uint64_t ns) { + driver_logf("cfree-run %s -- %.3f msec", name, (double)ns / 1000000.0); +} + static void run_usage(void) { driver_errf(RUN_TOOL, "%s", "usage: cfree run [options] inputs... [-- prog-arg...]\n" @@ -146,8 +154,12 @@ void driver_help_run(void) { " -O0 -O1 -O2 Optimization level (default -O0)\n" " -g Emit DWARF debug info\n" " --time, --metrics Emit scoped compile/link/JIT timing to stderr\n" + " --bench-time Emit parseable compile/JIT/execution timings\n" " -e SYMBOL Entry symbol (default `main`)\n" " -target TRIPLE Cross-compile target (see `cfree cc --help`)\n" + " --sysroot DIR Hosted libc sysroot for headers/defines with -lc\n" + " -lc Enable hosted libc headers/defines; calls resolve " + "via host dlsym\n" " -fPIC -fpic Position-independent code (no-op for the JIT)\n" " -fPIE -fpie Position-independent executable (no-op for the " "JIT)\n" @@ -192,7 +204,9 @@ static int run_alloc_arrays(RunOptions* o, int argc) { return 1; } if (driver_inputs_init(&o->inputs, o->env, RUN_TOOL, argc) != 0) return 1; - if (driver_cflags_init(&o->cf, o->env, argc) != 0) { + if (driver_cflags_init( + &o->cf, o->env, + argc + DRIVER_HOSTED_MAX_INCLUDES + DRIVER_HOSTED_MAX_DEFINES) != 0) { driver_errf(RUN_TOOL, "out of memory"); return 1; } @@ -245,6 +259,31 @@ static int run_classify_positional(RunOptions* o, const char* a) { return 0; } +static int run_apply_hosted_profile(RunOptions* o) { + DriverHostedRequest req; + uint32_t i; + if (!o->wants_hosted_libc) return 0; + { + DriverHostedRequest z = {0}; + req = z; + } + req.env = o->env; + req.tool = RUN_TOOL; + req.target = o->target; + req.sysroot = o->sysroot; + req.static_link = 0; + req.link_inputs = 0; + if (driver_hosted_resolve(&req, &o->hosted) != 0) return 1; + for (i = 0; i < o->hosted.nsystem_includes; ++i) { + o->cf.system_include_dirs[o->cf.nsystem_include_dirs++] = + o->hosted.system_includes[i]; + } + for (i = 0; i < o->hosted.ndefines; ++i) { + o->cf.defines[o->cf.ndefines++] = o->hosted.defines[i]; + } + return 0; +} + static int run_parse(int argc, char** argv, RunOptions* o) { int i; int after_dash_dash = 0; @@ -278,6 +317,10 @@ static int run_parse(int argc, char** argv, RunOptions* o) { o->debug_info = 1; continue; } + if (driver_streq(a, "--bench-time")) { + o->bench_time = 1; + continue; + } if (driver_streq(a, "--time") || driver_streq(a, "--metrics")) { o->metrics = 1; continue; @@ -342,6 +385,43 @@ static int run_parse(int argc, char** argv, RunOptions* o) { } continue; } + if (driver_streq(a, "--sysroot") || driver_streq(a, "-isysroot")) { + if (++i >= argc) { + driver_errf(RUN_TOOL, "%s requires an argument", a); + return 1; + } + o->sysroot = argv[i]; + continue; + } + if (driver_strneq(a, "--sysroot=", 10)) { + o->sysroot = a + 10; + continue; + } + if (driver_streq(a, "-lc")) { + o->wants_hosted_libc = 1; + continue; + } + if (driver_streq(a, "-l")) { + if (++i >= argc) { + driver_errf(RUN_TOOL, "-l requires an argument"); + return 1; + } + if (!driver_streq(argv[i], "c")) { + driver_errf(RUN_TOOL, "unsupported hosted library for JIT: -l%s", + argv[i]); + return 1; + } + o->wants_hosted_libc = 1; + continue; + } + if (driver_strneq(a, "-l", 2)) { + if (!driver_streq(a + 2, "c")) { + driver_errf(RUN_TOOL, "unsupported hosted library for JIT: %s", a); + return 1; + } + o->wants_hosted_libc = 1; + continue; + } if (driver_streq(a, "-e")) { if (++i >= argc) { @@ -370,6 +450,7 @@ static int run_parse(int argc, char** argv, RunOptions* o) { return 1; } if (!o->entry) o->entry = "main"; + if (run_apply_hosted_profile(o) != 0) return 1; /* Synthetic argv[0]. Hosted programs conventionally read argv[0] as * the program name; under `cfree run` there is no executable path, so @@ -380,6 +461,7 @@ static int run_parse(int argc, char** argv, RunOptions* o) { static void run_options_release(RunOptions* o) { size_t bound = o->argv_bound; + driver_hosted_plan_fini(o->env, &o->hosted); driver_inputs_release(&o->inputs); driver_cflags_fini(&o->cf, o->env); driver_free(o->env, o->prog_argv, bound * sizeof(*o->prog_argv)); @@ -467,6 +549,11 @@ int driver_run(int argc, char** argv) { void* sym; MainFn entry_fn; int rc; + uint64_t bench_total_start = 0; + uint64_t bench_compile_start = 0; + uint64_t bench_compile_end = 0; + uint64_t bench_exec_start = 0; + uint64_t bench_exec_end = 0; if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) { driver_help_run(); @@ -489,6 +576,7 @@ int driver_run(int argc, char** argv) { driver_logf("cfree metrics:"); run_metrics_begin(metrics, "run.total"); } + if (ro.bench_time) bench_total_start = driver_now_ns(); /* Compiler backs the JIT image — keep it alive across cfree_jit_lookup * and the entry call, free after cfree_jit_free. */ @@ -502,10 +590,15 @@ int driver_run(int argc, char** argv) { return 1; } + if (ro.bench_time) bench_compile_start = driver_now_ns(); run_metrics_begin(metrics, "run.compile_and_jit"); rc = run_compile_and_jit(&ro, compiler, &jhost, &jit); run_metrics_end(metrics, "run.compile_and_jit"); + if (ro.bench_time) bench_compile_end = driver_now_ns(); if (rc != 0) { + if (ro.bench_time) + run_bench_time("compile_and_jit", + bench_compile_end - bench_compile_start); driver_compiler_free(compiler); run_metrics_finish(metrics); run_options_release(&ro); @@ -536,9 +629,16 @@ int driver_run(int argc, char** argv) { } run_metrics_begin(metrics, "run.entry_call"); + if (ro.bench_time) bench_exec_start = driver_now_ns(); if (!run_call_wasm_entry(&ro, jit, sym, &rc)) rc = entry_fn((int)ro.prog_argc, ro.prog_argv); + if (ro.bench_time) bench_exec_end = driver_now_ns(); run_metrics_end(metrics, "run.entry_call"); + if (ro.bench_time) { + run_bench_time("compile_and_jit", bench_compile_end - bench_compile_start); + run_bench_time("execution", bench_exec_end - bench_exec_start); + run_bench_time("total", bench_exec_end - bench_total_start); + } cfree_jit_free(jit); driver_compiler_free(compiler); diff --git a/scripts/opt_bench.sh b/scripts/opt_bench.sh @@ -0,0 +1,463 @@ +#!/usr/bin/env bash +set -uo pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +MIR_DIR="${MIR_DIR:-$HOME/tmp/mir}" +MIR_C2M="${MIR_C2M:-$MIR_DIR/c2m}" +CFREE="${CFREE:-$ROOT/build/cfree}" +CLANG="${CLANG:-clang}" +GCC="${GCC:-gcc-15}" +OUT_DIR="${CFREE_OPT_BENCH_OUT:-$ROOT/build/bench/opt}" +CFREE_SYSROOT="${CFREE_OPT_BENCH_SYSROOT:-}" + +LEVELS="${CFREE_OPT_BENCH_LEVELS:-0 1 2}" +BENCHES="${CFREE_OPT_BENCHES:-array binary-trees except funnkuch-reduce hash hash2 heapsort lists matrix method-call mandelbrot nbody sieve spectral-norm strcat}" +COMPILE_REPEATS="${CFREE_OPT_BENCH_COMPILE_REPEATS:-1}" +RUN_REPEATS="${CFREE_OPT_BENCH_RUN_REPEATS:-3}" +MIR_MAKE="${MIR_MAKE:-}" + +case "$(uname -s 2>/dev/null || true)" in + Darwin) + DEFAULT_MATH_LIBS="" + if command -v xcrun >/dev/null 2>&1; then + DEFAULT_SYSROOT="$(xcrun --show-sdk-path)" + DEFAULT_CFLAGS_EXTRA="-isysroot $DEFAULT_SYSROOT" + else + DEFAULT_SYSROOT="" + DEFAULT_CFLAGS_EXTRA="" + fi + ;; + *) + DEFAULT_MATH_LIBS="-lm" + DEFAULT_SYSROOT="${SYSROOT:-}" + DEFAULT_CFLAGS_EXTRA="" + ;; +esac +if [ -z "$CFREE_SYSROOT" ]; then + CFREE_SYSROOT="$DEFAULT_SYSROOT" +fi +MATH_LIBS="${CFREE_OPT_BENCH_MATH_LIBS:-$DEFAULT_MATH_LIBS}" +CFLAGS_EXTRA="${CFREE_OPT_BENCH_CFLAGS:-$DEFAULT_CFLAGS_EXTRA}" +CFREE_FLAGS_EXTRA="${CFREE_OPT_BENCH_CFREE_FLAGS:-}" +CFREE_RUN_FLAGS_EXTRA="${CFREE_OPT_BENCH_CFREE_RUN_FLAGS:-}" + +CSV="$OUT_DIR/results.csv" +SUMMARY="$OUT_DIR/summary.md" +LOG_DIR="$OUT_DIR/logs" +BIN_DIR="$OUT_DIR/bin" + +mkdir -p "$OUT_DIR" "$LOG_DIR" "$BIN_DIR" + +now_ns() { + python3 -c 'import time; print(time.monotonic_ns())' +} + +ns_to_ms() { + awk -v ns="$1" 'BEGIN { printf "%.3f", ns / 1000000.0 }' +} + +min_ms() { + awk -v a="$1" -v b="$2" 'BEGIN { + if (a == "" || a == "NA") printf "%s", b; + else if (b + 0 < a + 0) printf "%s", b; + else printf "%s", a; + }' +} + +csv_field() { + printf '%s' "$1" | sed 's/"/""/g; s/^/"/; s/$/"/' +} + +record_row() { + local bench="$1" tool="$2" opt="$3" status="$4" + local compile_ms="$5" codegen_ms="$6" runtime_ms="$7" rc="$8" log="$9" + { + csv_field "$bench"; printf ',' + csv_field "$tool"; printf ',' + csv_field "$opt"; printf ',' + csv_field "$status"; printf ',' + csv_field "$compile_ms"; printf ',' + csv_field "$codegen_ms"; printf ',' + csv_field "$runtime_ms"; printf ',' + csv_field "$rc"; printf ',' + csv_field "$log"; printf '\n' + } >>"$CSV" +} + +run_timed() { + local out="$1" err="$2" + shift 2 + local t0 t1 + t0="$(now_ns)" + "$@" >"$out" 2>"$err" + RUN_RC=$? + t1="$(now_ns)" + RUN_MS="$(ns_to_ms "$((t1 - t0))")" +} + +read_arg_file() { + local f="$1" + if [ -f "$f" ]; then + sh "$f" + fi +} + +check_expected() { + local expect="$1" got="$2" diff_out="$3" + if [ ! -f "$expect" ]; then + return 0 + fi + cmp "$expect" "$got" >/dev/null 2>&1 && return 0 + diff -u "$expect" "$got" >"$diff_out" 2>&1 + return 1 +} + +parse_mir_ms() { + local pattern="$1" file="$2" + awk -v pat="$pattern" ' + $0 ~ pat { + v = $(NF - 1) + unit = $NF + if (unit == "usec") v = v / 1000.0 + if (unit == "msec") v = v + 0.0 + printf "%.3f\n", v + exit + } + ' "$file" +} + +ensure_mir() { + if [ -x "$MIR_C2M" ]; then + return 0 + fi + if [ ! -d "$MIR_DIR" ]; then + printf 'opt-bench: MIR_DIR does not exist: %s\n' "$MIR_DIR" >&2 + return 1 + fi + if [ -z "$MIR_MAKE" ]; then + if command -v gmake >/dev/null 2>&1; then + MIR_MAKE=gmake + else + MIR_MAKE=make + fi + fi + printf 'opt-bench: building MIR c2m with %s -C %s c2m\n' "$MIR_MAKE" "$MIR_DIR" + "$MIR_MAKE" -C "$MIR_DIR" c2m +} + +tool_label() { + basename "$1" +} + +bench_source_dir() { + printf '%s/c-benchmarks' "$MIR_DIR" +} + +compile_native() { + local bench="$1" tool="$2" cc="$3" opt="$4" src="$5" exe="$6" log_base="$7" + local best="NA" rep out err + local cmd=() cflags=() libs=() cfree_flags=() + read -r -a cmd <<<"$cc" + read -r -a cflags <<<"$CFLAGS_EXTRA" + read -r -a libs <<<"$MATH_LIBS" + read -r -a cfree_flags <<<"$CFREE_FLAGS_EXTRA" + if [ "$tool" = "cfree" ]; then + cflags=("${cfree_flags[@]}") + if [ -n "$CFREE_SYSROOT" ]; then + cflags+=(--sysroot "$CFREE_SYSROOT") + fi + libs+=("-lc") + fi + for rep in $(seq 1 "$COMPILE_REPEATS"); do + out="$log_base.compile.$rep.out" + err="$log_base.compile.$rep.err" + rm -f "$exe" + run_timed "$out" "$err" "${cmd[@]}" -std=c99 "-O$opt" -I"$(bench_source_dir)" \ + "${cflags[@]}" "$src" "${libs[@]}" -o "$exe" + if [ "$RUN_RC" -ne 0 ]; then + record_row "$bench" "$tool" "$opt" "COMPILE_FAIL" "$RUN_MS" "NA" \ + "NA" "$RUN_RC" "$err" + return 1 + fi + best="$(min_ms "$best" "$RUN_MS")" + done + COMPILE_MS="$best" + return 0 +} + +run_native() { + local bench="$1" tool="$2" opt="$3" exe="$4" expect="$5" arg_line="$6" log_base="$7" + local best="NA" best_rc=0 rep out err diff_out + local args=() + read -r -a args <<<"$arg_line" + for rep in $(seq 1 "$RUN_REPEATS"); do + out="$log_base.run.$rep.out" + err="$log_base.run.$rep.err" + run_timed "$out" "$err" "$exe" "${args[@]}" + if [ "$RUN_RC" -ne 0 ]; then + record_row "$bench" "$tool" "$opt" "RUN_FAIL" "$COMPILE_MS" "NA" \ + "$RUN_MS" "$RUN_RC" "$err" + return 1 + fi + diff_out="$log_base.run.$rep.diff" + if ! check_expected "$expect" "$out" "$diff_out"; then + record_row "$bench" "$tool" "$opt" "OUTPUT_FAIL" "$COMPILE_MS" "NA" \ + "$RUN_MS" "$RUN_RC" "$diff_out" + return 1 + fi + best="$(min_ms "$best" "$RUN_MS")" + best_rc="$RUN_RC" + done + record_row "$bench" "$tool" "$opt" "OK" "$COMPILE_MS" "NA" "$best" \ + "$best_rc" "$log_base" +} + +bench_native_tool() { + local bench="$1" tool="$2" cc="$3" opt="$4" src="$5" expect="$6" arg_line="$7" + local exe="$BIN_DIR/$tool.O$opt.$bench.exe" + local log_base="$LOG_DIR/$tool.O$opt.$bench" + local cmd=() + read -r -a cmd <<<"$cc" + if ! command -v "${cmd[0]}" >/dev/null 2>&1 && [ ! -x "${cmd[0]}" ]; then + record_row "$bench" "$tool" "$opt" "TOOL_MISSING" "NA" "NA" "NA" \ + "127" "$cc" + return 0 + fi + compile_native "$bench" "$tool" "$cc" "$opt" "$src" "$exe" "$log_base" || return 0 + run_native "$bench" "$tool" "$opt" "$exe" "$expect" "$arg_line" "$log_base" +} + +bench_mir() { + local bench="$1" opt="$2" src="$3" expect="$4" arg_line="$5" + local tool="mir-c2m" + local bmir="$BIN_DIR/$tool.O$opt.$bench.bmir" + local log_base="$LOG_DIR/$tool.O$opt.$bench" + local best_run="NA" best_codegen="NA" best_rc=0 rep out err diff_out exec_ms cg_ms + local args=() + read -r -a args <<<"$arg_line" + + run_timed "$log_base.compile.out" "$log_base.compile.err" \ + "$MIR_C2M" "-O$opt" -c -I"$(bench_source_dir)" -o "$bmir" "$src" + if [ "$RUN_RC" -ne 0 ]; then + record_row "$bench" "$tool" "$opt" "COMPILE_FAIL" "$RUN_MS" "NA" \ + "NA" "$RUN_RC" "$log_base.compile.err" + return 0 + fi + COMPILE_MS="$RUN_MS" + + for rep in $(seq 1 "$RUN_REPEATS"); do + out="$log_base.run.$rep.out" + err="$log_base.run.$rep.err" + run_timed "$out" "$err" "$MIR_C2M" -v "-O$opt" "$bmir" -eg "${args[@]}" + exec_ms="$(parse_mir_ms 'execution' "$err")" + cg_ms="$(parse_mir_ms 'MIR link finish' "$err")" + [ -z "$exec_ms" ] && exec_ms="$RUN_MS" + [ -z "$cg_ms" ] && cg_ms="NA" + if [ "$RUN_RC" -ne 0 ]; then + record_row "$bench" "$tool" "$opt" "RUN_FAIL" "$COMPILE_MS" "$cg_ms" \ + "$exec_ms" "$RUN_RC" "$err" + return 0 + fi + diff_out="$log_base.run.$rep.diff" + if ! check_expected "$expect" "$out" "$diff_out"; then + record_row "$bench" "$tool" "$opt" "OUTPUT_FAIL" "$COMPILE_MS" "$cg_ms" \ + "$exec_ms" "$RUN_RC" "$diff_out" + return 0 + fi + best_run="$(min_ms "$best_run" "$exec_ms")" + if [ "$cg_ms" != "NA" ]; then + best_codegen="$(min_ms "$best_codegen" "$cg_ms")" + fi + best_rc="$RUN_RC" + done + record_row "$bench" "$tool" "$opt" "OK" "$COMPILE_MS" "$best_codegen" \ + "$best_run" "$best_rc" "$log_base" +} + +bench_cfree_run() { + local bench="$1" opt="$2" src="$3" expect="$4" arg_line="$5" + local tool="cfree-run" + local log_base="$LOG_DIR/$tool.O$opt.$bench" + local best_run="NA" best_compile="NA" best_rc=0 rep out err diff_out cm_ms exec_ms + local args=() cfree_flags=() run_flags=() cmd=() + read -r -a args <<<"$arg_line" + read -r -a cfree_flags <<<"$CFREE_FLAGS_EXTRA" + read -r -a run_flags <<<"$CFREE_RUN_FLAGS_EXTRA" + + if [ ! -x "$CFREE" ]; then + record_row "$bench" "$tool" "$opt" "TOOL_MISSING" "NA" "NA" "NA" \ + "127" "$CFREE" + return 0 + fi + + for rep in $(seq 1 "$RUN_REPEATS"); do + out="$log_base.run.$rep.out" + err="$log_base.run.$rep.err" + cmd=("$CFREE" run --bench-time "-O$opt" -I"$(bench_source_dir)") + if [ -n "$CFREE_SYSROOT" ]; then + cmd+=(--sysroot "$CFREE_SYSROOT") + fi + cmd+=("${cfree_flags[@]}" "${run_flags[@]}" "$src" -lc -- "${args[@]}") + run_timed "$out" "$err" "${cmd[@]}" + cm_ms="$(parse_mir_ms 'cfree-run compile_and_jit' "$err")" + exec_ms="$(parse_mir_ms 'cfree-run execution' "$err")" + [ -z "$cm_ms" ] && cm_ms="NA" + [ -z "$exec_ms" ] && exec_ms="$RUN_MS" + if [ "$RUN_RC" -ne 0 ]; then + record_row "$bench" "$tool" "$opt" "RUN_FAIL" "$cm_ms" "NA" \ + "$exec_ms" "$RUN_RC" "$err" + return 0 + fi + diff_out="$log_base.run.$rep.diff" + if ! check_expected "$expect" "$out" "$diff_out"; then + record_row "$bench" "$tool" "$opt" "OUTPUT_FAIL" "$cm_ms" "NA" \ + "$exec_ms" "$RUN_RC" "$diff_out" + return 0 + fi + if [ "$cm_ms" != "NA" ]; then + best_compile="$(min_ms "$best_compile" "$cm_ms")" + fi + best_run="$(min_ms "$best_run" "$exec_ms")" + best_rc="$RUN_RC" + done + record_row "$bench" "$tool" "$opt" "OK" "$best_compile" "NA" "$best_run" \ + "$best_rc" "$log_base" +} + +write_summary() { + python3 - "$CSV" "$SUMMARY" "$(tool_label "$GCC")" <<'PY' +import csv +import math +import sys +from collections import defaultdict + +csv_path, out_path, base_tool = sys.argv[1:4] +with open(csv_path, newline="") as f: + rows = list(csv.DictReader(f)) + +def fnum(v): + if v in ("", "NA", None): + return None + try: + return float(v) + except ValueError: + return None + +ok = [r for r in rows if r["status"] == "OK"] +groups = defaultdict(list) +for r in ok: + groups[(r["tool"], r["opt"])].append(r) + +base_runtime = {} +base_compile = {} +for r in ok: + if r["tool"] == base_tool and r["opt"] == "2": + rt = fnum(r["runtime_ms"]) + cm = fnum(r["compile_ms"]) + if rt and rt > 0: + base_runtime[r["bench"]] = rt + if cm and cm > 0: + base_compile[r["bench"]] = cm + +def geo(xs): + xs = [x for x in xs if x and x > 0] + if not xs: + return "NA" + return f"{math.exp(sum(math.log(x) for x in xs) / len(xs)):.3f}" + +def avg(xs): + xs = [x for x in xs if x is not None] + if not xs: + return "NA" + return f"{sum(xs) / len(xs):.3f}" + +status_counts = defaultdict(int) +for r in rows: + status_counts[r["status"]] += 1 + +lines = [] +lines.append("# OPT Benchmark Summary") +lines.append("") +lines.append(f"Base for speed ratios: `{base_tool} -O2`.") +lines.append("For MIR, `compile_ms` is C-to-binary-MIR time and `codegen_ms` is the JIT link/generation slice reported by `c2m -v`; compile ratios use their sum. `cfree-run` uses `--bench-time`: `compile_ms` is compile+JIT time, and `runtime_ms` is the in-process entry-call execution slice.") +lines.append("") +lines.append("## Status") +lines.append("") +lines.append("| status | rows |") +lines.append("| --- | ---: |") +for k in sorted(status_counts): + lines.append(f"| {k} | {status_counts[k]} |") +lines.append("") +lines.append("## Geomean Ratios") +lines.append("") +lines.append("| tool | opt | ok cases | compile speed vs base | runtime speed vs base | avg compile+codegen ms | avg runtime ms |") +lines.append("| --- | ---: | ---: | ---: | ---: | ---: | ---: |") +for key in sorted(groups): + vals = groups[key] + comp_totals = [] + run_times = [] + comp_ratios = [] + run_ratios = [] + for r in vals: + cm = fnum(r["compile_ms"]) + cg = fnum(r["codegen_ms"]) or 0.0 + rt = fnum(r["runtime_ms"]) + total = None if cm is None else cm + cg + comp_totals.append(total) + run_times.append(rt) + b = r["bench"] + if total and b in base_compile: + comp_ratios.append(base_compile[b] / total) + if rt and b in base_runtime: + run_ratios.append(base_runtime[b] / rt) + lines.append( + f"| {key[0]} | {key[1]} | {len(vals)} | {geo(comp_ratios)} | " + f"{geo(run_ratios)} | {avg(comp_totals)} | {avg(run_times)} |" + ) +lines.append("") +lines.append(f"Raw CSV: `{csv_path}`") +with open(out_path, "w") as f: + f.write("\n".join(lines) + "\n") +PY +} + +printf 'bench,tool,opt,status,compile_ms,codegen_ms,runtime_ms,exit_code,log\n' >"$CSV" + +BENCH_DIR="$(bench_source_dir)" +if [ ! -d "$BENCH_DIR" ]; then + printf 'opt-bench: benchmark directory not found: %s\n' "$BENCH_DIR" >&2 + exit 2 +fi +if [ ! -x "$CFREE" ]; then + printf 'opt-bench: cfree binary not found: %s\n' "$CFREE" >&2 + printf 'opt-bench: run `make bin` or set CFREE=/path/to/cfree\n' >&2 + exit 2 +fi +ensure_mir || exit 2 + +printf 'opt-bench: output: %s\n' "$OUT_DIR" +printf 'opt-bench: benches: %s\n' "$BENCHES" +printf 'opt-bench: levels: %s\n' "$LEVELS" +printf 'opt-bench: compile repeats=%s run repeats=%s\n' "$COMPILE_REPEATS" "$RUN_REPEATS" + +for bench in $BENCHES; do + src="$BENCH_DIR/$bench.c" + expect="$BENCH_DIR/$bench.expect" + arg_line="$(read_arg_file "$BENCH_DIR/$bench.arg")" + if [ ! -f "$src" ]; then + printf 'opt-bench: skipping missing benchmark source: %s\n' "$src" >&2 + continue + fi + printf '+++++ %s %s +++++\n' "$bench" "$arg_line" + for opt in $LEVELS; do + bench_native_tool "$bench" "$(tool_label "$GCC")" "$GCC" "$opt" "$src" "$expect" "$arg_line" + bench_native_tool "$bench" "$(tool_label "$CLANG")" "$CLANG" "$opt" "$src" "$expect" "$arg_line" + bench_native_tool "$bench" "cfree" "$CFREE cc" "$opt" "$src" "$expect" "$arg_line" + bench_cfree_run "$bench" "$opt" "$src" "$expect" "$arg_line" + bench_mir "$bench" "$opt" "$src" "$expect" "$arg_line" + done +done + +write_summary +printf 'opt-bench: wrote %s\n' "$CSV" +printf 'opt-bench: wrote %s\n' "$SUMMARY"