commit 28f717c79bf761e9f3bcf3b33276733f67a37347
parent f9d2f3384870b09083786f88b7ecdf5209f68794
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 22 May 2026 04:54:01 -0700
Add OPT benchmark harness
Diffstat:
| M | Makefile | | | 5 | ++++- |
| M | doc/OPT.md | | | 72 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
| M | driver/run.c | | | 116 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ |
| A | scripts/opt_bench.sh | | | 463 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
4 files changed, 646 insertions(+), 10 deletions(-)
diff --git a/Makefile b/Makefile
@@ -42,7 +42,7 @@ LIB_AR = build/libcfree.a
LANG_TOY_AR = build/libcfree_toy.a
BIN = build/cfree
-.PHONY: all lib bin format clean bootstrap
+.PHONY: all lib bin format clean bootstrap bench-opt
all: lib bin
@@ -115,6 +115,9 @@ bootstrap: $(BIN)
cmp $(BOOTSTRAP_STAGE2_BIN) $(BOOTSTRAP_STAGE3_BIN)
shasum -a 256 $(BOOTSTRAP_STAGE2_BIN) $(BOOTSTRAP_STAGE3_BIN)
+bench-opt: bin
+ @bash scripts/opt_bench.sh
+
format:
find src include driver lang test rt \( -path test/lex -o -path test/pp \) -prune -o \( -name '*.c' -o -name '*.h' \) -print | xargs clang-format -i --style=google
diff --git a/doc/OPT.md b/doc/OPT.md
@@ -1151,14 +1151,84 @@ Benchmark set:
benchmark that requires only supported libc/runtime features.
- cfree-specific stress cases for ABI, TLS, atomics, and inline asm.
+Primary harness:
+
+```
+make bench-opt
+```
+
+`scripts/opt_bench.sh` compares MIR `c2m`, `clang`, `gcc-15`, `cfree cc`,
+and `cfree run` at `-O0`, `-O1`, and `-O2` on MIR's `c-benchmarks` sources
+from `~/tmp/mir/c-benchmarks`. It writes per-case logs, `results.csv`, and a
+geomean summary under `build/bench/opt/`.
+
+The hosted `cfree cc` lane passes `--sysroot` and `-lc`; on Darwin the
+sysroot defaults to `xcrun --show-sdk-path`, and it can be overridden with
+`CFREE_OPT_BENCH_SYSROOT`. The `cfree run` lane is an in-process JIT run for
+direct comparison with MIR; it also passes `--sysroot` and `-lc`, expands the
+hosted libc headers/defines in the driver, and relies on host-symbol fallback
+for libc calls.
+
+The benchmark intentionally records unsupported cases instead of stopping at
+the first failure. A cfree row that cannot compile a hosted benchmark is a
+`COMPILE_FAIL` data point, not a harness failure; this lets optimizer work
+track coverage and performance in the same run.
+
+Useful focused runs:
+
+```
+CFREE_OPT_BENCHES="sieve spectral-norm" make bench-opt
+CFREE_OPT_BENCH_LEVELS="1 2" CFREE_OPT_BENCH_RUN_REPEATS=5 make bench-opt
+GCC=/opt/homebrew/bin/gcc-15 MIR_DIR=~/tmp/mir make bench-opt
+```
+
Measure:
- Compile wall time for `-O0`, `-O1`, `-O2`.
-- Executable run time against clang/gcc `-O2` when available.
+- Executable run time against `gcc-15 -O2`, `clang -O2`, and MIR `c2m -O2`
+ when available.
+- MIR-specific split: `compile_ms` is C-to-binary-MIR time, `codegen_ms` is
+ the JIT link/generation slice reported by `c2m -v`, and `runtime_ms` is the
+ generated function execution slice. Native compiler rows use compile+link
+ wall time for `compile_ms` and executable wall time for `runtime_ms`.
+- `cfree-run` uses `cfree run --bench-time`: `compile_ms` is compile+JIT time,
+ and `runtime_ms` is the in-process entry-call execution slice.
- Code size for hot text sections.
- Pass counters: removed GVN expressions, folded branches, removed stores,
coalesced moves, spills/restores, split ranges, post-RA deleted moves.
+Initial representative run, 2026-05-22:
+
+- Scope: `array`, `binary-trees`, `hash`, `hash2`, `matrix`, `nbody`,
+ `sieve`, and `spectral-norm`; levels `0 1 2`; one compile repeat and one
+ run repeat. Output was written to `build/bench/opt/results.csv` and
+ `build/bench/opt/summary.md`.
+- Coverage: 120 data rows; 91 `OK`, 18 `COMPILE_FAIL`, 9 `RUN_FAIL`, and
+ 2 `OUTPUT_FAIL`.
+- `gcc-15` and `clang` completed all rows. cfree and MIR are blocked on some
+ hosted/math cases: `binary-trees`, `nbody`, and `spectral-norm` hit Darwin
+ `math.h`/builtin compatibility issues. cfree also has an `-O2` wrong-code
+ failure on `matrix`.
+- Runtime geomean versus `gcc-15 -O2` on completed rows:
+ `cfree-run` improved from `0.372x` at `-O0` to `0.445x` at `-O1` and
+ `0.480x` at `-O2`; MIR improved from `0.554x` to `0.609x` to `0.796x`.
+ `clang -O2` measured `1.076x`.
+- Compile/JIT geomean versus `gcc-15 -O2`: `cfree-run` was about `11.5x` to
+ `12.0x` faster, while MIR was about `2.7x` faster. On cases where both
+ completed, cfree-run compile/JIT time was roughly 4-5x faster than MIR.
+- Direct cfree-run versus MIR runtime on common successful cases:
+ `0.67x` at `-O0`, `0.73x` at `-O1`, and `0.57x` at `-O2`. This confirms
+ the current split: cfree's compile/JIT path is very fast, but generated-code
+ quality still trails MIR, especially at `-O2`.
+
+Immediate benchmark blockers:
+
+- Fix the `matrix -O2` wrong-code regression before trusting O2 timing.
+- Add or model the hosted math builtins needed by Darwin `math.h`, starting
+ with `__builtin_fabsf`.
+- Re-run the full MIR benchmark set after those blockers, then increase repeat
+ counts for stable numbers.
+
Target:
- `-O1` should be the fast optimized tier and materially faster to compile
diff --git a/driver/run.c b/driver/run.c
@@ -1,15 +1,15 @@
+#include <cfree/compile.h>
+#include <cfree/core.h>
+#include <cfree/jit.h>
+#include <cfree/link.h>
#include <stdint.h>
#include <stdlib.h>
#include "cflags.h"
#include "driver.h"
+#include "hosted.h"
#include "inputs.h"
-#include <cfree/compile.h>
-#include <cfree/core.h>
-#include <cfree/jit.h>
-#include <cfree/link.h>
-
/* `cfree run` — JIT-compile one or more inputs and invoke the entry symbol
* (default `main`) in-process. Args after `--` are passed to the JITed
* program as argv. Mirrors the cc front-end for input shape (.c / - sources,
@@ -30,10 +30,14 @@ typedef struct RunOptions {
int opt_level;
int debug_info;
int metrics;
+ int bench_time;
int warnings_are_errors; /* -Werror */
uint32_t max_errors; /* -fmax-errors=N */
const char* entry; /* -e, default "main" */
+ const char* sysroot; /* --sysroot */
+ int wants_hosted_libc; /* -lc */
CfreeTarget target; /* -target / host */
+ DriverHostedPlan hosted;
DriverCflags cf;
DriverInputs inputs;
@@ -75,8 +79,8 @@ static void run_metrics_scope_end(void* user, const char* name) {
f = m->stack[depth];
end_ns = driver_now_ns();
elapsed = (end_ns >= f.start_ns) ? (end_ns - f.start_ns) : 0;
- driver_logf("%*s%s %.3f ms", (int)(depth * 2u), "",
- f.name ? f.name : name, (double)elapsed / 1000000.0);
+ driver_logf("%*s%s %.3f ms", (int)(depth * 2u), "", f.name ? f.name : name,
+ (double)elapsed / 1000000.0);
}
static void run_metrics_count(void* user, const char* name, uint64_t value) {
@@ -109,6 +113,10 @@ static void run_metrics_finish(RunMetrics* m) {
}
}
+static void run_bench_time(const char* name, uint64_t ns) {
+ driver_logf("cfree-run %s -- %.3f msec", name, (double)ns / 1000000.0);
+}
+
static void run_usage(void) {
driver_errf(RUN_TOOL, "%s",
"usage: cfree run [options] inputs... [-- prog-arg...]\n"
@@ -146,8 +154,12 @@ void driver_help_run(void) {
" -O0 -O1 -O2 Optimization level (default -O0)\n"
" -g Emit DWARF debug info\n"
" --time, --metrics Emit scoped compile/link/JIT timing to stderr\n"
+ " --bench-time Emit parseable compile/JIT/execution timings\n"
" -e SYMBOL Entry symbol (default `main`)\n"
" -target TRIPLE Cross-compile target (see `cfree cc --help`)\n"
+ " --sysroot DIR Hosted libc sysroot for headers/defines with -lc\n"
+ " -lc Enable hosted libc headers/defines; calls resolve "
+ "via host dlsym\n"
" -fPIC -fpic Position-independent code (no-op for the JIT)\n"
" -fPIE -fpie Position-independent executable (no-op for the "
"JIT)\n"
@@ -192,7 +204,9 @@ static int run_alloc_arrays(RunOptions* o, int argc) {
return 1;
}
if (driver_inputs_init(&o->inputs, o->env, RUN_TOOL, argc) != 0) return 1;
- if (driver_cflags_init(&o->cf, o->env, argc) != 0) {
+ if (driver_cflags_init(
+ &o->cf, o->env,
+ argc + DRIVER_HOSTED_MAX_INCLUDES + DRIVER_HOSTED_MAX_DEFINES) != 0) {
driver_errf(RUN_TOOL, "out of memory");
return 1;
}
@@ -245,6 +259,31 @@ static int run_classify_positional(RunOptions* o, const char* a) {
return 0;
}
+static int run_apply_hosted_profile(RunOptions* o) {
+ DriverHostedRequest req;
+ uint32_t i;
+ if (!o->wants_hosted_libc) return 0;
+ {
+ DriverHostedRequest z = {0};
+ req = z;
+ }
+ req.env = o->env;
+ req.tool = RUN_TOOL;
+ req.target = o->target;
+ req.sysroot = o->sysroot;
+ req.static_link = 0;
+ req.link_inputs = 0;
+ if (driver_hosted_resolve(&req, &o->hosted) != 0) return 1;
+ for (i = 0; i < o->hosted.nsystem_includes; ++i) {
+ o->cf.system_include_dirs[o->cf.nsystem_include_dirs++] =
+ o->hosted.system_includes[i];
+ }
+ for (i = 0; i < o->hosted.ndefines; ++i) {
+ o->cf.defines[o->cf.ndefines++] = o->hosted.defines[i];
+ }
+ return 0;
+}
+
static int run_parse(int argc, char** argv, RunOptions* o) {
int i;
int after_dash_dash = 0;
@@ -278,6 +317,10 @@ static int run_parse(int argc, char** argv, RunOptions* o) {
o->debug_info = 1;
continue;
}
+ if (driver_streq(a, "--bench-time")) {
+ o->bench_time = 1;
+ continue;
+ }
if (driver_streq(a, "--time") || driver_streq(a, "--metrics")) {
o->metrics = 1;
continue;
@@ -342,6 +385,43 @@ static int run_parse(int argc, char** argv, RunOptions* o) {
}
continue;
}
+ if (driver_streq(a, "--sysroot") || driver_streq(a, "-isysroot")) {
+ if (++i >= argc) {
+ driver_errf(RUN_TOOL, "%s requires an argument", a);
+ return 1;
+ }
+ o->sysroot = argv[i];
+ continue;
+ }
+ if (driver_strneq(a, "--sysroot=", 10)) {
+ o->sysroot = a + 10;
+ continue;
+ }
+ if (driver_streq(a, "-lc")) {
+ o->wants_hosted_libc = 1;
+ continue;
+ }
+ if (driver_streq(a, "-l")) {
+ if (++i >= argc) {
+ driver_errf(RUN_TOOL, "-l requires an argument");
+ return 1;
+ }
+ if (!driver_streq(argv[i], "c")) {
+ driver_errf(RUN_TOOL, "unsupported hosted library for JIT: -l%s",
+ argv[i]);
+ return 1;
+ }
+ o->wants_hosted_libc = 1;
+ continue;
+ }
+ if (driver_strneq(a, "-l", 2)) {
+ if (!driver_streq(a + 2, "c")) {
+ driver_errf(RUN_TOOL, "unsupported hosted library for JIT: %s", a);
+ return 1;
+ }
+ o->wants_hosted_libc = 1;
+ continue;
+ }
if (driver_streq(a, "-e")) {
if (++i >= argc) {
@@ -370,6 +450,7 @@ static int run_parse(int argc, char** argv, RunOptions* o) {
return 1;
}
if (!o->entry) o->entry = "main";
+ if (run_apply_hosted_profile(o) != 0) return 1;
/* Synthetic argv[0]. Hosted programs conventionally read argv[0] as
* the program name; under `cfree run` there is no executable path, so
@@ -380,6 +461,7 @@ static int run_parse(int argc, char** argv, RunOptions* o) {
static void run_options_release(RunOptions* o) {
size_t bound = o->argv_bound;
+ driver_hosted_plan_fini(o->env, &o->hosted);
driver_inputs_release(&o->inputs);
driver_cflags_fini(&o->cf, o->env);
driver_free(o->env, o->prog_argv, bound * sizeof(*o->prog_argv));
@@ -467,6 +549,11 @@ int driver_run(int argc, char** argv) {
void* sym;
MainFn entry_fn;
int rc;
+ uint64_t bench_total_start = 0;
+ uint64_t bench_compile_start = 0;
+ uint64_t bench_compile_end = 0;
+ uint64_t bench_exec_start = 0;
+ uint64_t bench_exec_end = 0;
if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) {
driver_help_run();
@@ -489,6 +576,7 @@ int driver_run(int argc, char** argv) {
driver_logf("cfree metrics:");
run_metrics_begin(metrics, "run.total");
}
+ if (ro.bench_time) bench_total_start = driver_now_ns();
/* Compiler backs the JIT image — keep it alive across cfree_jit_lookup
* and the entry call, free after cfree_jit_free. */
@@ -502,10 +590,15 @@ int driver_run(int argc, char** argv) {
return 1;
}
+ if (ro.bench_time) bench_compile_start = driver_now_ns();
run_metrics_begin(metrics, "run.compile_and_jit");
rc = run_compile_and_jit(&ro, compiler, &jhost, &jit);
run_metrics_end(metrics, "run.compile_and_jit");
+ if (ro.bench_time) bench_compile_end = driver_now_ns();
if (rc != 0) {
+ if (ro.bench_time)
+ run_bench_time("compile_and_jit",
+ bench_compile_end - bench_compile_start);
driver_compiler_free(compiler);
run_metrics_finish(metrics);
run_options_release(&ro);
@@ -536,9 +629,16 @@ int driver_run(int argc, char** argv) {
}
run_metrics_begin(metrics, "run.entry_call");
+ if (ro.bench_time) bench_exec_start = driver_now_ns();
if (!run_call_wasm_entry(&ro, jit, sym, &rc))
rc = entry_fn((int)ro.prog_argc, ro.prog_argv);
+ if (ro.bench_time) bench_exec_end = driver_now_ns();
run_metrics_end(metrics, "run.entry_call");
+ if (ro.bench_time) {
+ run_bench_time("compile_and_jit", bench_compile_end - bench_compile_start);
+ run_bench_time("execution", bench_exec_end - bench_exec_start);
+ run_bench_time("total", bench_exec_end - bench_total_start);
+ }
cfree_jit_free(jit);
driver_compiler_free(compiler);
diff --git a/scripts/opt_bench.sh b/scripts/opt_bench.sh
@@ -0,0 +1,463 @@
+#!/usr/bin/env bash
+set -uo pipefail
+
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+MIR_DIR="${MIR_DIR:-$HOME/tmp/mir}"
+MIR_C2M="${MIR_C2M:-$MIR_DIR/c2m}"
+CFREE="${CFREE:-$ROOT/build/cfree}"
+CLANG="${CLANG:-clang}"
+GCC="${GCC:-gcc-15}"
+OUT_DIR="${CFREE_OPT_BENCH_OUT:-$ROOT/build/bench/opt}"
+CFREE_SYSROOT="${CFREE_OPT_BENCH_SYSROOT:-}"
+
+LEVELS="${CFREE_OPT_BENCH_LEVELS:-0 1 2}"
+BENCHES="${CFREE_OPT_BENCHES:-array binary-trees except funnkuch-reduce hash hash2 heapsort lists matrix method-call mandelbrot nbody sieve spectral-norm strcat}"
+COMPILE_REPEATS="${CFREE_OPT_BENCH_COMPILE_REPEATS:-1}"
+RUN_REPEATS="${CFREE_OPT_BENCH_RUN_REPEATS:-3}"
+MIR_MAKE="${MIR_MAKE:-}"
+
+case "$(uname -s 2>/dev/null || true)" in
+ Darwin)
+ DEFAULT_MATH_LIBS=""
+ if command -v xcrun >/dev/null 2>&1; then
+ DEFAULT_SYSROOT="$(xcrun --show-sdk-path)"
+ DEFAULT_CFLAGS_EXTRA="-isysroot $DEFAULT_SYSROOT"
+ else
+ DEFAULT_SYSROOT=""
+ DEFAULT_CFLAGS_EXTRA=""
+ fi
+ ;;
+ *)
+ DEFAULT_MATH_LIBS="-lm"
+ DEFAULT_SYSROOT="${SYSROOT:-}"
+ DEFAULT_CFLAGS_EXTRA=""
+ ;;
+esac
+if [ -z "$CFREE_SYSROOT" ]; then
+ CFREE_SYSROOT="$DEFAULT_SYSROOT"
+fi
+MATH_LIBS="${CFREE_OPT_BENCH_MATH_LIBS:-$DEFAULT_MATH_LIBS}"
+CFLAGS_EXTRA="${CFREE_OPT_BENCH_CFLAGS:-$DEFAULT_CFLAGS_EXTRA}"
+CFREE_FLAGS_EXTRA="${CFREE_OPT_BENCH_CFREE_FLAGS:-}"
+CFREE_RUN_FLAGS_EXTRA="${CFREE_OPT_BENCH_CFREE_RUN_FLAGS:-}"
+
+CSV="$OUT_DIR/results.csv"
+SUMMARY="$OUT_DIR/summary.md"
+LOG_DIR="$OUT_DIR/logs"
+BIN_DIR="$OUT_DIR/bin"
+
+mkdir -p "$OUT_DIR" "$LOG_DIR" "$BIN_DIR"
+
+now_ns() {
+ python3 -c 'import time; print(time.monotonic_ns())'
+}
+
+ns_to_ms() {
+ awk -v ns="$1" 'BEGIN { printf "%.3f", ns / 1000000.0 }'
+}
+
+min_ms() {
+ awk -v a="$1" -v b="$2" 'BEGIN {
+ if (a == "" || a == "NA") printf "%s", b;
+ else if (b + 0 < a + 0) printf "%s", b;
+ else printf "%s", a;
+ }'
+}
+
+csv_field() {
+ printf '%s' "$1" | sed 's/"/""/g; s/^/"/; s/$/"/'
+}
+
+record_row() {
+ local bench="$1" tool="$2" opt="$3" status="$4"
+ local compile_ms="$5" codegen_ms="$6" runtime_ms="$7" rc="$8" log="$9"
+ {
+ csv_field "$bench"; printf ','
+ csv_field "$tool"; printf ','
+ csv_field "$opt"; printf ','
+ csv_field "$status"; printf ','
+ csv_field "$compile_ms"; printf ','
+ csv_field "$codegen_ms"; printf ','
+ csv_field "$runtime_ms"; printf ','
+ csv_field "$rc"; printf ','
+ csv_field "$log"; printf '\n'
+ } >>"$CSV"
+}
+
+run_timed() {
+ local out="$1" err="$2"
+ shift 2
+ local t0 t1
+ t0="$(now_ns)"
+ "$@" >"$out" 2>"$err"
+ RUN_RC=$?
+ t1="$(now_ns)"
+ RUN_MS="$(ns_to_ms "$((t1 - t0))")"
+}
+
+read_arg_file() {
+ local f="$1"
+ if [ -f "$f" ]; then
+ sh "$f"
+ fi
+}
+
+check_expected() {
+ local expect="$1" got="$2" diff_out="$3"
+ if [ ! -f "$expect" ]; then
+ return 0
+ fi
+ cmp "$expect" "$got" >/dev/null 2>&1 && return 0
+ diff -u "$expect" "$got" >"$diff_out" 2>&1
+ return 1
+}
+
+parse_mir_ms() {
+ local pattern="$1" file="$2"
+ awk -v pat="$pattern" '
+ $0 ~ pat {
+ v = $(NF - 1)
+ unit = $NF
+ if (unit == "usec") v = v / 1000.0
+ if (unit == "msec") v = v + 0.0
+ printf "%.3f\n", v
+ exit
+ }
+ ' "$file"
+}
+
+ensure_mir() {
+ if [ -x "$MIR_C2M" ]; then
+ return 0
+ fi
+ if [ ! -d "$MIR_DIR" ]; then
+ printf 'opt-bench: MIR_DIR does not exist: %s\n' "$MIR_DIR" >&2
+ return 1
+ fi
+ if [ -z "$MIR_MAKE" ]; then
+ if command -v gmake >/dev/null 2>&1; then
+ MIR_MAKE=gmake
+ else
+ MIR_MAKE=make
+ fi
+ fi
+ printf 'opt-bench: building MIR c2m with %s -C %s c2m\n' "$MIR_MAKE" "$MIR_DIR"
+ "$MIR_MAKE" -C "$MIR_DIR" c2m
+}
+
+tool_label() {
+ basename "$1"
+}
+
+bench_source_dir() {
+ printf '%s/c-benchmarks' "$MIR_DIR"
+}
+
+compile_native() {
+ local bench="$1" tool="$2" cc="$3" opt="$4" src="$5" exe="$6" log_base="$7"
+ local best="NA" rep out err
+ local cmd=() cflags=() libs=() cfree_flags=()
+ read -r -a cmd <<<"$cc"
+ read -r -a cflags <<<"$CFLAGS_EXTRA"
+ read -r -a libs <<<"$MATH_LIBS"
+ read -r -a cfree_flags <<<"$CFREE_FLAGS_EXTRA"
+ if [ "$tool" = "cfree" ]; then
+ cflags=("${cfree_flags[@]}")
+ if [ -n "$CFREE_SYSROOT" ]; then
+ cflags+=(--sysroot "$CFREE_SYSROOT")
+ fi
+ libs+=("-lc")
+ fi
+ for rep in $(seq 1 "$COMPILE_REPEATS"); do
+ out="$log_base.compile.$rep.out"
+ err="$log_base.compile.$rep.err"
+ rm -f "$exe"
+ run_timed "$out" "$err" "${cmd[@]}" -std=c99 "-O$opt" -I"$(bench_source_dir)" \
+ "${cflags[@]}" "$src" "${libs[@]}" -o "$exe"
+ if [ "$RUN_RC" -ne 0 ]; then
+ record_row "$bench" "$tool" "$opt" "COMPILE_FAIL" "$RUN_MS" "NA" \
+ "NA" "$RUN_RC" "$err"
+ return 1
+ fi
+ best="$(min_ms "$best" "$RUN_MS")"
+ done
+ COMPILE_MS="$best"
+ return 0
+}
+
+run_native() {
+ local bench="$1" tool="$2" opt="$3" exe="$4" expect="$5" arg_line="$6" log_base="$7"
+ local best="NA" best_rc=0 rep out err diff_out
+ local args=()
+ read -r -a args <<<"$arg_line"
+ for rep in $(seq 1 "$RUN_REPEATS"); do
+ out="$log_base.run.$rep.out"
+ err="$log_base.run.$rep.err"
+ run_timed "$out" "$err" "$exe" "${args[@]}"
+ if [ "$RUN_RC" -ne 0 ]; then
+ record_row "$bench" "$tool" "$opt" "RUN_FAIL" "$COMPILE_MS" "NA" \
+ "$RUN_MS" "$RUN_RC" "$err"
+ return 1
+ fi
+ diff_out="$log_base.run.$rep.diff"
+ if ! check_expected "$expect" "$out" "$diff_out"; then
+ record_row "$bench" "$tool" "$opt" "OUTPUT_FAIL" "$COMPILE_MS" "NA" \
+ "$RUN_MS" "$RUN_RC" "$diff_out"
+ return 1
+ fi
+ best="$(min_ms "$best" "$RUN_MS")"
+ best_rc="$RUN_RC"
+ done
+ record_row "$bench" "$tool" "$opt" "OK" "$COMPILE_MS" "NA" "$best" \
+ "$best_rc" "$log_base"
+}
+
+bench_native_tool() {
+ local bench="$1" tool="$2" cc="$3" opt="$4" src="$5" expect="$6" arg_line="$7"
+ local exe="$BIN_DIR/$tool.O$opt.$bench.exe"
+ local log_base="$LOG_DIR/$tool.O$opt.$bench"
+ local cmd=()
+ read -r -a cmd <<<"$cc"
+ if ! command -v "${cmd[0]}" >/dev/null 2>&1 && [ ! -x "${cmd[0]}" ]; then
+ record_row "$bench" "$tool" "$opt" "TOOL_MISSING" "NA" "NA" "NA" \
+ "127" "$cc"
+ return 0
+ fi
+ compile_native "$bench" "$tool" "$cc" "$opt" "$src" "$exe" "$log_base" || return 0
+ run_native "$bench" "$tool" "$opt" "$exe" "$expect" "$arg_line" "$log_base"
+}
+
+bench_mir() {
+ local bench="$1" opt="$2" src="$3" expect="$4" arg_line="$5"
+ local tool="mir-c2m"
+ local bmir="$BIN_DIR/$tool.O$opt.$bench.bmir"
+ local log_base="$LOG_DIR/$tool.O$opt.$bench"
+ local best_run="NA" best_codegen="NA" best_rc=0 rep out err diff_out exec_ms cg_ms
+ local args=()
+ read -r -a args <<<"$arg_line"
+
+ run_timed "$log_base.compile.out" "$log_base.compile.err" \
+ "$MIR_C2M" "-O$opt" -c -I"$(bench_source_dir)" -o "$bmir" "$src"
+ if [ "$RUN_RC" -ne 0 ]; then
+ record_row "$bench" "$tool" "$opt" "COMPILE_FAIL" "$RUN_MS" "NA" \
+ "NA" "$RUN_RC" "$log_base.compile.err"
+ return 0
+ fi
+ COMPILE_MS="$RUN_MS"
+
+ for rep in $(seq 1 "$RUN_REPEATS"); do
+ out="$log_base.run.$rep.out"
+ err="$log_base.run.$rep.err"
+ run_timed "$out" "$err" "$MIR_C2M" -v "-O$opt" "$bmir" -eg "${args[@]}"
+ exec_ms="$(parse_mir_ms 'execution' "$err")"
+ cg_ms="$(parse_mir_ms 'MIR link finish' "$err")"
+ [ -z "$exec_ms" ] && exec_ms="$RUN_MS"
+ [ -z "$cg_ms" ] && cg_ms="NA"
+ if [ "$RUN_RC" -ne 0 ]; then
+ record_row "$bench" "$tool" "$opt" "RUN_FAIL" "$COMPILE_MS" "$cg_ms" \
+ "$exec_ms" "$RUN_RC" "$err"
+ return 0
+ fi
+ diff_out="$log_base.run.$rep.diff"
+ if ! check_expected "$expect" "$out" "$diff_out"; then
+ record_row "$bench" "$tool" "$opt" "OUTPUT_FAIL" "$COMPILE_MS" "$cg_ms" \
+ "$exec_ms" "$RUN_RC" "$diff_out"
+ return 0
+ fi
+ best_run="$(min_ms "$best_run" "$exec_ms")"
+ if [ "$cg_ms" != "NA" ]; then
+ best_codegen="$(min_ms "$best_codegen" "$cg_ms")"
+ fi
+ best_rc="$RUN_RC"
+ done
+ record_row "$bench" "$tool" "$opt" "OK" "$COMPILE_MS" "$best_codegen" \
+ "$best_run" "$best_rc" "$log_base"
+}
+
+bench_cfree_run() {
+ local bench="$1" opt="$2" src="$3" expect="$4" arg_line="$5"
+ local tool="cfree-run"
+ local log_base="$LOG_DIR/$tool.O$opt.$bench"
+ local best_run="NA" best_compile="NA" best_rc=0 rep out err diff_out cm_ms exec_ms
+ local args=() cfree_flags=() run_flags=() cmd=()
+ read -r -a args <<<"$arg_line"
+ read -r -a cfree_flags <<<"$CFREE_FLAGS_EXTRA"
+ read -r -a run_flags <<<"$CFREE_RUN_FLAGS_EXTRA"
+
+ if [ ! -x "$CFREE" ]; then
+ record_row "$bench" "$tool" "$opt" "TOOL_MISSING" "NA" "NA" "NA" \
+ "127" "$CFREE"
+ return 0
+ fi
+
+ for rep in $(seq 1 "$RUN_REPEATS"); do
+ out="$log_base.run.$rep.out"
+ err="$log_base.run.$rep.err"
+ cmd=("$CFREE" run --bench-time "-O$opt" -I"$(bench_source_dir)")
+ if [ -n "$CFREE_SYSROOT" ]; then
+ cmd+=(--sysroot "$CFREE_SYSROOT")
+ fi
+ cmd+=("${cfree_flags[@]}" "${run_flags[@]}" "$src" -lc -- "${args[@]}")
+ run_timed "$out" "$err" "${cmd[@]}"
+ cm_ms="$(parse_mir_ms 'cfree-run compile_and_jit' "$err")"
+ exec_ms="$(parse_mir_ms 'cfree-run execution' "$err")"
+ [ -z "$cm_ms" ] && cm_ms="NA"
+ [ -z "$exec_ms" ] && exec_ms="$RUN_MS"
+ if [ "$RUN_RC" -ne 0 ]; then
+ record_row "$bench" "$tool" "$opt" "RUN_FAIL" "$cm_ms" "NA" \
+ "$exec_ms" "$RUN_RC" "$err"
+ return 0
+ fi
+ diff_out="$log_base.run.$rep.diff"
+ if ! check_expected "$expect" "$out" "$diff_out"; then
+ record_row "$bench" "$tool" "$opt" "OUTPUT_FAIL" "$cm_ms" "NA" \
+ "$exec_ms" "$RUN_RC" "$diff_out"
+ return 0
+ fi
+ if [ "$cm_ms" != "NA" ]; then
+ best_compile="$(min_ms "$best_compile" "$cm_ms")"
+ fi
+ best_run="$(min_ms "$best_run" "$exec_ms")"
+ best_rc="$RUN_RC"
+ done
+ record_row "$bench" "$tool" "$opt" "OK" "$best_compile" "NA" "$best_run" \
+ "$best_rc" "$log_base"
+}
+
+write_summary() {
+ python3 - "$CSV" "$SUMMARY" "$(tool_label "$GCC")" <<'PY'
+import csv
+import math
+import sys
+from collections import defaultdict
+
+csv_path, out_path, base_tool = sys.argv[1:4]
+with open(csv_path, newline="") as f:
+ rows = list(csv.DictReader(f))
+
+def fnum(v):
+ if v in ("", "NA", None):
+ return None
+ try:
+ return float(v)
+ except ValueError:
+ return None
+
+ok = [r for r in rows if r["status"] == "OK"]
+groups = defaultdict(list)
+for r in ok:
+ groups[(r["tool"], r["opt"])].append(r)
+
+base_runtime = {}
+base_compile = {}
+for r in ok:
+ if r["tool"] == base_tool and r["opt"] == "2":
+ rt = fnum(r["runtime_ms"])
+ cm = fnum(r["compile_ms"])
+ if rt and rt > 0:
+ base_runtime[r["bench"]] = rt
+ if cm and cm > 0:
+ base_compile[r["bench"]] = cm
+
+def geo(xs):
+ xs = [x for x in xs if x and x > 0]
+ if not xs:
+ return "NA"
+ return f"{math.exp(sum(math.log(x) for x in xs) / len(xs)):.3f}"
+
+def avg(xs):
+ xs = [x for x in xs if x is not None]
+ if not xs:
+ return "NA"
+ return f"{sum(xs) / len(xs):.3f}"
+
+status_counts = defaultdict(int)
+for r in rows:
+ status_counts[r["status"]] += 1
+
+lines = []
+lines.append("# OPT Benchmark Summary")
+lines.append("")
+lines.append(f"Base for speed ratios: `{base_tool} -O2`.")
+lines.append("For MIR, `compile_ms` is C-to-binary-MIR time and `codegen_ms` is the JIT link/generation slice reported by `c2m -v`; compile ratios use their sum. `cfree-run` uses `--bench-time`: `compile_ms` is compile+JIT time, and `runtime_ms` is the in-process entry-call execution slice.")
+lines.append("")
+lines.append("## Status")
+lines.append("")
+lines.append("| status | rows |")
+lines.append("| --- | ---: |")
+for k in sorted(status_counts):
+ lines.append(f"| {k} | {status_counts[k]} |")
+lines.append("")
+lines.append("## Geomean Ratios")
+lines.append("")
+lines.append("| tool | opt | ok cases | compile speed vs base | runtime speed vs base | avg compile+codegen ms | avg runtime ms |")
+lines.append("| --- | ---: | ---: | ---: | ---: | ---: | ---: |")
+for key in sorted(groups):
+ vals = groups[key]
+ comp_totals = []
+ run_times = []
+ comp_ratios = []
+ run_ratios = []
+ for r in vals:
+ cm = fnum(r["compile_ms"])
+ cg = fnum(r["codegen_ms"]) or 0.0
+ rt = fnum(r["runtime_ms"])
+ total = None if cm is None else cm + cg
+ comp_totals.append(total)
+ run_times.append(rt)
+ b = r["bench"]
+ if total and b in base_compile:
+ comp_ratios.append(base_compile[b] / total)
+ if rt and b in base_runtime:
+ run_ratios.append(base_runtime[b] / rt)
+ lines.append(
+ f"| {key[0]} | {key[1]} | {len(vals)} | {geo(comp_ratios)} | "
+ f"{geo(run_ratios)} | {avg(comp_totals)} | {avg(run_times)} |"
+ )
+lines.append("")
+lines.append(f"Raw CSV: `{csv_path}`")
+with open(out_path, "w") as f:
+ f.write("\n".join(lines) + "\n")
+PY
+}
+
+printf 'bench,tool,opt,status,compile_ms,codegen_ms,runtime_ms,exit_code,log\n' >"$CSV"
+
+BENCH_DIR="$(bench_source_dir)"
+if [ ! -d "$BENCH_DIR" ]; then
+ printf 'opt-bench: benchmark directory not found: %s\n' "$BENCH_DIR" >&2
+ exit 2
+fi
+if [ ! -x "$CFREE" ]; then
+ printf 'opt-bench: cfree binary not found: %s\n' "$CFREE" >&2
+ printf 'opt-bench: run `make bin` or set CFREE=/path/to/cfree\n' >&2
+ exit 2
+fi
+ensure_mir || exit 2
+
+printf 'opt-bench: output: %s\n' "$OUT_DIR"
+printf 'opt-bench: benches: %s\n' "$BENCHES"
+printf 'opt-bench: levels: %s\n' "$LEVELS"
+printf 'opt-bench: compile repeats=%s run repeats=%s\n' "$COMPILE_REPEATS" "$RUN_REPEATS"
+
+for bench in $BENCHES; do
+ src="$BENCH_DIR/$bench.c"
+ expect="$BENCH_DIR/$bench.expect"
+ arg_line="$(read_arg_file "$BENCH_DIR/$bench.arg")"
+ if [ ! -f "$src" ]; then
+ printf 'opt-bench: skipping missing benchmark source: %s\n' "$src" >&2
+ continue
+ fi
+ printf '+++++ %s %s +++++\n' "$bench" "$arg_line"
+ for opt in $LEVELS; do
+ bench_native_tool "$bench" "$(tool_label "$GCC")" "$GCC" "$opt" "$src" "$expect" "$arg_line"
+ bench_native_tool "$bench" "$(tool_label "$CLANG")" "$CLANG" "$opt" "$src" "$expect" "$arg_line"
+ bench_native_tool "$bench" "cfree" "$CFREE cc" "$opt" "$src" "$expect" "$arg_line"
+ bench_cfree_run "$bench" "$opt" "$src" "$expect" "$arg_line"
+ bench_mir "$bench" "$opt" "$src" "$expect" "$arg_line"
+ done
+done
+
+write_summary
+printf 'opt-bench: wrote %s\n' "$CSV"
+printf 'opt-bench: wrote %s\n' "$SUMMARY"