bench: replace FAST flag with per-tool skip flags; add compare script - kit

commit 42c5ca3a53352e4fd3148c52257cb3a02a911be2
parent 67344f35ae4b8850515b5741595103ef9bbdd65b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Wed, 27 May 2026 11:38:43 -0700

bench: replace FAST flag with per-tool skip flags; add compare script

- Drop the FAST/slow binary mode in opt_bench.sh.  Default is now always
  the quick subset (O0+O1, 5 benches, 3 repeats each), with the full set
  described in a comment for easy opt-in via CFREE_OPT_BENCHES.
- Add SKIP_GCC / SKIP_CLANG / SKIP_MIR flags so individual reference
  tools can be disabled without touching anything else.  Default keeps
  gcc and MIR, skips clang (slow on macOS, rarely useful for quick runs).
- Add CFREE_OPT_BENCH_ARG_OVERRIDE to pin a single bench arg across all
  benchmarks (useful for quick smoke runs with small N).
- Add scripts/opt_bench_compare.py: reads the CSV and prints a
  per-benchmark + geomean table of cfree vs the gcc baseline.  The bench
  script auto-invokes it at the end when present.

Diffstat:
M scripts/opt_bench.sh  | 43 +++++++++++++++++++++++++------------------
A scripts/opt_bench_compare.py  | 182 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

2 files changed, 207 insertions(+), 18 deletions(-)
diff --git a/scripts/opt_bench.sh b/scripts/opt_bench.sh
@@ -10,22 +10,25 @@ GCC="${GCC:-gcc-15}"
 OUT_DIR="${CFREE_OPT_BENCH_OUT:-$ROOT/build/bench/opt}"
 CFREE_SYSROOT="${CFREE_OPT_BENCH_SYSROOT:-}"
 
-FAST="${CFREE_OPT_BENCH_FAST:-0}"
-if [ "$FAST" = "1" ]; then
-  DEFAULT_LEVELS="1"
-  DEFAULT_BENCHES="array hash hash2 matrix sieve"
-  DEFAULT_COMPILE_REPEATS="3"
-  DEFAULT_RUN_REPEATS="3"
-else
-  DEFAULT_LEVELS="0 1 2"
-  DEFAULT_BENCHES="array binary-trees except funnkuch-reduce hash hash2 heapsort lists matrix method-call mandelbrot nbody sieve spectral-norm strcat"
-  DEFAULT_COMPILE_REPEATS="1"
-  DEFAULT_RUN_REPEATS="3"
-fi
+# Full benchmark set (override with CFREE_OPT_BENCHES to use it):
+#   array binary-trees except funnkuch-reduce hash hash2 heapsort lists matrix
+#   method-call mandelbrot nbody sieve spectral-norm strcat
+# `except` in particular runs for many seconds at -O0, which is why the default
+# below is a small, quick subset at O0+O1 (skip the heavy O2 sweep).
+DEFAULT_LEVELS="0 1"
+DEFAULT_BENCHES="array hash hash2 matrix sieve"
+DEFAULT_COMPILE_REPEATS="3"
+DEFAULT_RUN_REPEATS="3"
 LEVELS="${CFREE_OPT_BENCH_LEVELS:-$DEFAULT_LEVELS}"
 BENCHES="${CFREE_OPT_BENCHES:-$DEFAULT_BENCHES}"
 COMPILE_REPEATS="${CFREE_OPT_BENCH_COMPILE_REPEATS:-$DEFAULT_COMPILE_REPEATS}"
 RUN_REPEATS="${CFREE_OPT_BENCH_RUN_REPEATS:-$DEFAULT_RUN_REPEATS}"
+# Per-tool skip flags. By default keep gcc (the baseline) and skip clang.
+# Override individually, e.g. CFREE_OPT_BENCH_SKIP_MIR=1 or
+# CFREE_OPT_BENCH_SKIP_CLANG=0.
+SKIP_GCC="${CFREE_OPT_BENCH_SKIP_GCC:-0}"
+SKIP_CLANG="${CFREE_OPT_BENCH_SKIP_CLANG:-1}"
+SKIP_MIR="${CFREE_OPT_BENCH_SKIP_MIR:-0}"
 MIR_MAKE="${MIR_MAKE:-}"
 
 case "$(uname -s 2>/dev/null || true)" in
@@ -493,23 +496,27 @@ printf 'opt-bench: compile repeats=%s run repeats=%s\n' "$COMPILE_REPEATS" "$RUN
 for bench in $BENCHES; do
   src="$BENCH_DIR/$bench.c"
   expect="$BENCH_DIR/$bench.expect"
-  arg_line="$(read_arg_file "$BENCH_DIR/$bench.arg")"
+  arg_line="${CFREE_OPT_BENCH_ARG_OVERRIDE:-$(read_arg_file "$BENCH_DIR/$bench.arg")}"
   if [ ! -f "$src" ]; then
     printf 'opt-bench: skipping missing benchmark source: %s\n' "$src" >&2
     continue
   fi
   printf '+++++ %s %s +++++\n' "$bench" "$arg_line"
   for opt in $LEVELS; do
-    if [ "$FAST" != "1" ]; then
-      bench_native_tool "$bench" "$(tool_label "$GCC")" "$GCC" "$opt" "$src" "$expect" "$arg_line"
-      bench_native_tool "$bench" "$(tool_label "$CLANG")" "$CLANG" "$opt" "$src" "$expect" "$arg_line"
-    fi
+    [ "$SKIP_GCC" != "1" ] && bench_native_tool "$bench" "$(tool_label "$GCC")" "$GCC" "$opt" "$src" "$expect" "$arg_line"
+    [ "$SKIP_CLANG" != "1" ] && bench_native_tool "$bench" "$(tool_label "$CLANG")" "$CLANG" "$opt" "$src" "$expect" "$arg_line"
     bench_native_tool "$bench" "cfree" "$CFREE cc" "$opt" "$src" "$expect" "$arg_line"
     bench_cfree_run "$bench" "$opt" "$src" "$expect" "$arg_line"
-    bench_mir "$bench" "$opt" "$src" "$expect" "$arg_line"
+    [ "$SKIP_MIR" != "1" ] && bench_mir "$bench" "$opt" "$src" "$expect" "$arg_line"
   done
 done
 
 write_summary
 printf 'opt-bench: wrote %s\n' "$CSV"
 printf 'opt-bench: wrote %s\n' "$SUMMARY"
+
+COMPARE_SCRIPT="$ROOT/scripts/opt_bench_compare.py"
+if [ -f "$COMPARE_SCRIPT" ]; then
+  printf '\n'
+  python3 "$COMPARE_SCRIPT" "$CSV" || true
+fi
diff --git a/scripts/opt_bench_compare.py b/scripts/opt_bench_compare.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+"""Compare cfree tools vs a baseline (default: gcc-15 -O0).
+
+Usage:
+    python3 scripts/opt_bench_compare.py [results.csv]
+    python3 scripts/opt_bench_compare.py results.csv --base-tool gcc-15 --base-opt 0
+"""
+import csv
+import math
+import os
+import sys
+
+
+def fnum(v):
+    try:
+        return float(v)
+    except (TypeError, ValueError):
+        return None
+
+
+def speedup_label(s):
+    if s is None:
+        return "NA"
+    if abs(s - 1.0) < 0.03:
+        return "~tied"
+    return f"{s:.2f}x faster" if s > 1 else f"{1/s:.2f}x slower"
+
+
+def compare(val, base):
+    """Return speedup ratio: >1 means val is faster (lower time) than base."""
+    if not val or not base or base <= 0 or val <= 0:
+        return None
+    return base / val
+
+
+def geo(xs):
+    xs = [x for x in xs if x and x > 0]
+    if not xs:
+        return None
+    return math.exp(sum(math.log(x) for x in xs) / len(xs))
+
+
+def compile_total(r):
+    cm = fnum(r.get("compile_ms"))
+    cg = fnum(r.get("codegen_ms")) or 0.0
+    return (cm + cg) if cm is not None else None
+
+
+def main():
+    args = sys.argv[1:]
+    csv_path = None
+    base_tool_arg = None
+    base_opt = "0"
+    i = 0
+    while i < len(args):
+        if args[i] == "--base-tool" and i + 1 < len(args):
+            base_tool_arg = args[i + 1]
+            i += 2
+        elif args[i] == "--base-opt" and i + 1 < len(args):
+            base_opt = args[i + 1]
+            i += 2
+        else:
+            csv_path = args[i]
+            i += 1
+
+    if csv_path is None:
+        root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        csv_path = os.path.join(root, "build", "bench", "opt", "results.csv")
+
+    if not os.path.exists(csv_path):
+        sys.exit(f"compare: CSV not found: {csv_path}")
+
+    with open(csv_path, newline="") as f:
+        ok = [r for r in csv.DictReader(f) if r["status"] == "OK"]
+
+    if not ok:
+        sys.exit("compare: no OK rows in CSV")
+
+    # Find base tool
+    base_tool = base_tool_arg
+    if not base_tool:
+        for prefix in ("gcc-15", "gcc"):
+            base_tool = next((r["tool"] for r in ok if r["tool"].startswith(prefix)), None)
+            if base_tool:
+                break
+    if not base_tool:
+        sys.exit("compare: no gcc tool found; use --base-tool TOOL")
+
+    # Baseline map: bench -> (compile_total_ms, runtime_ms)
+    baseline = {}
+    for r in ok:
+        if r["tool"] == base_tool and r["opt"] == base_opt:
+            baseline[r["bench"]] = (compile_total(r), fnum(r["runtime_ms"]))
+
+    if not baseline:
+        sys.exit(f"compare: no rows for tool={base_tool} opt={base_opt}")
+
+    idx = {(r["tool"], r["opt"], r["bench"]): r for r in ok}
+    all_benches = sorted(baseline)
+    all_opts = sorted(
+        {r["opt"] for r in ok},
+        key=lambda x: (int(x) if x.isdigit() else 99, x),
+    )
+
+    # Show order: base row first, then cfree/cfree-run at each opt level
+    show = [(base_tool, base_opt)]
+    seen = {(base_tool, base_opt)}
+    for t in ("cfree", "cfree-run"):
+        for o in all_opts:
+            key = (t, o)
+            if key not in seen and any((t, o, b) in idx for b in all_benches):
+                show.append(key)
+                seen.add(key)
+
+    print(f"cfree vs {base_tool} -O{base_opt}  [{os.path.basename(csv_path)}]")
+    print()
+
+    # Per-benchmark table
+    bench_label = all_benches[0] if len(all_benches) == 1 else f"{len(all_benches)} benches"
+    print(f"Per-benchmark: {bench_label}")
+    print()
+    hdr = (
+        f"{'bench':<16} {'tool':<12} opt  "
+        f"{'compile_ms':>10}  {'compile vs base':>16}  "
+        f"{'runtime_ms':>10}  {'runtime vs base':>16}"
+    )
+    print(hdr)
+    print("-" * len(hdr))
+
+    for bench in all_benches:
+        bcm, brt = baseline[bench]
+        first = True
+        for tool, opt in show:
+            r = idx.get((tool, opt, bench))
+            if r is None:
+                continue
+            ct = compile_total(r)
+            rt = fnum(r["runtime_ms"])
+            is_base = tool == base_tool and opt == base_opt
+            cl = "baseline" if is_base else speedup_label(compare(ct, bcm))
+            rl = "baseline" if is_base else speedup_label(compare(rt, brt))
+            bench_col = bench if first else ""
+            first = False
+            ct_s = f"{ct:.1f}" if ct is not None else "NA"
+            rt_s = f"{rt:.1f}" if rt is not None else "NA"
+            print(
+                f"{bench_col:<16} {tool:<12} O{opt:<2}  "
+                f"{ct_s:>10}  {cl:>16}  "
+                f"{rt_s:>10}  {rl:>16}"
+            )
+        print()
+
+    if len(all_benches) <= 1:
+        return
+
+    # Geomean summary
+    print(f"Geomean ({len(all_benches)} benches)")
+    print()
+    hdr2 = f"{'tool':<12} opt  {'compile speedup':>16}  {'runtime speedup':>16}"
+    print(hdr2)
+    print("-" * len(hdr2))
+    for tool, opt in show:
+        crs, rrs = [], []
+        for bench in all_benches:
+            bcm, brt = baseline.get(bench, (None, None))
+            r = idx.get((tool, opt, bench))
+            if r is None:
+                continue
+            s = compare(compile_total(r), bcm)
+            if s is not None:
+                crs.append(s)
+            s = compare(fnum(r["runtime_ms"]), brt)
+            if s is not None:
+                rrs.append(s)
+        is_base = tool == base_tool and opt == base_opt
+        cl = "baseline" if is_base else speedup_label(geo(crs))
+        rl = "baseline" if is_base else speedup_label(geo(rrs))
+        print(f"{tool:<12} O{opt:<2}  {cl:>16}  {rl:>16}")
+
+
+if __name__ == "__main__":
+    main()

	kit kit
	git clone https://git.ryansepassi.com/git/kit.git
	Log \| Files \| Refs \| README

M	scripts/opt_bench.sh	\|	43	+++++++++++++++++++++++++------------------
A	scripts/opt_bench_compare.py	\|	182	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++