kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

opt_bench.sh (18989B)


      1 #!/usr/bin/env bash
      2 set -uo pipefail
      3 
      4 ROOT="$(cd "$(dirname "$0")/.." && pwd)"
      5 MIR_DIR="${MIR_DIR:-$HOME/tmp/mir}"
      6 MIR_C2M="${MIR_C2M:-$MIR_DIR/c2m}"
      7 KIT="${KIT:-$ROOT/build/kit}"
      8 CLANG="${CLANG:-clang}"
      9 GCC="${GCC:-gcc-15}"
     10 OUT_DIR="${KIT_OPT_BENCH_OUT:-$ROOT/build/bench/opt}"
     11 KIT_SYSROOT="${KIT_OPT_BENCH_SYSROOT:-}"
     12 
     13 # Full benchmark set used for baseline caching (override with KIT_OPT_BENCHES):
     14 #   array binary-trees funnkuch-reduce hash hash2 heapsort lists matrix
     15 #   method-call mandelbrot nbody sieve spectral-norm strcat
     16 # `except` is excluded from the full set: it's setjmp/longjmp-bound and runs
     17 # for ~2.5 minutes per O0 sample, which inflates wall-clock without telling us
     18 # anything about codegen quality. Pass it explicitly via KIT_OPT_BENCHES if
     19 # wanted.
     20 FULL_BENCHES="array binary-trees funnkuch-reduce hash hash2 heapsort lists matrix method-call mandelbrot nbody sieve spectral-norm strcat"
     21 
     22 # Cached baseline timings for the fixed compilers (gcc/clang/MIR). Their codegen
     23 # does not change as we iterate on kit, so we measure them once into this file
     24 # (checked into scripts/) and reuse it for comparisons. Regenerate with
     25 # `KIT_OPT_BENCH_MODE=baseline scripts/opt_bench.sh`.
     26 BASELINE_CSV="${KIT_OPT_BENCH_BASELINE_CSV:-$ROOT/scripts/opt_bench_baseline.csv}"
     27 
     28 # Mode selects which tools run and where results go:
     29 #   kit    (default) - only kit/kit-run; writes build/bench/opt/results.csv
     30 #   baseline           - only gcc/clang/MIR over the full set; writes BASELINE_CSV
     31 MODE="${KIT_OPT_BENCH_MODE:-kit}"
     32 
     33 DEFAULT_LEVELS="0 1"
     34 DEFAULT_COMPILE_REPEATS="3"
     35 DEFAULT_RUN_REPEATS="3"
     36 
     37 case "$MODE" in
     38   baseline)
     39     # Measure the fixed compilers across the full set; kit is skipped.
     40     DEFAULT_BENCHES="$FULL_BENCHES"
     41     DEF_SKIP_GCC=0; DEF_SKIP_CLANG=0; DEF_SKIP_MIR=0; DEF_SKIP_KIT=1
     42     DEFAULT_CSV="$BASELINE_CSV"
     43     ;;
     44   kit)
     45     # Default working mode: only kit, compared against the cached baseline.
     46     DEFAULT_BENCHES="array hash hash2 matrix sieve"
     47     DEF_SKIP_GCC=1; DEF_SKIP_CLANG=1; DEF_SKIP_MIR=1; DEF_SKIP_KIT=0
     48     DEFAULT_CSV="$OUT_DIR/results.csv"
     49     ;;
     50   *)
     51     printf 'opt-bench: unknown KIT_OPT_BENCH_MODE: %s (want kit|baseline)\n' "$MODE" >&2
     52     exit 2
     53     ;;
     54 esac
     55 
     56 LEVELS="${KIT_OPT_BENCH_LEVELS:-$DEFAULT_LEVELS}"
     57 BENCHES="${KIT_OPT_BENCHES:-$DEFAULT_BENCHES}"
     58 COMPILE_REPEATS="${KIT_OPT_BENCH_COMPILE_REPEATS:-$DEFAULT_COMPILE_REPEATS}"
     59 RUN_REPEATS="${KIT_OPT_BENCH_RUN_REPEATS:-$DEFAULT_RUN_REPEATS}"
     60 # Per-tool skip flags. Defaults come from the mode above; override individually,
     61 # e.g. KIT_OPT_BENCH_SKIP_MIR=1 or KIT_OPT_BENCH_SKIP_KIT=0.
     62 SKIP_GCC="${KIT_OPT_BENCH_SKIP_GCC:-$DEF_SKIP_GCC}"
     63 SKIP_CLANG="${KIT_OPT_BENCH_SKIP_CLANG:-$DEF_SKIP_CLANG}"
     64 SKIP_MIR="${KIT_OPT_BENCH_SKIP_MIR:-$DEF_SKIP_MIR}"
     65 SKIP_KIT="${KIT_OPT_BENCH_SKIP_KIT:-$DEF_SKIP_KIT}"
     66 MIR_MAKE="${MIR_MAKE:-}"
     67 
     68 case "$(uname -s 2>/dev/null || true)" in
     69   Darwin)
     70     DEFAULT_MATH_LIBS=""
     71     if command -v xcrun >/dev/null 2>&1; then
     72       DEFAULT_SYSROOT="$(xcrun --show-sdk-path)"
     73       DEFAULT_CFLAGS_EXTRA="-isysroot $DEFAULT_SYSROOT"
     74     else
     75       DEFAULT_SYSROOT=""
     76       DEFAULT_CFLAGS_EXTRA=""
     77     fi
     78     ;;
     79   *)
     80     DEFAULT_MATH_LIBS="-lm"
     81     DEFAULT_SYSROOT="${SYSROOT:-}"
     82     DEFAULT_CFLAGS_EXTRA=""
     83     ;;
     84 esac
     85 if [ -z "$KIT_SYSROOT" ]; then
     86   KIT_SYSROOT="$DEFAULT_SYSROOT"
     87 fi
     88 MATH_LIBS="${KIT_OPT_BENCH_MATH_LIBS:-$DEFAULT_MATH_LIBS}"
     89 CFLAGS_EXTRA="${KIT_OPT_BENCH_CFLAGS:-$DEFAULT_CFLAGS_EXTRA}"
     90 KIT_FLAGS_EXTRA="${KIT_OPT_BENCH_KIT_FLAGS:-}"
     91 KIT_RUN_FLAGS_EXTRA="${KIT_OPT_BENCH_KIT_RUN_FLAGS:-}"
     92 
     93 CSV="${KIT_OPT_BENCH_CSV:-$DEFAULT_CSV}"
     94 SUMMARY="$OUT_DIR/summary.md"
     95 LOG_DIR="$OUT_DIR/logs"
     96 BIN_DIR="$OUT_DIR/bin"
     97 
     98 mkdir -p "$OUT_DIR" "$LOG_DIR" "$BIN_DIR"
     99 
    100 now_ns() {
    101   python3 -c 'import time; print(time.monotonic_ns())'
    102 }
    103 
    104 ns_to_ms() {
    105   awk -v ns="$1" 'BEGIN { printf "%.3f", ns / 1000000.0 }'
    106 }
    107 
    108 min_ms() {
    109   awk -v a="$1" -v b="$2" 'BEGIN {
    110     if (a == "" || a == "NA") printf "%s", b;
    111     else if (b + 0 < a + 0) printf "%s", b;
    112     else printf "%s", a;
    113   }'
    114 }
    115 
    116 csv_field() {
    117   printf '%s' "$1" | sed 's/"/""/g; s/^/"/; s/$/"/'
    118 }
    119 
    120 record_row() {
    121   local bench="$1" tool="$2" opt="$3" status="$4"
    122   local compile_ms="$5" codegen_ms="$6" runtime_ms="$7" rc="$8" log="$9"
    123   {
    124     csv_field "$bench"; printf ','
    125     csv_field "$tool"; printf ','
    126     csv_field "$opt"; printf ','
    127     csv_field "$status"; printf ','
    128     csv_field "$compile_ms"; printf ','
    129     csv_field "$codegen_ms"; printf ','
    130     csv_field "$runtime_ms"; printf ','
    131     csv_field "$rc"; printf ','
    132     csv_field "$log"; printf '\n'
    133   } >>"$CSV"
    134 }
    135 
    136 run_timed() {
    137   local out="$1" err="$2"
    138   shift 2
    139   local t0 t1
    140   t0="$(now_ns)"
    141   "$@" >"$out" 2>"$err"
    142   RUN_RC=$?
    143   t1="$(now_ns)"
    144   RUN_MS="$(ns_to_ms "$((t1 - t0))")"
    145 }
    146 
    147 read_arg_file() {
    148   local f="$1"
    149   if [ -f "$f" ]; then
    150     sh "$f"
    151   fi
    152 }
    153 
    154 check_expected() {
    155   local expect="$1" got="$2" diff_out="$3"
    156   if [ ! -f "$expect" ]; then
    157     return 0
    158   fi
    159   cmp "$expect" "$got" >/dev/null 2>&1 && return 0
    160   diff -u "$expect" "$got" >"$diff_out" 2>&1
    161   return 1
    162 }
    163 
    164 parse_mir_ms() {
    165   local pattern="$1" file="$2"
    166   awk -v pat="$pattern" '
    167     $0 ~ pat {
    168       v = $(NF - 1)
    169       unit = $NF
    170       if (unit == "usec") v = v / 1000.0
    171       if (unit == "msec") v = v + 0.0
    172       printf "%.3f\n", v
    173       exit
    174     }
    175   ' "$file"
    176 }
    177 
    178 # Sum all matches for a pattern. kit-run --bench-time emits per-pass timings
    179 # once per function compiled, so summing yields the per-pass total for the
    180 # whole TU. Empty (no matches) prints nothing.
    181 sum_pattern_ms() {
    182   local pattern="$1" file="$2"
    183   awk -v pat="$pattern" '
    184     $0 ~ pat {
    185       v = $(NF - 1) + 0.0
    186       unit = $NF
    187       if (unit == "usec") v = v / 1000.0
    188       total += v
    189       hits++
    190     }
    191     END { if (hits > 0) printf "%.3f\n", total }
    192   ' "$file"
    193 }
    194 
    195 ensure_mir() {
    196   if [ -x "$MIR_C2M" ]; then
    197     return 0
    198   fi
    199   if [ ! -d "$MIR_DIR" ]; then
    200     printf 'opt-bench: MIR_DIR does not exist: %s\n' "$MIR_DIR" >&2
    201     return 1
    202   fi
    203   if [ -z "$MIR_MAKE" ]; then
    204     if command -v gmake >/dev/null 2>&1; then
    205       MIR_MAKE=gmake
    206     else
    207       MIR_MAKE=make
    208     fi
    209   fi
    210   printf 'opt-bench: building MIR c2m with %s -C %s c2m\n' "$MIR_MAKE" "$MIR_DIR"
    211   "$MIR_MAKE" -C "$MIR_DIR" c2m
    212 }
    213 
    214 tool_label() {
    215   basename "$1"
    216 }
    217 
    218 bench_source_dir() {
    219   printf '%s/c-benchmarks' "$MIR_DIR"
    220 }
    221 
    222 compile_native() {
    223   local bench="$1" tool="$2" cc="$3" opt="$4" src="$5" exe="$6" log_base="$7"
    224   local best="NA" rep out err
    225   local cmd=() cflags=() libs=() kit_flags=()
    226   read -r -a cmd <<<"$cc"
    227   read -r -a cflags <<<"$CFLAGS_EXTRA"
    228   read -r -a libs <<<"$MATH_LIBS"
    229   read -r -a kit_flags <<<"$KIT_FLAGS_EXTRA"
    230   if [ "$tool" = "kit" ]; then
    231     cflags=("${kit_flags[@]}")
    232     if [ -n "$KIT_SYSROOT" ]; then
    233       cflags+=(--sysroot "$KIT_SYSROOT")
    234     fi
    235     libs+=("-lc")
    236   fi
    237   for rep in $(seq 1 "$COMPILE_REPEATS"); do
    238     out="$log_base.compile.$rep.out"
    239     err="$log_base.compile.$rep.err"
    240     rm -f "$exe"
    241     run_timed "$out" "$err" "${cmd[@]}" -std=c99 "-O$opt" -I"$(bench_source_dir)" \
    242       "${cflags[@]}" "$src" "${libs[@]}" -o "$exe"
    243     if [ "$RUN_RC" -ne 0 ]; then
    244       record_row "$bench" "$tool" "$opt" "COMPILE_FAIL" "$RUN_MS" "NA" \
    245         "NA" "$RUN_RC" "$err"
    246       return 1
    247     fi
    248     best="$(min_ms "$best" "$RUN_MS")"
    249   done
    250   COMPILE_MS="$best"
    251   return 0
    252 }
    253 
    254 run_native() {
    255   local bench="$1" tool="$2" opt="$3" exe="$4" expect="$5" arg_line="$6" log_base="$7"
    256   local best="NA" best_rc=0 rep out err diff_out
    257   local args=()
    258   read -r -a args <<<"$arg_line"
    259   for rep in $(seq 1 "$RUN_REPEATS"); do
    260     out="$log_base.run.$rep.out"
    261     err="$log_base.run.$rep.err"
    262     run_timed "$out" "$err" "$exe" "${args[@]}"
    263     if [ "$RUN_RC" -ne 0 ]; then
    264       record_row "$bench" "$tool" "$opt" "RUN_FAIL" "$COMPILE_MS" "NA" \
    265         "$RUN_MS" "$RUN_RC" "$err"
    266       return 1
    267     fi
    268     diff_out="$log_base.run.$rep.diff"
    269     if ! check_expected "$expect" "$out" "$diff_out"; then
    270       record_row "$bench" "$tool" "$opt" "OUTPUT_FAIL" "$COMPILE_MS" "NA" \
    271         "$RUN_MS" "$RUN_RC" "$diff_out"
    272       return 1
    273     fi
    274     best="$(min_ms "$best" "$RUN_MS")"
    275     best_rc="$RUN_RC"
    276   done
    277   record_row "$bench" "$tool" "$opt" "OK" "$COMPILE_MS" "NA" "$best" \
    278     "$best_rc" "$log_base"
    279 }
    280 
    281 bench_native_tool() {
    282   local bench="$1" tool="$2" cc="$3" opt="$4" src="$5" expect="$6" arg_line="$7"
    283   local exe="$BIN_DIR/$tool.O$opt.$bench.exe"
    284   local log_base="$LOG_DIR/$tool.O$opt.$bench"
    285   local cmd=()
    286   read -r -a cmd <<<"$cc"
    287   if ! command -v "${cmd[0]}" >/dev/null 2>&1 && [ ! -x "${cmd[0]}" ]; then
    288     record_row "$bench" "$tool" "$opt" "TOOL_MISSING" "NA" "NA" "NA" \
    289       "127" "$cc"
    290     return 0
    291   fi
    292   compile_native "$bench" "$tool" "$cc" "$opt" "$src" "$exe" "$log_base" || return 0
    293   run_native "$bench" "$tool" "$opt" "$exe" "$expect" "$arg_line" "$log_base"
    294 }
    295 
    296 bench_mir() {
    297   local bench="$1" opt="$2" src="$3" expect="$4" arg_line="$5"
    298   local tool="mir-c2m"
    299   local bmir="$BIN_DIR/$tool.O$opt.$bench.bmir"
    300   local log_base="$LOG_DIR/$tool.O$opt.$bench"
    301   local best_run="NA" best_codegen="NA" best_rc=0 rep out err diff_out exec_ms cg_ms
    302   local args=()
    303   read -r -a args <<<"$arg_line"
    304 
    305   run_timed "$log_base.compile.out" "$log_base.compile.err" \
    306     "$MIR_C2M" "-O$opt" -c -I"$(bench_source_dir)" -o "$bmir" "$src"
    307   if [ "$RUN_RC" -ne 0 ]; then
    308     record_row "$bench" "$tool" "$opt" "COMPILE_FAIL" "$RUN_MS" "NA" \
    309       "NA" "$RUN_RC" "$log_base.compile.err"
    310     return 0
    311   fi
    312   COMPILE_MS="$RUN_MS"
    313 
    314   for rep in $(seq 1 "$RUN_REPEATS"); do
    315     out="$log_base.run.$rep.out"
    316     err="$log_base.run.$rep.err"
    317     run_timed "$out" "$err" "$MIR_C2M" -v "-O$opt" "$bmir" -eg "${args[@]}"
    318     exec_ms="$(parse_mir_ms 'execution' "$err")"
    319     cg_ms="$(parse_mir_ms 'MIR link finish' "$err")"
    320     [ -z "$exec_ms" ] && exec_ms="$RUN_MS"
    321     [ -z "$cg_ms" ] && cg_ms="NA"
    322     if [ "$RUN_RC" -ne 0 ]; then
    323       record_row "$bench" "$tool" "$opt" "RUN_FAIL" "$COMPILE_MS" "$cg_ms" \
    324         "$exec_ms" "$RUN_RC" "$err"
    325       return 0
    326     fi
    327     diff_out="$log_base.run.$rep.diff"
    328     if ! check_expected "$expect" "$out" "$diff_out"; then
    329       record_row "$bench" "$tool" "$opt" "OUTPUT_FAIL" "$COMPILE_MS" "$cg_ms" \
    330         "$exec_ms" "$RUN_RC" "$diff_out"
    331       return 0
    332     fi
    333     best_run="$(min_ms "$best_run" "$exec_ms")"
    334     if [ "$cg_ms" != "NA" ]; then
    335       best_codegen="$(min_ms "$best_codegen" "$cg_ms")"
    336     fi
    337     best_rc="$RUN_RC"
    338   done
    339   record_row "$bench" "$tool" "$opt" "OK" "$COMPILE_MS" "$best_codegen" \
    340     "$best_run" "$best_rc" "$log_base"
    341 }
    342 
    343 bench_kit_run() {
    344   local bench="$1" opt="$2" src="$3" expect="$4" arg_line="$5"
    345   local tool="kit-run"
    346   local log_base="$LOG_DIR/$tool.O$opt.$bench"
    347   local best_run="NA" best_compile="NA" best_rc=0 rep out err diff_out cm_ms exec_ms
    348   local args=() kit_flags=() run_flags=() cmd=()
    349   read -r -a args <<<"$arg_line"
    350   read -r -a kit_flags <<<"$KIT_FLAGS_EXTRA"
    351   read -r -a run_flags <<<"$KIT_RUN_FLAGS_EXTRA"
    352 
    353   if [ ! -x "$KIT" ]; then
    354     record_row "$bench" "$tool" "$opt" "TOOL_MISSING" "NA" "NA" "NA" \
    355       "127" "$KIT"
    356     return 0
    357   fi
    358 
    359   for rep in $(seq 1 "$RUN_REPEATS"); do
    360     out="$log_base.run.$rep.out"
    361     err="$log_base.run.$rep.err"
    362     cmd=("$KIT" run --bench-time "-O$opt" -I"$(bench_source_dir)")
    363     if [ -n "$KIT_SYSROOT" ]; then
    364       cmd+=(--sysroot "$KIT_SYSROOT")
    365     fi
    366     cmd+=("${kit_flags[@]}" "${run_flags[@]}" "$src" -lc -- "${args[@]}")
    367     run_timed "$out" "$err" "${cmd[@]}"
    368     cm_ms="$(parse_mir_ms 'kit-run compile_and_jit' "$err")"
    369     exec_ms="$(parse_mir_ms 'kit-run execution' "$err")"
    370     [ -z "$cm_ms" ] && cm_ms="NA"
    371     [ -z "$exec_ms" ] && exec_ms="$RUN_MS"
    372     if [ "$RUN_RC" -ne 0 ]; then
    373       record_row "$bench" "$tool" "$opt" "RUN_FAIL" "$cm_ms" "NA" \
    374         "$exec_ms" "$RUN_RC" "$err"
    375       return 0
    376     fi
    377     diff_out="$log_base.run.$rep.diff"
    378     if ! check_expected "$expect" "$out" "$diff_out"; then
    379       record_row "$bench" "$tool" "$opt" "OUTPUT_FAIL" "$cm_ms" "NA" \
    380         "$exec_ms" "$RUN_RC" "$diff_out"
    381       return 0
    382     fi
    383     if [ "$cm_ms" != "NA" ]; then
    384       best_compile="$(min_ms "$best_compile" "$cm_ms")"
    385     fi
    386     best_run="$(min_ms "$best_run" "$exec_ms")"
    387     best_rc="$RUN_RC"
    388   done
    389   record_row "$bench" "$tool" "$opt" "OK" "$best_compile" "NA" "$best_run" \
    390     "$best_rc" "$log_base"
    391 }
    392 
    393 write_summary() {
    394   python3 - "$CSV" "$SUMMARY" "$(tool_label "$GCC")" "$BASELINE_CSV" <<'PY'
    395 import csv
    396 import math
    397 import os
    398 import sys
    399 from collections import defaultdict
    400 
    401 csv_path, out_path, base_tool, baseline_path = sys.argv[1:5]
    402 with open(csv_path, newline="") as f:
    403     rows = list(csv.DictReader(f))
    404 
    405 # Merge cached baseline timings (gcc/clang/MIR) so the summary tables include
    406 # the fixed compilers even though the default run only measures kit.
    407 seen = {(r["tool"], r["opt"], r["bench"]) for r in rows}
    408 if baseline_path and os.path.exists(baseline_path) and os.path.abspath(baseline_path) != os.path.abspath(csv_path):
    409     with open(baseline_path, newline="") as f:
    410         for r in csv.DictReader(f):
    411             key = (r["tool"], r["opt"], r["bench"])
    412             if key not in seen:
    413                 rows.append(r)
    414                 seen.add(key)
    415 
    416 def fnum(v):
    417     if v in ("", "NA", None):
    418         return None
    419     try:
    420         return float(v)
    421     except ValueError:
    422         return None
    423 
    424 ok = [r for r in rows if r["status"] == "OK"]
    425 groups = defaultdict(list)
    426 for r in ok:
    427     groups[(r["tool"], r["opt"])].append(r)
    428 
    429 base_runtime = {}
    430 base_compile = {}
    431 for r in ok:
    432     if r["tool"] == base_tool and r["opt"] == "2":
    433         rt = fnum(r["runtime_ms"])
    434         cm = fnum(r["compile_ms"])
    435         if rt and rt > 0:
    436             base_runtime[r["bench"]] = rt
    437         if cm and cm > 0:
    438             base_compile[r["bench"]] = cm
    439 
    440 def geo(xs):
    441     xs = [x for x in xs if x and x > 0]
    442     if not xs:
    443         return "NA"
    444     return f"{math.exp(sum(math.log(x) for x in xs) / len(xs)):.3f}"
    445 
    446 def avg(xs):
    447     xs = [x for x in xs if x is not None]
    448     if not xs:
    449         return "NA"
    450     return f"{sum(xs) / len(xs):.3f}"
    451 
    452 status_counts = defaultdict(int)
    453 for r in rows:
    454     status_counts[r["status"]] += 1
    455 
    456 lines = []
    457 lines.append("# OPT Benchmark Summary")
    458 lines.append("")
    459 lines.append(f"Base for speed ratios: `{base_tool} -O2`.")
    460 lines.append("Unified compile-speed ratios use `compile_ms + codegen_ms` where both are available. For MIR, `compile_ms` is C-to-binary-MIR time and `codegen_ms` is the JIT link/generation slice reported by `c2m -v`; this keeps the headline number comparable to `kit-run` as a whole while the split table below shows where time is spent. `kit-run` uses `--bench-time`: `compile_ms` is compile+JIT time, and `runtime_ms` is the in-process entry-call execution slice.")
    461 lines.append("")
    462 lines.append("## Status")
    463 lines.append("")
    464 lines.append("| status | rows |")
    465 lines.append("| --- | ---: |")
    466 for k in sorted(status_counts):
    467     lines.append(f"| {k} | {status_counts[k]} |")
    468 lines.append("")
    469 lines.append("## Geomean Ratios")
    470 lines.append("")
    471 lines.append("| tool | opt | ok cases | compile speed vs base | runtime speed vs base | avg compile+codegen ms | avg runtime ms |")
    472 lines.append("| --- | ---: | ---: | ---: | ---: | ---: | ---: |")
    473 for key in sorted(groups):
    474     vals = groups[key]
    475     comp_totals = []
    476     run_times = []
    477     comp_ratios = []
    478     run_ratios = []
    479     for r in vals:
    480         cm = fnum(r["compile_ms"])
    481         cg = fnum(r["codegen_ms"]) or 0.0
    482         rt = fnum(r["runtime_ms"])
    483         total = None if cm is None else cm + cg
    484         comp_totals.append(total)
    485         run_times.append(rt)
    486         b = r["bench"]
    487         if total and b in base_compile:
    488             comp_ratios.append(base_compile[b] / total)
    489         if rt and b in base_runtime:
    490             run_ratios.append(base_runtime[b] / rt)
    491     lines.append(
    492         f"| {key[0]} | {key[1]} | {len(vals)} | {geo(comp_ratios)} | "
    493         f"{geo(run_ratios)} | {avg(comp_totals)} | {avg(run_times)} |"
    494     )
    495 lines.append("")
    496 lines.append("## Split Compile Timings")
    497 lines.append("")
    498 lines.append("| tool | opt | ok cases | avg compile/frontend ms | avg codegen/JIT ms | avg unified compile ms |")
    499 lines.append("| --- | ---: | ---: | ---: | ---: | ---: |")
    500 for key in sorted(groups):
    501     vals = groups[key]
    502     compile_times = []
    503     codegen_times = []
    504     comp_totals = []
    505     for r in vals:
    506         cm = fnum(r["compile_ms"])
    507         cg = fnum(r["codegen_ms"])
    508         total = None if cm is None else cm + (cg or 0.0)
    509         compile_times.append(cm)
    510         codegen_times.append(cg)
    511         comp_totals.append(total)
    512     lines.append(
    513         f"| {key[0]} | {key[1]} | {len(vals)} | {avg(compile_times)} | "
    514         f"{avg(codegen_times)} | {avg(comp_totals)} |"
    515     )
    516 lines.append("")
    517 lines.append(f"Raw CSV: `{csv_path}`")
    518 with open(out_path, "w") as f:
    519     f.write("\n".join(lines) + "\n")
    520 PY
    521 }
    522 
    523 printf 'bench,tool,opt,status,compile_ms,codegen_ms,runtime_ms,exit_code,log\n' >"$CSV"
    524 
    525 BENCH_DIR="$(bench_source_dir)"
    526 if [ ! -d "$BENCH_DIR" ]; then
    527   printf 'opt-bench: benchmark directory not found: %s\n' "$BENCH_DIR" >&2
    528   exit 2
    529 fi
    530 if [ "$SKIP_KIT" != "1" ] && [ ! -x "$KIT" ]; then
    531   printf 'opt-bench: kit binary not found: %s\n' "$KIT" >&2
    532   printf 'opt-bench: run `make bin` or set KIT=/path/to/kit\n' >&2
    533   exit 2
    534 fi
    535 # c2m is only required when MIR is part of the run.
    536 [ "$SKIP_MIR" != "1" ] && { ensure_mir || exit 2; }
    537 
    538 printf 'opt-bench: mode: %s\n' "$MODE"
    539 printf 'opt-bench: csv: %s\n' "$CSV"
    540 printf 'opt-bench: output: %s\n' "$OUT_DIR"
    541 printf 'opt-bench: benches: %s\n' "$BENCHES"
    542 printf 'opt-bench: levels: %s\n' "$LEVELS"
    543 printf 'opt-bench: compile repeats=%s run repeats=%s\n' "$COMPILE_REPEATS" "$RUN_REPEATS"
    544 
    545 for bench in $BENCHES; do
    546   src="$BENCH_DIR/$bench.c"
    547   expect="$BENCH_DIR/$bench.expect"
    548   arg_line="${KIT_OPT_BENCH_ARG_OVERRIDE:-$(read_arg_file "$BENCH_DIR/$bench.arg")}"
    549   if [ ! -f "$src" ]; then
    550     printf 'opt-bench: skipping missing benchmark source: %s\n' "$src" >&2
    551     continue
    552   fi
    553   printf '+++++ %s %s +++++\n' "$bench" "$arg_line"
    554   for opt in $LEVELS; do
    555     [ "$SKIP_GCC" != "1" ] && bench_native_tool "$bench" "$(tool_label "$GCC")" "$GCC" "$opt" "$src" "$expect" "$arg_line"
    556     [ "$SKIP_CLANG" != "1" ] && bench_native_tool "$bench" "$(tool_label "$CLANG")" "$CLANG" "$opt" "$src" "$expect" "$arg_line"
    557     if [ "$SKIP_KIT" != "1" ]; then
    558       bench_native_tool "$bench" "kit" "$KIT cc" "$opt" "$src" "$expect" "$arg_line"
    559       bench_kit_run "$bench" "$opt" "$src" "$expect" "$arg_line"
    560     fi
    561     [ "$SKIP_MIR" != "1" ] && bench_mir "$bench" "$opt" "$src" "$expect" "$arg_line"
    562   done
    563 done
    564 
    565 if [ "$MODE" = "baseline" ]; then
    566   printf 'opt-bench: wrote baseline cache %s\n' "$CSV"
    567   printf 'opt-bench: commit this file so kit runs can compare against it\n'
    568   exit 0
    569 fi
    570 
    571 write_summary
    572 printf 'opt-bench: wrote %s\n' "$CSV"
    573 printf 'opt-bench: wrote %s\n' "$SUMMARY"
    574 
    575 COMPARE_SCRIPT="$ROOT/scripts/opt_bench_compare.py"
    576 if [ -f "$COMPARE_SCRIPT" ]; then
    577   printf '\n'
    578   python3 "$COMPARE_SCRIPT" "$CSV" || true
    579 fi