opt_bench_compare.py - kit

opt_bench_compare.py (7510B)
      1 #!/usr/bin/env python3
      2 """Compare kit tools vs a baseline (default: gcc-15 -O0).
      3 
      4 The fixed compilers (gcc/clang/MIR) don't change as kit evolves, so their
      5 timings are measured once into a cached CSV (scripts/opt_bench_baseline.csv via
      6 `KIT_OPT_BENCH_MODE=baseline scripts/opt_bench.sh`). This script auto-merges
      7 that cache with the fresh kit-only results so comparisons still work even when
      8 the results CSV contains only kit rows.
      9 
     10 Usage:
     11     python3 scripts/opt_bench_compare.py [results.csv]
     12     python3 scripts/opt_bench_compare.py results.csv --base-tool gcc-15 --base-opt 0
     13     python3 scripts/opt_bench_compare.py results.csv --baseline-csv path.csv
     14     python3 scripts/opt_bench_compare.py results.csv --no-baseline
     15 """
     16 import csv
     17 import math
     18 import os
     19 import sys
     20 
     21 DEFAULT_BASELINE_CSV = os.path.join(
     22     os.path.dirname(os.path.abspath(__file__)), "opt_bench_baseline.csv"
     23 )
     24 
     25 
     26 def fnum(v):
     27     try:
     28         return float(v)
     29     except (TypeError, ValueError):
     30         return None
     31 
     32 
     33 def speedup_label(s):
     34     if s is None:
     35         return "NA"
     36     if abs(s - 1.0) < 0.03:
     37         return "~tied"
     38     return f"{s:.2f}x faster" if s > 1 else f"{1/s:.2f}x slower"
     39 
     40 
     41 def compare(val, base):
     42     """Return speedup ratio: >1 means val is faster (lower time) than base."""
     43     if not val or not base or base <= 0 or val <= 0:
     44         return None
     45     return base / val
     46 
     47 
     48 def geo(xs):
     49     xs = [x for x in xs if x and x > 0]
     50     if not xs:
     51         return None
     52     return math.exp(sum(math.log(x) for x in xs) / len(xs))
     53 
     54 
     55 def compile_total(r):
     56     cm = fnum(r.get("compile_ms"))
     57     cg = fnum(r.get("codegen_ms")) or 0.0
     58     return (cm + cg) if cm is not None else None
     59 
     60 
     61 def main():
     62     args = sys.argv[1:]
     63     csv_path = None
     64     base_tool_arg = None
     65     base_opt = "0"
     66     baseline_csv = DEFAULT_BASELINE_CSV
     67     i = 0
     68     while i < len(args):
     69         if args[i] == "--base-tool" and i + 1 < len(args):
     70             base_tool_arg = args[i + 1]
     71             i += 2
     72         elif args[i] == "--base-opt" and i + 1 < len(args):
     73             base_opt = args[i + 1]
     74             i += 2
     75         elif args[i] == "--baseline-csv" and i + 1 < len(args):
     76             baseline_csv = args[i + 1]
     77             i += 2
     78         elif args[i] == "--no-baseline":
     79             baseline_csv = None
     80             i += 1
     81         else:
     82             csv_path = args[i]
     83             i += 1
     84 
     85     if csv_path is None:
     86         root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
     87         csv_path = os.path.join(root, "build", "bench", "opt", "results.csv")
     88 
     89     if not os.path.exists(csv_path):
     90         sys.exit(f"compare: CSV not found: {csv_path}")
     91 
     92     with open(csv_path, newline="") as f:
     93         ok = [r for r in csv.DictReader(f) if r["status"] == "OK"]
     94 
     95     # Benches measured by this run (typically kit-only); used to scope output.
     96     result_benches = {r["bench"] for r in ok}
     97 
     98     # Merge cached baseline timings (gcc/clang/MIR) unless disabled. Rows already
     99     # present in the fresh CSV win, so an explicit baseline run still overrides.
    100     if (
    101         baseline_csv
    102         and os.path.exists(baseline_csv)
    103         and os.path.abspath(baseline_csv) != os.path.abspath(csv_path)
    104     ):
    105         seen = {(r["tool"], r["opt"], r["bench"]) for r in ok}
    106         with open(baseline_csv, newline="") as f:
    107             for r in csv.DictReader(f):
    108                 if r["status"] != "OK":
    109                     continue
    110                 key = (r["tool"], r["opt"], r["bench"])
    111                 if key not in seen:
    112                     ok.append(r)
    113                     seen.add(key)
    114 
    115     if not ok:
    116         sys.exit("compare: no OK rows in CSV")
    117 
    118     # Find base tool
    119     base_tool = base_tool_arg
    120     if not base_tool:
    121         for prefix in ("gcc-15", "gcc"):
    122             base_tool = next((r["tool"] for r in ok if r["tool"].startswith(prefix)), None)
    123             if base_tool:
    124                 break
    125     if not base_tool:
    126         sys.exit("compare: no gcc tool found; use --base-tool TOOL")
    127 
    128     # Baseline map: bench -> (compile_total_ms, runtime_ms)
    129     baseline = {}
    130     for r in ok:
    131         if r["tool"] == base_tool and r["opt"] == base_opt:
    132             baseline[r["bench"]] = (compile_total(r), fnum(r["runtime_ms"]))
    133 
    134     if not baseline:
    135         sys.exit(f"compare: no rows for tool={base_tool} opt={base_opt}")
    136 
    137     idx = {(r["tool"], r["opt"], r["bench"]): r for r in ok}
    138     # Only report benches this run covered (and that the baseline has). When the
    139     # CSV is itself a baseline dump (no separate result benches), show them all.
    140     shown = (result_benches & set(baseline)) if result_benches else set(baseline)
    141     all_benches = sorted(shown) or sorted(baseline)
    142     all_opts = sorted(
    143         {r["opt"] for r in ok},
    144         key=lambda x: (int(x) if x.isdigit() else 99, x),
    145     )
    146 
    147     # Show order: base row first, then kit/kit-run at each opt level
    148     show = [(base_tool, base_opt)]
    149     seen = {(base_tool, base_opt)}
    150     for t in ("kit", "kit-run", "mir-c2m"):
    151         for o in all_opts:
    152             key = (t, o)
    153             if key not in seen and any((t, o, b) in idx for b in all_benches):
    154                 show.append(key)
    155                 seen.add(key)
    156 
    157     print(f"kit/mir vs {base_tool} -O{base_opt}  [{os.path.basename(csv_path)}]")
    158     print()
    159 
    160     # Per-benchmark table
    161     bench_label = all_benches[0] if len(all_benches) == 1 else f"{len(all_benches)} benches"
    162     print(f"Per-benchmark: {bench_label}")
    163     print()
    164     hdr = (
    165         f"{'bench':<16} {'tool':<12} opt  "
    166         f"{'compile_ms':>10}  {'compile vs base':>16}  "
    167         f"{'runtime_ms':>10}  {'runtime vs base':>16}"
    168     )
    169     print(hdr)
    170     print("-" * len(hdr))
    171 
    172     for bench in all_benches:
    173         bcm, brt = baseline[bench]
    174         first = True
    175         for tool, opt in show:
    176             r = idx.get((tool, opt, bench))
    177             if r is None:
    178                 continue
    179             ct = compile_total(r)
    180             rt = fnum(r["runtime_ms"])
    181             is_base = tool == base_tool and opt == base_opt
    182             cl = "baseline" if is_base else speedup_label(compare(ct, bcm))
    183             rl = "baseline" if is_base else speedup_label(compare(rt, brt))
    184             bench_col = bench if first else ""
    185             first = False
    186             ct_s = f"{ct:.1f}" if ct is not None else "NA"
    187             rt_s = f"{rt:.1f}" if rt is not None else "NA"
    188             print(
    189                 f"{bench_col:<16} {tool:<12} O{opt:<2}  "
    190                 f"{ct_s:>10}  {cl:>16}  "
    191                 f"{rt_s:>10}  {rl:>16}"
    192             )
    193         print()
    194 
    195     if len(all_benches) <= 1:
    196         return
    197 
    198     # Geomean summary
    199     print(f"Geomean ({len(all_benches)} benches)")
    200     print()
    201     hdr2 = f"{'tool':<12} opt  {'compile speedup':>16}  {'runtime speedup':>16}"
    202     print(hdr2)
    203     print("-" * len(hdr2))
    204     for tool, opt in show:
    205         crs, rrs = [], []
    206         for bench in all_benches:
    207             bcm, brt = baseline.get(bench, (None, None))
    208             r = idx.get((tool, opt, bench))
    209             if r is None:
    210                 continue
    211             s = compare(compile_total(r), bcm)
    212             if s is not None:
    213                 crs.append(s)
    214             s = compare(fnum(r["runtime_ms"]), brt)
    215             if s is not None:
    216                 rrs.append(s)
    217         is_base = tool == base_tool and opt == base_opt
    218         cl = "baseline" if is_base else speedup_label(geo(crs))
    219         rl = "baseline" if is_base else speedup_label(geo(rrs))
    220         print(f"{tool:<12} O{opt:<2}  {cl:>16}  {rl:>16}")
    221 
    222 
    223 if __name__ == "__main__":
    224     main()
	kit kit
	git clone https://git.ryansepassi.com/git/kit.git
	Log \| Files \| Refs \| README