opt_bench_compare.py (7510B)
1 #!/usr/bin/env python3 2 """Compare kit tools vs a baseline (default: gcc-15 -O0). 3 4 The fixed compilers (gcc/clang/MIR) don't change as kit evolves, so their 5 timings are measured once into a cached CSV (scripts/opt_bench_baseline.csv via 6 `KIT_OPT_BENCH_MODE=baseline scripts/opt_bench.sh`). This script auto-merges 7 that cache with the fresh kit-only results so comparisons still work even when 8 the results CSV contains only kit rows. 9 10 Usage: 11 python3 scripts/opt_bench_compare.py [results.csv] 12 python3 scripts/opt_bench_compare.py results.csv --base-tool gcc-15 --base-opt 0 13 python3 scripts/opt_bench_compare.py results.csv --baseline-csv path.csv 14 python3 scripts/opt_bench_compare.py results.csv --no-baseline 15 """ 16 import csv 17 import math 18 import os 19 import sys 20 21 DEFAULT_BASELINE_CSV = os.path.join( 22 os.path.dirname(os.path.abspath(__file__)), "opt_bench_baseline.csv" 23 ) 24 25 26 def fnum(v): 27 try: 28 return float(v) 29 except (TypeError, ValueError): 30 return None 31 32 33 def speedup_label(s): 34 if s is None: 35 return "NA" 36 if abs(s - 1.0) < 0.03: 37 return "~tied" 38 return f"{s:.2f}x faster" if s > 1 else f"{1/s:.2f}x slower" 39 40 41 def compare(val, base): 42 """Return speedup ratio: >1 means val is faster (lower time) than base.""" 43 if not val or not base or base <= 0 or val <= 0: 44 return None 45 return base / val 46 47 48 def geo(xs): 49 xs = [x for x in xs if x and x > 0] 50 if not xs: 51 return None 52 return math.exp(sum(math.log(x) for x in xs) / len(xs)) 53 54 55 def compile_total(r): 56 cm = fnum(r.get("compile_ms")) 57 cg = fnum(r.get("codegen_ms")) or 0.0 58 return (cm + cg) if cm is not None else None 59 60 61 def main(): 62 args = sys.argv[1:] 63 csv_path = None 64 base_tool_arg = None 65 base_opt = "0" 66 baseline_csv = DEFAULT_BASELINE_CSV 67 i = 0 68 while i < len(args): 69 if args[i] == "--base-tool" and i + 1 < len(args): 70 base_tool_arg = args[i + 1] 71 i += 2 72 elif args[i] == "--base-opt" and i + 1 < len(args): 73 base_opt = args[i + 1] 74 i += 2 75 elif args[i] == "--baseline-csv" and i + 1 < len(args): 76 baseline_csv = args[i + 1] 77 i += 2 78 elif args[i] == "--no-baseline": 79 baseline_csv = None 80 i += 1 81 else: 82 csv_path = args[i] 83 i += 1 84 85 if csv_path is None: 86 root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 87 csv_path = os.path.join(root, "build", "bench", "opt", "results.csv") 88 89 if not os.path.exists(csv_path): 90 sys.exit(f"compare: CSV not found: {csv_path}") 91 92 with open(csv_path, newline="") as f: 93 ok = [r for r in csv.DictReader(f) if r["status"] == "OK"] 94 95 # Benches measured by this run (typically kit-only); used to scope output. 96 result_benches = {r["bench"] for r in ok} 97 98 # Merge cached baseline timings (gcc/clang/MIR) unless disabled. Rows already 99 # present in the fresh CSV win, so an explicit baseline run still overrides. 100 if ( 101 baseline_csv 102 and os.path.exists(baseline_csv) 103 and os.path.abspath(baseline_csv) != os.path.abspath(csv_path) 104 ): 105 seen = {(r["tool"], r["opt"], r["bench"]) for r in ok} 106 with open(baseline_csv, newline="") as f: 107 for r in csv.DictReader(f): 108 if r["status"] != "OK": 109 continue 110 key = (r["tool"], r["opt"], r["bench"]) 111 if key not in seen: 112 ok.append(r) 113 seen.add(key) 114 115 if not ok: 116 sys.exit("compare: no OK rows in CSV") 117 118 # Find base tool 119 base_tool = base_tool_arg 120 if not base_tool: 121 for prefix in ("gcc-15", "gcc"): 122 base_tool = next((r["tool"] for r in ok if r["tool"].startswith(prefix)), None) 123 if base_tool: 124 break 125 if not base_tool: 126 sys.exit("compare: no gcc tool found; use --base-tool TOOL") 127 128 # Baseline map: bench -> (compile_total_ms, runtime_ms) 129 baseline = {} 130 for r in ok: 131 if r["tool"] == base_tool and r["opt"] == base_opt: 132 baseline[r["bench"]] = (compile_total(r), fnum(r["runtime_ms"])) 133 134 if not baseline: 135 sys.exit(f"compare: no rows for tool={base_tool} opt={base_opt}") 136 137 idx = {(r["tool"], r["opt"], r["bench"]): r for r in ok} 138 # Only report benches this run covered (and that the baseline has). When the 139 # CSV is itself a baseline dump (no separate result benches), show them all. 140 shown = (result_benches & set(baseline)) if result_benches else set(baseline) 141 all_benches = sorted(shown) or sorted(baseline) 142 all_opts = sorted( 143 {r["opt"] for r in ok}, 144 key=lambda x: (int(x) if x.isdigit() else 99, x), 145 ) 146 147 # Show order: base row first, then kit/kit-run at each opt level 148 show = [(base_tool, base_opt)] 149 seen = {(base_tool, base_opt)} 150 for t in ("kit", "kit-run", "mir-c2m"): 151 for o in all_opts: 152 key = (t, o) 153 if key not in seen and any((t, o, b) in idx for b in all_benches): 154 show.append(key) 155 seen.add(key) 156 157 print(f"kit/mir vs {base_tool} -O{base_opt} [{os.path.basename(csv_path)}]") 158 print() 159 160 # Per-benchmark table 161 bench_label = all_benches[0] if len(all_benches) == 1 else f"{len(all_benches)} benches" 162 print(f"Per-benchmark: {bench_label}") 163 print() 164 hdr = ( 165 f"{'bench':<16} {'tool':<12} opt " 166 f"{'compile_ms':>10} {'compile vs base':>16} " 167 f"{'runtime_ms':>10} {'runtime vs base':>16}" 168 ) 169 print(hdr) 170 print("-" * len(hdr)) 171 172 for bench in all_benches: 173 bcm, brt = baseline[bench] 174 first = True 175 for tool, opt in show: 176 r = idx.get((tool, opt, bench)) 177 if r is None: 178 continue 179 ct = compile_total(r) 180 rt = fnum(r["runtime_ms"]) 181 is_base = tool == base_tool and opt == base_opt 182 cl = "baseline" if is_base else speedup_label(compare(ct, bcm)) 183 rl = "baseline" if is_base else speedup_label(compare(rt, brt)) 184 bench_col = bench if first else "" 185 first = False 186 ct_s = f"{ct:.1f}" if ct is not None else "NA" 187 rt_s = f"{rt:.1f}" if rt is not None else "NA" 188 print( 189 f"{bench_col:<16} {tool:<12} O{opt:<2} " 190 f"{ct_s:>10} {cl:>16} " 191 f"{rt_s:>10} {rl:>16}" 192 ) 193 print() 194 195 if len(all_benches) <= 1: 196 return 197 198 # Geomean summary 199 print(f"Geomean ({len(all_benches)} benches)") 200 print() 201 hdr2 = f"{'tool':<12} opt {'compile speedup':>16} {'runtime speedup':>16}" 202 print(hdr2) 203 print("-" * len(hdr2)) 204 for tool, opt in show: 205 crs, rrs = [], [] 206 for bench in all_benches: 207 bcm, brt = baseline.get(bench, (None, None)) 208 r = idx.get((tool, opt, bench)) 209 if r is None: 210 continue 211 s = compare(compile_total(r), bcm) 212 if s is not None: 213 crs.append(s) 214 s = compare(fnum(r["runtime_ms"]), brt) 215 if s is not None: 216 rrs.append(s) 217 is_base = tool == base_tool and opt == base_opt 218 cl = "baseline" if is_base else speedup_label(geo(crs)) 219 rl = "baseline" if is_base else speedup_label(geo(rrs)) 220 print(f"{tool:<12} O{opt:<2} {cl:>16} {rl:>16}") 221 222 223 if __name__ == "__main__": 224 main()