kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit ed6d5b24fc8b242a6b5ba8405673c0b5b1836d2d
parent 9182c95cbe31e38578e3f0fcdc239722ce7eb349
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 14 May 2026 19:11:55 -0700

opt: add MIR-style live range model

Diffstat:
Mdoc/MIR_RA_REPORT.md | 38+++++++++++++++++++-------------------
Msrc/opt/opt.c | 11+++++++++--
Msrc/opt/opt.h | 34++++++++++++++++++++++++++++++++++
Msrc/opt/pass_live.c | 327+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/opt/opt_test.c | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/opt/phase0_guardrails.sh | 4++++
6 files changed, 467 insertions(+), 21 deletions(-)

diff --git a/doc/MIR_RA_REPORT.md b/doc/MIR_RA_REPORT.md @@ -559,28 +559,28 @@ Exit criteria: ### Phase 2: Live Range Model -- [ ] Define `OptLiveRange` and `OptLiveRangeSet`. -- [ ] Build ranges from block `live_out` with a backward block walk. -- [ ] Track per-value: - - [ ] first range - - [ ] live length - - [ ] frequency/spill cost - - [ ] live-across-call information -- [ ] Add compressed point numbering. -- [ ] Add whole-block span representation or an equivalent compact model. -- [ ] Add range dump output. -- [ ] Add range metrics: - - [ ] total ranges - - [ ] total points before compression - - [ ] total points after compression - - [ ] max ranges per value - - [ ] max live length +- [x] Define `OptLiveRange` and `OptLiveRangeSet`. +- [x] Build ranges from block `live_out` with a backward block walk. +- [x] Track per-value: + - [x] first range + - [x] live length + - [x] frequency/spill cost + - [x] live-across-call information +- [x] Add compressed point numbering. +- [x] Add whole-block span representation or an equivalent compact model. +- [x] Add range dump output. +- [x] Add range metrics: + - [x] total ranges + - [x] total points before compression + - [x] total points after compression + - [x] max ranges per value + - [x] max live length Exit criteria: -- [ ] Range dumps match expected lifetimes on small hand-written cases. -- [ ] Range construction works without using `Block.live_after`. -- [ ] Current allocator still works while range data is built side by side. +- [x] Range dumps match expected lifetimes on small hand-written cases. +- [x] Range construction works without using `Block.live_after`. +- [x] Current allocator still works while range data is built side by side. ### Phase 3: Range-Based Assignment diff --git a/src/opt/opt.c b/src/opt/opt.c @@ -1408,8 +1408,15 @@ static void w_func_end(CGTarget* t) { metrics_count(o->c, "opt.live.active_words", live.active_words); metrics_count(o->c, "opt.live.block_bytes", live.block_bytes); metrics_count(o->c, "opt.live.set_bit_scans", live.set_bit_scans); - metrics_count(o->c, "opt.ranges", 0); - metrics_count(o->c, "opt.range_points", 0); + OptLiveRangeSet ranges; + opt_live_ranges_build(o->f, &live, &ranges); + metrics_count(o->c, "opt.ranges", ranges.nranges); + metrics_count(o->c, "opt.range_points", ranges.point_count); + metrics_count(o->c, "opt.range_raw_points", ranges.raw_point_count); + metrics_count(o->c, "opt.range_max_per_val", ranges.max_ranges_per_val); + metrics_count(o->c, "opt.range_max_length", ranges.max_live_length); + metrics_count(o->c, "opt.range_whole_block_spans", + ranges.whole_block_spans); metrics_count(o->c, "opt.conflict_bytes", 0); metrics_scope_end(o->c, "opt.live_blocks.pre_dde"); metrics_scope_begin(o->c, "opt.dead_def_elim"); diff --git a/src/opt/opt.h b/src/opt/opt.h @@ -91,6 +91,38 @@ typedef struct OptLiveInfo { OptBlockLive* blocks; } OptLiveInfo; +#define OPT_RANGE_NONE ((u32)~0u) + +typedef struct OptLiveRange { + Val val; + u32 start; + u32 end; + u32 next; + u32 block; + u8 whole_block; + u8 pad[3]; +} OptLiveRange; + +typedef struct OptLiveRangeSet { + Arena* arena; + Func* f; + OptLiveRange* ranges; + u32 nranges; + u32 cap; + u32* first_range_by_val; + u32* live_length_by_val; + u32* use_freq_by_val; + u32* def_freq_by_val; + u32* live_block_freq_by_val; + u32* live_across_call_freq_by_val; + u32* spill_cost_by_val; + u32 point_count; + u32 raw_point_count; + u32 max_ranges_per_val; + u32 max_live_length; + u32 whole_block_spans; +} OptLiveRangeSet; + typedef void (*OptBitsetIterFn)(Val, void*); void opt_bitset_clear(OptBitset*); @@ -104,6 +136,8 @@ void opt_bitset_iter_set(const OptBitset*, OptBitsetIterFn, void* arg); void opt_live_blocks(Func*, OptLiveInfo*); void opt_live_dump_blocks(Func*, const OptLiveInfo*, Writer*); +void opt_live_ranges_build(Func*, const OptLiveInfo*, OptLiveRangeSet*); +void opt_live_dump_ranges(Func*, const OptLiveRangeSet*, Writer*); void opt_live_info(Func*); void opt_coalesce(Func*); void opt_regalloc(Func*, int allow_live_range_split); diff --git a/src/opt/pass_live.c b/src/opt/pass_live.c @@ -1,4 +1,5 @@ #include <stdio.h> +#include <stdlib.h> #include <string.h> #include "core/arena.h" @@ -331,3 +332,329 @@ void opt_live_dump_blocks(Func* f, const OptLiveInfo* live, Writer* w) { dump_set(w, "out", &live->blocks[b].live_out); } } + +typedef struct RangeBuildCtx { + Func* f; + OptLiveRangeSet* ranges; + u32* last_range_by_val; + u32* open_end_by_val; + u32* raw_points; + u32 nraw_points; + u32 raw_points_cap; +} RangeBuildCtx; + +typedef struct RangeBitsCtx { + OptBitset* use; + OptBitset* def; +} RangeBitsCtx; + +static void range_collect_bits(Func* f, Inst* in, Operand* op, int is_def, + void* arg) { + (void)in; + RangeBitsCtx* c = (RangeBitsCtx*)arg; + if (op->kind != OPK_REG) return; + Val v = (Val)op->v.reg; + if (v == VAL_NONE || v >= f->nvals) return; + if (is_def) + opt_bitset_set(c->def, v); + else + opt_bitset_set(c->use, v); +} + +static void range_push_raw_point(RangeBuildCtx* c, u32 p) { + if (c->nraw_points == c->raw_points_cap) { + u32 ncap = c->raw_points_cap ? c->raw_points_cap * 2u : 64u; + u32* np = arena_array(c->f->arena, u32, ncap); + if (c->raw_points) + memcpy(np, c->raw_points, sizeof(c->raw_points[0]) * c->nraw_points); + c->raw_points = np; + c->raw_points_cap = ncap; + } + c->raw_points[c->nraw_points++] = p; +} + +static void range_append(RangeBuildCtx* c, Val v, u32 start, u32 end, u32 block, + int whole_block) { + if (v == VAL_NONE || v >= c->f->nvals) return; + if (end <= start) end = start + 1u; + OptLiveRangeSet* ranges = c->ranges; + if (ranges->nranges == ranges->cap) { + u32 ncap = ranges->cap ? ranges->cap * 2u : 64u; + OptLiveRange* nr = arena_array(c->f->arena, OptLiveRange, ncap); + if (ranges->ranges) + memcpy(nr, ranges->ranges, sizeof(ranges->ranges[0]) * ranges->nranges); + ranges->ranges = nr; + ranges->cap = ncap; + } + u32 idx = ranges->nranges++; + OptLiveRange* r = &ranges->ranges[idx]; + memset(r, 0, sizeof *r); + r->val = v; + r->start = start; + r->end = end; + r->next = OPT_RANGE_NONE; + r->block = block; + r->whole_block = whole_block ? 1u : 0u; + if (ranges->first_range_by_val[v] == OPT_RANGE_NONE) { + ranges->first_range_by_val[v] = idx; + } else { + ranges->ranges[c->last_range_by_val[v]].next = idx; + } + c->last_range_by_val[v] = idx; + range_push_raw_point(c, start); + range_push_raw_point(c, end); + if (whole_block) ++ranges->whole_block_spans; +} + +typedef struct RangeOpenCtx { + RangeBuildCtx* build; + u32 raw_end; +} RangeOpenCtx; + +static void range_open_at_end(Val v, void* arg) { + RangeOpenCtx* c = (RangeOpenCtx*)arg; + if (v < c->build->f->nvals) c->build->open_end_by_val[v] = c->raw_end; +} + +typedef struct RangeCloseCtx { + RangeBuildCtx* build; + u32 start; + u32 block; +} RangeCloseCtx; + +static void range_close_def(Val v, void* arg) { + RangeCloseCtx* c = (RangeCloseCtx*)arg; + RangeBuildCtx* b = c->build; + if (v >= b->f->nvals) return; + if (b->open_end_by_val[v] != OPT_RANGE_NONE) { + range_append(b, v, c->start, b->open_end_by_val[v], c->block, 0); + b->open_end_by_val[v] = OPT_RANGE_NONE; + } else { + range_append(b, v, c->start, c->start + 1u, c->block, 0); + } + b->ranges->def_freq_by_val[v] += b->f->blocks[c->block].frequency; +} + +typedef struct RangeUseCtx { + RangeBuildCtx* build; + u32 end; + u32 block; +} RangeUseCtx; + +static void range_open_use(Val v, void* arg) { + RangeUseCtx* c = (RangeUseCtx*)arg; + RangeBuildCtx* b = c->build; + if (v >= b->f->nvals) return; + if (b->open_end_by_val[v] == OPT_RANGE_NONE) b->open_end_by_val[v] = c->end; + b->ranges->use_freq_by_val[v] += b->f->blocks[c->block].frequency; +} + +typedef struct RangeBlockFreqCtx { + OptLiveRangeSet* ranges; + u32 freq; +} RangeBlockFreqCtx; + +static void range_add_block_freq(Val v, void* arg) { + RangeBlockFreqCtx* c = (RangeBlockFreqCtx*)arg; + c->ranges->live_block_freq_by_val[v] += c->freq; +} + +typedef struct RangeCallCtx { + Func* f; + OptLiveRangeSet* ranges; + const OptBitset* def; + u32 freq; +} RangeCallCtx; + +static void range_add_live_across_call(Val v, void* arg) { + RangeCallCtx* c = (RangeCallCtx*)arg; + if (v == VAL_NONE || v >= c->f->nvals) return; + if (opt_bitset_has(c->def, v)) return; + c->ranges->live_across_call_freq_by_val[v] += c->freq; +} + +static int u32_cmp(const void* a, const void* b) { + u32 av = *(const u32*)a; + u32 bv = *(const u32*)b; + return (av > bv) - (av < bv); +} + +static u32 range_point_index(const u32* points, u32 npoints, u32 raw) { + u32 lo = 0; + u32 hi = npoints; + while (lo < hi) { + u32 mid = lo + (hi - lo) / 2u; + if (points[mid] < raw) + lo = mid + 1u; + else + hi = mid; + } + return lo; +} + +static void range_compress_points(OptLiveRangeSet* ranges, + RangeBuildCtx* build) { + if (build->nraw_points == 0) return; + qsort(build->raw_points, build->nraw_points, sizeof(build->raw_points[0]), + u32_cmp); + u32 unique = 0; + for (u32 i = 0; i < build->nraw_points; ++i) { + if (unique == 0 || build->raw_points[i] != build->raw_points[unique - 1u]) + build->raw_points[unique++] = build->raw_points[i]; + } + ranges->point_count = unique; + for (u32 i = 0; i < ranges->nranges; ++i) { + OptLiveRange* r = &ranges->ranges[i]; + r->start = range_point_index(build->raw_points, unique, r->start); + r->end = range_point_index(build->raw_points, unique, r->end); + if (r->end <= r->start) r->end = r->start + 1u; + u32 len = r->end - r->start; + ranges->live_length_by_val[r->val] += len; + if (ranges->max_live_length < len) ranges->max_live_length = len; + } +} + +void opt_live_ranges_build(Func* f, const OptLiveInfo* live, + OptLiveRangeSet* ranges) { + memset(ranges, 0, sizeof *ranges); + if (!f || !live) return; + ranges->arena = f->arena; + ranges->f = f; + ranges->first_range_by_val = + arena_array(f->arena, u32, f->nvals ? f->nvals : 1u); + ranges->live_length_by_val = + arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); + ranges->use_freq_by_val = + arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); + ranges->def_freq_by_val = + arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); + ranges->live_block_freq_by_val = + arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); + ranges->live_across_call_freq_by_val = + arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); + ranges->spill_cost_by_val = + arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u); + for (Val v = 0; v < f->nvals; ++v) + ranges->first_range_by_val[v] = OPT_RANGE_NONE; + + RangeBuildCtx build; + memset(&build, 0, sizeof build); + build.f = f; + build.ranges = ranges; + build.last_range_by_val = + arena_array(f->arena, u32, f->nvals ? f->nvals : 1u); + build.open_end_by_val = arena_array(f->arena, u32, f->nvals ? f->nvals : 1u); + for (Val v = 0; v < f->nvals; ++v) { + build.last_range_by_val[v] = OPT_RANGE_NONE; + build.open_end_by_val[v] = OPT_RANGE_NONE; + } + + u32* block_base = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u); + u32 raw = 0; + for (u32 b = 0; b < f->nblocks; ++b) { + block_base[b] = raw; + raw += f->blocks[b].ninsts ? f->blocks[b].ninsts : 1u; + } + ranges->raw_point_count = raw + 1u; + + OptBitset use; + OptBitset def; + OptBitset live_after; + OptBitset block_live; + opt_bitset_init(f->arena, &use, live->words); + opt_bitset_init(f->arena, &def, live->words); + opt_bitset_init(f->arena, &live_after, live->words); + opt_bitset_init(f->arena, &block_live, live->words); + + for (u32 b = 0; b < f->nblocks; ++b) { + Block* bl = &f->blocks[b]; + const OptBlockLive* lb = &live->blocks[b]; + for (Val v = 0; v < f->nvals; ++v) + build.open_end_by_val[v] = OPT_RANGE_NONE; + u32 raw_start = block_base[b]; + u32 raw_end = raw_start + (bl->ninsts ? bl->ninsts : 1u); + RangeOpenCtx open_ctx = {&build, raw_end}; + opt_bitset_iter_set(&lb->live_out, range_open_at_end, &open_ctx); + opt_bitset_copy(&live_after, &lb->live_out); + + RangeBlockFreqCtx freq_ctx = {ranges, bl->frequency}; + opt_bitset_copy(&block_live, &lb->live_in); + opt_bitset_union(&block_live, &lb->live_out); + opt_bitset_iter_set(&block_live, range_add_block_freq, &freq_ctx); + + for (u32 ri = bl->ninsts; ri > 0; --ri) { + u32 i = ri - 1u; + Inst* in = &bl->insts[i]; + opt_bitset_clear(&use); + opt_bitset_clear(&def); + RangeBitsCtx bits = {&use, &def}; + live_walk_inst_operands(f, in, range_collect_bits, &bits); + + if ((IROp)in->op == IR_CALL) { + RangeCallCtx call_ctx = {f, ranges, &def, bl->frequency}; + opt_bitset_iter_set(&live_after, range_add_live_across_call, &call_ctx); + } + + RangeCloseCtx close_ctx = {&build, raw_start + i, b}; + opt_bitset_iter_set(&def, range_close_def, &close_ctx); + RangeUseCtx use_ctx = {&build, raw_start + i + 1u, b}; + opt_bitset_iter_set(&use, range_open_use, &use_ctx); + for (u32 wi = 0; wi < live->words; ++wi) + live_after.words[wi] = + (live_after.words[wi] & ~def.words[wi]) | use.words[wi]; + opt_bitset_trim(&live_after); + } + + for (Val v = 1; v < f->nvals; ++v) { + if (build.open_end_by_val[v] == OPT_RANGE_NONE) continue; + int whole_block = build.open_end_by_val[v] == raw_end && + !opt_bitset_has(&lb->live_use, v) && + !opt_bitset_has(&lb->live_def, v); + range_append(&build, v, raw_start, build.open_end_by_val[v], b, + whole_block); + } + } + + range_compress_points(ranges, &build); + + for (Val v = 1; v < f->nvals; ++v) { + u32 nranges = 0; + for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE; + r = ranges->ranges[r].next) + ++nranges; + if (ranges->max_ranges_per_val < nranges) + ranges->max_ranges_per_val = nranges; + ranges->spill_cost_by_val[v] = (ranges->use_freq_by_val[v] * 2u) + + ranges->def_freq_by_val[v] + + ranges->live_across_call_freq_by_val[v] + + ranges->live_block_freq_by_val[v]; + } +} + +void opt_live_dump_ranges(Func* f, const OptLiveRangeSet* ranges, Writer* w) { + (void)f; + if (!ranges || !w) return; + char buf[160]; + snprintf(buf, sizeof buf, + "ranges total=%u points=%u raw_points=%u whole_block=%u\n", + (unsigned)ranges->nranges, (unsigned)ranges->point_count, + (unsigned)ranges->raw_point_count, + (unsigned)ranges->whole_block_spans); + dump_write(w, buf); + for (Val v = 1; v < ranges->f->nvals; ++v) { + if (ranges->first_range_by_val[v] == OPT_RANGE_NONE) continue; + snprintf(buf, sizeof buf, "v%u len=%u spill=%u:", (unsigned)v, + (unsigned)ranges->live_length_by_val[v], + (unsigned)ranges->spill_cost_by_val[v]); + dump_write(w, buf); + for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE; + r = ranges->ranges[r].next) { + const OptLiveRange* lr = &ranges->ranges[r]; + snprintf(buf, sizeof buf, " [%u,%u)b%u%s", (unsigned)lr->start, + (unsigned)lr->end, (unsigned)lr->block, + lr->whole_block ? "*" : ""); + dump_write(w, buf); + } + dump_write(w, "\n"); + } +} diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c @@ -330,6 +330,14 @@ static int count_op(Func* f, IROp op) { return n; } +static u32 live_range_count_for(const OptLiveRangeSet* ranges, Val v) { + u32 n = 0; + for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE; + r = ranges->ranges[r].next) + ++n; + return n; +} + /* ============================================================ * MockCGTarget — provides register coordination so opt_machinize * and opt_emit can query backend policy without hard-coding arch @@ -612,6 +620,71 @@ static void opt_block_liveness_phase1(void) { tc_fini(&tc); } +static void opt_live_ranges_phase2(void) { + TestCtx tc; + tc_init(&tc); + Func* f = new_func(&tc); + u32 entry = f->entry; + u32 header = ir_block_new(f); + u32 body = ir_block_new(f); + u32 exit = ir_block_new(f); + ir_note_emit(f, header); + ir_note_emit(f, body); + ir_note_emit(f, exit); + Val loop_v = add_val(f, tc.i32); + Val call_live = add_val(f, tc.i32); + Val out = add_val(f, tc.i32); + + emit_load_imm(f, entry, loop_v, tc.i32, 1); + emit_load_imm(f, entry, call_live, tc.i32, 2); + emit_br_to(f, entry, header); + emit_test_branch(f, header, body, exit, tc.i32); + emit_call_void(f, body); + emit_binop(f, body, out, loop_v, call_live, tc.i32); + emit_br_to(f, body, header); + emit_ret_val(f, exit, call_live, tc.i32); + + opt_build_cfg(f); + opt_build_loop_tree(f); + OptLiveInfo live; + opt_live_blocks(f, &live); + OptLiveRangeSet ranges; + opt_live_ranges_build(f, &live, &ranges); + + EXPECT(ranges.first_range_by_val[loop_v] != OPT_RANGE_NONE, + "loop value should have a live range"); + EXPECT(ranges.first_range_by_val[call_live] != OPT_RANGE_NONE, + "call-live value should have a live range"); + EXPECT(live_range_count_for(&ranges, loop_v) >= 2, + "loop-carried value should have ranges in multiple blocks"); + EXPECT(ranges.live_length_by_val[loop_v] != 0, + "loop value should record live length"); + EXPECT(ranges.spill_cost_by_val[loop_v] != 0, + "loop value should record spill cost"); + EXPECT(ranges.live_across_call_freq_by_val[call_live] != 0, + "call-live value should record live-across-call frequency"); + EXPECT(ranges.point_count != 0, "range builder should compress points"); + EXPECT(ranges.point_count <= ranges.raw_point_count, + "compressed point count should not exceed raw point count"); + EXPECT(ranges.whole_block_spans != 0, + "range builder should record whole-block spans"); + EXPECT(ranges.max_ranges_per_val >= live_range_count_for(&ranges, loop_v), + "range metrics should record max ranges per value"); + EXPECT(f->val_conflicts == NULL, + "range construction should not allocate conflict matrix"); + + CfreeWriter* w = cfree_writer_mem(&g_heap); + opt_live_dump_ranges(f, &ranges, w); + size_t len = 0; + const unsigned char* bytes = cfree_writer_mem_bytes(w, &len); + EXPECT(bytes_contains(bytes, len, "ranges total="), + "range dump should include summary"); + EXPECT(bytes_contains(bytes, len, "v"), + "range dump should include value rows"); + cfree_writer_close(w); + tc_fini(&tc); +} + static void opt_live_after_linear(void) { TestCtx tc; tc_init(&tc); @@ -1997,6 +2070,7 @@ int main(void) { opt_loop_tree_does_not_mutate_cfg(); opt_liveness_branch(); opt_block_liveness_phase1(); + opt_live_ranges_phase2(); opt_live_after_linear(); opt_interference_branch_disjoint(); opt_interference_def_live_out(); diff --git a/test/opt/phase0_guardrails.sh b/test/opt/phase0_guardrails.sh @@ -93,6 +93,10 @@ check_metrics() { opt.live.active_words \ opt.ranges \ opt.range_points \ + opt.range_raw_points \ + opt.range_max_per_val \ + opt.range_max_length \ + opt.range_whole_block_spans \ opt.alloc.used_loc_words \ opt.alloc.spills \ opt.rewrite.reloads \