commit ed6d5b24fc8b242a6b5ba8405673c0b5b1836d2d
parent 9182c95cbe31e38578e3f0fcdc239722ce7eb349
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 14 May 2026 19:11:55 -0700
opt: add MIR-style live range model
Diffstat:
6 files changed, 467 insertions(+), 21 deletions(-)
diff --git a/doc/MIR_RA_REPORT.md b/doc/MIR_RA_REPORT.md
@@ -559,28 +559,28 @@ Exit criteria:
### Phase 2: Live Range Model
-- [ ] Define `OptLiveRange` and `OptLiveRangeSet`.
-- [ ] Build ranges from block `live_out` with a backward block walk.
-- [ ] Track per-value:
- - [ ] first range
- - [ ] live length
- - [ ] frequency/spill cost
- - [ ] live-across-call information
-- [ ] Add compressed point numbering.
-- [ ] Add whole-block span representation or an equivalent compact model.
-- [ ] Add range dump output.
-- [ ] Add range metrics:
- - [ ] total ranges
- - [ ] total points before compression
- - [ ] total points after compression
- - [ ] max ranges per value
- - [ ] max live length
+- [x] Define `OptLiveRange` and `OptLiveRangeSet`.
+- [x] Build ranges from block `live_out` with a backward block walk.
+- [x] Track per-value:
+ - [x] first range
+ - [x] live length
+ - [x] frequency/spill cost
+ - [x] live-across-call information
+- [x] Add compressed point numbering.
+- [x] Add whole-block span representation or an equivalent compact model.
+- [x] Add range dump output.
+- [x] Add range metrics:
+ - [x] total ranges
+ - [x] total points before compression
+ - [x] total points after compression
+ - [x] max ranges per value
+ - [x] max live length
Exit criteria:
-- [ ] Range dumps match expected lifetimes on small hand-written cases.
-- [ ] Range construction works without using `Block.live_after`.
-- [ ] Current allocator still works while range data is built side by side.
+- [x] Range dumps match expected lifetimes on small hand-written cases.
+- [x] Range construction works without using `Block.live_after`.
+- [x] Current allocator still works while range data is built side by side.
### Phase 3: Range-Based Assignment
diff --git a/src/opt/opt.c b/src/opt/opt.c
@@ -1408,8 +1408,15 @@ static void w_func_end(CGTarget* t) {
metrics_count(o->c, "opt.live.active_words", live.active_words);
metrics_count(o->c, "opt.live.block_bytes", live.block_bytes);
metrics_count(o->c, "opt.live.set_bit_scans", live.set_bit_scans);
- metrics_count(o->c, "opt.ranges", 0);
- metrics_count(o->c, "opt.range_points", 0);
+ OptLiveRangeSet ranges;
+ opt_live_ranges_build(o->f, &live, &ranges);
+ metrics_count(o->c, "opt.ranges", ranges.nranges);
+ metrics_count(o->c, "opt.range_points", ranges.point_count);
+ metrics_count(o->c, "opt.range_raw_points", ranges.raw_point_count);
+ metrics_count(o->c, "opt.range_max_per_val", ranges.max_ranges_per_val);
+ metrics_count(o->c, "opt.range_max_length", ranges.max_live_length);
+ metrics_count(o->c, "opt.range_whole_block_spans",
+ ranges.whole_block_spans);
metrics_count(o->c, "opt.conflict_bytes", 0);
metrics_scope_end(o->c, "opt.live_blocks.pre_dde");
metrics_scope_begin(o->c, "opt.dead_def_elim");
diff --git a/src/opt/opt.h b/src/opt/opt.h
@@ -91,6 +91,38 @@ typedef struct OptLiveInfo {
OptBlockLive* blocks;
} OptLiveInfo;
+#define OPT_RANGE_NONE ((u32)~0u)
+
+typedef struct OptLiveRange {
+ Val val;
+ u32 start;
+ u32 end;
+ u32 next;
+ u32 block;
+ u8 whole_block;
+ u8 pad[3];
+} OptLiveRange;
+
+typedef struct OptLiveRangeSet {
+ Arena* arena;
+ Func* f;
+ OptLiveRange* ranges;
+ u32 nranges;
+ u32 cap;
+ u32* first_range_by_val;
+ u32* live_length_by_val;
+ u32* use_freq_by_val;
+ u32* def_freq_by_val;
+ u32* live_block_freq_by_val;
+ u32* live_across_call_freq_by_val;
+ u32* spill_cost_by_val;
+ u32 point_count;
+ u32 raw_point_count;
+ u32 max_ranges_per_val;
+ u32 max_live_length;
+ u32 whole_block_spans;
+} OptLiveRangeSet;
+
typedef void (*OptBitsetIterFn)(Val, void*);
void opt_bitset_clear(OptBitset*);
@@ -104,6 +136,8 @@ void opt_bitset_iter_set(const OptBitset*, OptBitsetIterFn, void* arg);
void opt_live_blocks(Func*, OptLiveInfo*);
void opt_live_dump_blocks(Func*, const OptLiveInfo*, Writer*);
+void opt_live_ranges_build(Func*, const OptLiveInfo*, OptLiveRangeSet*);
+void opt_live_dump_ranges(Func*, const OptLiveRangeSet*, Writer*);
void opt_live_info(Func*);
void opt_coalesce(Func*);
void opt_regalloc(Func*, int allow_live_range_split);
diff --git a/src/opt/pass_live.c b/src/opt/pass_live.c
@@ -1,4 +1,5 @@
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include "core/arena.h"
@@ -331,3 +332,329 @@ void opt_live_dump_blocks(Func* f, const OptLiveInfo* live, Writer* w) {
dump_set(w, "out", &live->blocks[b].live_out);
}
}
+
+typedef struct RangeBuildCtx {
+ Func* f;
+ OptLiveRangeSet* ranges;
+ u32* last_range_by_val;
+ u32* open_end_by_val;
+ u32* raw_points;
+ u32 nraw_points;
+ u32 raw_points_cap;
+} RangeBuildCtx;
+
+typedef struct RangeBitsCtx {
+ OptBitset* use;
+ OptBitset* def;
+} RangeBitsCtx;
+
+static void range_collect_bits(Func* f, Inst* in, Operand* op, int is_def,
+ void* arg) {
+ (void)in;
+ RangeBitsCtx* c = (RangeBitsCtx*)arg;
+ if (op->kind != OPK_REG) return;
+ Val v = (Val)op->v.reg;
+ if (v == VAL_NONE || v >= f->nvals) return;
+ if (is_def)
+ opt_bitset_set(c->def, v);
+ else
+ opt_bitset_set(c->use, v);
+}
+
+static void range_push_raw_point(RangeBuildCtx* c, u32 p) {
+ if (c->nraw_points == c->raw_points_cap) {
+ u32 ncap = c->raw_points_cap ? c->raw_points_cap * 2u : 64u;
+ u32* np = arena_array(c->f->arena, u32, ncap);
+ if (c->raw_points)
+ memcpy(np, c->raw_points, sizeof(c->raw_points[0]) * c->nraw_points);
+ c->raw_points = np;
+ c->raw_points_cap = ncap;
+ }
+ c->raw_points[c->nraw_points++] = p;
+}
+
+static void range_append(RangeBuildCtx* c, Val v, u32 start, u32 end, u32 block,
+ int whole_block) {
+ if (v == VAL_NONE || v >= c->f->nvals) return;
+ if (end <= start) end = start + 1u;
+ OptLiveRangeSet* ranges = c->ranges;
+ if (ranges->nranges == ranges->cap) {
+ u32 ncap = ranges->cap ? ranges->cap * 2u : 64u;
+ OptLiveRange* nr = arena_array(c->f->arena, OptLiveRange, ncap);
+ if (ranges->ranges)
+ memcpy(nr, ranges->ranges, sizeof(ranges->ranges[0]) * ranges->nranges);
+ ranges->ranges = nr;
+ ranges->cap = ncap;
+ }
+ u32 idx = ranges->nranges++;
+ OptLiveRange* r = &ranges->ranges[idx];
+ memset(r, 0, sizeof *r);
+ r->val = v;
+ r->start = start;
+ r->end = end;
+ r->next = OPT_RANGE_NONE;
+ r->block = block;
+ r->whole_block = whole_block ? 1u : 0u;
+ if (ranges->first_range_by_val[v] == OPT_RANGE_NONE) {
+ ranges->first_range_by_val[v] = idx;
+ } else {
+ ranges->ranges[c->last_range_by_val[v]].next = idx;
+ }
+ c->last_range_by_val[v] = idx;
+ range_push_raw_point(c, start);
+ range_push_raw_point(c, end);
+ if (whole_block) ++ranges->whole_block_spans;
+}
+
+typedef struct RangeOpenCtx {
+ RangeBuildCtx* build;
+ u32 raw_end;
+} RangeOpenCtx;
+
+static void range_open_at_end(Val v, void* arg) {
+ RangeOpenCtx* c = (RangeOpenCtx*)arg;
+ if (v < c->build->f->nvals) c->build->open_end_by_val[v] = c->raw_end;
+}
+
+typedef struct RangeCloseCtx {
+ RangeBuildCtx* build;
+ u32 start;
+ u32 block;
+} RangeCloseCtx;
+
+static void range_close_def(Val v, void* arg) {
+ RangeCloseCtx* c = (RangeCloseCtx*)arg;
+ RangeBuildCtx* b = c->build;
+ if (v >= b->f->nvals) return;
+ if (b->open_end_by_val[v] != OPT_RANGE_NONE) {
+ range_append(b, v, c->start, b->open_end_by_val[v], c->block, 0);
+ b->open_end_by_val[v] = OPT_RANGE_NONE;
+ } else {
+ range_append(b, v, c->start, c->start + 1u, c->block, 0);
+ }
+ b->ranges->def_freq_by_val[v] += b->f->blocks[c->block].frequency;
+}
+
+typedef struct RangeUseCtx {
+ RangeBuildCtx* build;
+ u32 end;
+ u32 block;
+} RangeUseCtx;
+
+static void range_open_use(Val v, void* arg) {
+ RangeUseCtx* c = (RangeUseCtx*)arg;
+ RangeBuildCtx* b = c->build;
+ if (v >= b->f->nvals) return;
+ if (b->open_end_by_val[v] == OPT_RANGE_NONE) b->open_end_by_val[v] = c->end;
+ b->ranges->use_freq_by_val[v] += b->f->blocks[c->block].frequency;
+}
+
+typedef struct RangeBlockFreqCtx {
+ OptLiveRangeSet* ranges;
+ u32 freq;
+} RangeBlockFreqCtx;
+
+static void range_add_block_freq(Val v, void* arg) {
+ RangeBlockFreqCtx* c = (RangeBlockFreqCtx*)arg;
+ c->ranges->live_block_freq_by_val[v] += c->freq;
+}
+
+typedef struct RangeCallCtx {
+ Func* f;
+ OptLiveRangeSet* ranges;
+ const OptBitset* def;
+ u32 freq;
+} RangeCallCtx;
+
+static void range_add_live_across_call(Val v, void* arg) {
+ RangeCallCtx* c = (RangeCallCtx*)arg;
+ if (v == VAL_NONE || v >= c->f->nvals) return;
+ if (opt_bitset_has(c->def, v)) return;
+ c->ranges->live_across_call_freq_by_val[v] += c->freq;
+}
+
+static int u32_cmp(const void* a, const void* b) {
+ u32 av = *(const u32*)a;
+ u32 bv = *(const u32*)b;
+ return (av > bv) - (av < bv);
+}
+
+static u32 range_point_index(const u32* points, u32 npoints, u32 raw) {
+ u32 lo = 0;
+ u32 hi = npoints;
+ while (lo < hi) {
+ u32 mid = lo + (hi - lo) / 2u;
+ if (points[mid] < raw)
+ lo = mid + 1u;
+ else
+ hi = mid;
+ }
+ return lo;
+}
+
+static void range_compress_points(OptLiveRangeSet* ranges,
+ RangeBuildCtx* build) {
+ if (build->nraw_points == 0) return;
+ qsort(build->raw_points, build->nraw_points, sizeof(build->raw_points[0]),
+ u32_cmp);
+ u32 unique = 0;
+ for (u32 i = 0; i < build->nraw_points; ++i) {
+ if (unique == 0 || build->raw_points[i] != build->raw_points[unique - 1u])
+ build->raw_points[unique++] = build->raw_points[i];
+ }
+ ranges->point_count = unique;
+ for (u32 i = 0; i < ranges->nranges; ++i) {
+ OptLiveRange* r = &ranges->ranges[i];
+ r->start = range_point_index(build->raw_points, unique, r->start);
+ r->end = range_point_index(build->raw_points, unique, r->end);
+ if (r->end <= r->start) r->end = r->start + 1u;
+ u32 len = r->end - r->start;
+ ranges->live_length_by_val[r->val] += len;
+ if (ranges->max_live_length < len) ranges->max_live_length = len;
+ }
+}
+
+void opt_live_ranges_build(Func* f, const OptLiveInfo* live,
+ OptLiveRangeSet* ranges) {
+ memset(ranges, 0, sizeof *ranges);
+ if (!f || !live) return;
+ ranges->arena = f->arena;
+ ranges->f = f;
+ ranges->first_range_by_val =
+ arena_array(f->arena, u32, f->nvals ? f->nvals : 1u);
+ ranges->live_length_by_val =
+ arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u);
+ ranges->use_freq_by_val =
+ arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u);
+ ranges->def_freq_by_val =
+ arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u);
+ ranges->live_block_freq_by_val =
+ arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u);
+ ranges->live_across_call_freq_by_val =
+ arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u);
+ ranges->spill_cost_by_val =
+ arena_zarray(f->arena, u32, f->nvals ? f->nvals : 1u);
+ for (Val v = 0; v < f->nvals; ++v)
+ ranges->first_range_by_val[v] = OPT_RANGE_NONE;
+
+ RangeBuildCtx build;
+ memset(&build, 0, sizeof build);
+ build.f = f;
+ build.ranges = ranges;
+ build.last_range_by_val =
+ arena_array(f->arena, u32, f->nvals ? f->nvals : 1u);
+ build.open_end_by_val = arena_array(f->arena, u32, f->nvals ? f->nvals : 1u);
+ for (Val v = 0; v < f->nvals; ++v) {
+ build.last_range_by_val[v] = OPT_RANGE_NONE;
+ build.open_end_by_val[v] = OPT_RANGE_NONE;
+ }
+
+ u32* block_base = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u);
+ u32 raw = 0;
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ block_base[b] = raw;
+ raw += f->blocks[b].ninsts ? f->blocks[b].ninsts : 1u;
+ }
+ ranges->raw_point_count = raw + 1u;
+
+ OptBitset use;
+ OptBitset def;
+ OptBitset live_after;
+ OptBitset block_live;
+ opt_bitset_init(f->arena, &use, live->words);
+ opt_bitset_init(f->arena, &def, live->words);
+ opt_bitset_init(f->arena, &live_after, live->words);
+ opt_bitset_init(f->arena, &block_live, live->words);
+
+ for (u32 b = 0; b < f->nblocks; ++b) {
+ Block* bl = &f->blocks[b];
+ const OptBlockLive* lb = &live->blocks[b];
+ for (Val v = 0; v < f->nvals; ++v)
+ build.open_end_by_val[v] = OPT_RANGE_NONE;
+ u32 raw_start = block_base[b];
+ u32 raw_end = raw_start + (bl->ninsts ? bl->ninsts : 1u);
+ RangeOpenCtx open_ctx = {&build, raw_end};
+ opt_bitset_iter_set(&lb->live_out, range_open_at_end, &open_ctx);
+ opt_bitset_copy(&live_after, &lb->live_out);
+
+ RangeBlockFreqCtx freq_ctx = {ranges, bl->frequency};
+ opt_bitset_copy(&block_live, &lb->live_in);
+ opt_bitset_union(&block_live, &lb->live_out);
+ opt_bitset_iter_set(&block_live, range_add_block_freq, &freq_ctx);
+
+ for (u32 ri = bl->ninsts; ri > 0; --ri) {
+ u32 i = ri - 1u;
+ Inst* in = &bl->insts[i];
+ opt_bitset_clear(&use);
+ opt_bitset_clear(&def);
+ RangeBitsCtx bits = {&use, &def};
+ live_walk_inst_operands(f, in, range_collect_bits, &bits);
+
+ if ((IROp)in->op == IR_CALL) {
+ RangeCallCtx call_ctx = {f, ranges, &def, bl->frequency};
+ opt_bitset_iter_set(&live_after, range_add_live_across_call, &call_ctx);
+ }
+
+ RangeCloseCtx close_ctx = {&build, raw_start + i, b};
+ opt_bitset_iter_set(&def, range_close_def, &close_ctx);
+ RangeUseCtx use_ctx = {&build, raw_start + i + 1u, b};
+ opt_bitset_iter_set(&use, range_open_use, &use_ctx);
+ for (u32 wi = 0; wi < live->words; ++wi)
+ live_after.words[wi] =
+ (live_after.words[wi] & ~def.words[wi]) | use.words[wi];
+ opt_bitset_trim(&live_after);
+ }
+
+ for (Val v = 1; v < f->nvals; ++v) {
+ if (build.open_end_by_val[v] == OPT_RANGE_NONE) continue;
+ int whole_block = build.open_end_by_val[v] == raw_end &&
+ !opt_bitset_has(&lb->live_use, v) &&
+ !opt_bitset_has(&lb->live_def, v);
+ range_append(&build, v, raw_start, build.open_end_by_val[v], b,
+ whole_block);
+ }
+ }
+
+ range_compress_points(ranges, &build);
+
+ for (Val v = 1; v < f->nvals; ++v) {
+ u32 nranges = 0;
+ for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE;
+ r = ranges->ranges[r].next)
+ ++nranges;
+ if (ranges->max_ranges_per_val < nranges)
+ ranges->max_ranges_per_val = nranges;
+ ranges->spill_cost_by_val[v] = (ranges->use_freq_by_val[v] * 2u) +
+ ranges->def_freq_by_val[v] +
+ ranges->live_across_call_freq_by_val[v] +
+ ranges->live_block_freq_by_val[v];
+ }
+}
+
+void opt_live_dump_ranges(Func* f, const OptLiveRangeSet* ranges, Writer* w) {
+ (void)f;
+ if (!ranges || !w) return;
+ char buf[160];
+ snprintf(buf, sizeof buf,
+ "ranges total=%u points=%u raw_points=%u whole_block=%u\n",
+ (unsigned)ranges->nranges, (unsigned)ranges->point_count,
+ (unsigned)ranges->raw_point_count,
+ (unsigned)ranges->whole_block_spans);
+ dump_write(w, buf);
+ for (Val v = 1; v < ranges->f->nvals; ++v) {
+ if (ranges->first_range_by_val[v] == OPT_RANGE_NONE) continue;
+ snprintf(buf, sizeof buf, "v%u len=%u spill=%u:", (unsigned)v,
+ (unsigned)ranges->live_length_by_val[v],
+ (unsigned)ranges->spill_cost_by_val[v]);
+ dump_write(w, buf);
+ for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE;
+ r = ranges->ranges[r].next) {
+ const OptLiveRange* lr = &ranges->ranges[r];
+ snprintf(buf, sizeof buf, " [%u,%u)b%u%s", (unsigned)lr->start,
+ (unsigned)lr->end, (unsigned)lr->block,
+ lr->whole_block ? "*" : "");
+ dump_write(w, buf);
+ }
+ dump_write(w, "\n");
+ }
+}
diff --git a/test/opt/opt_test.c b/test/opt/opt_test.c
@@ -330,6 +330,14 @@ static int count_op(Func* f, IROp op) {
return n;
}
+static u32 live_range_count_for(const OptLiveRangeSet* ranges, Val v) {
+ u32 n = 0;
+ for (u32 r = ranges->first_range_by_val[v]; r != OPT_RANGE_NONE;
+ r = ranges->ranges[r].next)
+ ++n;
+ return n;
+}
+
/* ============================================================
* MockCGTarget — provides register coordination so opt_machinize
* and opt_emit can query backend policy without hard-coding arch
@@ -612,6 +620,71 @@ static void opt_block_liveness_phase1(void) {
tc_fini(&tc);
}
+static void opt_live_ranges_phase2(void) {
+ TestCtx tc;
+ tc_init(&tc);
+ Func* f = new_func(&tc);
+ u32 entry = f->entry;
+ u32 header = ir_block_new(f);
+ u32 body = ir_block_new(f);
+ u32 exit = ir_block_new(f);
+ ir_note_emit(f, header);
+ ir_note_emit(f, body);
+ ir_note_emit(f, exit);
+ Val loop_v = add_val(f, tc.i32);
+ Val call_live = add_val(f, tc.i32);
+ Val out = add_val(f, tc.i32);
+
+ emit_load_imm(f, entry, loop_v, tc.i32, 1);
+ emit_load_imm(f, entry, call_live, tc.i32, 2);
+ emit_br_to(f, entry, header);
+ emit_test_branch(f, header, body, exit, tc.i32);
+ emit_call_void(f, body);
+ emit_binop(f, body, out, loop_v, call_live, tc.i32);
+ emit_br_to(f, body, header);
+ emit_ret_val(f, exit, call_live, tc.i32);
+
+ opt_build_cfg(f);
+ opt_build_loop_tree(f);
+ OptLiveInfo live;
+ opt_live_blocks(f, &live);
+ OptLiveRangeSet ranges;
+ opt_live_ranges_build(f, &live, &ranges);
+
+ EXPECT(ranges.first_range_by_val[loop_v] != OPT_RANGE_NONE,
+ "loop value should have a live range");
+ EXPECT(ranges.first_range_by_val[call_live] != OPT_RANGE_NONE,
+ "call-live value should have a live range");
+ EXPECT(live_range_count_for(&ranges, loop_v) >= 2,
+ "loop-carried value should have ranges in multiple blocks");
+ EXPECT(ranges.live_length_by_val[loop_v] != 0,
+ "loop value should record live length");
+ EXPECT(ranges.spill_cost_by_val[loop_v] != 0,
+ "loop value should record spill cost");
+ EXPECT(ranges.live_across_call_freq_by_val[call_live] != 0,
+ "call-live value should record live-across-call frequency");
+ EXPECT(ranges.point_count != 0, "range builder should compress points");
+ EXPECT(ranges.point_count <= ranges.raw_point_count,
+ "compressed point count should not exceed raw point count");
+ EXPECT(ranges.whole_block_spans != 0,
+ "range builder should record whole-block spans");
+ EXPECT(ranges.max_ranges_per_val >= live_range_count_for(&ranges, loop_v),
+ "range metrics should record max ranges per value");
+ EXPECT(f->val_conflicts == NULL,
+ "range construction should not allocate conflict matrix");
+
+ CfreeWriter* w = cfree_writer_mem(&g_heap);
+ opt_live_dump_ranges(f, &ranges, w);
+ size_t len = 0;
+ const unsigned char* bytes = cfree_writer_mem_bytes(w, &len);
+ EXPECT(bytes_contains(bytes, len, "ranges total="),
+ "range dump should include summary");
+ EXPECT(bytes_contains(bytes, len, "v"),
+ "range dump should include value rows");
+ cfree_writer_close(w);
+ tc_fini(&tc);
+}
+
static void opt_live_after_linear(void) {
TestCtx tc;
tc_init(&tc);
@@ -1997,6 +2070,7 @@ int main(void) {
opt_loop_tree_does_not_mutate_cfg();
opt_liveness_branch();
opt_block_liveness_phase1();
+ opt_live_ranges_phase2();
opt_live_after_linear();
opt_interference_branch_disjoint();
opt_interference_def_live_out();
diff --git a/test/opt/phase0_guardrails.sh b/test/opt/phase0_guardrails.sh
@@ -93,6 +93,10 @@ check_metrics() {
opt.live.active_words \
opt.ranges \
opt.range_points \
+ opt.range_raw_points \
+ opt.range_max_per_val \
+ opt.range_max_length \
+ opt.range_whole_block_spans \
opt.alloc.used_loc_words \
opt.alloc.spills \
opt.rewrite.reloads \