kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

pass_live.c (29308B)


      1 #include <stdlib.h>
      2 #include <string.h>
      3 
      4 #include "core/arena.h"
      5 #include "core/slice.h"
      6 #include "core/strbuf.h"
      7 #include "opt/opt.h"
      8 #include "opt/opt_internal.h"
      9 
     10 static u32 opt_bit_words(u32 nregs) { return (nregs + 63u) / 64u; }
     11 
     12 static void opt_bitset_init(Arena* arena, OptBitset* bs, u32 nwords) {
     13   memset(bs, 0, sizeof *bs);
     14   bs->arena = arena;
     15   bs->nwords = nwords;
     16   bs->words = arena_zarray(arena, u64, nwords ? nwords : 1u);
     17 }
     18 
     19 static int opt_bitset_grow(OptBitset* bs, u32 need_words) {
     20   if (!bs || need_words <= bs->nwords) return 1;
     21   if (!bs->arena) return 0;
     22   u32 nwords = bs->nwords ? bs->nwords : 1u;
     23   while (nwords < need_words) nwords *= 2u;
     24   u64* words = arena_zarray(bs->arena, u64, nwords);
     25   if (bs->words && bs->nwords)
     26     memcpy(words, bs->words, sizeof(words[0]) * bs->nwords);
     27   bs->words = words;
     28   bs->nwords = nwords;
     29   return 1;
     30 }
     31 
     32 static void opt_bitset_trim(OptBitset* bs) {
     33   u32 n = bs->nwords;
     34   while (n && bs->words[n - 1u] == 0) --n;
     35   bs->active_words = n;
     36 }
     37 
     38 void opt_bitset_clear(OptBitset* bs) {
     39   if (!bs || !bs->words) return;
     40   for (u32 i = 0; i < bs->active_words; ++i) bs->words[i] = 0;
     41   bs->active_words = 0;
     42 }
     43 
     44 void opt_bitset_set(OptBitset* bs, PReg r) {
     45   if (!bs) return;
     46   u32 w = r / 64u;
     47   if (w >= bs->nwords && !opt_bitset_grow(bs, w + 1u)) return;
     48   bs->words[w] |= 1ull << (r % 64u);
     49   if (bs->active_words <= w) bs->active_words = w + 1u;
     50 }
     51 
     52 void opt_bitset_clear_bit(OptBitset* bs, PReg r) {
     53   if (!bs || !bs->words) return;
     54   u32 w = r / 64u;
     55   if (w >= bs->active_words) return;
     56   bs->words[w] &= ~(1ull << (r % 64u));
     57   if (w + 1u == bs->active_words && bs->words[w] == 0) opt_bitset_trim(bs);
     58 }
     59 
     60 int opt_bitset_has(const OptBitset* bs, PReg r) {
     61   if (!bs || !bs->words) return 0;
     62   u32 w = r / 64u;
     63   if (w >= bs->active_words) return 0;
     64   return (bs->words[w] & (1ull << (r % 64u))) != 0;
     65 }
     66 
     67 int opt_bitset_copy(OptBitset* dst, const OptBitset* src) {
     68   int changed = 0;
     69   u32 n;
     70   u32 old_active;
     71   if (!dst || !src || !src->words) return 0;
     72   if (src->active_words > dst->nwords &&
     73       !opt_bitset_grow(dst, src->active_words))
     74     return 0;
     75   old_active = dst->active_words;
     76   n = src->active_words;
     77   for (u32 i = 0; i < n; ++i) {
     78     changed |= dst->words[i] != src->words[i];
     79     dst->words[i] = src->words[i];
     80   }
     81   for (u32 i = n; i < old_active; ++i) {
     82     changed |= dst->words[i] != 0;
     83     dst->words[i] = 0;
     84   }
     85   dst->active_words = n;
     86   if (n && dst->words[n - 1u] == 0) opt_bitset_trim(dst);
     87   return changed;
     88 }
     89 
     90 int opt_bitset_union(OptBitset* dst, const OptBitset* src) {
     91   int changed = 0;
     92   u32 n;
     93   if (!dst || !src || !src->words) return 0;
     94   if (src->active_words > dst->nwords &&
     95       !opt_bitset_grow(dst, src->active_words))
     96     return 0;
     97   n = src->active_words;
     98   for (u32 i = 0; i < n; ++i) {
     99     u64 old = dst->words[i];
    100     dst->words[i] |= src->words[i];
    101     changed |= dst->words[i] != old;
    102   }
    103   if (changed) opt_bitset_trim(dst);
    104   return changed;
    105 }
    106 
    107 int opt_bitset_union_and_not(OptBitset* dst, const OptBitset* src,
    108                              const OptBitset* not_bits) {
    109   int changed = 0;
    110   u32 n;
    111   if (!dst || !src || !src->words) return 0;
    112   if (src->active_words > dst->nwords &&
    113       !opt_bitset_grow(dst, src->active_words))
    114     return 0;
    115   n = src->active_words;
    116   for (u32 i = 0; i < n; ++i) {
    117     u64 mask =
    118         (not_bits && i < not_bits->active_words) ? not_bits->words[i] : 0;
    119     u64 old = dst->words[i];
    120     dst->words[i] |= src->words[i] & ~mask;
    121     changed |= dst->words[i] != old;
    122   }
    123   if (changed) opt_bitset_trim(dst);
    124   return changed;
    125 }
    126 
    127 void opt_bitset_iter_set(const OptBitset* bs, OptBitsetIterFn fn, void* arg) {
    128   if (!bs || !bs->words || !fn) return;
    129   for (u32 w = 0; w < bs->active_words; ++w) {
    130     u64 bits = bs->words[w];
    131     while (bits) {
    132       u32 bit = (u32)__builtin_ctzll(bits);
    133       fn(w * 64u + bit, arg);
    134       bits &= bits - 1u;
    135     }
    136   }
    137 }
    138 
    139 typedef struct LiveUseDefCtx {
    140   OptBitset* use;
    141   OptBitset* def;
    142 } LiveUseDefCtx;
    143 
    144 static void live_collect_use_def(Func* f, Inst* in, Operand* op, int is_def,
    145                                  void* arg) {
    146   (void)in;
    147   LiveUseDefCtx* c = (LiveUseDefCtx*)arg;
    148   if (op->kind != OPK_REG) return;
    149   PReg r = (PReg)op->v.reg;
    150   if (r == PREG_NONE || r == 0 || r >= opt_reg_count(f)) return;
    151   if (is_def) {
    152     opt_bitset_set(c->def, r);
    153   } else if (!opt_bitset_has(c->def, r)) {
    154     opt_bitset_set(c->use, r);
    155   }
    156 }
    157 
    158 static void live_count_bit(PReg r, void* arg) {
    159   (void)r;
    160   ++*(u64*)arg;
    161 }
    162 
    163 static u64 live_count_set_bits(const OptBitset* bs) {
    164   u64 n = 0;
    165   opt_bitset_iter_set(bs, live_count_bit, &n);
    166   return n;
    167 }
    168 
    169 static void live_metric_clear(OptLiveInfo* live, OptBitset* bs) {
    170   u32 touched = bs ? bs->active_words : 0;
    171   if (live) {
    172     u64 old = live->bitset_words_touched;
    173     live->bitset_words_touched = old + touched;
    174   }
    175   opt_bitset_clear(bs);
    176 }
    177 
    178 static int live_metric_copy(OptLiveInfo* live, OptBitset* dst,
    179                             const OptBitset* src) {
    180   u32 n = 0;
    181   if (dst && src) {
    182     n = src->active_words;
    183     if (dst->active_words > n) n += dst->active_words - n;
    184   }
    185   if (live) live->bitset_words_touched += n;
    186   return opt_bitset_copy(dst, src);
    187 }
    188 
    189 static int live_metric_union(OptLiveInfo* live, OptBitset* dst,
    190                              const OptBitset* src) {
    191   u32 n = 0;
    192   if (dst && src) n = src->active_words;
    193   if (live) live->bitset_words_touched += n;
    194   return opt_bitset_union(dst, src);
    195 }
    196 
    197 static int live_metric_union_and_not(OptLiveInfo* live, OptBitset* dst,
    198                                      const OptBitset* src,
    199                                      const OptBitset* not_bits) {
    200   u32 n = 0;
    201   (void)not_bits;
    202   if (dst && src) n = src->active_words;
    203   if (live) live->bitset_words_touched += n;
    204   return opt_bitset_union_and_not(dst, src, not_bits);
    205 }
    206 
    207 static void live_recompute_metrics(OptLiveInfo* live) {
    208   live->active_words = 0;
    209   live->block_bytes = 0;
    210   live->set_bit_scans = 0;
    211   for (u32 b = 0; b < live->f->nblocks; ++b) {
    212     OptBlockLive* bl = &live->blocks[b];
    213     live->active_words += bl->live_in.active_words;
    214     live->active_words += bl->live_out.active_words;
    215     live->active_words += bl->live_use.active_words;
    216     live->active_words += bl->live_def.active_words;
    217     live->set_bit_scans += live_count_set_bits(&bl->live_in);
    218     live->set_bit_scans += live_count_set_bits(&bl->live_out);
    219     live->set_bit_scans += live_count_set_bits(&bl->live_use);
    220     live->set_bit_scans += live_count_set_bits(&bl->live_def);
    221   }
    222   live->block_bytes = live->active_words * (u64)sizeof(u64);
    223 }
    224 
    225 static void live_worklist_push(u32* worklist, u8* queued, u32* n, u32 b) {
    226   if (queued[b]) return;
    227   queued[b] = 1;
    228   worklist[(*n)++] = b;
    229 }
    230 
    231 void opt_live_blocks(Func* f, OptLiveInfo* live) {
    232   memset(live, 0, sizeof *live);
    233   live->arena = f->arena;
    234   live->f = f;
    235   live->words = opt_bit_words(opt_reg_count(f));
    236   f->opt_live_words = (u16)live->words;
    237   live->blocks =
    238       arena_zarray(f->arena, OptBlockLive, f->nblocks ? f->nblocks : 1u);
    239 
    240   for (u32 b = 0; b < f->nblocks; ++b) {
    241     Block* bl = &f->blocks[b];
    242     OptBlockLive* lb = &live->blocks[b];
    243     opt_bitset_init(f->arena, &lb->live_in, 0);
    244     opt_bitset_init(f->arena, &lb->live_out, 0);
    245     opt_bitset_init(f->arena, &lb->live_use, 0);
    246     opt_bitset_init(f->arena, &lb->live_def, 0);
    247 
    248     LiveUseDefCtx ctx;
    249     ctx.use = &lb->live_use;
    250     ctx.def = &lb->live_def;
    251     for (u32 i = 0; i < bl->ninsts; ++i)
    252       opt_walk_inst_operands(f, &bl->insts[i], live_collect_use_def, &ctx);
    253   }
    254 
    255   OptBitset new_out;
    256   OptBitset tmp;
    257   opt_bitset_init(f->arena, &new_out, 0);
    258   opt_bitset_init(f->arena, &tmp, 0);
    259 
    260   u32* worklist = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u);
    261   u8* queued = arena_zarray(f->arena, u8, f->nblocks ? f->nblocks : 1u);
    262   u32 nwork = 0;
    263   for (u32 bi = f->nblocks; bi > 0; --bi)
    264     live_worklist_push(worklist, queued, &nwork, bi - 1u);
    265 
    266   while (nwork) {
    267     u32 b = worklist[--nwork];
    268     queued[b] = 0;
    269     Block* bl = &f->blocks[b];
    270     OptBlockLive* lb = &live->blocks[b];
    271     ++live->dataflow_iterations;
    272     ++live->dataflow_block_visits;
    273     live_metric_clear(live, &new_out);
    274     live_metric_clear(live, &tmp);
    275     for (u32 s = 0; s < bl->nsucc; ++s) {
    276       u32 t = bl->succ[s];
    277       if (t < f->nblocks)
    278         live_metric_union(live, &new_out, &live->blocks[t].live_in);
    279     }
    280     live_metric_copy(live, &tmp, &lb->live_use);
    281     live_metric_union_and_not(live, &tmp, &new_out, &lb->live_def);
    282     (void)live_metric_copy(live, &lb->live_out, &new_out);
    283     if (live_metric_copy(live, &lb->live_in, &tmp)) {
    284       for (u32 p = 0; p < bl->npreds; ++p) {
    285         u32 pred = bl->preds[p];
    286         if (pred < f->nblocks)
    287           live_worklist_push(worklist, queued, &nwork, pred);
    288       }
    289     }
    290   }
    291 
    292   live_recompute_metrics(live);
    293 }
    294 
    295 static void dump_write(Writer* w, const char* s) {
    296   kit_writer_write(w, s, slice_from_cstr(s).len);
    297 }
    298 
    299 static void dump_sb(Writer* w, const StrBuf* sb) {
    300   kit_writer_write(w, strbuf_cstr(sb), strbuf_len(sb));
    301 }
    302 
    303 static void dump_bit(PReg r, void* arg) {
    304   Writer* w = (Writer*)arg;
    305   char buf[32];
    306   StrBuf sb;
    307   strbuf_init(&sb, buf, sizeof buf);
    308   strbuf_puts(&sb, " r");
    309   strbuf_put_u64(&sb, (u64)(unsigned)r);
    310   dump_sb(w, &sb);
    311 }
    312 
    313 static void dump_set(Writer* w, const char* name, const OptBitset* bs) {
    314   dump_write(w, "  ");
    315   dump_write(w, name);
    316   dump_write(w, ":");
    317   opt_bitset_iter_set(bs, dump_bit, w);
    318   dump_write(w, "\n");
    319 }
    320 
    321 void opt_live_dump_blocks(Func* f, const OptLiveInfo* live, Writer* w) {
    322   (void)f;
    323   if (!live || !w) return;
    324   for (u32 b = 0; b < live->f->nblocks; ++b) {
    325     char buf[64];
    326     StrBuf sb;
    327     strbuf_init(&sb, buf, sizeof buf);
    328     strbuf_puts(&sb, "block ");
    329     strbuf_put_u64(&sb, (u64)(unsigned)b);
    330     strbuf_putc(&sb, '\n');
    331     dump_sb(w, &sb);
    332     dump_set(w, "use", &live->blocks[b].live_use);
    333     dump_set(w, "def", &live->blocks[b].live_def);
    334     dump_set(w, "in", &live->blocks[b].live_in);
    335     dump_set(w, "out", &live->blocks[b].live_out);
    336   }
    337 }
    338 
    339 typedef struct RangeBuildCtx {
    340   Func* f;
    341   OptLiveRangeSet* ranges;
    342   u32* last_range_by_preg;
    343   u32* open_end_by_preg;
    344   u32* open_gen_by_preg;
    345   u32 open_gen;
    346   PReg* open_pregs;
    347   u32 nopen_pregs;
    348   u32 open_pregs_cap;
    349   PReg* range_pregs;
    350   u32 nrange_pregs;
    351   u32 range_pregs_cap;
    352   u32* raw_points;
    353   u32 nraw_points;
    354   u32 raw_points_cap;
    355 } RangeBuildCtx;
    356 
    357 typedef struct RangeInstRefs {
    358   PReg* uses;
    359   PReg* defs;
    360   u32 nuses;
    361   u32 ndefs;
    362   u32 use_cap;
    363   u32 def_cap;
    364   u32* use_mark;
    365   u32* def_mark;
    366   u32 gen;
    367 } RangeInstRefs;
    368 
    369 typedef struct RangeLivePregs {
    370   Func* f;
    371   PReg* regs;
    372   u32 nregs;
    373   u32 cap;
    374   u32* pos_by_preg;
    375 } RangeLivePregs;
    376 
    377 static void range_refs_init(Func* f, RangeInstRefs* refs) {
    378   memset(refs, 0, sizeof *refs);
    379   refs->use_mark =
    380       arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    381   refs->def_mark =
    382       arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    383   refs->gen = 1;
    384 }
    385 
    386 static void range_refs_reset(Func* f, RangeInstRefs* refs) {
    387   refs->nuses = 0;
    388   refs->ndefs = 0;
    389   ++refs->gen;
    390   if (refs->gen) return;
    391   memset(
    392       refs->use_mark, 0,
    393       sizeof(refs->use_mark[0]) * (opt_reg_count(f) ? opt_reg_count(f) : 1u));
    394   memset(
    395       refs->def_mark, 0,
    396       sizeof(refs->def_mark[0]) * (opt_reg_count(f) ? opt_reg_count(f) : 1u));
    397   refs->gen = 1;
    398 }
    399 
    400 static void range_refs_push_use(Func* f, RangeInstRefs* refs, PReg r) {
    401   if (refs->use_mark[r] == refs->gen) return;
    402   refs->use_mark[r] = refs->gen;
    403   if (refs->nuses == refs->use_cap) {
    404     u32 ncap = refs->use_cap ? refs->use_cap * 2u : 8u;
    405     PReg* nr = arena_array(f->arena, PReg, ncap);
    406     if (refs->uses) memcpy(nr, refs->uses, sizeof(refs->uses[0]) * refs->nuses);
    407     refs->uses = nr;
    408     refs->use_cap = ncap;
    409   }
    410   refs->uses[refs->nuses++] = r;
    411 }
    412 
    413 static void range_refs_push_def(Func* f, RangeInstRefs* refs, PReg r) {
    414   if (refs->def_mark[r] == refs->gen) return;
    415   refs->def_mark[r] = refs->gen;
    416   if (refs->ndefs == refs->def_cap) {
    417     u32 ncap = refs->def_cap ? refs->def_cap * 2u : 4u;
    418     PReg* nr = arena_array(f->arena, PReg, ncap);
    419     if (refs->defs) memcpy(nr, refs->defs, sizeof(refs->defs[0]) * refs->ndefs);
    420     refs->defs = nr;
    421     refs->def_cap = ncap;
    422   }
    423   refs->defs[refs->ndefs++] = r;
    424 }
    425 
    426 static int range_refs_has_def(const RangeInstRefs* refs, PReg r) {
    427   return refs->def_mark[r] == refs->gen;
    428 }
    429 
    430 static void range_live_pregs_init(Func* f, RangeLivePregs* live) {
    431   memset(live, 0, sizeof *live);
    432   live->f = f;
    433   live->pos_by_preg =
    434       arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    435 }
    436 
    437 static void range_live_pregs_clear(RangeLivePregs* live) {
    438   for (u32 i = 0; i < live->nregs; ++i) live->pos_by_preg[live->regs[i]] = 0;
    439   live->nregs = 0;
    440 }
    441 
    442 static void range_live_pregs_add(RangeLivePregs* live, PReg r) {
    443   Func* f = live->f;
    444   if (r == PREG_NONE || r == 0 || r >= opt_reg_count(f)) return;
    445   if (live->pos_by_preg[r]) return;
    446   if (live->nregs == live->cap) {
    447     u32 ncap = live->cap ? live->cap * 2u : 64u;
    448     PReg* nr = arena_array(f->arena, PReg, ncap);
    449     if (live->regs) memcpy(nr, live->regs, sizeof(live->regs[0]) * live->nregs);
    450     live->regs = nr;
    451     live->cap = ncap;
    452   }
    453   live->regs[live->nregs] = r;
    454   live->pos_by_preg[r] = live->nregs + 1u;
    455   ++live->nregs;
    456 }
    457 
    458 static void range_live_pregs_remove(RangeLivePregs* live, PReg r) {
    459   if (r == PREG_NONE || r == 0 || r >= opt_reg_count(live->f)) return;
    460   u32 pos = live->pos_by_preg[r];
    461   if (!pos) return;
    462   u32 idx = pos - 1u;
    463   PReg last = live->regs[live->nregs - 1u];
    464   live->regs[idx] = last;
    465   live->pos_by_preg[last] = idx + 1u;
    466   live->pos_by_preg[r] = 0;
    467   --live->nregs;
    468 }
    469 
    470 static void range_live_pregs_add_bit(PReg r, void* arg) {
    471   range_live_pregs_add((RangeLivePregs*)arg, r);
    472 }
    473 
    474 static void range_collect_bits(Func* f, Inst* in, Operand* op, int is_def,
    475                                void* arg) {
    476   (void)in;
    477   RangeInstRefs* refs = (RangeInstRefs*)arg;
    478   if (op->kind != OPK_REG) return;
    479   PReg r = (PReg)op->v.reg;
    480   if (r == PREG_NONE || r == 0 || r >= opt_reg_count(f)) return;
    481   if (is_def)
    482     range_refs_push_def(f, refs, r);
    483   else
    484     range_refs_push_use(f, refs, r);
    485 }
    486 
    487 static void range_push_raw_point(RangeBuildCtx* c, u32 p) {
    488   if (c->nraw_points == c->raw_points_cap) {
    489     u32 ncap = c->raw_points_cap ? c->raw_points_cap * 2u : 64u;
    490     u32* np = arena_array(c->f->arena, u32, ncap);
    491     if (c->raw_points)
    492       memcpy(np, c->raw_points, sizeof(c->raw_points[0]) * c->nraw_points);
    493     c->raw_points = np;
    494     c->raw_points_cap = ncap;
    495   }
    496   c->raw_points[c->nraw_points++] = p;
    497 }
    498 
    499 static void range_push_open_preg(RangeBuildCtx* c, PReg r) {
    500   if (c->nopen_pregs == c->open_pregs_cap) {
    501     u32 ncap = c->open_pregs_cap ? c->open_pregs_cap * 2u : 64u;
    502     PReg* nr = arena_array(c->f->arena, PReg, ncap);
    503     if (c->open_pregs)
    504       memcpy(nr, c->open_pregs, sizeof(c->open_pregs[0]) * c->nopen_pregs);
    505     c->open_pregs = nr;
    506     c->open_pregs_cap = ncap;
    507   }
    508   c->open_pregs[c->nopen_pregs++] = r;
    509 }
    510 
    511 static void range_push_range_preg(RangeBuildCtx* c, PReg r) {
    512   if (c->nrange_pregs == c->range_pregs_cap) {
    513     u32 ncap = c->range_pregs_cap ? c->range_pregs_cap * 2u : 64u;
    514     PReg* nr = arena_array(c->f->arena, PReg, ncap);
    515     if (c->range_pregs)
    516       memcpy(nr, c->range_pregs, sizeof(c->range_pregs[0]) * c->nrange_pregs);
    517     c->range_pregs = nr;
    518     c->range_pregs_cap = ncap;
    519   }
    520   c->range_pregs[c->nrange_pregs++] = r;
    521 }
    522 
    523 static void range_block_reset(RangeBuildCtx* c) {
    524   c->nopen_pregs = 0;
    525   ++c->open_gen;
    526   if (c->open_gen) return;
    527   memset(c->open_gen_by_preg, 0,
    528          sizeof(c->open_gen_by_preg[0]) *
    529              (opt_reg_count(c->f) ? opt_reg_count(c->f) : 1u));
    530   c->open_gen = 1;
    531 }
    532 
    533 static int range_is_open(const RangeBuildCtx* c, PReg r) {
    534   return r < opt_reg_count(c->f) && c->open_gen_by_preg[r] == c->open_gen;
    535 }
    536 
    537 static u32 range_open_end(const RangeBuildCtx* c, PReg r) {
    538   return range_is_open(c, r) ? c->open_end_by_preg[r] : OPT_RANGE_NONE;
    539 }
    540 
    541 static void range_set_open_end(RangeBuildCtx* c, PReg r, u32 end) {
    542   if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return;
    543   if (!range_is_open(c, r)) {
    544     c->open_gen_by_preg[r] = c->open_gen;
    545     range_push_open_preg(c, r);
    546   }
    547   c->open_end_by_preg[r] = end;
    548 }
    549 
    550 static void range_close_open_end(RangeBuildCtx* c, PReg r) {
    551   if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return;
    552   c->open_gen_by_preg[r] = 0;
    553 }
    554 
    555 static void range_append(RangeBuildCtx* c, PReg r, u32 start, u32 end,
    556                          u32 block, int whole_block) {
    557   if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return;
    558   if (end <= start) end = start + 1u;
    559   OptLiveRangeSet* ranges = c->ranges;
    560   if (ranges->nranges == ranges->cap) {
    561     u32 ncap = ranges->cap ? ranges->cap * 2u : 64u;
    562     OptLiveRange* nr = arena_array(c->f->arena, OptLiveRange, ncap);
    563     if (ranges->ranges)
    564       memcpy(nr, ranges->ranges, sizeof(ranges->ranges[0]) * ranges->nranges);
    565     ranges->ranges = nr;
    566     ranges->cap = ncap;
    567   }
    568   u32 idx = ranges->nranges++;
    569   OptLiveRange* lr = &ranges->ranges[idx];
    570   memset(lr, 0, sizeof *lr);
    571   lr->preg = r;
    572   lr->start = start;
    573   lr->end = end;
    574   lr->raw_start = start;
    575   lr->raw_end = end;
    576   lr->next = OPT_RANGE_NONE;
    577   lr->block = block;
    578   lr->whole_block = whole_block ? 1u : 0u;
    579   if (ranges->first_range_by_preg[r] == OPT_RANGE_NONE) {
    580     ranges->first_range_by_preg[r] = idx;
    581     range_push_range_preg(c, r);
    582   } else {
    583     ranges->ranges[c->last_range_by_preg[r]].next = idx;
    584   }
    585   c->last_range_by_preg[r] = idx;
    586   range_push_raw_point(c, start);
    587   range_push_raw_point(c, end);
    588   if (whole_block) ++ranges->whole_block_spans;
    589 }
    590 
    591 typedef struct RangeOpenCtx {
    592   RangeBuildCtx* build;
    593   u32 raw_end;
    594 } RangeOpenCtx;
    595 
    596 static void range_open_at_end(PReg r, void* arg) {
    597   RangeOpenCtx* c = (RangeOpenCtx*)arg;
    598   range_set_open_end(c->build, r, c->raw_end);
    599 }
    600 
    601 typedef struct RangeCloseCtx {
    602   RangeBuildCtx* build;
    603   u32 start;
    604   u32 block;
    605 } RangeCloseCtx;
    606 
    607 static void range_close_def(PReg r, void* arg) {
    608   RangeCloseCtx* c = (RangeCloseCtx*)arg;
    609   RangeBuildCtx* b = c->build;
    610   if (r == PREG_NONE || r == 0 || r >= opt_reg_count(b->f)) return;
    611   u32 open_end = range_open_end(b, r);
    612   if (open_end != OPT_RANGE_NONE) {
    613     range_append(b, r, c->start, open_end, c->block, 0);
    614     range_close_open_end(b, r);
    615   } else {
    616     range_append(b, r, c->start, c->start + 1u, c->block, 0);
    617   }
    618   b->ranges->def_freq_by_preg[r] += b->f->blocks[c->block].frequency;
    619 }
    620 
    621 typedef struct RangeUseCtx {
    622   RangeBuildCtx* build;
    623   u32 end;
    624   u32 block;
    625 } RangeUseCtx;
    626 
    627 static void range_open_use(PReg r, void* arg) {
    628   RangeUseCtx* c = (RangeUseCtx*)arg;
    629   RangeBuildCtx* b = c->build;
    630   if (r == PREG_NONE || r == 0 || r >= opt_reg_count(b->f)) return;
    631   if (!range_is_open(b, r)) range_set_open_end(b, r, c->end);
    632   b->ranges->use_freq_by_preg[r] += b->f->blocks[c->block].frequency;
    633 }
    634 
    635 typedef struct RangeBlockFreqCtx {
    636   OptLiveRangeSet* ranges;
    637   u32 freq;
    638 } RangeBlockFreqCtx;
    639 
    640 static void range_add_block_freq(PReg r, void* arg) {
    641   RangeBlockFreqCtx* c = (RangeBlockFreqCtx*)arg;
    642   c->ranges->live_block_freq_by_preg[r] += c->freq;
    643 }
    644 
    645 typedef struct RangeCallCtx {
    646   Func* f;
    647   OptLiveRangeSet* ranges;
    648   const RangeInstRefs* refs;
    649   u32 freq;
    650 } RangeCallCtx;
    651 
    652 static void range_add_live_across_call(PReg r, void* arg) {
    653   RangeCallCtx* c = (RangeCallCtx*)arg;
    654   if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return;
    655   if (range_refs_has_def(c->refs, r)) return;
    656   c->ranges->live_across_call_freq_by_preg[r] += c->freq;
    657 }
    658 
    659 static void range_live_pregs_update_before(RangeLivePregs* live,
    660                                            const RangeInstRefs* refs) {
    661   for (u32 i = 0; i < refs->ndefs; ++i)
    662     range_live_pregs_remove(live, refs->defs[i]);
    663   for (u32 i = 0; i < refs->nuses; ++i)
    664     range_live_pregs_add(live, refs->uses[i]);
    665 }
    666 
    667 static int u32_cmp(const void* a, const void* b) {
    668   u32 av = *(const u32*)a;
    669   u32 bv = *(const u32*)b;
    670   return (av > bv) - (av < bv);
    671 }
    672 
    673 static u32 range_point_index(const u32* points, u32 npoints, u32 raw) {
    674   u32 lo = 0;
    675   u32 hi = npoints;
    676   while (lo < hi) {
    677     u32 mid = lo + (hi - lo) / 2u;
    678     if (points[mid] < raw)
    679       lo = mid + 1u;
    680     else
    681       hi = mid;
    682   }
    683   return lo;
    684 }
    685 
    686 static void range_compress_points(OptLiveRangeSet* ranges,
    687                                   RangeBuildCtx* build) {
    688   if (build->nraw_points == 0) return;
    689   qsort(build->raw_points, build->nraw_points, sizeof(build->raw_points[0]),
    690         u32_cmp);
    691   u32 unique = 0;
    692   for (u32 i = 0; i < build->nraw_points; ++i) {
    693     if (unique == 0 || build->raw_points[i] != build->raw_points[unique - 1u])
    694       build->raw_points[unique++] = build->raw_points[i];
    695   }
    696   ranges->point_count = unique;
    697   for (u32 i = 0; i < ranges->nranges; ++i) {
    698     OptLiveRange* r = &ranges->ranges[i];
    699     r->start = range_point_index(build->raw_points, unique, r->start);
    700     r->end = range_point_index(build->raw_points, unique, r->end);
    701     if (r->end <= r->start) r->end = r->start + 1u;
    702     u32 len = r->end - r->start;
    703     ranges->range_point_visits += len;
    704     ranges->live_length_by_preg[r->preg] += len;
    705     if (ranges->max_live_length < len) ranges->max_live_length = len;
    706   }
    707 }
    708 
    709 void opt_live_ranges_build(Func* f, const OptLiveInfo* live,
    710                            OptLiveRangeSet* ranges) {
    711   memset(ranges, 0, sizeof *ranges);
    712   if (!f || !live) return;
    713   ranges->arena = f->arena;
    714   ranges->f = f;
    715   ranges->first_range_by_preg =
    716       arena_array(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    717   ranges->live_length_by_preg =
    718       arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    719   ranges->use_freq_by_preg =
    720       arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    721   ranges->def_freq_by_preg =
    722       arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    723   ranges->live_block_freq_by_preg =
    724       arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    725   ranges->live_across_call_freq_by_preg =
    726       arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    727   ranges->spill_cost_by_preg =
    728       arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    729   memset(ranges->first_range_by_preg, 0xff,
    730          sizeof(ranges->first_range_by_preg[0]) *
    731              (opt_reg_count(f) ? opt_reg_count(f) : 1u));
    732 
    733   RangeBuildCtx build;
    734   memset(&build, 0, sizeof build);
    735   build.f = f;
    736   build.ranges = ranges;
    737   build.last_range_by_preg =
    738       arena_array(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    739   build.open_end_by_preg =
    740       arena_array(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    741   build.open_gen_by_preg =
    742       arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u);
    743   build.open_gen = 1;
    744   memset(build.last_range_by_preg, 0xff,
    745          sizeof(build.last_range_by_preg[0]) *
    746              (opt_reg_count(f) ? opt_reg_count(f) : 1u));
    747 
    748   u32* block_base = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u);
    749   u32 raw = 0;
    750   for (u32 b = 0; b < f->nblocks; ++b) {
    751     block_base[b] = raw;
    752     raw += f->blocks[b].ninsts ? f->blocks[b].ninsts : 1u;
    753   }
    754   ranges->raw_point_count = raw + 1u;
    755 
    756   OptBitset block_live;
    757   opt_bitset_init(f->arena, &block_live, live->words);
    758   RangeInstRefs refs;
    759   range_refs_init(f, &refs);
    760   RangeLivePregs live_pregs;
    761   range_live_pregs_init(f, &live_pregs);
    762 
    763   for (u32 b = 0; b < f->nblocks; ++b) {
    764     Block* bl = &f->blocks[b];
    765     const OptBlockLive* lb = &live->blocks[b];
    766     range_block_reset(&build);
    767     range_live_pregs_clear(&live_pregs);
    768     u32 raw_start = block_base[b];
    769     u32 raw_end = raw_start + (bl->ninsts ? bl->ninsts : 1u);
    770     RangeOpenCtx open_ctx = {&build, raw_end};
    771     ranges->live_words_touched += lb->live_out.active_words;
    772     opt_bitset_iter_set(&lb->live_out, range_open_at_end, &open_ctx);
    773     opt_bitset_iter_set(&lb->live_out, range_live_pregs_add_bit, &live_pregs);
    774 
    775     RangeBlockFreqCtx freq_ctx = {ranges, bl->frequency};
    776     ranges->live_words_touched += block_live.nwords < lb->live_in.active_words
    777                                       ? block_live.nwords
    778                                       : lb->live_in.active_words;
    779     if (block_live.active_words > lb->live_in.active_words)
    780       ranges->live_words_touched +=
    781           block_live.active_words - lb->live_in.active_words;
    782     opt_bitset_copy(&block_live, &lb->live_in);
    783     ranges->live_words_touched += block_live.nwords < lb->live_out.active_words
    784                                       ? block_live.nwords
    785                                       : lb->live_out.active_words;
    786     opt_bitset_union(&block_live, &lb->live_out);
    787     ranges->live_words_touched += block_live.active_words;
    788     opt_bitset_iter_set(&block_live, range_add_block_freq, &freq_ctx);
    789 
    790     for (u32 ri = bl->ninsts; ri > 0; --ri) {
    791       u32 i = ri - 1u;
    792       Inst* in = &bl->insts[i];
    793       range_refs_reset(f, &refs);
    794       opt_walk_inst_operands(f, in, range_collect_bits, &refs);
    795 
    796       if ((IROp)in->op == IR_CALL) {
    797         RangeCallCtx call_ctx = {f, ranges, &refs, bl->frequency};
    798         ranges->live_words_touched += live_pregs.nregs;
    799         for (u32 k = 0; k < live_pregs.nregs; ++k)
    800           range_add_live_across_call(live_pregs.regs[k], &call_ctx);
    801       }
    802 
    803       /* Incoming parameters are all delivered simultaneously by the ABI at
    804        * function entry, so their values are mutually live there even though the
    805        * param_decl markers are sequenced. Anchor every param_decl def at the
    806        * entry block's base point so the params interfere with one another; this
    807        * stops the allocator from coalescing a dead param (e.g. an unused arg)
    808        * into a live param's incoming register, which the entry-bind parallel
    809        * copy could not then resolve (two binds targeting one register). */
    810       u32 def_pos = ((IROp)in->op == IR_PARAM_DECL) ? raw_start : raw_start + i;
    811       RangeCloseCtx close_ctx = {&build, def_pos, b};
    812       for (u32 k = 0; k < refs.ndefs; ++k)
    813         range_close_def(refs.defs[k], &close_ctx);
    814       RangeUseCtx use_ctx = {&build, raw_start + i + 1u, b};
    815       for (u32 k = 0; k < refs.nuses; ++k)
    816         range_open_use(refs.uses[k], &use_ctx);
    817       range_live_pregs_update_before(&live_pregs, &refs);
    818     }
    819 
    820     for (u32 oi = 0; oi < build.nopen_pregs; ++oi) {
    821       PReg r = build.open_pregs[oi];
    822       if (!range_is_open(&build, r)) continue;
    823       u32 open_end = range_open_end(&build, r);
    824       if (open_end == OPT_RANGE_NONE) continue;
    825       int whole_block = open_end == raw_end &&
    826                         !opt_bitset_has(&lb->live_use, r) &&
    827                         !opt_bitset_has(&lb->live_def, r);
    828       range_append(&build, r, raw_start, open_end, b, whole_block);
    829       range_close_open_end(&build, r);
    830     }
    831     ranges->preg_scans += build.nopen_pregs;
    832   }
    833 
    834   range_compress_points(ranges, &build);
    835 
    836   for (u32 vi = 0; vi < build.nrange_pregs; ++vi) {
    837     PReg r = build.range_pregs[vi];
    838     u32 nranges = 0;
    839     for (u32 ri = ranges->first_range_by_preg[r]; ri != OPT_RANGE_NONE;
    840          ri = ranges->ranges[ri].next)
    841       ++nranges;
    842     if (ranges->max_ranges_per_preg < nranges)
    843       ranges->max_ranges_per_preg = nranges;
    844     ranges->spill_cost_by_preg[r] = (ranges->use_freq_by_preg[r] * 2u) +
    845                                     ranges->def_freq_by_preg[r] +
    846                                     ranges->live_across_call_freq_by_preg[r] +
    847                                     ranges->live_block_freq_by_preg[r];
    848   }
    849   ranges->preg_scans += build.nrange_pregs;
    850 }
    851 
    852 void opt_live_dump_ranges(Func* f, const OptLiveRangeSet* ranges, Writer* w) {
    853   (void)f;
    854   if (!ranges || !w) return;
    855   char buf[160];
    856   StrBuf sb;
    857   strbuf_init(&sb, buf, sizeof buf);
    858   strbuf_puts(&sb, "ranges total=");
    859   strbuf_put_u64(&sb, (u64)(unsigned)ranges->nranges);
    860   strbuf_puts(&sb, " points=");
    861   strbuf_put_u64(&sb, (u64)(unsigned)ranges->point_count);
    862   strbuf_puts(&sb, " raw_points=");
    863   strbuf_put_u64(&sb, (u64)(unsigned)ranges->raw_point_count);
    864   strbuf_puts(&sb, " whole_block=");
    865   strbuf_put_u64(&sb, (u64)(unsigned)ranges->whole_block_spans);
    866   strbuf_putc(&sb, '\n');
    867   dump_sb(w, &sb);
    868   for (PReg r = 1; r < opt_reg_count(ranges->f); ++r) {
    869     if (ranges->first_range_by_preg[r] == OPT_RANGE_NONE) continue;
    870     strbuf_reset(&sb);
    871     strbuf_putc(&sb, 'r');
    872     strbuf_put_u64(&sb, (u64)(unsigned)r);
    873     strbuf_puts(&sb, " len=");
    874     strbuf_put_u64(&sb, (u64)(unsigned)ranges->live_length_by_preg[r]);
    875     strbuf_puts(&sb, " spill=");
    876     strbuf_put_u64(&sb, (u64)(unsigned)ranges->spill_cost_by_preg[r]);
    877     strbuf_putc(&sb, ':');
    878     dump_sb(w, &sb);
    879     for (u32 ri = ranges->first_range_by_preg[r]; ri != OPT_RANGE_NONE;
    880          ri = ranges->ranges[ri].next) {
    881       const OptLiveRange* lr = &ranges->ranges[ri];
    882       strbuf_reset(&sb);
    883       strbuf_puts(&sb, " [");
    884       strbuf_put_u64(&sb, (u64)(unsigned)lr->start);
    885       strbuf_putc(&sb, ',');
    886       strbuf_put_u64(&sb, (u64)(unsigned)lr->end);
    887       strbuf_puts(&sb, ")b");
    888       strbuf_put_u64(&sb, (u64)(unsigned)lr->block);
    889       if (lr->whole_block) strbuf_putc(&sb, '*');
    890       dump_sb(w, &sb);
    891     }
    892     dump_write(w, "\n");
    893   }
    894 }