kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

pass_coalesce.c (12611B)


      1 #include <stdlib.h>
      2 #include <string.h>
      3 
      4 #include "core/arena.h"
      5 #include "core/metrics.h"
      6 #include "opt/opt_internal.h"
      7 
      8 typedef struct CoalesceMove {
      9   PReg dst;
     10   PReg src;
     11   u32 weight;
     12 } CoalesceMove;
     13 
     14 typedef struct CoalesceCtx {
     15   Func* f;
     16   const OptLiveRangeSet* ranges;
     17   PReg* related;
     18   u32 nrelated;
     19   u32 related_cap;
     20   u32* related_index;
     21   u64* conflicts;
     22   u64* unit_conflicts;
     23   u32 conflict_words;
     24 } CoalesceCtx;
     25 
     26 static PReg coalesce_find(Func* f, PReg v) {
     27   if (!f->opt_coalesce_parent || v == PREG_NONE || v >= opt_reg_count(f))
     28     return v;
     29   PReg p = (PReg)f->opt_coalesce_parent[v];
     30   while (p != f->opt_coalesce_parent[p]) p = (PReg)f->opt_coalesce_parent[p];
     31   while (v != p) {
     32     PReg n = (PReg)f->opt_coalesce_parent[v];
     33     f->opt_coalesce_parent[v] = p;
     34     v = n;
     35   }
     36   return p;
     37 }
     38 
     39 static void coalesce_add_related(CoalesceCtx* c, PReg v) {
     40   Func* f = c->f;
     41   if (!opt_reg_valid(f, v)) return;
     42   if (c->related_index[v] != OPT_RANGE_NONE) return;
     43   if (c->nrelated == c->related_cap) {
     44     u32 ncap = c->related_cap ? c->related_cap * 2u : 32u;
     45     PReg* nr = arena_array(f->arena, PReg, ncap);
     46     if (c->related) memcpy(nr, c->related, sizeof(c->related[0]) * c->nrelated);
     47     c->related = nr;
     48     c->related_cap = ncap;
     49   }
     50   c->related_index[v] = c->nrelated;
     51   c->related[c->nrelated++] = v;
     52 }
     53 
     54 int opt_ranges_overlap_kind(const OptLiveRangeSet* ranges, PReg a, PReg b);
     55 static int ranges_overlap_kind(const OptLiveRangeSet* ranges, PReg a, PReg b) {
     56   return opt_ranges_overlap_kind(ranges, a, b);
     57 }
     58 int opt_ranges_overlap_kind(const OptLiveRangeSet* ranges, PReg a, PReg b) {
     59   /* Returns 0 (no overlap), 1 (a single unit-length overlap), or 2 (real
     60    * conflict: an overlap longer than one point, or two or more disjoint
     61    * unit-length overlaps).
     62    *
     63    * A single unit-length overlap is the natural conflict of a move
     64    * `dst = COPY src`: at the def point of dst, src is still live for one
     65    * point. `group_conflicts` allows that single unit conflict to permit
     66    * coalescing the move. Two or more unit overlaps at distinct points imply
     67    * dst has multiple non-SSA defs whose live ranges each clip src — those
     68    * are real conflicts and must block coalescing, otherwise we'd allocate
     69    * dst into src's hard register and clobber src at the extra def. */
     70   /* Overlap must be measured on the *raw* (pre-compression) point numbers,
     71    * not the compressed start/end. range_compress_points only keeps points
     72    * that are some range's boundary, so an interior instruction point shared
     73    * by two live values can be dropped — collapsing a genuine multi-point
     74    * overlap down to a single compressed point and masquerading as the benign
     75    * unit-overlap of a coalescable move. The benign move/swap pattern
     76    * (`dst = COPY src`, or `sub x0, x21, x0`) is genuinely one *raw* point
     77    * wide: src is used and dst is defined at the same instruction. A value
     78    * that stays live across the def of an unrelated value spans two or more
     79    * raw points and is a real conflict. */
     80   u32 unit_count = 0;
     81   for (u32 ar = ranges->first_range_by_preg[a]; ar != OPT_RANGE_NONE;
     82        ar = ranges->ranges[ar].next) {
     83     const OptLiveRange* ra = &ranges->ranges[ar];
     84     for (u32 br = ranges->first_range_by_preg[b]; br != OPT_RANGE_NONE;
     85          br = ranges->ranges[br].next) {
     86       const OptLiveRange* rb = &ranges->ranges[br];
     87       if (ra->raw_start < rb->raw_end && rb->raw_start < ra->raw_end) {
     88         u32 start =
     89             ra->raw_start > rb->raw_start ? ra->raw_start : rb->raw_start;
     90         u32 end = ra->raw_end < rb->raw_end ? ra->raw_end : rb->raw_end;
     91         if (end > start + 1u) return 2;
     92         if (++unit_count > 1u) return 2;
     93       }
     94     }
     95   }
     96   return unit_count ? 1 : 0;
     97 }
     98 
     99 static void coalesce_set_conflict_bit(u64* bits, u32 nrelated, u32 a, u32 b) {
    100   if (!bits) return;
    101   if (a == b) return;
    102   u32 x = a < b ? a : b;
    103   u32 y = a < b ? b : a;
    104   u32 bit = x * nrelated + y;
    105   bits[bit / 64u] |= 1ull << (bit % 64u);
    106 }
    107 
    108 static void coalesce_set_conflict(CoalesceCtx* c, u32 a, u32 b, int unit) {
    109   coalesce_set_conflict_bit(c->conflicts, c->nrelated, a, b);
    110   if (unit) coalesce_set_conflict_bit(c->unit_conflicts, c->nrelated, a, b);
    111 }
    112 
    113 static int coalesce_has_bit(const CoalesceCtx* c, const u64* bits, PReg a,
    114                             PReg b) {
    115   if (!bits || a >= opt_reg_count(c->f) || b >= opt_reg_count(c->f)) return 1;
    116   u32 ai = c->related_index[a];
    117   u32 bi = c->related_index[b];
    118   if (ai == OPT_RANGE_NONE || bi == OPT_RANGE_NONE) return 0;
    119   if (ai == bi) return 0;
    120   u32 x = ai < bi ? ai : bi;
    121   u32 y = ai < bi ? bi : ai;
    122   u32 bit = x * c->nrelated + y;
    123   return (bits[bit / 64u] & (1ull << (bit % 64u))) != 0;
    124 }
    125 
    126 static int coalesce_has_conflict(const CoalesceCtx* c, PReg a, PReg b) {
    127   return coalesce_has_bit(c, c->conflicts, a, b);
    128 }
    129 
    130 static int coalesce_has_unit_conflict(const CoalesceCtx* c, PReg a, PReg b) {
    131   return coalesce_has_bit(c, c->unit_conflicts, a, b);
    132 }
    133 
    134 static int group_conflicts(const CoalesceCtx* c, PReg ra, PReg rb, PReg allow_a,
    135                            PReg allow_b) {
    136   for (u32 i = 0; i < c->nrelated; ++i) {
    137     PReg a = c->related[i];
    138     if (coalesce_find(c->f, a) != ra) continue;
    139     for (u32 j = 0; j < c->nrelated; ++j) {
    140       PReg b = c->related[j];
    141       if (coalesce_find(c->f, b) != rb) continue;
    142       if (((a == allow_a && b == allow_b) || (a == allow_b && b == allow_a)) &&
    143           coalesce_has_unit_conflict(c, a, b))
    144         continue;
    145       if (coalesce_has_conflict(c, a, b)) return 1;
    146     }
    147   }
    148   return 0;
    149 }
    150 
    151 static int hard_reg_possible(Func* f, u8 cls, u32 forbidden, u32 allowed) {
    152   for (u32 i = 0; i < f->opt_hard_reg_count[cls]; ++i) {
    153     Reg r = f->opt_hard_regs[cls][i];
    154     if (r >= 32) continue;
    155     if (allowed && (allowed & (1u << r)) == 0) continue;
    156     if ((forbidden & (1u << r)) == 0) return 1;
    157   }
    158   if (allowed) {
    159     for (Reg r = 0; r < 32; ++r) {
    160       if ((allowed & (1u << r)) == 0) continue;
    161       if (forbidden & (1u << r)) continue;
    162       int in_hard = 0;
    163       for (u32 i = 0; i < f->opt_hard_reg_count[cls]; ++i) {
    164         if (f->opt_hard_regs[cls][i] == r) {
    165           in_hard = 1;
    166           break;
    167         }
    168       }
    169       if (in_hard) continue;
    170       for (u32 i = 0; i < f->opt_phys_reg_count[cls]; ++i) {
    171         const CGPhysRegInfo* pi = &f->opt_phys_regs[cls][i];
    172         if (pi->reg == r && (pi->flags & CG_REG_RESERVED) == 0) return 1;
    173       }
    174     }
    175     return 0;
    176   }
    177   return f->opt_hard_reg_count[cls] == 0;
    178 }
    179 
    180 static int group_constraints_compatible(const CoalesceCtx* c, PReg ra,
    181                                         PReg rb) {
    182   Func* f = c->f;
    183   u8 cls = opt_reg_cls(f, ra);
    184   KitCgTypeId type = opt_reg_type(f, ra);
    185   i32 tied = -1;
    186   u32 forbidden = 0;
    187   u32 allowed = 0;
    188   for (PReg v = 1; v < opt_reg_count(f); ++v) {
    189     PReg r = coalesce_find(f, v);
    190     if (r != ra && r != rb) continue;
    191     if (opt_reg_cls(f, v) != cls || opt_reg_type(f, v) != type) return 0;
    192     const OptPRegInfo* vi = &f->preg_info[v];
    193     forbidden |= vi->forbidden_hard_regs;
    194     if (vi->allowed_hard_regs) {
    195       if (allowed) {
    196         allowed &= vi->allowed_hard_regs;
    197         if (!allowed) return 0;
    198       } else {
    199         allowed = vi->allowed_hard_regs;
    200       }
    201     }
    202     if (vi->tied_hard_reg >= 0) {
    203       if (tied >= 0 && tied != vi->tied_hard_reg) return 0;
    204       tied = vi->tied_hard_reg;
    205     }
    206   }
    207   if (tied >= 0 && tied < 32 && (forbidden & (1u << (Reg)tied))) return 0;
    208   if (tied >= 0 && tied < 32 && allowed && (allowed & (1u << (Reg)tied)) == 0)
    209     return 0;
    210   return hard_reg_possible(f, cls, forbidden, allowed);
    211 }
    212 
    213 static void coalesce_union(Func* f, PReg a, PReg b) {
    214   PReg ra = coalesce_find(f, a);
    215   PReg rb = coalesce_find(f, b);
    216   if (ra == rb) return;
    217   u32 as = f->opt_coalesce_size[ra];
    218   u32 bs = f->opt_coalesce_size[rb];
    219   if (as < bs || (as == bs && rb < ra)) {
    220     PReg t = ra;
    221     ra = rb;
    222     rb = t;
    223   }
    224   f->opt_coalesce_parent[rb] = ra;
    225   f->opt_coalesce_size[ra] += f->opt_coalesce_size[rb];
    226 }
    227 
    228 static int move_higher(const CoalesceMove* a, const CoalesceMove* b) {
    229   if (a->weight != b->weight) return a->weight > b->weight;
    230   if (a->dst != b->dst) return a->dst < b->dst;
    231   return a->src < b->src;
    232 }
    233 
    234 static int move_cmp(const void* va, const void* vb) {
    235   const CoalesceMove* a = (const CoalesceMove*)va;
    236   const CoalesceMove* b = (const CoalesceMove*)vb;
    237   if (move_higher(a, b)) return -1;
    238   if (move_higher(b, a)) return 1;
    239   return 0;
    240 }
    241 
    242 static int collect_move(Func* f, const OptLiveRangeSet* ranges, Inst* in,
    243                         u32 block, CoalesceMove* out) {
    244   if ((IROp)in->op != IR_COPY || in->nopnds < 2) return 0;
    245   if (in->flags & IRF_NO_COALESCE) return 0;
    246   if (in->opnds[0].kind != OPK_REG || in->opnds[1].kind != OPK_REG) return 0;
    247   PReg dst = (PReg)in->opnds[0].v.reg;
    248   PReg src = (PReg)in->opnds[1].v.reg;
    249   if (!opt_reg_valid(f, dst) || !opt_reg_valid(f, src) || dst == src) return 0;
    250   if (ranges->first_range_by_preg[dst] == OPT_RANGE_NONE ||
    251       ranges->first_range_by_preg[src] == OPT_RANGE_NONE)
    252     return 0;
    253   if (opt_reg_cls(f, dst) != opt_reg_cls(f, src)) return 0;
    254   if (opt_reg_type(f, dst) != opt_reg_type(f, src)) return 0;
    255   out->dst = dst;
    256   out->src = src;
    257   out->weight = f->blocks[block].frequency ? f->blocks[block].frequency : 1u;
    258   return 1;
    259 }
    260 
    261 void opt_coalesce_ranges(Func* f, const OptLiveRangeSet* ranges) {
    262   if (!f || !ranges || !f->preg_info) return;
    263   u32 nregs = opt_reg_count(f);
    264   f->opt_coalesce_moves_seen = 0;
    265   f->opt_coalesce_candidates = 0;
    266   f->opt_coalesce_conflicts = 0;
    267   f->opt_coalesce_merge_attempts = 0;
    268   f->opt_coalesce_merges = 0;
    269   f->opt_coalesce_parent = arena_array(f->arena, u32, nregs ? nregs : 1u);
    270   f->opt_coalesce_size = arena_array(f->arena, u32, nregs ? nregs : 1u);
    271   for (PReg v = 0; v < nregs; ++v) {
    272     f->opt_coalesce_parent[v] = v;
    273     f->opt_coalesce_size[v] = 1;
    274   }
    275 
    276   CoalesceMove* moves = NULL;
    277   u32 nmoves = 0;
    278   u32 move_cap = 0;
    279   CoalesceCtx ctx;
    280   memset(&ctx, 0, sizeof ctx);
    281   ctx.f = f;
    282   ctx.ranges = ranges;
    283   ctx.related_index = arena_array(f->arena, u32, nregs ? nregs : 1u);
    284   memset(ctx.related_index, 0xff,
    285          sizeof(ctx.related_index[0]) * (nregs ? nregs : 1u));
    286 
    287   for (u32 b = 0; b < f->nblocks; ++b) {
    288     Block* bl = &f->blocks[b];
    289     for (u32 i = 0; i < bl->ninsts; ++i) {
    290       if ((IROp)bl->insts[i].op == IR_COPY) ++f->opt_coalesce_moves_seen;
    291       CoalesceMove m;
    292       if (!collect_move(f, ranges, &bl->insts[i], b, &m)) continue;
    293       if (nmoves == move_cap) {
    294         u32 ncap = move_cap ? move_cap * 2u : 32u;
    295         CoalesceMove* nv = arena_array(f->arena, CoalesceMove, ncap);
    296         if (moves) memcpy(nv, moves, sizeof(moves[0]) * nmoves);
    297         moves = nv;
    298         move_cap = ncap;
    299       }
    300       moves[nmoves++] = m;
    301       coalesce_add_related(&ctx, m.dst);
    302       coalesce_add_related(&ctx, m.src);
    303     }
    304   }
    305   f->opt_coalesce_candidates = nmoves;
    306   if (!nmoves || ctx.nrelated < 2) goto metrics;
    307 
    308   ctx.conflict_words = (ctx.nrelated * ctx.nrelated + 63u) / 64u;
    309   ctx.conflicts =
    310       arena_zarray(f->arena, u64, ctx.conflict_words ? ctx.conflict_words : 1u);
    311   ctx.unit_conflicts =
    312       arena_zarray(f->arena, u64, ctx.conflict_words ? ctx.conflict_words : 1u);
    313   for (u32 i = 0; i < ctx.nrelated; ++i) {
    314     for (u32 j = i + 1u; j < ctx.nrelated; ++j) {
    315       int kind = ranges_overlap_kind(ranges, ctx.related[i], ctx.related[j]);
    316       if (kind) {
    317         coalesce_set_conflict(&ctx, i, j, kind == 1);
    318         ++f->opt_coalesce_conflicts;
    319       }
    320     }
    321   }
    322 
    323   qsort(moves, nmoves, sizeof(moves[0]), move_cmp);
    324   for (u32 i = 0; i < nmoves; ++i) {
    325     PReg ra = coalesce_find(f, moves[i].dst);
    326     PReg rb = coalesce_find(f, moves[i].src);
    327     if (ra == rb) continue;
    328     ++f->opt_coalesce_merge_attempts;
    329     if (group_conflicts(&ctx, ra, rb, moves[i].dst, moves[i].src)) continue;
    330     if (!group_constraints_compatible(&ctx, ra, rb)) continue;
    331     coalesce_union(f, ra, rb);
    332     ++f->opt_coalesce_merges;
    333   }
    334 
    335 metrics:
    336   metrics_count(f->c, "opt.coalesce.moves_seen", f->opt_coalesce_moves_seen);
    337   metrics_count(f->c, "opt.coalesce.candidates", f->opt_coalesce_candidates);
    338   metrics_count(f->c, "opt.coalesce.conflicts", f->opt_coalesce_conflicts);
    339   metrics_count(f->c, "opt.coalesce.merge_attempts",
    340                 f->opt_coalesce_merge_attempts);
    341   metrics_count(f->c, "opt.coalesce.merges", f->opt_coalesce_merges);
    342 }
    343 
    344 void opt_coalesce(Func* f) {
    345   if (!f) return;
    346   OptLiveInfo live;
    347   opt_live_blocks(f, &live);
    348   OptLiveRangeSet ranges;
    349   opt_live_ranges_build(f, &live, &ranges);
    350   opt_coalesce_ranges(f, &ranges);
    351 }