pass_coalesce.c (12611B)
1 #include <stdlib.h> 2 #include <string.h> 3 4 #include "core/arena.h" 5 #include "core/metrics.h" 6 #include "opt/opt_internal.h" 7 8 typedef struct CoalesceMove { 9 PReg dst; 10 PReg src; 11 u32 weight; 12 } CoalesceMove; 13 14 typedef struct CoalesceCtx { 15 Func* f; 16 const OptLiveRangeSet* ranges; 17 PReg* related; 18 u32 nrelated; 19 u32 related_cap; 20 u32* related_index; 21 u64* conflicts; 22 u64* unit_conflicts; 23 u32 conflict_words; 24 } CoalesceCtx; 25 26 static PReg coalesce_find(Func* f, PReg v) { 27 if (!f->opt_coalesce_parent || v == PREG_NONE || v >= opt_reg_count(f)) 28 return v; 29 PReg p = (PReg)f->opt_coalesce_parent[v]; 30 while (p != f->opt_coalesce_parent[p]) p = (PReg)f->opt_coalesce_parent[p]; 31 while (v != p) { 32 PReg n = (PReg)f->opt_coalesce_parent[v]; 33 f->opt_coalesce_parent[v] = p; 34 v = n; 35 } 36 return p; 37 } 38 39 static void coalesce_add_related(CoalesceCtx* c, PReg v) { 40 Func* f = c->f; 41 if (!opt_reg_valid(f, v)) return; 42 if (c->related_index[v] != OPT_RANGE_NONE) return; 43 if (c->nrelated == c->related_cap) { 44 u32 ncap = c->related_cap ? c->related_cap * 2u : 32u; 45 PReg* nr = arena_array(f->arena, PReg, ncap); 46 if (c->related) memcpy(nr, c->related, sizeof(c->related[0]) * c->nrelated); 47 c->related = nr; 48 c->related_cap = ncap; 49 } 50 c->related_index[v] = c->nrelated; 51 c->related[c->nrelated++] = v; 52 } 53 54 int opt_ranges_overlap_kind(const OptLiveRangeSet* ranges, PReg a, PReg b); 55 static int ranges_overlap_kind(const OptLiveRangeSet* ranges, PReg a, PReg b) { 56 return opt_ranges_overlap_kind(ranges, a, b); 57 } 58 int opt_ranges_overlap_kind(const OptLiveRangeSet* ranges, PReg a, PReg b) { 59 /* Returns 0 (no overlap), 1 (a single unit-length overlap), or 2 (real 60 * conflict: an overlap longer than one point, or two or more disjoint 61 * unit-length overlaps). 62 * 63 * A single unit-length overlap is the natural conflict of a move 64 * `dst = COPY src`: at the def point of dst, src is still live for one 65 * point. `group_conflicts` allows that single unit conflict to permit 66 * coalescing the move. Two or more unit overlaps at distinct points imply 67 * dst has multiple non-SSA defs whose live ranges each clip src — those 68 * are real conflicts and must block coalescing, otherwise we'd allocate 69 * dst into src's hard register and clobber src at the extra def. */ 70 /* Overlap must be measured on the *raw* (pre-compression) point numbers, 71 * not the compressed start/end. range_compress_points only keeps points 72 * that are some range's boundary, so an interior instruction point shared 73 * by two live values can be dropped — collapsing a genuine multi-point 74 * overlap down to a single compressed point and masquerading as the benign 75 * unit-overlap of a coalescable move. The benign move/swap pattern 76 * (`dst = COPY src`, or `sub x0, x21, x0`) is genuinely one *raw* point 77 * wide: src is used and dst is defined at the same instruction. A value 78 * that stays live across the def of an unrelated value spans two or more 79 * raw points and is a real conflict. */ 80 u32 unit_count = 0; 81 for (u32 ar = ranges->first_range_by_preg[a]; ar != OPT_RANGE_NONE; 82 ar = ranges->ranges[ar].next) { 83 const OptLiveRange* ra = &ranges->ranges[ar]; 84 for (u32 br = ranges->first_range_by_preg[b]; br != OPT_RANGE_NONE; 85 br = ranges->ranges[br].next) { 86 const OptLiveRange* rb = &ranges->ranges[br]; 87 if (ra->raw_start < rb->raw_end && rb->raw_start < ra->raw_end) { 88 u32 start = 89 ra->raw_start > rb->raw_start ? ra->raw_start : rb->raw_start; 90 u32 end = ra->raw_end < rb->raw_end ? ra->raw_end : rb->raw_end; 91 if (end > start + 1u) return 2; 92 if (++unit_count > 1u) return 2; 93 } 94 } 95 } 96 return unit_count ? 1 : 0; 97 } 98 99 static void coalesce_set_conflict_bit(u64* bits, u32 nrelated, u32 a, u32 b) { 100 if (!bits) return; 101 if (a == b) return; 102 u32 x = a < b ? a : b; 103 u32 y = a < b ? b : a; 104 u32 bit = x * nrelated + y; 105 bits[bit / 64u] |= 1ull << (bit % 64u); 106 } 107 108 static void coalesce_set_conflict(CoalesceCtx* c, u32 a, u32 b, int unit) { 109 coalesce_set_conflict_bit(c->conflicts, c->nrelated, a, b); 110 if (unit) coalesce_set_conflict_bit(c->unit_conflicts, c->nrelated, a, b); 111 } 112 113 static int coalesce_has_bit(const CoalesceCtx* c, const u64* bits, PReg a, 114 PReg b) { 115 if (!bits || a >= opt_reg_count(c->f) || b >= opt_reg_count(c->f)) return 1; 116 u32 ai = c->related_index[a]; 117 u32 bi = c->related_index[b]; 118 if (ai == OPT_RANGE_NONE || bi == OPT_RANGE_NONE) return 0; 119 if (ai == bi) return 0; 120 u32 x = ai < bi ? ai : bi; 121 u32 y = ai < bi ? bi : ai; 122 u32 bit = x * c->nrelated + y; 123 return (bits[bit / 64u] & (1ull << (bit % 64u))) != 0; 124 } 125 126 static int coalesce_has_conflict(const CoalesceCtx* c, PReg a, PReg b) { 127 return coalesce_has_bit(c, c->conflicts, a, b); 128 } 129 130 static int coalesce_has_unit_conflict(const CoalesceCtx* c, PReg a, PReg b) { 131 return coalesce_has_bit(c, c->unit_conflicts, a, b); 132 } 133 134 static int group_conflicts(const CoalesceCtx* c, PReg ra, PReg rb, PReg allow_a, 135 PReg allow_b) { 136 for (u32 i = 0; i < c->nrelated; ++i) { 137 PReg a = c->related[i]; 138 if (coalesce_find(c->f, a) != ra) continue; 139 for (u32 j = 0; j < c->nrelated; ++j) { 140 PReg b = c->related[j]; 141 if (coalesce_find(c->f, b) != rb) continue; 142 if (((a == allow_a && b == allow_b) || (a == allow_b && b == allow_a)) && 143 coalesce_has_unit_conflict(c, a, b)) 144 continue; 145 if (coalesce_has_conflict(c, a, b)) return 1; 146 } 147 } 148 return 0; 149 } 150 151 static int hard_reg_possible(Func* f, u8 cls, u32 forbidden, u32 allowed) { 152 for (u32 i = 0; i < f->opt_hard_reg_count[cls]; ++i) { 153 Reg r = f->opt_hard_regs[cls][i]; 154 if (r >= 32) continue; 155 if (allowed && (allowed & (1u << r)) == 0) continue; 156 if ((forbidden & (1u << r)) == 0) return 1; 157 } 158 if (allowed) { 159 for (Reg r = 0; r < 32; ++r) { 160 if ((allowed & (1u << r)) == 0) continue; 161 if (forbidden & (1u << r)) continue; 162 int in_hard = 0; 163 for (u32 i = 0; i < f->opt_hard_reg_count[cls]; ++i) { 164 if (f->opt_hard_regs[cls][i] == r) { 165 in_hard = 1; 166 break; 167 } 168 } 169 if (in_hard) continue; 170 for (u32 i = 0; i < f->opt_phys_reg_count[cls]; ++i) { 171 const CGPhysRegInfo* pi = &f->opt_phys_regs[cls][i]; 172 if (pi->reg == r && (pi->flags & CG_REG_RESERVED) == 0) return 1; 173 } 174 } 175 return 0; 176 } 177 return f->opt_hard_reg_count[cls] == 0; 178 } 179 180 static int group_constraints_compatible(const CoalesceCtx* c, PReg ra, 181 PReg rb) { 182 Func* f = c->f; 183 u8 cls = opt_reg_cls(f, ra); 184 KitCgTypeId type = opt_reg_type(f, ra); 185 i32 tied = -1; 186 u32 forbidden = 0; 187 u32 allowed = 0; 188 for (PReg v = 1; v < opt_reg_count(f); ++v) { 189 PReg r = coalesce_find(f, v); 190 if (r != ra && r != rb) continue; 191 if (opt_reg_cls(f, v) != cls || opt_reg_type(f, v) != type) return 0; 192 const OptPRegInfo* vi = &f->preg_info[v]; 193 forbidden |= vi->forbidden_hard_regs; 194 if (vi->allowed_hard_regs) { 195 if (allowed) { 196 allowed &= vi->allowed_hard_regs; 197 if (!allowed) return 0; 198 } else { 199 allowed = vi->allowed_hard_regs; 200 } 201 } 202 if (vi->tied_hard_reg >= 0) { 203 if (tied >= 0 && tied != vi->tied_hard_reg) return 0; 204 tied = vi->tied_hard_reg; 205 } 206 } 207 if (tied >= 0 && tied < 32 && (forbidden & (1u << (Reg)tied))) return 0; 208 if (tied >= 0 && tied < 32 && allowed && (allowed & (1u << (Reg)tied)) == 0) 209 return 0; 210 return hard_reg_possible(f, cls, forbidden, allowed); 211 } 212 213 static void coalesce_union(Func* f, PReg a, PReg b) { 214 PReg ra = coalesce_find(f, a); 215 PReg rb = coalesce_find(f, b); 216 if (ra == rb) return; 217 u32 as = f->opt_coalesce_size[ra]; 218 u32 bs = f->opt_coalesce_size[rb]; 219 if (as < bs || (as == bs && rb < ra)) { 220 PReg t = ra; 221 ra = rb; 222 rb = t; 223 } 224 f->opt_coalesce_parent[rb] = ra; 225 f->opt_coalesce_size[ra] += f->opt_coalesce_size[rb]; 226 } 227 228 static int move_higher(const CoalesceMove* a, const CoalesceMove* b) { 229 if (a->weight != b->weight) return a->weight > b->weight; 230 if (a->dst != b->dst) return a->dst < b->dst; 231 return a->src < b->src; 232 } 233 234 static int move_cmp(const void* va, const void* vb) { 235 const CoalesceMove* a = (const CoalesceMove*)va; 236 const CoalesceMove* b = (const CoalesceMove*)vb; 237 if (move_higher(a, b)) return -1; 238 if (move_higher(b, a)) return 1; 239 return 0; 240 } 241 242 static int collect_move(Func* f, const OptLiveRangeSet* ranges, Inst* in, 243 u32 block, CoalesceMove* out) { 244 if ((IROp)in->op != IR_COPY || in->nopnds < 2) return 0; 245 if (in->flags & IRF_NO_COALESCE) return 0; 246 if (in->opnds[0].kind != OPK_REG || in->opnds[1].kind != OPK_REG) return 0; 247 PReg dst = (PReg)in->opnds[0].v.reg; 248 PReg src = (PReg)in->opnds[1].v.reg; 249 if (!opt_reg_valid(f, dst) || !opt_reg_valid(f, src) || dst == src) return 0; 250 if (ranges->first_range_by_preg[dst] == OPT_RANGE_NONE || 251 ranges->first_range_by_preg[src] == OPT_RANGE_NONE) 252 return 0; 253 if (opt_reg_cls(f, dst) != opt_reg_cls(f, src)) return 0; 254 if (opt_reg_type(f, dst) != opt_reg_type(f, src)) return 0; 255 out->dst = dst; 256 out->src = src; 257 out->weight = f->blocks[block].frequency ? f->blocks[block].frequency : 1u; 258 return 1; 259 } 260 261 void opt_coalesce_ranges(Func* f, const OptLiveRangeSet* ranges) { 262 if (!f || !ranges || !f->preg_info) return; 263 u32 nregs = opt_reg_count(f); 264 f->opt_coalesce_moves_seen = 0; 265 f->opt_coalesce_candidates = 0; 266 f->opt_coalesce_conflicts = 0; 267 f->opt_coalesce_merge_attempts = 0; 268 f->opt_coalesce_merges = 0; 269 f->opt_coalesce_parent = arena_array(f->arena, u32, nregs ? nregs : 1u); 270 f->opt_coalesce_size = arena_array(f->arena, u32, nregs ? nregs : 1u); 271 for (PReg v = 0; v < nregs; ++v) { 272 f->opt_coalesce_parent[v] = v; 273 f->opt_coalesce_size[v] = 1; 274 } 275 276 CoalesceMove* moves = NULL; 277 u32 nmoves = 0; 278 u32 move_cap = 0; 279 CoalesceCtx ctx; 280 memset(&ctx, 0, sizeof ctx); 281 ctx.f = f; 282 ctx.ranges = ranges; 283 ctx.related_index = arena_array(f->arena, u32, nregs ? nregs : 1u); 284 memset(ctx.related_index, 0xff, 285 sizeof(ctx.related_index[0]) * (nregs ? nregs : 1u)); 286 287 for (u32 b = 0; b < f->nblocks; ++b) { 288 Block* bl = &f->blocks[b]; 289 for (u32 i = 0; i < bl->ninsts; ++i) { 290 if ((IROp)bl->insts[i].op == IR_COPY) ++f->opt_coalesce_moves_seen; 291 CoalesceMove m; 292 if (!collect_move(f, ranges, &bl->insts[i], b, &m)) continue; 293 if (nmoves == move_cap) { 294 u32 ncap = move_cap ? move_cap * 2u : 32u; 295 CoalesceMove* nv = arena_array(f->arena, CoalesceMove, ncap); 296 if (moves) memcpy(nv, moves, sizeof(moves[0]) * nmoves); 297 moves = nv; 298 move_cap = ncap; 299 } 300 moves[nmoves++] = m; 301 coalesce_add_related(&ctx, m.dst); 302 coalesce_add_related(&ctx, m.src); 303 } 304 } 305 f->opt_coalesce_candidates = nmoves; 306 if (!nmoves || ctx.nrelated < 2) goto metrics; 307 308 ctx.conflict_words = (ctx.nrelated * ctx.nrelated + 63u) / 64u; 309 ctx.conflicts = 310 arena_zarray(f->arena, u64, ctx.conflict_words ? ctx.conflict_words : 1u); 311 ctx.unit_conflicts = 312 arena_zarray(f->arena, u64, ctx.conflict_words ? ctx.conflict_words : 1u); 313 for (u32 i = 0; i < ctx.nrelated; ++i) { 314 for (u32 j = i + 1u; j < ctx.nrelated; ++j) { 315 int kind = ranges_overlap_kind(ranges, ctx.related[i], ctx.related[j]); 316 if (kind) { 317 coalesce_set_conflict(&ctx, i, j, kind == 1); 318 ++f->opt_coalesce_conflicts; 319 } 320 } 321 } 322 323 qsort(moves, nmoves, sizeof(moves[0]), move_cmp); 324 for (u32 i = 0; i < nmoves; ++i) { 325 PReg ra = coalesce_find(f, moves[i].dst); 326 PReg rb = coalesce_find(f, moves[i].src); 327 if (ra == rb) continue; 328 ++f->opt_coalesce_merge_attempts; 329 if (group_conflicts(&ctx, ra, rb, moves[i].dst, moves[i].src)) continue; 330 if (!group_constraints_compatible(&ctx, ra, rb)) continue; 331 coalesce_union(f, ra, rb); 332 ++f->opt_coalesce_merges; 333 } 334 335 metrics: 336 metrics_count(f->c, "opt.coalesce.moves_seen", f->opt_coalesce_moves_seen); 337 metrics_count(f->c, "opt.coalesce.candidates", f->opt_coalesce_candidates); 338 metrics_count(f->c, "opt.coalesce.conflicts", f->opt_coalesce_conflicts); 339 metrics_count(f->c, "opt.coalesce.merge_attempts", 340 f->opt_coalesce_merge_attempts); 341 metrics_count(f->c, "opt.coalesce.merges", f->opt_coalesce_merges); 342 } 343 344 void opt_coalesce(Func* f) { 345 if (!f) return; 346 OptLiveInfo live; 347 opt_live_blocks(f, &live); 348 OptLiveRangeSet ranges; 349 opt_live_ranges_build(f, &live, &ranges); 350 opt_coalesce_ranges(f, &ranges); 351 }