pass_live.c (29308B)
1 #include <stdlib.h> 2 #include <string.h> 3 4 #include "core/arena.h" 5 #include "core/slice.h" 6 #include "core/strbuf.h" 7 #include "opt/opt.h" 8 #include "opt/opt_internal.h" 9 10 static u32 opt_bit_words(u32 nregs) { return (nregs + 63u) / 64u; } 11 12 static void opt_bitset_init(Arena* arena, OptBitset* bs, u32 nwords) { 13 memset(bs, 0, sizeof *bs); 14 bs->arena = arena; 15 bs->nwords = nwords; 16 bs->words = arena_zarray(arena, u64, nwords ? nwords : 1u); 17 } 18 19 static int opt_bitset_grow(OptBitset* bs, u32 need_words) { 20 if (!bs || need_words <= bs->nwords) return 1; 21 if (!bs->arena) return 0; 22 u32 nwords = bs->nwords ? bs->nwords : 1u; 23 while (nwords < need_words) nwords *= 2u; 24 u64* words = arena_zarray(bs->arena, u64, nwords); 25 if (bs->words && bs->nwords) 26 memcpy(words, bs->words, sizeof(words[0]) * bs->nwords); 27 bs->words = words; 28 bs->nwords = nwords; 29 return 1; 30 } 31 32 static void opt_bitset_trim(OptBitset* bs) { 33 u32 n = bs->nwords; 34 while (n && bs->words[n - 1u] == 0) --n; 35 bs->active_words = n; 36 } 37 38 void opt_bitset_clear(OptBitset* bs) { 39 if (!bs || !bs->words) return; 40 for (u32 i = 0; i < bs->active_words; ++i) bs->words[i] = 0; 41 bs->active_words = 0; 42 } 43 44 void opt_bitset_set(OptBitset* bs, PReg r) { 45 if (!bs) return; 46 u32 w = r / 64u; 47 if (w >= bs->nwords && !opt_bitset_grow(bs, w + 1u)) return; 48 bs->words[w] |= 1ull << (r % 64u); 49 if (bs->active_words <= w) bs->active_words = w + 1u; 50 } 51 52 void opt_bitset_clear_bit(OptBitset* bs, PReg r) { 53 if (!bs || !bs->words) return; 54 u32 w = r / 64u; 55 if (w >= bs->active_words) return; 56 bs->words[w] &= ~(1ull << (r % 64u)); 57 if (w + 1u == bs->active_words && bs->words[w] == 0) opt_bitset_trim(bs); 58 } 59 60 int opt_bitset_has(const OptBitset* bs, PReg r) { 61 if (!bs || !bs->words) return 0; 62 u32 w = r / 64u; 63 if (w >= bs->active_words) return 0; 64 return (bs->words[w] & (1ull << (r % 64u))) != 0; 65 } 66 67 int opt_bitset_copy(OptBitset* dst, const OptBitset* src) { 68 int changed = 0; 69 u32 n; 70 u32 old_active; 71 if (!dst || !src || !src->words) return 0; 72 if (src->active_words > dst->nwords && 73 !opt_bitset_grow(dst, src->active_words)) 74 return 0; 75 old_active = dst->active_words; 76 n = src->active_words; 77 for (u32 i = 0; i < n; ++i) { 78 changed |= dst->words[i] != src->words[i]; 79 dst->words[i] = src->words[i]; 80 } 81 for (u32 i = n; i < old_active; ++i) { 82 changed |= dst->words[i] != 0; 83 dst->words[i] = 0; 84 } 85 dst->active_words = n; 86 if (n && dst->words[n - 1u] == 0) opt_bitset_trim(dst); 87 return changed; 88 } 89 90 int opt_bitset_union(OptBitset* dst, const OptBitset* src) { 91 int changed = 0; 92 u32 n; 93 if (!dst || !src || !src->words) return 0; 94 if (src->active_words > dst->nwords && 95 !opt_bitset_grow(dst, src->active_words)) 96 return 0; 97 n = src->active_words; 98 for (u32 i = 0; i < n; ++i) { 99 u64 old = dst->words[i]; 100 dst->words[i] |= src->words[i]; 101 changed |= dst->words[i] != old; 102 } 103 if (changed) opt_bitset_trim(dst); 104 return changed; 105 } 106 107 int opt_bitset_union_and_not(OptBitset* dst, const OptBitset* src, 108 const OptBitset* not_bits) { 109 int changed = 0; 110 u32 n; 111 if (!dst || !src || !src->words) return 0; 112 if (src->active_words > dst->nwords && 113 !opt_bitset_grow(dst, src->active_words)) 114 return 0; 115 n = src->active_words; 116 for (u32 i = 0; i < n; ++i) { 117 u64 mask = 118 (not_bits && i < not_bits->active_words) ? not_bits->words[i] : 0; 119 u64 old = dst->words[i]; 120 dst->words[i] |= src->words[i] & ~mask; 121 changed |= dst->words[i] != old; 122 } 123 if (changed) opt_bitset_trim(dst); 124 return changed; 125 } 126 127 void opt_bitset_iter_set(const OptBitset* bs, OptBitsetIterFn fn, void* arg) { 128 if (!bs || !bs->words || !fn) return; 129 for (u32 w = 0; w < bs->active_words; ++w) { 130 u64 bits = bs->words[w]; 131 while (bits) { 132 u32 bit = (u32)__builtin_ctzll(bits); 133 fn(w * 64u + bit, arg); 134 bits &= bits - 1u; 135 } 136 } 137 } 138 139 typedef struct LiveUseDefCtx { 140 OptBitset* use; 141 OptBitset* def; 142 } LiveUseDefCtx; 143 144 static void live_collect_use_def(Func* f, Inst* in, Operand* op, int is_def, 145 void* arg) { 146 (void)in; 147 LiveUseDefCtx* c = (LiveUseDefCtx*)arg; 148 if (op->kind != OPK_REG) return; 149 PReg r = (PReg)op->v.reg; 150 if (r == PREG_NONE || r == 0 || r >= opt_reg_count(f)) return; 151 if (is_def) { 152 opt_bitset_set(c->def, r); 153 } else if (!opt_bitset_has(c->def, r)) { 154 opt_bitset_set(c->use, r); 155 } 156 } 157 158 static void live_count_bit(PReg r, void* arg) { 159 (void)r; 160 ++*(u64*)arg; 161 } 162 163 static u64 live_count_set_bits(const OptBitset* bs) { 164 u64 n = 0; 165 opt_bitset_iter_set(bs, live_count_bit, &n); 166 return n; 167 } 168 169 static void live_metric_clear(OptLiveInfo* live, OptBitset* bs) { 170 u32 touched = bs ? bs->active_words : 0; 171 if (live) { 172 u64 old = live->bitset_words_touched; 173 live->bitset_words_touched = old + touched; 174 } 175 opt_bitset_clear(bs); 176 } 177 178 static int live_metric_copy(OptLiveInfo* live, OptBitset* dst, 179 const OptBitset* src) { 180 u32 n = 0; 181 if (dst && src) { 182 n = src->active_words; 183 if (dst->active_words > n) n += dst->active_words - n; 184 } 185 if (live) live->bitset_words_touched += n; 186 return opt_bitset_copy(dst, src); 187 } 188 189 static int live_metric_union(OptLiveInfo* live, OptBitset* dst, 190 const OptBitset* src) { 191 u32 n = 0; 192 if (dst && src) n = src->active_words; 193 if (live) live->bitset_words_touched += n; 194 return opt_bitset_union(dst, src); 195 } 196 197 static int live_metric_union_and_not(OptLiveInfo* live, OptBitset* dst, 198 const OptBitset* src, 199 const OptBitset* not_bits) { 200 u32 n = 0; 201 (void)not_bits; 202 if (dst && src) n = src->active_words; 203 if (live) live->bitset_words_touched += n; 204 return opt_bitset_union_and_not(dst, src, not_bits); 205 } 206 207 static void live_recompute_metrics(OptLiveInfo* live) { 208 live->active_words = 0; 209 live->block_bytes = 0; 210 live->set_bit_scans = 0; 211 for (u32 b = 0; b < live->f->nblocks; ++b) { 212 OptBlockLive* bl = &live->blocks[b]; 213 live->active_words += bl->live_in.active_words; 214 live->active_words += bl->live_out.active_words; 215 live->active_words += bl->live_use.active_words; 216 live->active_words += bl->live_def.active_words; 217 live->set_bit_scans += live_count_set_bits(&bl->live_in); 218 live->set_bit_scans += live_count_set_bits(&bl->live_out); 219 live->set_bit_scans += live_count_set_bits(&bl->live_use); 220 live->set_bit_scans += live_count_set_bits(&bl->live_def); 221 } 222 live->block_bytes = live->active_words * (u64)sizeof(u64); 223 } 224 225 static void live_worklist_push(u32* worklist, u8* queued, u32* n, u32 b) { 226 if (queued[b]) return; 227 queued[b] = 1; 228 worklist[(*n)++] = b; 229 } 230 231 void opt_live_blocks(Func* f, OptLiveInfo* live) { 232 memset(live, 0, sizeof *live); 233 live->arena = f->arena; 234 live->f = f; 235 live->words = opt_bit_words(opt_reg_count(f)); 236 f->opt_live_words = (u16)live->words; 237 live->blocks = 238 arena_zarray(f->arena, OptBlockLive, f->nblocks ? f->nblocks : 1u); 239 240 for (u32 b = 0; b < f->nblocks; ++b) { 241 Block* bl = &f->blocks[b]; 242 OptBlockLive* lb = &live->blocks[b]; 243 opt_bitset_init(f->arena, &lb->live_in, 0); 244 opt_bitset_init(f->arena, &lb->live_out, 0); 245 opt_bitset_init(f->arena, &lb->live_use, 0); 246 opt_bitset_init(f->arena, &lb->live_def, 0); 247 248 LiveUseDefCtx ctx; 249 ctx.use = &lb->live_use; 250 ctx.def = &lb->live_def; 251 for (u32 i = 0; i < bl->ninsts; ++i) 252 opt_walk_inst_operands(f, &bl->insts[i], live_collect_use_def, &ctx); 253 } 254 255 OptBitset new_out; 256 OptBitset tmp; 257 opt_bitset_init(f->arena, &new_out, 0); 258 opt_bitset_init(f->arena, &tmp, 0); 259 260 u32* worklist = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u); 261 u8* queued = arena_zarray(f->arena, u8, f->nblocks ? f->nblocks : 1u); 262 u32 nwork = 0; 263 for (u32 bi = f->nblocks; bi > 0; --bi) 264 live_worklist_push(worklist, queued, &nwork, bi - 1u); 265 266 while (nwork) { 267 u32 b = worklist[--nwork]; 268 queued[b] = 0; 269 Block* bl = &f->blocks[b]; 270 OptBlockLive* lb = &live->blocks[b]; 271 ++live->dataflow_iterations; 272 ++live->dataflow_block_visits; 273 live_metric_clear(live, &new_out); 274 live_metric_clear(live, &tmp); 275 for (u32 s = 0; s < bl->nsucc; ++s) { 276 u32 t = bl->succ[s]; 277 if (t < f->nblocks) 278 live_metric_union(live, &new_out, &live->blocks[t].live_in); 279 } 280 live_metric_copy(live, &tmp, &lb->live_use); 281 live_metric_union_and_not(live, &tmp, &new_out, &lb->live_def); 282 (void)live_metric_copy(live, &lb->live_out, &new_out); 283 if (live_metric_copy(live, &lb->live_in, &tmp)) { 284 for (u32 p = 0; p < bl->npreds; ++p) { 285 u32 pred = bl->preds[p]; 286 if (pred < f->nblocks) 287 live_worklist_push(worklist, queued, &nwork, pred); 288 } 289 } 290 } 291 292 live_recompute_metrics(live); 293 } 294 295 static void dump_write(Writer* w, const char* s) { 296 kit_writer_write(w, s, slice_from_cstr(s).len); 297 } 298 299 static void dump_sb(Writer* w, const StrBuf* sb) { 300 kit_writer_write(w, strbuf_cstr(sb), strbuf_len(sb)); 301 } 302 303 static void dump_bit(PReg r, void* arg) { 304 Writer* w = (Writer*)arg; 305 char buf[32]; 306 StrBuf sb; 307 strbuf_init(&sb, buf, sizeof buf); 308 strbuf_puts(&sb, " r"); 309 strbuf_put_u64(&sb, (u64)(unsigned)r); 310 dump_sb(w, &sb); 311 } 312 313 static void dump_set(Writer* w, const char* name, const OptBitset* bs) { 314 dump_write(w, " "); 315 dump_write(w, name); 316 dump_write(w, ":"); 317 opt_bitset_iter_set(bs, dump_bit, w); 318 dump_write(w, "\n"); 319 } 320 321 void opt_live_dump_blocks(Func* f, const OptLiveInfo* live, Writer* w) { 322 (void)f; 323 if (!live || !w) return; 324 for (u32 b = 0; b < live->f->nblocks; ++b) { 325 char buf[64]; 326 StrBuf sb; 327 strbuf_init(&sb, buf, sizeof buf); 328 strbuf_puts(&sb, "block "); 329 strbuf_put_u64(&sb, (u64)(unsigned)b); 330 strbuf_putc(&sb, '\n'); 331 dump_sb(w, &sb); 332 dump_set(w, "use", &live->blocks[b].live_use); 333 dump_set(w, "def", &live->blocks[b].live_def); 334 dump_set(w, "in", &live->blocks[b].live_in); 335 dump_set(w, "out", &live->blocks[b].live_out); 336 } 337 } 338 339 typedef struct RangeBuildCtx { 340 Func* f; 341 OptLiveRangeSet* ranges; 342 u32* last_range_by_preg; 343 u32* open_end_by_preg; 344 u32* open_gen_by_preg; 345 u32 open_gen; 346 PReg* open_pregs; 347 u32 nopen_pregs; 348 u32 open_pregs_cap; 349 PReg* range_pregs; 350 u32 nrange_pregs; 351 u32 range_pregs_cap; 352 u32* raw_points; 353 u32 nraw_points; 354 u32 raw_points_cap; 355 } RangeBuildCtx; 356 357 typedef struct RangeInstRefs { 358 PReg* uses; 359 PReg* defs; 360 u32 nuses; 361 u32 ndefs; 362 u32 use_cap; 363 u32 def_cap; 364 u32* use_mark; 365 u32* def_mark; 366 u32 gen; 367 } RangeInstRefs; 368 369 typedef struct RangeLivePregs { 370 Func* f; 371 PReg* regs; 372 u32 nregs; 373 u32 cap; 374 u32* pos_by_preg; 375 } RangeLivePregs; 376 377 static void range_refs_init(Func* f, RangeInstRefs* refs) { 378 memset(refs, 0, sizeof *refs); 379 refs->use_mark = 380 arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 381 refs->def_mark = 382 arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 383 refs->gen = 1; 384 } 385 386 static void range_refs_reset(Func* f, RangeInstRefs* refs) { 387 refs->nuses = 0; 388 refs->ndefs = 0; 389 ++refs->gen; 390 if (refs->gen) return; 391 memset( 392 refs->use_mark, 0, 393 sizeof(refs->use_mark[0]) * (opt_reg_count(f) ? opt_reg_count(f) : 1u)); 394 memset( 395 refs->def_mark, 0, 396 sizeof(refs->def_mark[0]) * (opt_reg_count(f) ? opt_reg_count(f) : 1u)); 397 refs->gen = 1; 398 } 399 400 static void range_refs_push_use(Func* f, RangeInstRefs* refs, PReg r) { 401 if (refs->use_mark[r] == refs->gen) return; 402 refs->use_mark[r] = refs->gen; 403 if (refs->nuses == refs->use_cap) { 404 u32 ncap = refs->use_cap ? refs->use_cap * 2u : 8u; 405 PReg* nr = arena_array(f->arena, PReg, ncap); 406 if (refs->uses) memcpy(nr, refs->uses, sizeof(refs->uses[0]) * refs->nuses); 407 refs->uses = nr; 408 refs->use_cap = ncap; 409 } 410 refs->uses[refs->nuses++] = r; 411 } 412 413 static void range_refs_push_def(Func* f, RangeInstRefs* refs, PReg r) { 414 if (refs->def_mark[r] == refs->gen) return; 415 refs->def_mark[r] = refs->gen; 416 if (refs->ndefs == refs->def_cap) { 417 u32 ncap = refs->def_cap ? refs->def_cap * 2u : 4u; 418 PReg* nr = arena_array(f->arena, PReg, ncap); 419 if (refs->defs) memcpy(nr, refs->defs, sizeof(refs->defs[0]) * refs->ndefs); 420 refs->defs = nr; 421 refs->def_cap = ncap; 422 } 423 refs->defs[refs->ndefs++] = r; 424 } 425 426 static int range_refs_has_def(const RangeInstRefs* refs, PReg r) { 427 return refs->def_mark[r] == refs->gen; 428 } 429 430 static void range_live_pregs_init(Func* f, RangeLivePregs* live) { 431 memset(live, 0, sizeof *live); 432 live->f = f; 433 live->pos_by_preg = 434 arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 435 } 436 437 static void range_live_pregs_clear(RangeLivePregs* live) { 438 for (u32 i = 0; i < live->nregs; ++i) live->pos_by_preg[live->regs[i]] = 0; 439 live->nregs = 0; 440 } 441 442 static void range_live_pregs_add(RangeLivePregs* live, PReg r) { 443 Func* f = live->f; 444 if (r == PREG_NONE || r == 0 || r >= opt_reg_count(f)) return; 445 if (live->pos_by_preg[r]) return; 446 if (live->nregs == live->cap) { 447 u32 ncap = live->cap ? live->cap * 2u : 64u; 448 PReg* nr = arena_array(f->arena, PReg, ncap); 449 if (live->regs) memcpy(nr, live->regs, sizeof(live->regs[0]) * live->nregs); 450 live->regs = nr; 451 live->cap = ncap; 452 } 453 live->regs[live->nregs] = r; 454 live->pos_by_preg[r] = live->nregs + 1u; 455 ++live->nregs; 456 } 457 458 static void range_live_pregs_remove(RangeLivePregs* live, PReg r) { 459 if (r == PREG_NONE || r == 0 || r >= opt_reg_count(live->f)) return; 460 u32 pos = live->pos_by_preg[r]; 461 if (!pos) return; 462 u32 idx = pos - 1u; 463 PReg last = live->regs[live->nregs - 1u]; 464 live->regs[idx] = last; 465 live->pos_by_preg[last] = idx + 1u; 466 live->pos_by_preg[r] = 0; 467 --live->nregs; 468 } 469 470 static void range_live_pregs_add_bit(PReg r, void* arg) { 471 range_live_pregs_add((RangeLivePregs*)arg, r); 472 } 473 474 static void range_collect_bits(Func* f, Inst* in, Operand* op, int is_def, 475 void* arg) { 476 (void)in; 477 RangeInstRefs* refs = (RangeInstRefs*)arg; 478 if (op->kind != OPK_REG) return; 479 PReg r = (PReg)op->v.reg; 480 if (r == PREG_NONE || r == 0 || r >= opt_reg_count(f)) return; 481 if (is_def) 482 range_refs_push_def(f, refs, r); 483 else 484 range_refs_push_use(f, refs, r); 485 } 486 487 static void range_push_raw_point(RangeBuildCtx* c, u32 p) { 488 if (c->nraw_points == c->raw_points_cap) { 489 u32 ncap = c->raw_points_cap ? c->raw_points_cap * 2u : 64u; 490 u32* np = arena_array(c->f->arena, u32, ncap); 491 if (c->raw_points) 492 memcpy(np, c->raw_points, sizeof(c->raw_points[0]) * c->nraw_points); 493 c->raw_points = np; 494 c->raw_points_cap = ncap; 495 } 496 c->raw_points[c->nraw_points++] = p; 497 } 498 499 static void range_push_open_preg(RangeBuildCtx* c, PReg r) { 500 if (c->nopen_pregs == c->open_pregs_cap) { 501 u32 ncap = c->open_pregs_cap ? c->open_pregs_cap * 2u : 64u; 502 PReg* nr = arena_array(c->f->arena, PReg, ncap); 503 if (c->open_pregs) 504 memcpy(nr, c->open_pregs, sizeof(c->open_pregs[0]) * c->nopen_pregs); 505 c->open_pregs = nr; 506 c->open_pregs_cap = ncap; 507 } 508 c->open_pregs[c->nopen_pregs++] = r; 509 } 510 511 static void range_push_range_preg(RangeBuildCtx* c, PReg r) { 512 if (c->nrange_pregs == c->range_pregs_cap) { 513 u32 ncap = c->range_pregs_cap ? c->range_pregs_cap * 2u : 64u; 514 PReg* nr = arena_array(c->f->arena, PReg, ncap); 515 if (c->range_pregs) 516 memcpy(nr, c->range_pregs, sizeof(c->range_pregs[0]) * c->nrange_pregs); 517 c->range_pregs = nr; 518 c->range_pregs_cap = ncap; 519 } 520 c->range_pregs[c->nrange_pregs++] = r; 521 } 522 523 static void range_block_reset(RangeBuildCtx* c) { 524 c->nopen_pregs = 0; 525 ++c->open_gen; 526 if (c->open_gen) return; 527 memset(c->open_gen_by_preg, 0, 528 sizeof(c->open_gen_by_preg[0]) * 529 (opt_reg_count(c->f) ? opt_reg_count(c->f) : 1u)); 530 c->open_gen = 1; 531 } 532 533 static int range_is_open(const RangeBuildCtx* c, PReg r) { 534 return r < opt_reg_count(c->f) && c->open_gen_by_preg[r] == c->open_gen; 535 } 536 537 static u32 range_open_end(const RangeBuildCtx* c, PReg r) { 538 return range_is_open(c, r) ? c->open_end_by_preg[r] : OPT_RANGE_NONE; 539 } 540 541 static void range_set_open_end(RangeBuildCtx* c, PReg r, u32 end) { 542 if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return; 543 if (!range_is_open(c, r)) { 544 c->open_gen_by_preg[r] = c->open_gen; 545 range_push_open_preg(c, r); 546 } 547 c->open_end_by_preg[r] = end; 548 } 549 550 static void range_close_open_end(RangeBuildCtx* c, PReg r) { 551 if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return; 552 c->open_gen_by_preg[r] = 0; 553 } 554 555 static void range_append(RangeBuildCtx* c, PReg r, u32 start, u32 end, 556 u32 block, int whole_block) { 557 if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return; 558 if (end <= start) end = start + 1u; 559 OptLiveRangeSet* ranges = c->ranges; 560 if (ranges->nranges == ranges->cap) { 561 u32 ncap = ranges->cap ? ranges->cap * 2u : 64u; 562 OptLiveRange* nr = arena_array(c->f->arena, OptLiveRange, ncap); 563 if (ranges->ranges) 564 memcpy(nr, ranges->ranges, sizeof(ranges->ranges[0]) * ranges->nranges); 565 ranges->ranges = nr; 566 ranges->cap = ncap; 567 } 568 u32 idx = ranges->nranges++; 569 OptLiveRange* lr = &ranges->ranges[idx]; 570 memset(lr, 0, sizeof *lr); 571 lr->preg = r; 572 lr->start = start; 573 lr->end = end; 574 lr->raw_start = start; 575 lr->raw_end = end; 576 lr->next = OPT_RANGE_NONE; 577 lr->block = block; 578 lr->whole_block = whole_block ? 1u : 0u; 579 if (ranges->first_range_by_preg[r] == OPT_RANGE_NONE) { 580 ranges->first_range_by_preg[r] = idx; 581 range_push_range_preg(c, r); 582 } else { 583 ranges->ranges[c->last_range_by_preg[r]].next = idx; 584 } 585 c->last_range_by_preg[r] = idx; 586 range_push_raw_point(c, start); 587 range_push_raw_point(c, end); 588 if (whole_block) ++ranges->whole_block_spans; 589 } 590 591 typedef struct RangeOpenCtx { 592 RangeBuildCtx* build; 593 u32 raw_end; 594 } RangeOpenCtx; 595 596 static void range_open_at_end(PReg r, void* arg) { 597 RangeOpenCtx* c = (RangeOpenCtx*)arg; 598 range_set_open_end(c->build, r, c->raw_end); 599 } 600 601 typedef struct RangeCloseCtx { 602 RangeBuildCtx* build; 603 u32 start; 604 u32 block; 605 } RangeCloseCtx; 606 607 static void range_close_def(PReg r, void* arg) { 608 RangeCloseCtx* c = (RangeCloseCtx*)arg; 609 RangeBuildCtx* b = c->build; 610 if (r == PREG_NONE || r == 0 || r >= opt_reg_count(b->f)) return; 611 u32 open_end = range_open_end(b, r); 612 if (open_end != OPT_RANGE_NONE) { 613 range_append(b, r, c->start, open_end, c->block, 0); 614 range_close_open_end(b, r); 615 } else { 616 range_append(b, r, c->start, c->start + 1u, c->block, 0); 617 } 618 b->ranges->def_freq_by_preg[r] += b->f->blocks[c->block].frequency; 619 } 620 621 typedef struct RangeUseCtx { 622 RangeBuildCtx* build; 623 u32 end; 624 u32 block; 625 } RangeUseCtx; 626 627 static void range_open_use(PReg r, void* arg) { 628 RangeUseCtx* c = (RangeUseCtx*)arg; 629 RangeBuildCtx* b = c->build; 630 if (r == PREG_NONE || r == 0 || r >= opt_reg_count(b->f)) return; 631 if (!range_is_open(b, r)) range_set_open_end(b, r, c->end); 632 b->ranges->use_freq_by_preg[r] += b->f->blocks[c->block].frequency; 633 } 634 635 typedef struct RangeBlockFreqCtx { 636 OptLiveRangeSet* ranges; 637 u32 freq; 638 } RangeBlockFreqCtx; 639 640 static void range_add_block_freq(PReg r, void* arg) { 641 RangeBlockFreqCtx* c = (RangeBlockFreqCtx*)arg; 642 c->ranges->live_block_freq_by_preg[r] += c->freq; 643 } 644 645 typedef struct RangeCallCtx { 646 Func* f; 647 OptLiveRangeSet* ranges; 648 const RangeInstRefs* refs; 649 u32 freq; 650 } RangeCallCtx; 651 652 static void range_add_live_across_call(PReg r, void* arg) { 653 RangeCallCtx* c = (RangeCallCtx*)arg; 654 if (r == PREG_NONE || r == 0 || r >= opt_reg_count(c->f)) return; 655 if (range_refs_has_def(c->refs, r)) return; 656 c->ranges->live_across_call_freq_by_preg[r] += c->freq; 657 } 658 659 static void range_live_pregs_update_before(RangeLivePregs* live, 660 const RangeInstRefs* refs) { 661 for (u32 i = 0; i < refs->ndefs; ++i) 662 range_live_pregs_remove(live, refs->defs[i]); 663 for (u32 i = 0; i < refs->nuses; ++i) 664 range_live_pregs_add(live, refs->uses[i]); 665 } 666 667 static int u32_cmp(const void* a, const void* b) { 668 u32 av = *(const u32*)a; 669 u32 bv = *(const u32*)b; 670 return (av > bv) - (av < bv); 671 } 672 673 static u32 range_point_index(const u32* points, u32 npoints, u32 raw) { 674 u32 lo = 0; 675 u32 hi = npoints; 676 while (lo < hi) { 677 u32 mid = lo + (hi - lo) / 2u; 678 if (points[mid] < raw) 679 lo = mid + 1u; 680 else 681 hi = mid; 682 } 683 return lo; 684 } 685 686 static void range_compress_points(OptLiveRangeSet* ranges, 687 RangeBuildCtx* build) { 688 if (build->nraw_points == 0) return; 689 qsort(build->raw_points, build->nraw_points, sizeof(build->raw_points[0]), 690 u32_cmp); 691 u32 unique = 0; 692 for (u32 i = 0; i < build->nraw_points; ++i) { 693 if (unique == 0 || build->raw_points[i] != build->raw_points[unique - 1u]) 694 build->raw_points[unique++] = build->raw_points[i]; 695 } 696 ranges->point_count = unique; 697 for (u32 i = 0; i < ranges->nranges; ++i) { 698 OptLiveRange* r = &ranges->ranges[i]; 699 r->start = range_point_index(build->raw_points, unique, r->start); 700 r->end = range_point_index(build->raw_points, unique, r->end); 701 if (r->end <= r->start) r->end = r->start + 1u; 702 u32 len = r->end - r->start; 703 ranges->range_point_visits += len; 704 ranges->live_length_by_preg[r->preg] += len; 705 if (ranges->max_live_length < len) ranges->max_live_length = len; 706 } 707 } 708 709 void opt_live_ranges_build(Func* f, const OptLiveInfo* live, 710 OptLiveRangeSet* ranges) { 711 memset(ranges, 0, sizeof *ranges); 712 if (!f || !live) return; 713 ranges->arena = f->arena; 714 ranges->f = f; 715 ranges->first_range_by_preg = 716 arena_array(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 717 ranges->live_length_by_preg = 718 arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 719 ranges->use_freq_by_preg = 720 arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 721 ranges->def_freq_by_preg = 722 arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 723 ranges->live_block_freq_by_preg = 724 arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 725 ranges->live_across_call_freq_by_preg = 726 arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 727 ranges->spill_cost_by_preg = 728 arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 729 memset(ranges->first_range_by_preg, 0xff, 730 sizeof(ranges->first_range_by_preg[0]) * 731 (opt_reg_count(f) ? opt_reg_count(f) : 1u)); 732 733 RangeBuildCtx build; 734 memset(&build, 0, sizeof build); 735 build.f = f; 736 build.ranges = ranges; 737 build.last_range_by_preg = 738 arena_array(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 739 build.open_end_by_preg = 740 arena_array(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 741 build.open_gen_by_preg = 742 arena_zarray(f->arena, u32, opt_reg_count(f) ? opt_reg_count(f) : 1u); 743 build.open_gen = 1; 744 memset(build.last_range_by_preg, 0xff, 745 sizeof(build.last_range_by_preg[0]) * 746 (opt_reg_count(f) ? opt_reg_count(f) : 1u)); 747 748 u32* block_base = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u); 749 u32 raw = 0; 750 for (u32 b = 0; b < f->nblocks; ++b) { 751 block_base[b] = raw; 752 raw += f->blocks[b].ninsts ? f->blocks[b].ninsts : 1u; 753 } 754 ranges->raw_point_count = raw + 1u; 755 756 OptBitset block_live; 757 opt_bitset_init(f->arena, &block_live, live->words); 758 RangeInstRefs refs; 759 range_refs_init(f, &refs); 760 RangeLivePregs live_pregs; 761 range_live_pregs_init(f, &live_pregs); 762 763 for (u32 b = 0; b < f->nblocks; ++b) { 764 Block* bl = &f->blocks[b]; 765 const OptBlockLive* lb = &live->blocks[b]; 766 range_block_reset(&build); 767 range_live_pregs_clear(&live_pregs); 768 u32 raw_start = block_base[b]; 769 u32 raw_end = raw_start + (bl->ninsts ? bl->ninsts : 1u); 770 RangeOpenCtx open_ctx = {&build, raw_end}; 771 ranges->live_words_touched += lb->live_out.active_words; 772 opt_bitset_iter_set(&lb->live_out, range_open_at_end, &open_ctx); 773 opt_bitset_iter_set(&lb->live_out, range_live_pregs_add_bit, &live_pregs); 774 775 RangeBlockFreqCtx freq_ctx = {ranges, bl->frequency}; 776 ranges->live_words_touched += block_live.nwords < lb->live_in.active_words 777 ? block_live.nwords 778 : lb->live_in.active_words; 779 if (block_live.active_words > lb->live_in.active_words) 780 ranges->live_words_touched += 781 block_live.active_words - lb->live_in.active_words; 782 opt_bitset_copy(&block_live, &lb->live_in); 783 ranges->live_words_touched += block_live.nwords < lb->live_out.active_words 784 ? block_live.nwords 785 : lb->live_out.active_words; 786 opt_bitset_union(&block_live, &lb->live_out); 787 ranges->live_words_touched += block_live.active_words; 788 opt_bitset_iter_set(&block_live, range_add_block_freq, &freq_ctx); 789 790 for (u32 ri = bl->ninsts; ri > 0; --ri) { 791 u32 i = ri - 1u; 792 Inst* in = &bl->insts[i]; 793 range_refs_reset(f, &refs); 794 opt_walk_inst_operands(f, in, range_collect_bits, &refs); 795 796 if ((IROp)in->op == IR_CALL) { 797 RangeCallCtx call_ctx = {f, ranges, &refs, bl->frequency}; 798 ranges->live_words_touched += live_pregs.nregs; 799 for (u32 k = 0; k < live_pregs.nregs; ++k) 800 range_add_live_across_call(live_pregs.regs[k], &call_ctx); 801 } 802 803 /* Incoming parameters are all delivered simultaneously by the ABI at 804 * function entry, so their values are mutually live there even though the 805 * param_decl markers are sequenced. Anchor every param_decl def at the 806 * entry block's base point so the params interfere with one another; this 807 * stops the allocator from coalescing a dead param (e.g. an unused arg) 808 * into a live param's incoming register, which the entry-bind parallel 809 * copy could not then resolve (two binds targeting one register). */ 810 u32 def_pos = ((IROp)in->op == IR_PARAM_DECL) ? raw_start : raw_start + i; 811 RangeCloseCtx close_ctx = {&build, def_pos, b}; 812 for (u32 k = 0; k < refs.ndefs; ++k) 813 range_close_def(refs.defs[k], &close_ctx); 814 RangeUseCtx use_ctx = {&build, raw_start + i + 1u, b}; 815 for (u32 k = 0; k < refs.nuses; ++k) 816 range_open_use(refs.uses[k], &use_ctx); 817 range_live_pregs_update_before(&live_pregs, &refs); 818 } 819 820 for (u32 oi = 0; oi < build.nopen_pregs; ++oi) { 821 PReg r = build.open_pregs[oi]; 822 if (!range_is_open(&build, r)) continue; 823 u32 open_end = range_open_end(&build, r); 824 if (open_end == OPT_RANGE_NONE) continue; 825 int whole_block = open_end == raw_end && 826 !opt_bitset_has(&lb->live_use, r) && 827 !opt_bitset_has(&lb->live_def, r); 828 range_append(&build, r, raw_start, open_end, b, whole_block); 829 range_close_open_end(&build, r); 830 } 831 ranges->preg_scans += build.nopen_pregs; 832 } 833 834 range_compress_points(ranges, &build); 835 836 for (u32 vi = 0; vi < build.nrange_pregs; ++vi) { 837 PReg r = build.range_pregs[vi]; 838 u32 nranges = 0; 839 for (u32 ri = ranges->first_range_by_preg[r]; ri != OPT_RANGE_NONE; 840 ri = ranges->ranges[ri].next) 841 ++nranges; 842 if (ranges->max_ranges_per_preg < nranges) 843 ranges->max_ranges_per_preg = nranges; 844 ranges->spill_cost_by_preg[r] = (ranges->use_freq_by_preg[r] * 2u) + 845 ranges->def_freq_by_preg[r] + 846 ranges->live_across_call_freq_by_preg[r] + 847 ranges->live_block_freq_by_preg[r]; 848 } 849 ranges->preg_scans += build.nrange_pregs; 850 } 851 852 void opt_live_dump_ranges(Func* f, const OptLiveRangeSet* ranges, Writer* w) { 853 (void)f; 854 if (!ranges || !w) return; 855 char buf[160]; 856 StrBuf sb; 857 strbuf_init(&sb, buf, sizeof buf); 858 strbuf_puts(&sb, "ranges total="); 859 strbuf_put_u64(&sb, (u64)(unsigned)ranges->nranges); 860 strbuf_puts(&sb, " points="); 861 strbuf_put_u64(&sb, (u64)(unsigned)ranges->point_count); 862 strbuf_puts(&sb, " raw_points="); 863 strbuf_put_u64(&sb, (u64)(unsigned)ranges->raw_point_count); 864 strbuf_puts(&sb, " whole_block="); 865 strbuf_put_u64(&sb, (u64)(unsigned)ranges->whole_block_spans); 866 strbuf_putc(&sb, '\n'); 867 dump_sb(w, &sb); 868 for (PReg r = 1; r < opt_reg_count(ranges->f); ++r) { 869 if (ranges->first_range_by_preg[r] == OPT_RANGE_NONE) continue; 870 strbuf_reset(&sb); 871 strbuf_putc(&sb, 'r'); 872 strbuf_put_u64(&sb, (u64)(unsigned)r); 873 strbuf_puts(&sb, " len="); 874 strbuf_put_u64(&sb, (u64)(unsigned)ranges->live_length_by_preg[r]); 875 strbuf_puts(&sb, " spill="); 876 strbuf_put_u64(&sb, (u64)(unsigned)ranges->spill_cost_by_preg[r]); 877 strbuf_putc(&sb, ':'); 878 dump_sb(w, &sb); 879 for (u32 ri = ranges->first_range_by_preg[r]; ri != OPT_RANGE_NONE; 880 ri = ranges->ranges[ri].next) { 881 const OptLiveRange* lr = &ranges->ranges[ri]; 882 strbuf_reset(&sb); 883 strbuf_puts(&sb, " ["); 884 strbuf_put_u64(&sb, (u64)(unsigned)lr->start); 885 strbuf_putc(&sb, ','); 886 strbuf_put_u64(&sb, (u64)(unsigned)lr->end); 887 strbuf_puts(&sb, ")b"); 888 strbuf_put_u64(&sb, (u64)(unsigned)lr->block); 889 if (lr->whole_block) strbuf_putc(&sb, '*'); 890 dump_sb(w, &sb); 891 } 892 dump_write(w, "\n"); 893 } 894 }