pass_ssa.c (34028B)
1 /* pass_ssa.c - mem2reg SSA construction and phi destruction for O2. */ 2 3 #include <string.h> 4 5 #include "core/arena.h" 6 #include "core/core.h" 7 #include "core/slice.h" 8 #include "core/strbuf.h" 9 #include "opt/opt_internal.h" 10 11 typedef struct SlotStack { 12 Val* vals; 13 u32 n; 14 u32 cap; 15 } SlotStack; 16 17 typedef struct RenameCtx { 18 Func* f; 19 OptAnalysis* analysis; 20 u8* promoted; 21 Val* repl; 22 u32 repl_cap; 23 SlotStack* stacks; 24 } RenameCtx; 25 26 typedef struct EdgeMove { 27 Val dst; 28 Val src; 29 KitCgTypeId type; 30 u8 cls; 31 } EdgeMove; 32 33 typedef struct RegRenameCtx { 34 Func* f; 35 OptAnalysis* analysis; 36 u32 old_nregs; 37 SlotStack* stacks; 38 } RegRenameCtx; 39 40 static u8 ssa_type_class(Func* f, KitCgTypeId ty) { 41 return opt_value_reg_class(f->c, ty); 42 } 43 44 static u32 opnd_slot_id(const Operand* op) { 45 if (!op || op->kind != OPK_LOCAL) return 0; 46 return (u32)op->v.frame_slot; 47 } 48 49 static int base_slot_promotable(const Func* f, u32 slot_id) { 50 if (slot_id == 0 || slot_id > f->nframe_slots) return 0; 51 const IRFrameSlot* s = &f->frame_slots[slot_id - 1u]; 52 if (s->kind != FS_LOCAL) return 0; 53 if (s->flags & (FSF_ADDR_TAKEN | FSF_VOLATILE)) return 0; 54 return 1; 55 } 56 57 static int operand_has_slot(const Operand* op, u32 slot_id) { 58 return op && op->kind == OPK_LOCAL && opnd_slot_id(op) == slot_id; 59 } 60 61 static int abivalue_has_slot(const CGABIValue* v, u32 slot_id) { 62 if (!v) return 0; 63 if (operand_has_slot(&v->storage, slot_id)) return 1; 64 for (u32 i = 0; i < v->nparts; ++i) 65 if (operand_has_slot(&v->parts[i].op, slot_id)) return 1; 66 return 0; 67 } 68 69 static int aux_has_slot(const Inst* in, u32 slot_id) { 70 switch ((IROp)in->op) { 71 case IR_CALL: { 72 IRCallAux* aux = (IRCallAux*)in->extra.aux; 73 if (!aux) return 0; 74 if (aux->use_plan_replay) { 75 if (operand_has_slot(&aux->plan.callee, slot_id)) return 1; 76 for (u32 i = 0; i < aux->plan.nargs; ++i) 77 if (operand_has_slot(&aux->plan.args[i].src, slot_id)) return 1; 78 for (u32 i = 0; i < aux->plan.nrets; ++i) 79 if (operand_has_slot(&aux->plan.rets[i].dst, slot_id)) return 1; 80 } else { 81 if (operand_has_slot(&aux->desc.callee, slot_id)) return 1; 82 for (u32 i = 0; i < aux->desc.nargs; ++i) 83 if (abivalue_has_slot(&aux->desc.args[i], slot_id)) return 1; 84 if (abivalue_has_slot(&aux->desc.ret, slot_id)) return 1; 85 } 86 break; 87 } 88 case IR_RET: { 89 IRRetAux* aux = (IRRetAux*)in->extra.aux; 90 return aux && aux->present && abivalue_has_slot(&aux->val, slot_id); 91 } 92 case IR_SCOPE_BEGIN: 93 return 0; 94 case IR_ASM_BLOCK: { 95 IRAsmAux* aux = (IRAsmAux*)in->extra.aux; 96 if (!aux) return 0; 97 for (u32 i = 0; i < aux->nin; ++i) 98 if (operand_has_slot(&aux->in_ops[i], slot_id)) return 1; 99 for (u32 i = 0; i < aux->nout; ++i) 100 if (operand_has_slot(&aux->out_ops[i], slot_id)) return 1; 101 break; 102 } 103 case IR_INTRINSIC: { 104 IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; 105 if (!aux) return 0; 106 for (u32 i = 0; i < aux->narg; ++i) 107 if (operand_has_slot(&aux->args[i], slot_id)) return 1; 108 for (u32 i = 0; i < aux->ndst; ++i) 109 if (operand_has_slot(&aux->dsts[i], slot_id)) return 1; 110 break; 111 } 112 default: 113 break; 114 } 115 return 0; 116 } 117 118 static int slot_access_promotable(const Func* f, const Inst* in, u32 slot_id) { 119 if ((IROp)in->op == IR_LOAD) { 120 if (in->nopnds < 2 || opnd_slot_id(&in->opnds[1]) != slot_id) return 1; 121 if (in->opnds[0].kind != OPK_REG || opt_mem_observable(&in->extra.mem)) 122 return 0; 123 /* Post-EA cg layer can produce LOAD opnds[1]=OPK_LOCAL(slot) with an 124 * access type that differs from the slot's declared type (e.g. a 125 * sub-word read for type-punning). mem2reg would silently lose those 126 * bits, so block promotion when the access type does not match the 127 * slot's declared type. */ 128 const IRFrameSlot* s = &f->frame_slots[slot_id - 1u]; 129 KitCgTypeId at = in->extra.mem.type; 130 if (at && at != s->type) return 0; 131 if (in->opnds[0].type && in->opnds[0].type != s->type) return 0; 132 return 1; 133 } 134 if ((IROp)in->op == IR_STORE) { 135 if (in->nopnds < 2 || opnd_slot_id(&in->opnds[0]) != slot_id) return 1; 136 if (opt_mem_observable(&in->extra.mem)) return 0; 137 if (in->opnds[1].kind != OPK_REG && in->opnds[1].kind != OPK_IMM) return 0; 138 const IRFrameSlot* s = &f->frame_slots[slot_id - 1u]; 139 KitCgTypeId at = in->extra.mem.type; 140 if (at && at != s->type) return 0; 141 if (in->opnds[1].type && in->opnds[1].type != s->type) return 0; 142 return 1; 143 } 144 for (u32 i = 0; i < in->nopnds; ++i) 145 if (opnd_slot_id(&in->opnds[i]) == slot_id) return 0; 146 if (aux_has_slot(in, slot_id)) return 0; 147 return 1; 148 } 149 150 static u8* find_promoted_slots(Func* f) { 151 u8* promoted = arena_zarray(f->arena, u8, f->nframe_slots + 1u); 152 for (u32 sid = 1; sid <= f->nframe_slots; ++sid) { 153 if (!base_slot_promotable(f, sid)) continue; 154 promoted[sid] = 1; 155 } 156 for (u32 b = 0; b < f->nblocks; ++b) { 157 Block* bl = &f->blocks[b]; 158 for (u32 i = 0; i < bl->ninsts; ++i) { 159 Inst* in = &bl->insts[i]; 160 for (u32 sid = 1; sid <= f->nframe_slots; ++sid) { 161 if (promoted[sid] && !slot_access_promotable(f, in, sid)) 162 promoted[sid] = 0; 163 } 164 } 165 } 166 return promoted; 167 } 168 169 static void stack_push(Arena* a, SlotStack* s, Val v) { 170 if (s->n == s->cap) { 171 u32 ncap = s->cap ? s->cap * 2u : 4u; 172 Val* vals = arena_array(a, Val, ncap); 173 if (s->vals) memcpy(vals, s->vals, sizeof(vals[0]) * s->n); 174 s->vals = vals; 175 s->cap = ncap; 176 } 177 s->vals[s->n++] = v; 178 } 179 180 static Val stack_top(const SlotStack* s) { 181 return s && s->n ? s->vals[s->n - 1u] : VAL_NONE; 182 } 183 184 static int reg_in_defs(const Inst* in, Reg r) { 185 if (!in || r == (Reg)REG_NONE) return 0; 186 if (in->def == (Val)r) return 1; 187 for (u32 i = 0; i < in->ndefs; ++i) 188 if (in->defs[i] == (Val)r) return 1; 189 return 0; 190 } 191 192 static u8* find_reg_def_blocks(Func* f, u32 old_nregs) { 193 u8* def_blocks = arena_zarray(f->arena, u8, old_nregs * f->nblocks); 194 for (u32 b = 0; b < f->nblocks; ++b) { 195 Block* bl = &f->blocks[b]; 196 for (u32 i = 0; i < bl->ninsts; ++i) { 197 Inst* in = &bl->insts[i]; 198 if (in->def != VAL_NONE && in->def < old_nregs) 199 def_blocks[in->def * f->nblocks + b] = 1; 200 for (u32 d = 0; d < in->ndefs; ++d) { 201 Val v = in->defs[d]; 202 if (v != VAL_NONE && v < old_nregs) def_blocks[v * f->nblocks + b] = 1; 203 } 204 } 205 } 206 return def_blocks; 207 } 208 209 static void compute_reg_phi_sites(Func* f, OptAnalysis* a, 210 const OptLiveInfo* live, u32 old_nregs, 211 u8* needs_phi) { 212 u8* def_blocks = find_reg_def_blocks(f, old_nregs); 213 u32* work = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u); 214 u8* queued = arena_zarray(f->arena, u8, f->nblocks ? f->nblocks : 1u); 215 216 for (u32 r = 1; r < old_nregs; ++r) { 217 if (!f->preg_type[r]) continue; 218 memset(queued, 0, f->nblocks); 219 u32 wn = 0; 220 for (u32 b = 0; b < f->nblocks; ++b) { 221 if (!def_blocks[r * f->nblocks + b]) continue; 222 queued[b] = 1; 223 work[wn++] = b; 224 } 225 while (wn) { 226 u32 x = work[--wn]; 227 OptBlockList* df = &a->dom_frontier[x]; 228 for (u32 i = 0; i < df->n; ++i) { 229 u32 y = df->items[i]; 230 if (needs_phi[r * f->nblocks + y]) continue; 231 if (live && !opt_bitset_has(&live->blocks[y].live_in, (Val)r)) continue; 232 needs_phi[r * f->nblocks + y] = 1; 233 if (!queued[y]) { 234 queued[y] = 1; 235 work[wn++] = y; 236 } 237 } 238 } 239 } 240 } 241 242 static void insert_reg_phis(Func* f, u32 b, u32 old_nregs, 243 const u8* needs_phi) { 244 Block* bl = &f->blocks[b]; 245 u32 nphi = 0; 246 for (u32 r = 1; r < old_nregs; ++r) 247 if (needs_phi[r * f->nblocks + b]) ++nphi; 248 if (!nphi) return; 249 250 u32 old_nvals = f->nvals; 251 Inst* insts = arena_zarray(f->arena, Inst, bl->ninsts + nphi); 252 u32 w = 0; 253 for (u32 r = 1; r < old_nregs; ++r) { 254 if (!needs_phi[r * f->nblocks + b]) continue; 255 Inst* in = &insts[w++]; 256 in->op = IR_PHI; 257 ir_assign_inst_id(f, in); 258 in->type = f->preg_type[r]; 259 in->def = ir_alloc_val(f, f->preg_type[r], f->preg_cls[r]); 260 f->val_def_block[in->def] = b; 261 f->val_def_inst[in->def] = w - 1u; 262 IRPhiAux* aux = arena_znew(f->arena, IRPhiAux); 263 aux->reg_id = r; 264 aux->npreds = bl->npreds; 265 if (bl->npreds) { 266 aux->pred_blocks = arena_array(f->arena, u32, bl->npreds); 267 aux->pred_vals = arena_zarray(f->arena, Val, bl->npreds); 268 memcpy(aux->pred_blocks, bl->preds, sizeof(u32) * bl->npreds); 269 } 270 in->extra.aux = aux; 271 } 272 if (bl->ninsts) memcpy(insts + nphi, bl->insts, sizeof(Inst) * bl->ninsts); 273 bl->insts = insts; 274 bl->ninsts += nphi; 275 bl->cap = bl->ninsts; 276 for (Val v = 1; v < old_nvals; ++v) 277 if (f->val_def_block[v] == b) f->val_def_inst[v] += nphi; 278 } 279 280 static Val reg_stack_top(RegRenameCtx* ctx, Reg r) { 281 if (r == (Reg)REG_NONE || (u32)r >= ctx->old_nregs) return VAL_NONE; 282 return stack_top(&ctx->stacks[(u32)r]); 283 } 284 285 static void reg_replace_use(RegRenameCtx* ctx, Operand* op) { 286 if (!op) return; 287 if (op->kind == OPK_REG) { 288 Reg r = op->v.reg; 289 Val v = reg_stack_top(ctx, r); 290 if (v == VAL_NONE) return; 291 op->v.reg = (Reg)v; 292 op->type = ctx->f->val_type[v]; 293 op->cls = ctx->f->val_cls[v]; 294 } else if (op->kind == OPK_INDIRECT) { 295 Reg r = op->v.ind.base; 296 Val v = reg_stack_top(ctx, r); 297 if (v != VAL_NONE) op->v.ind.base = (Reg)v; 298 if (op->v.ind.index != (Reg)REG_NONE) { 299 Reg ri = op->v.ind.index; 300 Val vi = reg_stack_top(ctx, ri); 301 if (vi != VAL_NONE) op->v.ind.index = (Reg)vi; 302 } 303 } 304 } 305 306 static void reg_replace_abivalue_uses(RegRenameCtx* ctx, CGABIValue* v) { 307 if (!v) return; 308 reg_replace_use(ctx, &v->storage); 309 for (u32 i = 0; i < v->nparts; ++i) 310 reg_replace_use(ctx, (Operand*)&v->parts[i].op); 311 } 312 313 static Val reg_define_operand(RegRenameCtx* ctx, u32 b, u32 i, Inst* in, 314 Operand* op, u32 def_index, u32* pushed) { 315 if (!op || op->kind != OPK_REG) return VAL_NONE; 316 Reg r = op->v.reg; 317 if (r == (Reg)REG_NONE || (u32)r >= ctx->old_nregs) return VAL_NONE; 318 KitCgTypeId ty = op->type ? op->type : ctx->f->preg_type[(u32)r]; 319 u8 cls = op->cls; 320 Val v = ir_alloc_val(ctx->f, ty, cls); 321 op->v.reg = (Reg)v; 322 op->type = ty; 323 op->cls = cls; 324 if (def_index == 0 && in->def == (Val)r) in->def = v; 325 if (def_index < in->ndefs && in->defs[def_index] == (Val)r) 326 in->defs[def_index] = v; 327 ctx->f->val_def_block[v] = b; 328 ctx->f->val_def_inst[v] = i; 329 stack_push(ctx->f->arena, &ctx->stacks[(u32)r], v); 330 pushed[(u32)r]++; 331 return v; 332 } 333 334 static u32 reg_def_index(const Inst* in, Reg r, u32 ordinal) { 335 if (!in || !in->ndefs) return 0; 336 u32 seen = 0; 337 for (u32 i = 0; i < in->ndefs; ++i) { 338 if (in->defs[i] != (Val)r) continue; 339 if (seen == ordinal) return i; 340 ++seen; 341 } 342 return 0; 343 } 344 345 static void reg_replace_inst_uses(RegRenameCtx* ctx, Inst* in) { 346 for (u32 o = 0; o < in->nopnds; ++o) { 347 Operand* op = &in->opnds[o]; 348 int is_def = 0; 349 if (op->kind == OPK_REG) { 350 if ((IROp)in->op == IR_ATOMIC_CAS) 351 is_def = (o == 0 || o == 1) && reg_in_defs(in, op->v.reg); 352 else 353 is_def = o == 0 && reg_in_defs(in, op->v.reg); 354 } 355 if (!is_def) reg_replace_use(ctx, op); 356 } 357 358 switch ((IROp)in->op) { 359 case IR_CALL: { 360 IRCallAux* aux = (IRCallAux*)in->extra.aux; 361 if (!aux) break; 362 if (aux->use_plan_replay) { 363 reg_replace_use(ctx, &aux->plan.callee); 364 for (u32 i = 0; i < aux->plan.nargs; ++i) 365 reg_replace_use(ctx, &aux->plan.args[i].src); 366 } else { 367 reg_replace_use(ctx, &aux->desc.callee); 368 for (u32 i = 0; i < aux->desc.nargs; ++i) 369 reg_replace_abivalue_uses(ctx, (CGABIValue*)&aux->desc.args[i]); 370 } 371 break; 372 } 373 case IR_RET: { 374 IRRetAux* aux = (IRRetAux*)in->extra.aux; 375 if (aux && aux->present) reg_replace_abivalue_uses(ctx, &aux->val); 376 break; 377 } 378 case IR_SCOPE_BEGIN: 379 break; 380 case IR_ASM_BLOCK: { 381 IRAsmAux* aux = (IRAsmAux*)in->extra.aux; 382 if (!aux) break; 383 for (u32 i = 0; i < aux->nin; ++i) reg_replace_use(ctx, &aux->in_ops[i]); 384 break; 385 } 386 case IR_INTRINSIC: { 387 IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; 388 if (!aux) break; 389 for (u32 i = 0; i < aux->narg; ++i) reg_replace_use(ctx, &aux->args[i]); 390 break; 391 } 392 default: 393 break; 394 } 395 } 396 397 static void reg_define_abivalue(RegRenameCtx* ctx, u32 b, u32 i, Inst* in, 398 CGABIValue* v, u32* pushed) { 399 if (!v) return; 400 if (v->storage.kind == OPK_REG && reg_in_defs(in, v->storage.v.reg)) 401 reg_define_operand(ctx, b, i, in, &v->storage, 0, pushed); 402 for (u32 p = 0; p < v->nparts; ++p) { 403 Operand* op = (Operand*)&v->parts[p].op; 404 if (op->kind == OPK_REG && reg_in_defs(in, op->v.reg)) 405 reg_define_operand(ctx, b, i, in, op, p, pushed); 406 } 407 } 408 409 static void reg_define_inst_defs(RegRenameCtx* ctx, u32 b, u32 i, Inst* in, 410 u32* pushed) { 411 switch ((IROp)in->op) { 412 case IR_CALL: { 413 IRCallAux* aux = (IRCallAux*)in->extra.aux; 414 if (!aux) break; 415 if (aux->use_plan_replay) { 416 for (u32 r = 0; r < aux->plan.nrets; ++r) { 417 if (aux->plan.rets[r].dst.kind == OPK_REG && 418 reg_in_defs(in, aux->plan.rets[r].dst.v.reg)) 419 reg_define_operand(ctx, b, i, in, &aux->plan.rets[r].dst, r, 420 pushed); 421 } 422 } else { 423 reg_define_abivalue(ctx, b, i, in, &aux->desc.ret, pushed); 424 } 425 break; 426 } 427 case IR_ATOMIC_CAS: 428 if (in->nopnds >= 1 && in->opnds[0].kind == OPK_REG) 429 reg_define_operand(ctx, b, i, in, &in->opnds[0], 0, pushed); 430 if (in->nopnds >= 2 && in->opnds[1].kind == OPK_REG) 431 reg_define_operand(ctx, b, i, in, &in->opnds[1], 1, pushed); 432 break; 433 case IR_ASM_BLOCK: { 434 IRAsmAux* aux = (IRAsmAux*)in->extra.aux; 435 if (!aux) break; 436 for (u32 o = 0; o < aux->nout; ++o) { 437 if (aux->out_ops[o].kind != OPK_REG) continue; 438 reg_define_operand(ctx, b, i, in, &aux->out_ops[o], 439 reg_def_index(in, aux->out_ops[o].v.reg, o), pushed); 440 } 441 break; 442 } 443 case IR_INTRINSIC: { 444 IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; 445 if (!aux) break; 446 for (u32 d = 0; d < aux->ndst; ++d) { 447 if (aux->dsts[d].kind != OPK_REG) continue; 448 reg_define_operand(ctx, b, i, in, &aux->dsts[d], d, pushed); 449 if (aux->result_vals) aux->result_vals[d] = (Val)aux->dsts[d].v.reg; 450 } 451 break; 452 } 453 default: 454 if (in->nopnds && in->opnds[0].kind == OPK_REG && 455 reg_in_defs(in, in->opnds[0].v.reg)) 456 reg_define_operand(ctx, b, i, in, &in->opnds[0], 0, pushed); 457 break; 458 } 459 } 460 461 static void reg_rename_block(RegRenameCtx* ctx, u32 b) { 462 Func* f = ctx->f; 463 Block* bl = &f->blocks[b]; 464 u32* pushed = arena_zarray(f->arena, u32, ctx->old_nregs); 465 466 for (u32 i = 0; i < bl->ninsts; ++i) { 467 Inst* in = &bl->insts[i]; 468 if ((IROp)in->op != IR_PHI) break; 469 IRPhiAux* aux = (IRPhiAux*)in->extra.aux; 470 if (!aux || !aux->reg_id || aux->reg_id >= ctx->old_nregs) continue; 471 stack_push(f->arena, &ctx->stacks[aux->reg_id], in->def); 472 pushed[aux->reg_id]++; 473 } 474 475 for (u32 i = 0; i < bl->ninsts; ++i) { 476 Inst* in = &bl->insts[i]; 477 if ((IROp)in->op == IR_PHI) continue; 478 reg_replace_inst_uses(ctx, in); 479 reg_define_inst_defs(ctx, b, i, in, pushed); 480 } 481 482 for (u32 s = 0; s < bl->nsucc; ++s) { 483 u32 succ = bl->succ[s]; 484 if (succ >= f->nblocks) continue; 485 Block* sb = &f->blocks[succ]; 486 u32 pred_idx = OPT_USE_NONE; 487 for (u32 p = 0; p < sb->npreds; ++p) { 488 if (sb->preds[p] == b) { 489 pred_idx = p; 490 break; 491 } 492 } 493 if (pred_idx == OPT_USE_NONE) continue; 494 for (u32 i = 0; i < sb->ninsts; ++i) { 495 Inst* phi = &sb->insts[i]; 496 if ((IROp)phi->op != IR_PHI) break; 497 IRPhiAux* aux = (IRPhiAux*)phi->extra.aux; 498 if (!aux || !aux->reg_id || aux->reg_id >= ctx->old_nregs) continue; 499 aux->pred_vals[pred_idx] = reg_stack_top(ctx, (Reg)aux->reg_id); 500 } 501 } 502 503 OptBlockList* children = &ctx->analysis->dom_children[b]; 504 for (u32 i = 0; i < children->n; ++i) 505 reg_rename_block(ctx, children->items[i]); 506 507 for (u32 r = 1; r < ctx->old_nregs; ++r) { 508 while (pushed[r]--) { 509 if (ctx->stacks[r].n) --ctx->stacks[r].n; 510 } 511 } 512 } 513 514 void opt_build_reg_ssa(Func* f) { 515 if (!f || f->nblocks == 0 || f->npregs <= 1) { 516 if (f) opt_rebuild_def_use(f); 517 return; 518 } 519 opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE); 520 521 u32 old_nregs = f->npregs; 522 OptAnalysis a; 523 opt_analysis_build_order(f, &a); 524 opt_analysis_build_dominators(f, &a); 525 opt_analysis_build_dom_frontier(f, &a); 526 527 OptLiveInfo live; 528 opt_live_blocks(f, &live); 529 u8* needs_phi = arena_zarray(f->arena, u8, old_nregs * f->nblocks); 530 compute_reg_phi_sites(f, &a, &live, old_nregs, needs_phi); 531 for (u32 b = 0; b < f->nblocks; ++b) 532 insert_reg_phis(f, b, old_nregs, needs_phi); 533 534 RegRenameCtx ctx; 535 memset(&ctx, 0, sizeof ctx); 536 ctx.f = f; 537 ctx.analysis = &a; 538 ctx.old_nregs = old_nregs; 539 ctx.stacks = arena_zarray(f->arena, SlotStack, old_nregs); 540 reg_rename_block(&ctx, f->entry); 541 f->opt_reg_ssa = 1; 542 opt_rebuild_def_use(f); 543 } 544 545 static Val resolve_repl(const RenameCtx* ctx, Val v) { 546 while (v != VAL_NONE && v < ctx->repl_cap && ctx->repl[v] != VAL_NONE && 547 ctx->repl[v] != v) { 548 v = ctx->repl[v]; 549 } 550 return v; 551 } 552 553 static void replace_use(Func* f, Inst* in, Operand* op, int is_def, void* arg) { 554 (void)in; 555 RenameCtx* ctx = (RenameCtx*)arg; 556 if (is_def || op->kind != OPK_REG) return; 557 Val old = (Val)op->v.reg; 558 if (old == VAL_NONE || old >= f->nvals) return; 559 Val repl = resolve_repl(ctx, old); 560 if (repl != VAL_NONE && repl != old) { 561 op->v.reg = (Reg)repl; 562 op->type = f->val_type[repl]; 563 op->cls = f->val_cls[repl]; 564 } 565 } 566 567 static void insert_phis(Func* f, u32 b, const u8* needs_phi) { 568 Block* bl = &f->blocks[b]; 569 u32 nphi = 0; 570 for (u32 sid = 1; sid <= f->nframe_slots; ++sid) 571 if (needs_phi[sid * f->nblocks + b]) ++nphi; 572 if (!nphi) return; 573 574 u32 old_nvals = f->nvals; 575 Inst* insts = arena_zarray(f->arena, Inst, bl->ninsts + nphi); 576 u32 w = 0; 577 for (u32 sid = 1; sid <= f->nframe_slots; ++sid) { 578 if (!needs_phi[sid * f->nblocks + b]) continue; 579 const IRFrameSlot* slot = &f->frame_slots[sid - 1u]; 580 Inst* in = &insts[w++]; 581 in->op = IR_PHI; 582 ir_assign_inst_id(f, in); 583 in->type = slot->type; 584 in->def = ir_alloc_val(f, slot->type, ssa_type_class(f, slot->type)); 585 f->val_def_block[in->def] = b; 586 f->val_def_inst[in->def] = w - 1u; 587 IRPhiAux* aux = arena_znew(f->arena, IRPhiAux); 588 aux->slot_id = sid; 589 aux->npreds = bl->npreds; 590 if (bl->npreds) { 591 aux->pred_blocks = arena_array(f->arena, u32, bl->npreds); 592 aux->pred_vals = arena_zarray(f->arena, Val, bl->npreds); 593 memcpy(aux->pred_blocks, bl->preds, sizeof(u32) * bl->npreds); 594 } 595 in->extra.aux = aux; 596 } 597 if (bl->ninsts) memcpy(insts + nphi, bl->insts, sizeof(Inst) * bl->ninsts); 598 bl->insts = insts; 599 bl->ninsts += nphi; 600 bl->cap = bl->ninsts; 601 for (Val v = 1; v < old_nvals; ++v) 602 if (f->val_def_block[v] == b) f->val_def_inst[v] += nphi; 603 } 604 605 static void mark_def_blocks(Func* f, const u8* promoted, u8* def_blocks) { 606 for (u32 b = 0; b < f->nblocks; ++b) { 607 Block* bl = &f->blocks[b]; 608 for (u32 i = 0; i < bl->ninsts; ++i) { 609 Inst* in = &bl->insts[i]; 610 if ((IROp)in->op != IR_STORE || in->nopnds < 2) continue; 611 u32 sid = opnd_slot_id(&in->opnds[0]); 612 if (sid && promoted[sid]) def_blocks[sid * f->nblocks + b] = 1; 613 } 614 } 615 } 616 617 static void compute_phi_sites(Func* f, OptAnalysis* a, const u8* promoted, 618 u8* needs_phi) { 619 u8* def_blocks = 620 arena_zarray(f->arena, u8, (f->nframe_slots + 1u) * f->nblocks); 621 mark_def_blocks(f, promoted, def_blocks); 622 u32* work = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u); 623 u8* queued = arena_zarray(f->arena, u8, f->nblocks ? f->nblocks : 1u); 624 625 for (u32 sid = 1; sid <= f->nframe_slots; ++sid) { 626 if (!promoted[sid]) continue; 627 memset(queued, 0, f->nblocks); 628 u32 wn = 0; 629 for (u32 b = 0; b < f->nblocks; ++b) { 630 if (!def_blocks[sid * f->nblocks + b]) continue; 631 queued[b] = 1; 632 work[wn++] = b; 633 } 634 while (wn) { 635 u32 x = work[--wn]; 636 OptBlockList* df = &a->dom_frontier[x]; 637 for (u32 i = 0; i < df->n; ++i) { 638 u32 y = df->items[i]; 639 if (needs_phi[sid * f->nblocks + y]) continue; 640 needs_phi[sid * f->nblocks + y] = 1; 641 if (!queued[y]) { 642 queued[y] = 1; 643 work[wn++] = y; 644 } 645 } 646 } 647 } 648 } 649 650 static void rewrite_store_immediate(RenameCtx* ctx, Inst* in, u32 b, u32 i, 651 u32 sid) { 652 Operand src = in->opnds[1]; 653 Val v = ir_alloc_val(ctx->f, src.type, src.cls); 654 Operand* opnds = arena_array(ctx->f->arena, Operand, 1); 655 opnds[0] = src; 656 opnds[0].kind = OPK_REG; 657 opnds[0].v.reg = (Reg)v; 658 in->op = IR_LOAD_IMM; 659 in->type = src.type; 660 in->def = v; 661 in->opnds = opnds; 662 in->nopnds = 1; 663 in->extra.imm = src.v.imm; 664 ctx->f->val_def_block[v] = b; 665 ctx->f->val_def_inst[v] = i; 666 stack_push(ctx->f->arena, &ctx->stacks[sid], v); 667 } 668 669 static void rename_block(RenameCtx* ctx, u32 b) { 670 Func* f = ctx->f; 671 Block* bl = &f->blocks[b]; 672 u32* pushed = arena_zarray(f->arena, u32, f->nframe_slots + 1u); 673 674 for (u32 i = 0; i < bl->ninsts; ++i) { 675 Inst* in = &bl->insts[i]; 676 if ((IROp)in->op != IR_PHI) break; 677 IRPhiAux* aux = (IRPhiAux*)in->extra.aux; 678 if (!aux || !aux->slot_id || !ctx->promoted[aux->slot_id]) continue; 679 stack_push(f->arena, &ctx->stacks[aux->slot_id], in->def); 680 pushed[aux->slot_id]++; 681 } 682 683 for (u32 i = 0; i < bl->ninsts; ++i) { 684 Inst* in = &bl->insts[i]; 685 if ((IROp)in->op == IR_PHI) continue; 686 if ((IROp)in->op == IR_STORE && in->nopnds >= 2) { 687 u32 sid = opnd_slot_id(&in->opnds[0]); 688 if (sid && ctx->promoted[sid]) { 689 Operand src = in->opnds[1]; 690 if (src.kind == OPK_REG) { 691 stack_push(f->arena, &ctx->stacks[sid], 692 resolve_repl(ctx, (Val)src.v.reg)); 693 in->op = IR_NOP; 694 in->nopnds = 0; 695 in->opnds = NULL; 696 in->def = VAL_NONE; 697 } else { 698 rewrite_store_immediate(ctx, in, b, i, sid); 699 } 700 pushed[sid]++; 701 continue; 702 } 703 } 704 if ((IROp)in->op == IR_LOAD && in->nopnds >= 2) { 705 u32 sid = opnd_slot_id(&in->opnds[1]); 706 if (sid && ctx->promoted[sid] && in->def != VAL_NONE) { 707 Val cur = stack_top(&ctx->stacks[sid]); 708 if (cur == VAL_NONE) continue; 709 if (in->def < ctx->repl_cap) 710 ctx->repl[in->def] = resolve_repl(ctx, cur); 711 in->op = IR_NOP; 712 in->nopnds = 0; 713 in->opnds = NULL; 714 in->def = VAL_NONE; 715 continue; 716 } 717 } 718 opt_walk_inst_operands(f, in, replace_use, ctx); 719 } 720 721 for (u32 s = 0; s < bl->nsucc; ++s) { 722 u32 succ = bl->succ[s]; 723 if (succ >= f->nblocks) continue; 724 Block* sb = &f->blocks[succ]; 725 u32 pred_idx = OPT_USE_NONE; 726 for (u32 p = 0; p < sb->npreds; ++p) { 727 if (sb->preds[p] == b) { 728 pred_idx = p; 729 break; 730 } 731 } 732 if (pred_idx == OPT_USE_NONE) continue; 733 for (u32 i = 0; i < sb->ninsts; ++i) { 734 Inst* phi = &sb->insts[i]; 735 if ((IROp)phi->op != IR_PHI) break; 736 IRPhiAux* aux = (IRPhiAux*)phi->extra.aux; 737 if (!aux || !aux->slot_id || !ctx->promoted[aux->slot_id]) continue; 738 aux->pred_vals[pred_idx] = 739 resolve_repl(ctx, stack_top(&ctx->stacks[aux->slot_id])); 740 } 741 } 742 743 OptBlockList* children = &ctx->analysis->dom_children[b]; 744 for (u32 i = 0; i < children->n; ++i) rename_block(ctx, children->items[i]); 745 746 for (u32 sid = 1; sid <= f->nframe_slots; ++sid) { 747 while (pushed[sid]--) { 748 if (ctx->stacks[sid].n) --ctx->stacks[sid].n; 749 } 750 } 751 } 752 753 static void replace_phi_inputs(Func* f, RenameCtx* ctx) { 754 for (u32 b = 0; b < f->nblocks; ++b) { 755 Block* bl = &f->blocks[b]; 756 for (u32 i = 0; i < bl->ninsts; ++i) { 757 Inst* in = &bl->insts[i]; 758 if ((IROp)in->op != IR_PHI) break; 759 IRPhiAux* aux = (IRPhiAux*)in->extra.aux; 760 if (!aux) continue; 761 for (u32 p = 0; p < aux->npreds; ++p) 762 aux->pred_vals[p] = resolve_repl(ctx, aux->pred_vals[p]); 763 } 764 } 765 } 766 767 void opt_build_ssa(Func* f) { 768 if (f) opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE); 769 if (!f || f->nblocks == 0 || f->nframe_slots == 0) { 770 if (f) opt_rebuild_def_use(f); 771 return; 772 } 773 774 OptAnalysis a; 775 opt_analysis_build_order(f, &a); 776 opt_analysis_build_dominators(f, &a); 777 opt_analysis_build_dom_frontier(f, &a); 778 779 u8* promoted = find_promoted_slots(f); 780 u8* needs_phi = 781 arena_zarray(f->arena, u8, (f->nframe_slots + 1u) * f->nblocks); 782 compute_phi_sites(f, &a, promoted, needs_phi); 783 for (u32 b = 0; b < f->nblocks; ++b) insert_phis(f, b, needs_phi); 784 785 RenameCtx ctx; 786 memset(&ctx, 0, sizeof ctx); 787 ctx.f = f; 788 ctx.analysis = &a; 789 ctx.promoted = promoted; 790 ctx.repl_cap = f->vals_cap ? f->vals_cap : f->nvals; 791 ctx.repl = arena_zarray(f->arena, Val, ctx.repl_cap ? ctx.repl_cap : 1u); 792 ctx.stacks = arena_zarray(f->arena, SlotStack, f->nframe_slots + 1u); 793 rename_block(&ctx, f->entry); 794 795 for (u32 b = 0; b < f->nblocks; ++b) { 796 Block* bl = &f->blocks[b]; 797 for (u32 i = 0; i < bl->ninsts; ++i) 798 opt_walk_inst_operands(f, &bl->insts[i], replace_use, &ctx); 799 } 800 replace_phi_inputs(f, &ctx); 801 opt_rebuild_def_use(f); 802 } 803 804 static int ssa_is_terminator(const Inst* in) { 805 switch ((IROp)in->op) { 806 case IR_BR: 807 case IR_CONDBR: 808 case IR_CMP_BRANCH: 809 case IR_SWITCH: 810 case IR_INDIRECT_BRANCH: 811 case IR_RET: 812 case IR_UNREACHABLE: 813 case IR_BREAK_TO: 814 case IR_CONTINUE_TO: 815 return 1; 816 case IR_INTRINSIC: { 817 IRIntrinAux* aux = (IRIntrinAux*)in->extra.aux; 818 return aux && (aux->kind == INTRIN_LONGJMP || aux->kind == INTRIN_TRAP); 819 } 820 default: 821 return 0; 822 } 823 } 824 825 static Inst make_copy_inst(Func* f, Val dst, Val src, KitCgTypeId ty, u8 cls) { 826 Inst in; 827 memset(&in, 0, sizeof in); 828 in.op = IR_COPY; 829 in.flags = IRF_NO_COALESCE; 830 ir_assign_inst_id(f, &in); 831 in.type = ty; 832 in.def = dst; 833 in.nopnds = 2; 834 in.opnds = arena_array(f->arena, Operand, 2); 835 in.opnds[0].kind = OPK_REG; 836 in.opnds[0].type = ty; 837 in.opnds[0].cls = cls; 838 in.opnds[0].v.reg = (Reg)dst; 839 in.opnds[1].kind = OPK_REG; 840 in.opnds[1].type = ty; 841 in.opnds[1].cls = cls; 842 in.opnds[1].v.reg = (Reg)src; 843 return in; 844 } 845 846 static void insert_edge_moves(Func* f, u32 b, const EdgeMove* moves, u32 n) { 847 if (!n) return; 848 Block* bl = &f->blocks[b]; 849 u32 term = bl->ninsts && ssa_is_terminator(&bl->insts[bl->ninsts - 1u]); 850 u32 insert_at = bl->ninsts - term; 851 u32 extra = n == 1 ? 1u : n * 2u; 852 Inst* insts = arena_zarray(f->arena, Inst, bl->ninsts + extra); 853 if (insert_at) memcpy(insts, bl->insts, sizeof(Inst) * insert_at); 854 u32 w = insert_at; 855 if (n == 1) { 856 insts[w++] = make_copy_inst(f, moves[0].dst, moves[0].src, moves[0].type, 857 moves[0].cls); 858 } else { 859 Val* temps = arena_array(f->arena, Val, n); 860 for (u32 i = 0; i < n; ++i) { 861 temps[i] = ir_alloc_val(f, moves[i].type, moves[i].cls); 862 f->val_def_block[temps[i]] = b; 863 f->val_def_inst[temps[i]] = w; 864 insts[w++] = make_copy_inst(f, temps[i], moves[i].src, moves[i].type, 865 moves[i].cls); 866 } 867 for (u32 i = 0; i < n; ++i) { 868 insts[w] = make_copy_inst(f, moves[i].dst, temps[i], moves[i].type, 869 moves[i].cls); 870 f->val_def_block[moves[i].dst] = b; 871 f->val_def_inst[moves[i].dst] = w; 872 ++w; 873 } 874 } 875 if (term) insts[w++] = bl->insts[bl->ninsts - 1u]; 876 bl->insts = insts; 877 bl->ninsts = w; 878 bl->cap = w; 879 if (n == 1) { 880 f->val_def_block[moves[0].dst] = b; 881 f->val_def_inst[moves[0].dst] = insert_at; 882 } 883 } 884 885 static void realign_phi_preds(Func* f) { 886 for (u32 b = 0; b < f->nblocks; ++b) { 887 Block* bl = &f->blocks[b]; 888 for (u32 i = 0; i < bl->ninsts; ++i) { 889 Inst* phi = &bl->insts[i]; 890 if ((IROp)phi->op != IR_PHI) break; 891 IRPhiAux* aux = (IRPhiAux*)phi->extra.aux; 892 if (!aux || aux->npreds == bl->npreds) { 893 if (!aux) continue; 894 } 895 u32 old_n = aux->npreds; 896 u32* old_blocks = aux->pred_blocks; 897 Val* old_vals = aux->pred_vals; 898 u32* pred_blocks = 899 bl->npreds ? arena_array(f->arena, u32, bl->npreds) : NULL; 900 Val* pred_vals = 901 bl->npreds ? arena_zarray(f->arena, Val, bl->npreds) : NULL; 902 for (u32 p = 0; p < bl->npreds; ++p) { 903 pred_blocks[p] = bl->preds[p]; 904 pred_vals[p] = phi->def; 905 for (u32 old = 0; old < old_n; ++old) { 906 if (old_blocks && old_blocks[old] == bl->preds[p]) { 907 pred_vals[p] = old_vals ? old_vals[old] : VAL_NONE; 908 break; 909 } 910 } 911 } 912 aux->npreds = bl->npreds; 913 aux->pred_blocks = pred_blocks; 914 aux->pred_vals = pred_vals; 915 } 916 } 917 } 918 919 void opt_make_conventional_ssa(Func* f) { 920 if (!f) return; 921 opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE | 922 OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP); 923 for (u32 b = 0; b < f->nblocks; ++b) { 924 Block* bl = &f->blocks[b]; 925 if (!bl->ninsts || (IROp)bl->insts[0].op != IR_PHI) continue; 926 for (u32 p = 0; p < bl->npreds; ++p) { 927 u32 pred = bl->preds[p]; 928 EdgeMove* moves = arena_array(f->arena, EdgeMove, bl->ninsts); 929 u32 n = 0; 930 for (u32 i = 0; i < bl->ninsts; ++i) { 931 Inst* phi = &bl->insts[i]; 932 if ((IROp)phi->op != IR_PHI) break; 933 IRPhiAux* aux = (IRPhiAux*)phi->extra.aux; 934 if (!aux || p >= aux->npreds) continue; 935 Val src = aux->pred_vals[p]; 936 if (src == VAL_NONE || src == phi->def) continue; 937 moves[n].dst = phi->def; 938 moves[n].src = src; 939 moves[n].type = phi->type; 940 moves[n].cls = f->val_cls[phi->def]; 941 ++n; 942 } 943 if (!n) continue; 944 u32 target = pred; 945 if (pred >= f->nblocks) continue; 946 if (f->blocks[pred].nsucc != 1) target = opt_split_edge(f, pred, b); 947 insert_edge_moves(f, target, moves, n); 948 } 949 } 950 opt_build_cfg(f); 951 realign_phi_preds(f); 952 opt_rebuild_def_use(f); 953 } 954 955 void opt_undo_ssa(Func* f) { 956 if (!f) return; 957 opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE); 958 for (u32 b = 0; b < f->nblocks; ++b) { 959 Block* bl = &f->blocks[b]; 960 u32 w = 0; 961 for (u32 i = 0; i < bl->ninsts; ++i) { 962 if ((IROp)bl->insts[i].op == IR_PHI) continue; 963 bl->insts[w++] = bl->insts[i]; 964 } 965 bl->ninsts = w; 966 } 967 opt_rebuild_def_use(f); 968 } 969 970 static void ssa_dump_write(Writer* w, const char* s) { 971 kit_writer_write(w, s, slice_from_cstr(s).len); 972 } 973 974 static void ssa_dump_sb(Writer* w, const StrBuf* sb) { 975 kit_writer_write(w, strbuf_cstr(sb), strbuf_len(sb)); 976 } 977 978 typedef struct SsaDumpUseCtx { 979 Writer* w; 980 int any; 981 } SsaDumpUseCtx; 982 983 static void ssa_dump_use(Func* f, Inst* in, Operand* op, int is_def, 984 void* arg) { 985 (void)f; 986 (void)in; 987 if (is_def || op->kind != OPK_REG) return; 988 SsaDumpUseCtx* ctx = (SsaDumpUseCtx*)arg; 989 char buf[32]; 990 StrBuf sb; 991 strbuf_init(&sb, buf, sizeof buf); 992 if (ctx->any) strbuf_putc(&sb, ','); 993 strbuf_putc(&sb, 'v'); 994 strbuf_put_u64(&sb, (u64)(unsigned)op->v.reg); 995 ssa_dump_sb(ctx->w, &sb); 996 ctx->any = 1; 997 } 998 999 void opt_ssa_dump(Func* f, Writer* w) { 1000 if (!f || !w) return; 1001 opt_rebuild_def_use(f); 1002 char buf[160]; 1003 StrBuf sb; 1004 strbuf_init(&sb, buf, sizeof buf); 1005 strbuf_puts(&sb, "ssa blocks="); 1006 strbuf_put_u64(&sb, (u64)(unsigned)f->nblocks); 1007 strbuf_puts(&sb, " vals="); 1008 strbuf_put_u64(&sb, (u64)(unsigned)f->nvals); 1009 strbuf_puts(&sb, " uses="); 1010 strbuf_put_u64(&sb, (u64)(unsigned)f->opt_nuses); 1011 strbuf_putc(&sb, '\n'); 1012 ssa_dump_sb(w, &sb); 1013 for (u32 b = 0; b < f->nblocks; ++b) { 1014 Block* bl = &f->blocks[b]; 1015 strbuf_reset(&sb); 1016 strbuf_puts(&sb, "block "); 1017 strbuf_put_u64(&sb, (u64)(unsigned)b); 1018 strbuf_puts(&sb, " preds="); 1019 strbuf_put_u64(&sb, (u64)(unsigned)bl->npreds); 1020 strbuf_puts(&sb, " succs="); 1021 strbuf_put_u64(&sb, (u64)(unsigned)bl->nsucc); 1022 strbuf_putc(&sb, '\n'); 1023 ssa_dump_sb(w, &sb); 1024 for (u32 i = 0; i < bl->ninsts; ++i) { 1025 Inst* in = &bl->insts[i]; 1026 strbuf_reset(&sb); 1027 strbuf_puts(&sb, " i"); 1028 strbuf_put_u64(&sb, (u64)(unsigned)in->id); 1029 strbuf_puts(&sb, " op="); 1030 strbuf_put_u64(&sb, (u64)(unsigned)in->op); 1031 ssa_dump_sb(w, &sb); 1032 if (in->def != VAL_NONE) { 1033 strbuf_reset(&sb); 1034 strbuf_puts(&sb, " def=v"); 1035 strbuf_put_u64(&sb, (u64)(unsigned)in->def); 1036 ssa_dump_sb(w, &sb); 1037 } 1038 if ((IROp)in->op == IR_PHI) { 1039 IRPhiAux* aux = (IRPhiAux*)in->extra.aux; 1040 strbuf_reset(&sb); 1041 strbuf_puts(&sb, " phi slot="); 1042 strbuf_put_u64(&sb, aux ? (u64)(unsigned)aux->slot_id : 0u); 1043 strbuf_puts(&sb, " preds="); 1044 ssa_dump_sb(w, &sb); 1045 if (aux) { 1046 for (u32 p = 0; p < aux->npreds; ++p) { 1047 strbuf_reset(&sb); 1048 if (p) strbuf_putc(&sb, ','); 1049 strbuf_putc(&sb, 'b'); 1050 strbuf_put_u64(&sb, (u64)(unsigned)aux->pred_blocks[p]); 1051 strbuf_puts(&sb, ":v"); 1052 strbuf_put_u64(&sb, (u64)(unsigned)aux->pred_vals[p]); 1053 ssa_dump_sb(w, &sb); 1054 } 1055 } 1056 } else { 1057 SsaDumpUseCtx ctx; 1058 ctx.w = w; 1059 ctx.any = 0; 1060 ssa_dump_write(w, " uses="); 1061 opt_walk_inst_operands(f, in, ssa_dump_use, &ctx); 1062 if (!ctx.any) ssa_dump_write(w, "-"); 1063 } 1064 ssa_dump_write(w, "\n"); 1065 } 1066 } 1067 }