pass_inline.c (25369B)
1 #include <string.h> 2 3 #include "core/arena.h" 4 #include "core/core.h" 5 #include "core/metrics.h" 6 #include "opt/opt_internal.h" 7 8 typedef struct InlineMap { 9 Func* caller; 10 Func* callee; 11 PReg* preg; 12 u32 npreg; 13 FrameSlot* slot; 14 u32 nslot; 15 u32* block; 16 u32 nblock; 17 } InlineMap; 18 19 typedef struct InstVec { 20 Func* f; 21 Inst* v; 22 u32 n; 23 u32 cap; 24 } InstVec; 25 26 typedef struct InlineOrderCtx { 27 FuncSet* fs; 28 u8* temp; 29 u8* done; 30 Func** order; 31 u32 norder; 32 } InlineOrderCtx; 33 34 #define INLINE_NORMAL_COST_LIMIT 20u 35 #define INLINE_HINT_COST_LIMIT 40u 36 #define INLINE_ABS_GROWTH_LIMIT 64u 37 #define INLINE_HINT_ABS_GROWTH_LIMIT 128u 38 39 /* Streaming O1 tiny-inline: a much smaller budget than the whole-program 40 * inliner. DEFAULT/HINT callees must fit under this cost; ALWAYS bypasses it. 41 * Bounded by max passes since an inlined straightline body never introduces a 42 * new call site. */ 43 #define INLINE_TINY_COST_LIMIT 8u 44 #define TINY_INLINE_MAX_PASSES 4u 45 46 static u32 func_inline_cost(Func* f) { 47 u32 cost = 0; 48 if (!f) return 0; 49 for (u32 b = 0; b < f->nblocks; ++b) { 50 Block* bl = &f->blocks[b]; 51 for (u32 i = 0; i < bl->ninsts; ++i) { 52 switch ((IROp)bl->insts[i].op) { 53 case IR_NOP: 54 case IR_PARAM_DECL: 55 case IR_BR: 56 case IR_RET: 57 break; 58 default: 59 ++cost; 60 break; 61 } 62 } 63 } 64 return cost; 65 } 66 67 static void instvec_push(InstVec* iv, const Inst* in) { 68 if (iv->n == iv->cap) { 69 u32 ncap = iv->cap ? iv->cap * 2u : 16u; 70 Inst* nv = arena_array(iv->f->arena, Inst, ncap); 71 if (iv->v) memcpy(nv, iv->v, sizeof(Inst) * iv->n); 72 iv->v = nv; 73 iv->cap = ncap; 74 } 75 iv->v[iv->n++] = *in; 76 } 77 78 static Func* funcset_find(FuncSet* fs, ObjSymId sym) { 79 if (!fs || sym == OBJ_SYM_NONE) return NULL; 80 for (u32 i = 0; i < fs->nfuncs; ++i) 81 if (fs->funcs[i] && fs->funcs[i]->name == sym) return fs->funcs[i]; 82 return NULL; 83 } 84 85 static int funcset_index(FuncSet* fs, Func* f) { 86 if (!fs || !f) return -1; 87 for (u32 i = 0; i < fs->nfuncs; ++i) 88 if (fs->funcs[i] == f) return (int)i; 89 return -1; 90 } 91 92 static Func* direct_callee(FuncSet* fs, const Inst* in) { 93 if (!in || (IROp)in->op != IR_CALL) return NULL; 94 IRCallAux* aux = (IRCallAux*)in->extra.aux; 95 if (!aux || aux->use_plan_replay) return NULL; 96 if (aux->desc.flags & CG_CALL_TAIL) return NULL; 97 if (aux->desc.callee.kind != OPK_GLOBAL) return NULL; 98 if (aux->desc.callee.v.global.addend != 0) return NULL; 99 return funcset_find(fs, aux->desc.callee.v.global.sym); 100 } 101 102 static KitCgInlinePolicy call_inline_policy(const Inst* in) { 103 if (!in || (IROp)in->op != IR_CALL) return KIT_CG_INLINE_DEFAULT; 104 IRCallAux* aux = (IRCallAux*)in->extra.aux; 105 return aux ? aux->desc.inline_policy : KIT_CG_INLINE_DEFAULT; 106 } 107 108 static KitCgInlinePolicy effective_inline_policy(const Inst* in, 109 const Func* callee) { 110 KitCgInlinePolicy call_policy = call_inline_policy(in); 111 KitCgInlinePolicy callee_policy = 112 callee ? callee->desc.inline_policy : KIT_CG_INLINE_DEFAULT; 113 if (call_policy == KIT_CG_INLINE_NEVER || 114 callee_policy == KIT_CG_INLINE_NEVER) 115 return KIT_CG_INLINE_NEVER; 116 if (call_policy == KIT_CG_INLINE_ALWAYS || 117 callee_policy == KIT_CG_INLINE_ALWAYS) 118 return KIT_CG_INLINE_ALWAYS; 119 if (call_policy == KIT_CG_INLINE_HINT || callee_policy == KIT_CG_INLINE_HINT) 120 return KIT_CG_INLINE_HINT; 121 return KIT_CG_INLINE_DEFAULT; 122 } 123 124 static int func_reaches(FuncSet* fs, Func* from, Func* target, u8* seen) { 125 int idx = funcset_index(fs, from); 126 if (idx < 0) return 0; 127 if (seen[idx]) return 0; 128 seen[idx] = 1; 129 for (u32 b = 0; b < from->nblocks; ++b) { 130 Block* bl = &from->blocks[b]; 131 for (u32 i = 0; i < bl->ninsts; ++i) { 132 Func* callee = direct_callee(fs, &bl->insts[i]); 133 if (!callee) continue; 134 if (callee == target) return 1; 135 if (func_reaches(fs, callee, target, seen)) return 1; 136 } 137 } 138 return 0; 139 } 140 141 static int recursive_or_scc(FuncSet* fs, Func* caller, Func* callee) { 142 if (caller == callee) return 1; 143 u8* seen = arena_zarray(fs->arena, u8, fs->nfuncs ? fs->nfuncs : 1u); 144 return func_reaches(fs, callee, caller, seen); 145 } 146 147 static int op_supported_in_straightline_inline(IROp op) { 148 switch (op) { 149 case IR_NOP: 150 case IR_PARAM_DECL: 151 case IR_LOAD_IMM: 152 case IR_LOAD_CONST: 153 case IR_COPY: 154 case IR_LOAD: 155 case IR_STORE: 156 case IR_BINOP: 157 case IR_UNOP: 158 case IR_CMP: 159 case IR_CONVERT: 160 case IR_BR: 161 case IR_CONDBR: 162 case IR_CMP_BRANCH: 163 case IR_RET: 164 return 1; 165 default: 166 return 0; 167 } 168 } 169 170 static int callee_inline_shape(Func* callee, KitCgInlinePolicy policy, 171 u32* cost_out) { 172 if (!callee || callee->opt_reg_ssa || callee->opt_rewritten) return 0; 173 if (kit_cg_type_func_is_variadic((KitCompiler*)callee->c, callee->type)) { 174 metrics_count(callee->c, "opt.inline.refuse_shape_variadic", 1); 175 return 0; 176 } 177 if (callee->entry >= callee->nblocks) { 178 metrics_count(callee->c, "opt.inline.refuse_shape_entry", 1); 179 return 0; 180 } 181 182 u32 nret = 0; 183 u32 cost = 0; 184 for (u32 b = 0; b < callee->nblocks; ++b) { 185 Block* bl = &callee->blocks[b]; 186 for (u32 i = 0; i < bl->ninsts; ++i) { 187 Inst* in = &bl->insts[i]; 188 IROp op = (IROp)in->op; 189 if (!op_supported_in_straightline_inline(op)) { 190 metrics_count(callee->c, "opt.inline.refuse_shape_op", 1); 191 return 0; 192 } 193 if (op == IR_RET) { 194 ++nret; 195 if (i + 1u != bl->ninsts) { 196 metrics_count(callee->c, "opt.inline.refuse_shape_ret_pos", 1); 197 return 0; 198 } 199 continue; 200 } 201 if (op != IR_NOP && op != IR_PARAM_DECL && op != IR_BR) ++cost; 202 } 203 } 204 if (nret == 0) { 205 metrics_count(callee->c, "opt.inline.refuse_shape_ret_count", 1); 206 return 0; 207 } 208 if (nret > 1) cost += (nret - 1u) * 4u; 209 u32 cost_limit = policy == KIT_CG_INLINE_ALWAYS ? 0xffffffffu 210 : policy == KIT_CG_INLINE_HINT ? INLINE_HINT_COST_LIMIT 211 : INLINE_NORMAL_COST_LIMIT; 212 if (cost > cost_limit) { 213 metrics_count(callee->c, "opt.inline.refuse_shape_budget", 1); 214 return 0; 215 } 216 if (cost_out) *cost_out = cost; 217 return 1; 218 } 219 220 static PReg map_preg(InlineMap* m, PReg r) { 221 if (r == PREG_NONE || r == 0 || r >= m->npreg) return r; 222 return m->preg[r] ? m->preg[r] : r; 223 } 224 225 static FrameSlot map_slot(InlineMap* m, FrameSlot s) { 226 if (s == FRAME_SLOT_NONE || s >= m->nslot) return s; 227 return m->slot[s] ? m->slot[s] : s; 228 } 229 230 static u32 map_block(InlineMap* m, u32 b) { 231 if (b >= m->nblock) return b; 232 return m->block[b] != 0xffffffffu ? m->block[b] : b; 233 } 234 235 static void map_mem(InlineMap* m, MemAccess* mem) { 236 if (!mem) return; 237 if (mem->alias.kind == ALIAS_LOCAL && mem->alias.v.local_id > 0) { 238 FrameSlot old = (FrameSlot)mem->alias.v.local_id; 239 mem->alias.v.local_id = (i32)map_slot(m, old); 240 } 241 } 242 243 static Operand map_operand(InlineMap* m, Operand op) { 244 switch ((OptOperandKind)op.kind) { 245 case OPK_REG: 246 op.v.reg = map_preg(m, (PReg)op.v.reg); 247 break; 248 case OPK_LOCAL: 249 op.v.frame_slot = map_slot(m, op.v.frame_slot); 250 break; 251 case OPK_INDIRECT: 252 op.v.ind.base = map_preg(m, (PReg)op.v.ind.base); 253 if (op.v.ind.index != (Reg)REG_NONE) 254 op.v.ind.index = map_preg(m, (PReg)op.v.ind.index); 255 break; 256 default: 257 break; 258 } 259 return op; 260 } 261 262 static int build_inline_map(InlineMap* m, Func* caller, Func* callee) { 263 memset(m, 0, sizeof *m); 264 m->caller = caller; 265 m->callee = callee; 266 m->npreg = callee->npregs; 267 m->nslot = callee->nframe_slots + 1u; 268 m->nblock = callee->nblocks; 269 m->preg = arena_zarray(caller->arena, PReg, m->npreg ? m->npreg : 1u); 270 m->slot = arena_zarray(caller->arena, FrameSlot, m->nslot ? m->nslot : 1u); 271 m->block = arena_array(caller->arena, u32, m->nblock ? m->nblock : 1u); 272 for (u32 b = 0; b < m->nblock; ++b) m->block[b] = 0xffffffffu; 273 274 for (FrameSlot s = 1; s <= callee->nframe_slots; ++s) { 275 IRFrameSlot* old = &callee->frame_slots[s - 1u]; 276 FrameSlotDesc d; 277 memset(&d, 0, sizeof d); 278 d.type = old->type; 279 d.name = old->name; 280 d.loc = old->loc; 281 d.size = old->size; 282 d.align = old->align; 283 d.kind = old->kind == FS_PARAM ? FS_LOCAL : old->kind; 284 d.flags = old->flags; 285 m->slot[s] = ir_frame_slot_new(caller, &d); 286 } 287 288 for (PReg r = 1; r < callee->npregs; ++r) { 289 m->preg[r] = 290 ir_alloc_preg(caller, callee->preg_type[r], callee->preg_cls[r]); 291 } 292 for (u32 b = 0; b < callee->nblocks; ++b) m->block[b] = ir_block_new(caller); 293 return 1; 294 } 295 296 static Inst make_inst(Func* f, IROp op, SrcLoc loc) { 297 Inst in; 298 memset(&in, 0, sizeof in); 299 in.op = (u16)op; 300 in.id = ir_inst_id_new(f); 301 in.loc = loc; 302 return in; 303 } 304 305 static Operand local_operand(FrameSlot s, KitCgTypeId ty) { 306 Operand op; 307 memset(&op, 0, sizeof op); 308 op.kind = OPK_LOCAL; 309 op.cls = RC_INT; 310 op.type = ty; 311 op.v.frame_slot = s; 312 return op; 313 } 314 315 static MemAccess param_mem(const IRParam* p, FrameSlot s) { 316 MemAccess m; 317 memset(&m, 0, sizeof m); 318 m.type = p->type; 319 m.size = p->size; 320 m.align = p->align; 321 m.alias.kind = ALIAS_LOCAL; 322 m.alias.v.local_id = (i32)s; 323 return m; 324 } 325 326 static int append_param_materialization(InstVec* out, InlineMap* m, 327 const Inst* call) { 328 IRCallAux* aux = (IRCallAux*)call->extra.aux; 329 Func* caller = m->caller; 330 Func* callee = m->callee; 331 if (!aux || aux->desc.nargs != callee->nparams) return 0; 332 333 for (u32 i = 0; i < callee->nparams; ++i) { 334 IRParam* p = &callee->params[i]; 335 const CGABIValue* av = &aux->desc.args[i]; 336 if (av->nparts != 0) return 0; 337 Operand src = av->storage; 338 if (src.kind != OPK_REG && src.kind != OPK_IMM) return 0; 339 if (p->storage.kind == CG_LOCAL_STORAGE_REG) { 340 PReg dst_r = map_preg(m, (PReg)p->storage.v.reg); 341 Operand dst; 342 memset(&dst, 0, sizeof dst); 343 dst.kind = OPK_REG; 344 dst.cls = opt_reg_cls(caller, dst_r); 345 dst.type = p->type; 346 dst.v.reg = dst_r; 347 Inst in = make_inst(caller, src.kind == OPK_IMM ? IR_LOAD_IMM : IR_COPY, 348 call->loc); 349 in.type = p->type; 350 in.def = (Val)dst_r; 351 in.opnds = 352 arena_array(caller->arena, Operand, src.kind == OPK_IMM ? 1u : 2u); 353 in.opnds[0] = dst; 354 in.nopnds = src.kind == OPK_IMM ? 1u : 2u; 355 if (src.kind == OPK_IMM) { 356 in.extra.imm = src.v.imm; 357 } else { 358 in.opnds[1] = src; 359 } 360 instvec_push(out, &in); 361 } else { 362 FrameSlot dst_s = map_slot(m, p->storage.v.frame_slot); 363 Inst in = make_inst(caller, IR_STORE, call->loc); 364 in.opnds = arena_array(caller->arena, Operand, 2); 365 in.opnds[0] = local_operand(dst_s, p->type); 366 in.opnds[1] = src; 367 in.nopnds = 2; 368 in.extra.mem = param_mem(p, dst_s); 369 instvec_push(out, &in); 370 } 371 } 372 return 1; 373 } 374 375 static Inst clone_inst(InlineMap* m, const Inst* src) { 376 Func* caller = m->caller; 377 Inst dst = *src; 378 dst.id = ir_inst_id_new(caller); 379 dst.def = src->def != VAL_NONE ? (Val)map_preg(m, (PReg)src->def) : VAL_NONE; 380 if (src->ndefs) { 381 dst.defs = arena_array(caller->arena, Val, src->ndefs); 382 for (u32 i = 0; i < src->ndefs; ++i) { 383 dst.defs[i] = src->defs[i] != VAL_NONE 384 ? (Val)map_preg(m, (PReg)src->defs[i]) 385 : VAL_NONE; 386 } 387 } 388 if (src->nopnds) { 389 dst.opnds = arena_array(caller->arena, Operand, src->nopnds); 390 for (u32 i = 0; i < src->nopnds; ++i) 391 dst.opnds[i] = map_operand(m, src->opnds[i]); 392 } 393 if ((IROp)dst.op == IR_LOAD || (IROp)dst.op == IR_STORE) { 394 map_mem(m, &dst.extra.mem); 395 } 396 return dst; 397 } 398 399 static void clone_block_succs(InlineMap* m, Block* dst, const Block* src) { 400 ir_block_set_nsucc(m->caller, dst->id, src->nsucc); 401 for (u32 s = 0; s < src->nsucc; ++s) 402 dst->succ[s] = map_block(m, src->succ[s]); 403 } 404 405 static int append_return_materialization(InstVec* out, InlineMap* m, 406 const Inst* call, const Inst* ret) { 407 IRCallAux* call_aux = (IRCallAux*)call->extra.aux; 408 IRRetAux* ret_aux = (IRRetAux*)ret->extra.aux; 409 Func* caller = m->caller; 410 if (!call_aux) return 0; 411 if (!ret_aux || !ret_aux->present) return 1; 412 if (ret_aux->val.nparts != 0 || call_aux->desc.ret.nparts != 0) return 0; 413 Operand dst = call_aux->desc.ret.storage; 414 Operand src = ret_aux->val.storage; 415 if (dst.kind == OPK_IMM) return 1; 416 if (dst.kind != OPK_REG) return 0; 417 src = map_operand(m, src); 418 if (src.kind != OPK_REG && src.kind != OPK_IMM) return 0; 419 420 Inst in = 421 make_inst(caller, src.kind == OPK_IMM ? IR_LOAD_IMM : IR_COPY, call->loc); 422 in.type = dst.type; 423 in.def = (Val)dst.v.reg; 424 in.opnds = arena_array(caller->arena, Operand, src.kind == OPK_IMM ? 1u : 2u); 425 in.opnds[0] = dst; 426 in.nopnds = src.kind == OPK_IMM ? 1u : 2u; 427 if (src.kind == OPK_IMM) { 428 in.extra.imm = src.v.imm; 429 } else { 430 in.opnds[1] = src; 431 } 432 instvec_push(out, &in); 433 return 1; 434 } 435 436 static Inst make_branch(Func* f, SrcLoc loc) { 437 return make_inst(f, IR_BR, loc); 438 } 439 440 static void emit_order_push_unique(u32* out, u32* nout, u32 b) { 441 for (u32 i = 0; i < *nout; ++i) 442 if (out[i] == b) return; 443 out[(*nout)++] = b; 444 } 445 446 static void inline_rebuild_emit_order(Func* caller, u32 anchor, InlineMap* m, 447 u32 cont) { 448 u32 cap = 449 caller->emit_order_n + m->callee->emit_order_n + m->callee->nblocks + 2u; 450 u32* order = arena_array(caller->arena, u32, cap ? cap : 1u); 451 u32 norder = 0; 452 int inserted = 0; 453 for (u32 i = 0; i < caller->emit_order_n; ++i) { 454 u32 b = caller->emit_order[i]; 455 emit_order_push_unique(order, &norder, b); 456 if (b != anchor) continue; 457 for (u32 j = 0; j < m->callee->emit_order_n; ++j) 458 emit_order_push_unique(order, &norder, 459 map_block(m, m->callee->emit_order[j])); 460 for (u32 cb = 0; cb < m->callee->nblocks; ++cb) 461 emit_order_push_unique(order, &norder, map_block(m, cb)); 462 emit_order_push_unique(order, &norder, cont); 463 inserted = 1; 464 } 465 if (!inserted) { 466 emit_order_push_unique(order, &norder, anchor); 467 for (u32 j = 0; j < m->callee->emit_order_n; ++j) 468 emit_order_push_unique(order, &norder, 469 map_block(m, m->callee->emit_order[j])); 470 for (u32 cb = 0; cb < m->callee->nblocks; ++cb) 471 emit_order_push_unique(order, &norder, map_block(m, cb)); 472 emit_order_push_unique(order, &norder, cont); 473 } 474 caller->emit_order = order; 475 caller->emit_order_n = norder; 476 caller->emit_order_cap = cap; 477 } 478 479 static int inline_rewrite_supported(Func* callee, const Inst* call) { 480 IRCallAux* call_aux = (IRCallAux*)call->extra.aux; 481 if (!call_aux || call_aux->desc.nargs != callee->nparams) return 0; 482 for (u32 i = 0; i < callee->nparams; ++i) { 483 const CGABIValue* av = &call_aux->desc.args[i]; 484 if (av->nparts != 0) return 0; 485 if (av->storage.kind != OPK_REG && av->storage.kind != OPK_IMM) return 0; 486 } 487 488 for (u32 b = 0; b < callee->nblocks; ++b) { 489 Block* bl = &callee->blocks[b]; 490 for (u32 i = 0; i < bl->ninsts; ++i) { 491 Inst* in = &bl->insts[i]; 492 if ((IROp)in->op != IR_RET) continue; 493 IRRetAux* ret_aux = (IRRetAux*)in->extra.aux; 494 if (!ret_aux || !ret_aux->present) continue; 495 if (ret_aux->val.nparts != 0 || call_aux->desc.ret.nparts != 0) return 0; 496 if (call_aux->desc.ret.storage.kind == OPK_IMM) continue; 497 if (call_aux->desc.ret.storage.kind != OPK_REG) return 0; 498 if (ret_aux->val.storage.kind != OPK_REG && 499 ret_aux->val.storage.kind != OPK_IMM) 500 return 0; 501 } 502 } 503 return 1; 504 } 505 506 static int inline_call_site(Func* caller, u32 block_idx, u32 inst_idx, 507 Func* callee) { 508 Block* old_bl = &caller->blocks[block_idx]; 509 Inst* old_insts = old_bl->insts; 510 u32 old_ninsts = old_bl->ninsts; 511 u32 old_nsucc = old_bl->nsucc; 512 u32* old_succ = arena_array(caller->arena, u32, old_nsucc ? old_nsucc : 1u); 513 for (u32 s = 0; s < old_nsucc; ++s) old_succ[s] = old_bl->succ[s]; 514 Inst call = old_insts[inst_idx]; 515 InlineMap map; 516 if (!build_inline_map(&map, caller, callee)) return 0; 517 u32 cont = ir_block_new(caller); 518 519 InstVec pre; 520 memset(&pre, 0, sizeof pre); 521 pre.f = caller; 522 for (u32 i = 0; i < inst_idx; ++i) instvec_push(&pre, &old_insts[i]); 523 if (!append_param_materialization(&pre, &map, &call)) return 0; 524 Inst br = make_branch(caller, call.loc); 525 instvec_push(&pre, &br); 526 527 Block* pre_bl = &caller->blocks[block_idx]; 528 pre_bl->insts = pre.v; 529 pre_bl->ninsts = pre.n; 530 pre_bl->cap = pre.cap; 531 ir_block_set_nsucc(caller, block_idx, 1); 532 pre_bl = &caller->blocks[block_idx]; 533 pre_bl->succ[0] = map_block(&map, callee->entry); 534 535 Block* cont_bl = &caller->blocks[cont]; 536 InstVec cont_out; 537 memset(&cont_out, 0, sizeof cont_out); 538 cont_out.f = caller; 539 for (u32 i = inst_idx + 1u; i < old_ninsts; ++i) 540 instvec_push(&cont_out, &old_insts[i]); 541 cont_bl->insts = cont_out.v; 542 cont_bl->ninsts = cont_out.n; 543 cont_bl->cap = cont_out.cap; 544 ir_block_set_nsucc(caller, cont, old_nsucc); 545 cont_bl = &caller->blocks[cont]; 546 for (u32 s = 0; s < old_nsucc; ++s) cont_bl->succ[s] = old_succ[s]; 547 548 for (u32 b = 0; b < callee->nblocks; ++b) { 549 Block* src_bl = &callee->blocks[b]; 550 u32 dst_b = map_block(&map, b); 551 Block* dst_bl = &caller->blocks[dst_b]; 552 InstVec body; 553 memset(&body, 0, sizeof body); 554 body.f = caller; 555 for (u32 i = 0; i < src_bl->ninsts; ++i) { 556 Inst* src = &src_bl->insts[i]; 557 switch ((IROp)src->op) { 558 case IR_NOP: 559 case IR_PARAM_DECL: 560 break; 561 case IR_RET: { 562 if (!append_return_materialization(&body, &map, &call, src)) return 0; 563 Inst ret_br = make_branch(caller, src->loc); 564 instvec_push(&body, &ret_br); 565 break; 566 } 567 default: { 568 Inst cloned = clone_inst(&map, src); 569 instvec_push(&body, &cloned); 570 break; 571 } 572 } 573 } 574 dst_bl = &caller->blocks[dst_b]; 575 dst_bl->insts = body.v; 576 dst_bl->ninsts = body.n; 577 dst_bl->cap = body.cap; 578 if (body.n && (IROp)body.v[body.n - 1u].op == IR_BR && src_bl->ninsts && 579 (IROp)src_bl->insts[src_bl->ninsts - 1u].op == IR_RET) { 580 ir_block_set_nsucc(caller, dst_b, 1); 581 caller->blocks[dst_b].succ[0] = cont; 582 } else { 583 clone_block_succs(&map, &caller->blocks[dst_b], src_bl); 584 } 585 } 586 587 inline_rebuild_emit_order(caller, block_idx, &map, cont); 588 opt_analysis_invalidate(caller, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE | 589 OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP); 590 return 1; 591 } 592 593 static int caller_growth_ok(FuncSet* fs, Func* caller, const u32* base_cost, 594 u32 callee_cost, KitCgInlinePolicy policy) { 595 if (policy == KIT_CG_INLINE_ALWAYS) return 1; 596 int idx = funcset_index(fs, caller); 597 u32 base = idx >= 0 ? base_cost[idx] : func_inline_cost(caller); 598 u32 growth_limit = base / 4u; 599 if (growth_limit < INLINE_NORMAL_COST_LIMIT) 600 growth_limit = INLINE_NORMAL_COST_LIMIT; 601 if (policy == KIT_CG_INLINE_HINT) { 602 growth_limit *= 2u; 603 if (growth_limit < INLINE_HINT_COST_LIMIT) 604 growth_limit = INLINE_HINT_COST_LIMIT; 605 if (growth_limit > INLINE_HINT_ABS_GROWTH_LIMIT) 606 growth_limit = INLINE_HINT_ABS_GROWTH_LIMIT; 607 } else if (growth_limit > INLINE_ABS_GROWTH_LIMIT) { 608 growth_limit = INLINE_ABS_GROWTH_LIMIT; 609 } 610 return func_inline_cost(caller) + callee_cost <= base + growth_limit; 611 } 612 613 static int try_inline_call(FuncSet* fs, Func* caller, u32 b, u32 i, 614 const u32* base_cost) { 615 Inst* in = &caller->blocks[b].insts[i]; 616 Func* callee = direct_callee(fs, in); 617 u32 cost = 0; 618 KitCgInlinePolicy policy; 619 if (!callee) return 0; 620 policy = effective_inline_policy(in, callee); 621 metrics_count(caller->c, "opt.inline.candidates", 1); 622 if (policy == KIT_CG_INLINE_NEVER) { 623 metrics_count(caller->c, "opt.inline.refuse_policy", 1); 624 return 0; 625 } 626 if (recursive_or_scc(fs, caller, callee)) { 627 metrics_count(caller->c, "opt.inline.refuse_scc", 1); 628 return 0; 629 } 630 if (!callee_inline_shape(callee, policy, &cost)) { 631 metrics_count(caller->c, "opt.inline.refuse_shape", 1); 632 return 0; 633 } 634 if (!caller_growth_ok(fs, caller, base_cost, cost, policy)) { 635 metrics_count(caller->c, "opt.inline.refuse_growth", 1); 636 return 0; 637 } 638 if (!inline_rewrite_supported(callee, in)) { 639 metrics_count(caller->c, "opt.inline.refuse_rewrite_shape", 1); 640 return 0; 641 } 642 if (!inline_call_site(caller, b, i, callee)) { 643 metrics_count(caller->c, "opt.inline.refuse_rewrite", 1); 644 return 0; 645 } 646 metrics_count(caller->c, "opt.inline.inlined", 1); 647 return 1; 648 } 649 650 static void inline_order_visit(InlineOrderCtx* ctx, Func* f) { 651 int idx = funcset_index(ctx->fs, f); 652 if (idx < 0 || ctx->done[idx]) return; 653 if (ctx->temp[idx]) return; 654 ctx->temp[idx] = 1; 655 for (u32 b = 0; b < f->nblocks; ++b) { 656 Block* bl = &f->blocks[b]; 657 for (u32 i = 0; i < bl->ninsts; ++i) { 658 Func* callee = direct_callee(ctx->fs, &bl->insts[i]); 659 if (callee) inline_order_visit(ctx, callee); 660 } 661 } 662 ctx->temp[idx] = 0; 663 ctx->done[idx] = 1; 664 ctx->order[ctx->norder++] = f; 665 } 666 667 void opt_inline(FuncSet* fs, int max_iters) { 668 if (!fs || fs->nfuncs == 0 || max_iters <= 0) return; 669 if (max_iters > 4) max_iters = 4; 670 u32* base_cost = arena_array(fs->arena, u32, fs->nfuncs); 671 for (u32 i = 0; i < fs->nfuncs; ++i) 672 base_cost[i] = func_inline_cost(fs->funcs[i]); 673 for (int iter = 0; iter < max_iters; ++iter) { 674 int changed = 0; 675 InlineOrderCtx ctx; 676 memset(&ctx, 0, sizeof ctx); 677 ctx.fs = fs; 678 ctx.temp = arena_zarray(fs->arena, u8, fs->nfuncs); 679 ctx.done = arena_zarray(fs->arena, u8, fs->nfuncs); 680 ctx.order = arena_array(fs->arena, Func*, fs->nfuncs); 681 for (u32 fidx = 0; fidx < fs->nfuncs; ++fidx) 682 inline_order_visit(&ctx, fs->funcs[fidx]); 683 684 for (u32 fidx = 0; fidx < ctx.norder; ++fidx) { 685 Func* caller = ctx.order[fidx]; 686 if (!caller || caller->opt_reg_ssa || caller->opt_rewritten) continue; 687 for (u32 b = 0; b < caller->nblocks; ++b) { 688 Block* bl = &caller->blocks[b]; 689 for (u32 i = 0; i < bl->ninsts; ++i) { 690 if ((IROp)bl->insts[i].op != IR_CALL) continue; 691 if (try_inline_call(fs, caller, b, i, base_cost)) { 692 changed = 1; 693 bl = &caller->blocks[b]; 694 } 695 } 696 } 697 } 698 if (!changed) break; 699 } 700 } 701 702 /* Streaming single-caller variant for the O1 pipeline. Same gates as 703 * try_inline_call, minus the whole-program growth check (which needs a 704 * base_cost array the streaming path lacks), plus the tiny cost cap. */ 705 static int try_tiny_inline_call(FuncSet* fs, Func* caller, u32 b, u32 i) { 706 Inst* in = &caller->blocks[b].insts[i]; 707 Func* callee = direct_callee(fs, in); 708 u32 cost = 0; 709 KitCgInlinePolicy policy; 710 if (!callee) return 0; 711 policy = effective_inline_policy(in, callee); 712 metrics_count(caller->c, "opt.tiny_inline.candidates", 1); 713 if (policy == KIT_CG_INLINE_NEVER) { 714 metrics_count(caller->c, "opt.tiny_inline.refuse_policy", 1); 715 return 0; 716 } 717 if (recursive_or_scc(fs, caller, callee)) { 718 metrics_count(caller->c, "opt.tiny_inline.refuse_scc", 1); 719 return 0; 720 } 721 if (!callee_inline_shape(callee, policy, &cost)) { 722 metrics_count(caller->c, "opt.tiny_inline.refuse_shape", 1); 723 return 0; 724 } 725 if (policy != KIT_CG_INLINE_ALWAYS && cost > INLINE_TINY_COST_LIMIT) { 726 metrics_count(caller->c, "opt.tiny_inline.refuse_budget", 1); 727 return 0; 728 } 729 if (!inline_rewrite_supported(callee, in)) { 730 metrics_count(caller->c, "opt.tiny_inline.refuse_rewrite_shape", 1); 731 return 0; 732 } 733 if (!inline_call_site(caller, b, i, callee)) { 734 metrics_count(caller->c, "opt.tiny_inline.refuse_rewrite", 1); 735 return 0; 736 } 737 metrics_count(caller->c, "opt.tiny_inline.inlined", 1); 738 return 1; 739 } 740 741 int opt_try_tiny_inline(Func* caller, OptInlineCalleeLookup lookup, void* ctx) { 742 int total = 0; 743 if (!caller || !lookup || caller->opt_reg_ssa || caller->opt_rewritten) 744 return 0; 745 for (u32 pass = 0; pass < TINY_INLINE_MAX_PASSES; ++pass) { 746 int changed = 0; 747 for (u32 b = 0; b < caller->nblocks && !changed; ++b) { 748 Block* bl = &caller->blocks[b]; 749 for (u32 i = 0; i < bl->ninsts; ++i) { 750 Inst* in = &bl->insts[i]; 751 if ((IROp)in->op != IR_CALL) continue; 752 IRCallAux* aux = (IRCallAux*)in->extra.aux; 753 if (!aux || aux->desc.callee.kind != OPK_GLOBAL) continue; 754 Func* callee = lookup(ctx, aux->desc.callee.v.global.sym); 755 if (!callee) continue; 756 /* Ad-hoc 2-element FuncSet so the existing gates (direct_callee, 757 * recursive_or_scc) resolve the callee by symbol. */ 758 Func* funcs[2] = {caller, callee}; 759 FuncSet fs; 760 memset(&fs, 0, sizeof fs); 761 fs.c = caller->c; 762 fs.arena = caller->arena; 763 fs.funcs = funcs; 764 fs.nfuncs = (caller == callee) ? 1u : 2u; 765 fs.cap = fs.nfuncs; 766 if (try_tiny_inline_call(&fs, caller, b, i)) { 767 ++total; 768 changed = 1; /* indices invalidated by the split; restart scan */ 769 break; 770 } 771 } 772 } 773 if (!changed) break; 774 } 775 return total; 776 }