kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

opt.c (38045B)


      1 #include <kit/config.h>
      2 #include <string.h>
      3 
      4 #include "abi/abi.h"
      5 #include "cg/ir.h"
      6 #include "cg/ir_recorder.h"
      7 #include "cg/native_asm.h"
      8 #include "cg/native_direct_target.h"
      9 #include "cg/type.h"
     10 #include "core/arena.h"
     11 #include "core/core.h"
     12 #include "core/diag.h"
     13 #include "core/hashmap.h"
     14 #include "core/metrics.h"
     15 #include "core/pool.h"
     16 #include "core/slice.h"
     17 #include "core/strbuf.h"
     18 #include "debug/debug.h"
     19 #include "obj/symresolve.h"
     20 #include "opt/opt_internal.h"
     21 
     22 #undef Operand
     23 #undef CGCallDesc
     24 #undef CGFuncDesc
     25 #undef CGParamDesc
     26 #undef CGScopeDesc
     27 
     28 /* Fixpoint bound for the whole-program inliner. opt_inline internally clamps to
     29  * 4; an inlined straightline body introduces no new call sites, so a small
     30  * bound converges. */
     31 #define OPT_WHOLE_PROGRAM_INLINE_ITERS 4
     32 
     33 typedef struct OptImpl {
     34   Compiler* c;
     35   CgTarget* target;
     36   NativeTarget* native;
     37   int level;
     38   /* Whole-program (LTO) mode: defer all per-function emission to finalize so
     39    * the module-wide sweep can GC dead symbols and run cross-function inlining
     40    * over the full reachable set. Enabled whenever the optimizer runs (-O1 and
     41    * above; see opt_cgtarget_new). The ARM64 path already defers
     42    * unconditionally; this generalizes that to every arch and adds the inliner.
     43    */
     44   int whole_program;
     45   CgFinishPolicy finish_policy;
     46   Writer* dump_writer;
     47   /* Registry of functions recorded so far, for tiny-inline callee lookup.
     48    * `lowered_cache` is parallel to `cg_by_sym`: a lazily re-lowered
     49    * pre-machinize Func for the callee, reused across all call sites. All on
     50    * c->tu (persist for the whole translation unit). */
     51   CgIrFunc** cg_by_sym;
     52   Func** lowered_cache;
     53   u32 ncg;
     54   u32 cg_cap;
     55 } OptImpl;
     56 
     57 HASHMAP_DEFINE(OptFuncIndex, ObjSymId, u32, hash_u32);
     58 
     59 /* A symbol whose definition can be replaced at link time must not have its body
     60  * inlined — the inlined copy would defeat the override. Weak definitions are
     61  * interposable in every output kind, so they are never safe to inline. (The
     62  * broader default-visibility interposition under -shared is governed by the
     63  * preserved set; see doc/plan/LTO.md §5/§9.) */
     64 static int opt_cg_func_interposable(OptImpl* o, const CgIrFunc* cg) {
     65   const ObjSym* s;
     66   if (!cg || cg->desc.sym == OBJ_SYM_NONE) return 0;
     67   if (cg->desc.sym_bind == SB_WEAK) return 1;
     68   s = obj_symbol_get(o->target->obj, cg->desc.sym);
     69   return s && s->bind == SB_WEAK;
     70 }
     71 
     72 /* Lower a recorded function to the pre-machinize Func used by the inliners, and
     73  * mark interposable definitions INLINE_NEVER so neither the streaming
     74  * tiny-inliner nor the whole-program inliner fuses their bodies into callers.
     75  * Marking the callee's policy is honored by effective_inline_policy in both. */
     76 static Func* opt_lower_for_inline(OptImpl* o, const CgIrFunc* cg) {
     77   Func* f = opt_func_from_cg_ir(o->c, cg);
     78   if (f && opt_cg_func_interposable(o, cg))
     79     f->desc.inline_policy = KIT_CG_INLINE_NEVER;
     80   return f;
     81 }
     82 
     83 /* Lazily re-lower (and cache) the pre-machinize Func for a recorded callee
     84  * symbol. Returns NULL for forward-defined callees not yet recorded. */
     85 static Func* opt_tiny_callee_lookup(void* ctx, ObjSymId sym) {
     86   OptImpl* o = (OptImpl*)ctx;
     87   for (u32 i = 0; i < o->ncg; ++i) {
     88     if (o->cg_by_sym[i]->desc.sym != sym) continue;
     89     if (!o->lowered_cache[i])
     90       o->lowered_cache[i] = opt_lower_for_inline(o, o->cg_by_sym[i]);
     91     return o->lowered_cache[i];
     92   }
     93   return NULL;
     94 }
     95 
     96 static void opt_registry_add(OptImpl* o, CgIrFunc* f) {
     97   if (o->ncg == o->cg_cap) {
     98     u32 ncap = o->cg_cap ? o->cg_cap * 2u : 16u;
     99     CgIrFunc** ncg = arena_array(o->c->tu, CgIrFunc*, ncap);
    100     Func** nlc = arena_zarray(o->c->tu, Func*, ncap);
    101     if (o->ncg) {
    102       memcpy(ncg, o->cg_by_sym, sizeof(ncg[0]) * o->ncg);
    103       memcpy(nlc, o->lowered_cache, sizeof(nlc[0]) * o->ncg);
    104     }
    105     o->cg_by_sym = ncg;
    106     o->lowered_cache = nlc;
    107     o->cg_cap = ncap;
    108   }
    109   o->cg_by_sym[o->ncg++] = f;
    110 }
    111 
    112 static void opt_dbg_dump(OptImpl* o, Func* f, const char* tag) {
    113   const char* s = kit_debug_getenv("KIT_DUMP");
    114   KitWriter* w = NULL;
    115   size_t len = 0;
    116   const uint8_t* bytes;
    117   if (!s) return;
    118   if (strcmp(s, "1") != 0 && strcmp(s, tag) != 0) return;
    119   kit_writer_mem(o->c->ctx->heap, &w);
    120   opt_ir_dump(f, w);
    121   bytes = kit_writer_mem_bytes(w, &len);
    122   compiler_panic(o->c, f->desc.loc, "DUMP %s:\n%.*s", tag, (int)len,
    123                  (const char*)bytes);
    124 }
    125 
    126 /* CFG-prep prefix shared by the streaming and whole-program pipelines: lower's
    127  * raw blocks -> built CFG -> jump cleanup -> rebuilt CFG -> local simplify. In
    128  * whole-program mode this runs on every reachable function before opt_inline
    129  * sees the FuncSet, so the inliner observes the same block shape the streaming
    130  * path does. */
    131 static void opt_o1_native_prepare(OptImpl* o, Func* f) {
    132   if (!o->native)
    133     compiler_panic(o->c, f ? f->desc.loc : (SrcLoc){0, 0, 0},
    134                    "O1 optimizer requires a native target");
    135   opt_dbg_dump(o, f, "entry");
    136 
    137   metrics_count(o->c, "opt.funcs", 1);
    138   metrics_count(o->c, "opt.blocks", f->nblocks);
    139   metrics_count(o->c, "opt.pregs", f->npregs);
    140 
    141   metrics_scope_begin(o->c, "opt.cfg.build_1");
    142   opt_build_cfg(f);
    143   metrics_scope_end(o->c, "opt.cfg.build_1");
    144   metrics_scope_begin(o->c, "opt.cfg.jump_cleanup_cfg");
    145   opt_jump_cleanup(f, OPT_JUMP_CLEANUP_CFG);
    146   metrics_scope_end(o->c, "opt.cfg.jump_cleanup_cfg");
    147   metrics_scope_begin(o->c, "opt.cfg.build_2");
    148   opt_build_cfg(f);
    149   metrics_scope_end(o->c, "opt.cfg.build_2");
    150   metrics_scope_begin(o->c, "opt.cfg.simplify_local");
    151   opt_simplify_local(f);
    152   metrics_scope_end(o->c, "opt.cfg.simplify_local");
    153 }
    154 
    155 /* The machinize-through-emit suffix. `cfg_dirty` is set by the whole-program
    156  * path: opt_inline mutated the caller's blocks in place and left CFG analysis
    157  * stale, so the CFG must be rebuilt before tiny-inline/verify/machinize. The
    158  * streaming path passes 0 (prepare just built it). */
    159 static void opt_o1_native_finish(OptImpl* o, Func* f, int cfg_dirty) {
    160   OptLiveInfo live;
    161   OptLiveInfo regalloc_live;
    162 
    163   if (cfg_dirty) {
    164     /* opt_inline maintains succ + emit_order only; rebuild preds/CFG and merge
    165      * the BR-glue chains back into straight-line blocks (build_cfg ->
    166      * jump_cleanup -> build_cfg) before any pass that needs the analysis. */
    167     opt_build_cfg(f);
    168     opt_jump_cleanup(f, OPT_JUMP_CLEANUP_CFG);
    169     opt_build_cfg(f);
    170   }
    171 
    172   metrics_scope_begin(o->c, "opt.o1.tiny_inline");
    173   int inlined = opt_try_tiny_inline(f, opt_tiny_callee_lookup, o);
    174   metrics_scope_end(o->c, "opt.o1.tiny_inline");
    175   if (inlined) {
    176     /* inline_call_site invalidated CFG and left preds stale (it maintains
    177      * succ + emit_order only); rebuilding is required before verify/machinize.
    178      * Then merge the BR-glue chain (pre -> body -> cont) back into
    179      * straight-line blocks so regalloc sees no artificial boundaries, mirroring
    180      * the prologue's build_cfg -> jump_cleanup -> build_cfg idiom. */
    181     opt_build_cfg(f);
    182     opt_jump_cleanup(f, OPT_JUMP_CLEANUP_CFG);
    183     opt_build_cfg(f);
    184   }
    185 
    186   metrics_scope_begin(o->c, "opt.cfg.verify");
    187   opt_verify(f, "lowering-cfg");
    188   metrics_scope_end(o->c, "opt.cfg.verify");
    189 
    190   metrics_scope_begin(o->c, "opt.machinize");
    191   opt_machinize_native(f, o->native);
    192   metrics_scope_end(o->c, "opt.machinize");
    193   metrics_scope_begin(o->c, "opt.machinize.verify");
    194   opt_verify(f, "lowering-machinize");
    195   metrics_scope_end(o->c, "opt.machinize.verify");
    196 
    197   metrics_scope_begin(o->c, "opt.o1.addr_xform_pregs");
    198   opt_addr_xform_pregs(f);
    199   metrics_scope_end(o->c, "opt.o1.addr_xform_pregs");
    200   metrics_scope_begin(o->c, "opt.o1.addr_xform.verify");
    201   opt_verify(f, "o1-addr-xform");
    202   metrics_scope_end(o->c, "opt.o1.addr_xform.verify");
    203   metrics_scope_begin(o->c, "opt.o1.promote_scalar_locals");
    204   opt_promote_scalar_locals(f);
    205   metrics_scope_end(o->c, "opt.o1.promote_scalar_locals");
    206   metrics_scope_begin(o->c, "opt.o1.promote_scalar.verify");
    207   opt_verify(f, "o1-promote-scalar");
    208   metrics_scope_end(o->c, "opt.o1.promote_scalar.verify");
    209   metrics_scope_begin(o->c, "opt.o1.addr_of_global_cse");
    210   opt_addr_of_global_cse(f);
    211   metrics_scope_end(o->c, "opt.o1.addr_of_global_cse");
    212   metrics_scope_begin(o->c, "opt.o1.addr_of_global.verify");
    213   opt_verify(f, "o1-addr-global-cse");
    214   metrics_scope_end(o->c, "opt.o1.addr_of_global.verify");
    215 
    216   metrics_scope_begin(o->c, "opt.build_loop_tree");
    217   opt_build_loop_tree(f);
    218   metrics_scope_end(o->c, "opt.build_loop_tree");
    219   metrics_scope_begin(o->c, "opt.o1.lower_loop_imm");
    220   opt_lower_loop_imm_operands(f, o->native);
    221   metrics_scope_end(o->c, "opt.o1.lower_loop_imm");
    222   opt_verify(f, "o1-lower-loop-imm");
    223   metrics_scope_begin(o->c, "opt.o1.hoist_loop_consts");
    224   opt_hoist_loop_consts(f);
    225   metrics_scope_end(o->c, "opt.o1.hoist_loop_consts");
    226   opt_verify(f, "o1-hoist-loop-consts");
    227 
    228   metrics_scope_begin(o->c, "opt.live_blocks.pre_dde");
    229   memset(&live, 0, sizeof live);
    230   opt_live_blocks(f, &live);
    231   metrics_count(o->c, "opt.live_words", f->opt_live_words);
    232   metrics_scope_end(o->c, "opt.live_blocks.pre_dde");
    233   metrics_scope_begin(o->c, "opt.dead_def_elim");
    234   opt_dead_def_elim_with_live(f, &live);
    235   metrics_scope_end(o->c, "opt.dead_def_elim");
    236 
    237   metrics_scope_begin(o->c, "opt.regalloc");
    238   memset(&regalloc_live, 0, sizeof regalloc_live);
    239   opt_regalloc_locations(f, 0, &regalloc_live);
    240   metrics_scope_end(o->c, "opt.regalloc");
    241   metrics_scope_begin(o->c, "opt.regalloc.verify");
    242   opt_analysis_invalidate(f, OPT_ANALYSIS_DEF_USE);
    243   opt_verify(f, "post-regalloc");
    244   metrics_scope_end(o->c, "opt.regalloc.verify");
    245 
    246   metrics_scope_begin(o->c, "opt.lower_mir");
    247   opt_lower_to_mir(f, &regalloc_live);
    248   metrics_scope_end(o->c, "opt.lower_mir");
    249   metrics_scope_begin(o->c, "opt.lower_mir.verify");
    250   opt_mir_verify(f, "lower-mir");
    251   metrics_scope_end(o->c, "opt.lower_mir.verify");
    252   metrics_scope_begin(o->c, "opt.combine");
    253   opt_mir_combine(f);
    254   metrics_scope_end(o->c, "opt.combine");
    255   metrics_scope_begin(o->c, "opt.combine.verify");
    256   opt_mir_verify(f, "post-mir-combine");
    257   metrics_scope_end(o->c, "opt.combine.verify");
    258   metrics_scope_begin(o->c, "opt.dce");
    259   opt_mir_dce(f);
    260   metrics_scope_end(o->c, "opt.dce");
    261   metrics_scope_begin(o->c, "opt.dce.verify");
    262   opt_mir_verify(f, "post-mir-dce");
    263   metrics_scope_end(o->c, "opt.dce.verify");
    264   metrics_scope_begin(o->c, "opt.post_ra.jump_cleanup_cfg");
    265   opt_mir_jump_cleanup(f, OPT_JUMP_CLEANUP_CFG);
    266   metrics_scope_end(o->c, "opt.post_ra.jump_cleanup_cfg");
    267   metrics_scope_begin(o->c, "opt.post_ra.build_cfg");
    268   opt_mir_build_cfg(f);
    269   metrics_scope_end(o->c, "opt.post_ra.build_cfg");
    270   metrics_scope_begin(o->c, "opt.post_ra.verify");
    271   opt_mir_verify(f, "post-mir-jump-cfg");
    272   metrics_scope_end(o->c, "opt.post_ra.verify");
    273   metrics_scope_begin(o->c, "opt.post_ra.jump_cleanup_layout");
    274   opt_mir_jump_cleanup(f, OPT_JUMP_CLEANUP_LAYOUT);
    275   metrics_scope_end(o->c, "opt.post_ra.jump_cleanup_layout");
    276 
    277   opt_dbg_dump(o, f, "pre-emit");
    278   metrics_scope_begin(o->c, "opt.emit");
    279   if (o->native->mc && o->native->mc->debug)
    280     debug_func_select(o->native->mc->debug, f->desc.sym);
    281   opt_emit_native(o->c, f, o->native);
    282   if (o->native->mc && o->native->mc->debug)
    283     debug_func_end(o->native->mc->debug);
    284   metrics_scope_end(o->c, "opt.emit");
    285 }
    286 
    287 /* Streaming pipeline for one function: prepare + finish, back to back. Used by
    288  * the eager per-function path (x64/rv64 below -O2) and by the ARM64/-O2 sweep
    289  * for functions with no cross-function inlining to do. */
    290 static void opt_run_o1_native(OptImpl* o, Func* f) {
    291   metrics_scope_begin(o->c, "opt.o1.total");
    292   opt_o1_native_prepare(o, f);
    293   opt_o1_native_finish(o, f, /*cfg_dirty=*/0);
    294   metrics_scope_end(o->c, "opt.o1.total");
    295 }
    296 
    297 /* Sibling of opt_run_o1_native for the threaded interpreter. Runs the maximal
    298  * target-independent subset and stops before opt_machinize_native: no regalloc,
    299  * no MIR, no native emission. The result is a Func with virtual PRegs that the
    300  * interpreter loader consumes directly. opt_verify is intentionally omitted —
    301  * it is a debug aid and some checks assume the machinize-completed shape. */
    302 Func* opt_run_o1_interp(Compiler* c, const CgIrFunc* cg) {
    303   Func* f;
    304   OptLiveInfo live;
    305   metrics_scope_begin(c, "opt.interp.total");
    306   f = opt_func_from_cg_ir(c, cg);
    307   opt_build_cfg(f);
    308   opt_jump_cleanup(f, OPT_JUMP_CLEANUP_CFG);
    309   opt_build_cfg(f);
    310   opt_simplify_local(f);
    311   /* Target-independent local/escape optimizations. These run after machinize
    312    * in the native pipeline but depend only on the PReg/frame-slot view, not on
    313    * physical-register pools, so they are safe here and shrink the work the
    314    * interpreter does (promote scalar locals to PRegs, fold addr-of-local,
    315    * CSE addr-of-global). */
    316   opt_addr_xform_pregs(f);
    317   opt_promote_scalar_locals(f);
    318   opt_addr_of_global_cse(f);
    319   opt_build_loop_tree(f);
    320   memset(&live, 0, sizeof live);
    321   opt_live_blocks(f, &live);
    322   opt_dead_def_elim_with_live(f, &live);
    323   metrics_scope_end(c, "opt.interp.total");
    324   {
    325     if (kit_debug_getenv("KIT_DUMP_INTERP")) {
    326       KitWriter* w = NULL;
    327       size_t len = 0;
    328       const uint8_t* bytes;
    329       kit_writer_mem(c->ctx->heap, &w);
    330       opt_ir_dump(f, w);
    331       bytes = kit_writer_mem_bytes(w, &len);
    332       diag_emit(c->ctx->diag, KIT_DIAG_NOTE, (SrcLoc){0, 0, 0},
    333                 "INTERP IR:\n%.*s", (int)len, (const char*)bytes);
    334     }
    335   }
    336   return f;
    337 }
    338 
    339 #if KIT_INTERP_ENABLED
    340 /* Defined in src/interp/lower.c. Lowers a post-opt_run_o1_interp Func into the
    341  * program's bytecode and registers it by symbol (ObjSymId, for internal calls)
    342  * and by unmangled C name (for entry lookup). Declared here (rather than
    343  * including the interp header into opt) to keep the dependency one-way. */
    344 void interp_capture_func(void* program, Func* f, ObjSymId sym, const char* name,
    345                          u32 name_len, const ObjBuilder* obj);
    346 
    347 static void opt_maybe_capture_interp(OptImpl* o, const CgIrFunc* cg) {
    348   ObjSymId sym;
    349   Slice name;
    350   if (!o->c->interp_sink) return;
    351   sym = cg->desc.sym;
    352   name =
    353       (sym != OBJ_SYM_NONE)
    354           ? pool_slice(o->c->global, obj_symbol_get(o->target->obj, sym)->name)
    355           : SLICE_NULL;
    356   interp_capture_func(o->c->interp_sink, opt_run_o1_interp(o->c, cg), sym,
    357                       name.s, (u32)name.len, o->target->obj);
    358 }
    359 #else
    360 static void opt_maybe_capture_interp(OptImpl* o, const CgIrFunc* cg) {
    361   (void)o;
    362   (void)cg;
    363 }
    364 #endif
    365 
    366 static void opt_dbg_dump_cg(OptImpl* o, const CgIrFunc* f) {
    367   KitWriter* w = NULL;
    368   size_t len = 0;
    369   const uint8_t* bytes;
    370   if (!kit_debug_getenv("KIT_DUMPCG")) return;
    371   kit_writer_mem(o->c->ctx->heap, &w);
    372   cg_ir_func_dump(f, w);
    373   bytes = kit_writer_mem_bytes(w, &len);
    374   compiler_panic(o->c, f->desc.loc, "CGIR:\n%.*s", (int)len,
    375                  (const char*)bytes);
    376 }
    377 
    378 static void opt_on_func(void* user, CgIrFunc* cg_func) {
    379   OptImpl* o = (OptImpl*)user;
    380   Func* f;
    381   /* Register before lowering so later callers can resolve this (complete)
    382    * function as a tiny-inline callee. */
    383   opt_registry_add(o, cg_func);
    384   opt_dbg_dump_cg(o, cg_func);
    385   /* The dump writer renders the semantic CG IR tape — the IR as recorded,
    386    * before lowering to the optimizer's CFG form. */
    387   if (o->dump_writer) cg_ir_func_dump(cg_func, o->dump_writer);
    388   /* Defer emission to the finalize sweep whenever whole-program mode is on —
    389    * the same path for every arch. The sweep does GC + cross-function inlining
    390    * over the full reachable set. The eager emit below is the fallback for a
    391    * (currently unreachable) non-whole-program configuration. */
    392   if (o->whole_program) return;
    393   metrics_scope_begin(o->c, "opt.o1.cg_ir_lower");
    394   f = opt_func_from_cg_ir(o->c, cg_func);
    395   metrics_scope_end(o->c, "opt.o1.cg_ir_lower");
    396   opt_run_o1_native(o, f);
    397   opt_maybe_capture_interp(o, cg_func);
    398 }
    399 
    400 static int opt_module_has_asm(const CgIrModule* module) {
    401   if (!module) return 0;
    402   if (module->nfile_scope_asms) return 1;
    403   for (u32 i = 0; i < module->nfuncs; ++i) {
    404     const CgIrFunc* f = module->funcs[i];
    405     if (!f || f->removed) continue;
    406     for (u32 k = 0; k < f->ninsts; ++k)
    407       if (f->insts[k].op == CG_IR_ASM_BLOCK) return 1;
    408   }
    409   return 0;
    410 }
    411 
    412 static int opt_sym_in_preserved_section(OptImpl* o, ObjSymId sym,
    413                                         const ObjSym* s) {
    414   const Section* sec;
    415   const ObjAtom* atom;
    416   ObjAtomId aid;
    417   if (!o || !s || s->section_id == OBJ_SEC_NONE) return 0;
    418   sec = obj_section_get(o->target->obj, s->section_id);
    419   if (sec && ((sec->flags & SF_RETAIN) || sec->sem == SSEM_INIT_ARRAY ||
    420               sec->sem == SSEM_FINI_ARRAY || sec->sem == SSEM_PREINIT_ARRAY))
    421     return 1;
    422   aid = obj_atom_find_symbol(o->target->obj, sym);
    423   atom = obj_atom_get(o->target->obj, aid);
    424   return atom && (atom->flags & OBJ_ATOM_RETAIN);
    425 }
    426 
    427 static void opt_build_preserved_set(OptImpl* o, ObjSymSet* preserved) {
    428   for (u32 i = 0; i < o->finish_policy.npreserved_symbols; ++i) {
    429     ObjSymId sym = o->finish_policy.preserved_symbols[i];
    430     if (sym != OBJ_SYM_NONE) (void)ObjSymSet_set(preserved, sym, 1);
    431   }
    432 }
    433 
    434 static int opt_sym_must_stay_external(OptImpl* o, int module_has_asm,
    435                                       const ObjSymSet* preserved, ObjSymId sym,
    436                                       const ObjSym* s) {
    437   if (!s || s->removed) return 1;
    438   if (s->bind == SB_LOCAL) return 1;
    439   if (o->finish_policy.output_kind != KIT_CG_OUTPUT_EXECUTABLE) return 1;
    440   if (o->finish_policy.interposition_policy ==
    441       KIT_CG_INTERPOSITION_DEFAULT_VISIBILITY)
    442     return 1;
    443   if (ObjSymSet_get(preserved, sym)) return 1;
    444   if (s->bind == SB_WEAK) return 1;
    445   if (s->kind == SK_IFUNC) return 1;
    446   if (s->flags & KIT_CG_SYM_USED) return 1;
    447   if (module_has_asm) return 1;
    448   if (opt_sym_in_preserved_section(o, sym, s)) return 1;
    449   return 0;
    450 }
    451 
    452 static int opt_sym_internalizable(const ObjSym* s) {
    453   if (!s || s->removed || s->bind == SB_LOCAL) return 0;
    454   if (!symresolve_sym_is_def(s)) return 0;
    455   switch ((SymKind)s->kind) {
    456     case SK_FUNC:
    457     case SK_OBJ:
    458     case SK_TLS:
    459       return 1;
    460     default:
    461       return 0;
    462   }
    463 }
    464 
    465 static void opt_internalize_non_preserved(OptImpl* o, const CgIrModule* module,
    466                                           const ObjSymSet* preserved) {
    467   ObjSymIter* it;
    468   ObjSymEntry ent;
    469   int module_has_asm;
    470   if (!o || !o->target || !o->target->obj) return;
    471   if (o->finish_policy.output_kind != KIT_CG_OUTPUT_EXECUTABLE) return;
    472   module_has_asm = opt_module_has_asm(module);
    473   it = obj_symiter_new(o->target->obj);
    474   while (it && obj_symiter_next(it, &ent)) {
    475     const ObjSym* s = ent.sym;
    476     if (!opt_sym_internalizable(s)) continue;
    477     if (opt_sym_must_stay_external(o, module_has_asm, preserved, ent.id, s))
    478       continue;
    479     obj_symbol_set_bind(o->target->obj, ent.id, SB_LOCAL);
    480     obj_symbol_set_vis(o->target->obj, ent.id, SV_HIDDEN);
    481   }
    482   if (it) obj_symiter_free(it);
    483 }
    484 
    485 static int opt_func_is_root(OptImpl* o, const CgIrFunc* f) {
    486   const ObjSym* s;
    487   if (!f || f->removed || f->desc.sym == OBJ_SYM_NONE) return 0;
    488   s = obj_symbol_get(o->target->obj, f->desc.sym);
    489   if (!s || s->removed) return 0;
    490   if (s->bind != SB_LOCAL) return 1;
    491   if (s->flags & KIT_CG_SYM_USED) return 1;
    492   return 0;
    493 }
    494 
    495 static SymAttrs opt_func_sym_attrs(OptImpl* o, const CgIrFunc* f) {
    496   SymAttrs a;
    497   const ObjSym* s;
    498   memset(&a, 0, sizeof a);
    499   if (!f) return a;
    500   s = obj_symbol_get(o->target->obj, f->desc.sym);
    501   a.bind = f->desc.sym_bind ? f->desc.sym_bind : (s ? s->bind : SB_GLOBAL);
    502   a.kind = f->desc.sym_kind ? f->desc.sym_kind : SK_FUNC;
    503   a.size = s ? s->size : 0;
    504   a.common_align = 0;
    505   a.in_comdat = 0;
    506   return a;
    507 }
    508 
    509 static void opt_resolve_duplicate_funcs(OptImpl* o, const CgIrModule* module,
    510                                         OptFuncIndex* index) {
    511   for (u32 i = 0; i < module->nfuncs; ++i) {
    512     CgIrFunc* incoming = module->funcs[i];
    513     u32* existing_idx;
    514     if (!incoming || incoming->removed || incoming->desc.sym == OBJ_SYM_NONE)
    515       continue;
    516     existing_idx = OptFuncIndex_get(index, incoming->desc.sym);
    517     if (!existing_idx) {
    518       (void)OptFuncIndex_set(index, incoming->desc.sym, i);
    519       continue;
    520     }
    521     {
    522       CgIrFunc* existing = module->funcs[*existing_idx];
    523       SymMergeResult mr = symresolve_merge(opt_func_sym_attrs(o, existing),
    524                                            opt_func_sym_attrs(o, incoming));
    525       switch (mr.kind) {
    526         case SYM_MERGE_REPLACE:
    527           if (existing) existing->removed = 1;
    528           (void)OptFuncIndex_set(index, incoming->desc.sym, i);
    529           if (incoming->desc.sym_bind)
    530             obj_symbol_set_bind(o->target->obj, incoming->desc.sym,
    531                                 (SymBind)incoming->desc.sym_bind);
    532           break;
    533         case SYM_MERGE_KEEP_EXISTING:
    534         case SYM_MERGE_COMDAT_DISCARD:
    535           incoming->removed = 1;
    536           if (existing && existing->desc.sym_bind)
    537             obj_symbol_set_bind(o->target->obj, incoming->desc.sym,
    538                                 (SymBind)existing->desc.sym_bind);
    539           break;
    540         case SYM_MERGE_COMMON:
    541           incoming->removed = 1;
    542           break;
    543         case SYM_MERGE_ODR_ERROR:
    544           compiler_panic(o->c, incoming->desc.loc,
    545                          "duplicate definition of symbol");
    546       }
    547     }
    548   }
    549 }
    550 
    551 static void opt_mark_func(u8* reachable, u8* queued, u32* queue, u32* qtail,
    552                           u32 idx) {
    553   if (reachable[idx]) return;
    554   reachable[idx] = 1;
    555   if (!queued[idx]) {
    556     queued[idx] = 1;
    557     queue[(*qtail)++] = idx;
    558   }
    559 }
    560 
    561 static void opt_mark_sym(OptFuncIndex* index, u8* reachable, u8* queued,
    562                          u32* queue, u32* qtail, ObjSymId sym) {
    563   u32* slot;
    564   if (sym == OBJ_SYM_NONE) return;
    565   slot = OptFuncIndex_get(index, sym);
    566   if (slot) opt_mark_func(reachable, queued, queue, qtail, *slot);
    567 }
    568 
    569 static void opt_mark_symset(OptFuncIndex* index, u8* reachable, u8* queued,
    570                             u32* queue, u32* qtail, const ObjSymSet* refs) {
    571   if (!refs || !refs->cap) return;
    572   for (u32 i = 0; i < refs->cap; ++i) {
    573     ObjSymId sym = refs->slots[i].k;
    574     if (sym != OBJ_SYM_NONE)
    575       opt_mark_sym(index, reachable, queued, queue, qtail, sym);
    576   }
    577 }
    578 
    579 static int opt_sym_is_relocatable_data(const ObjSym* s) {
    580   if (!s || s->removed || s->section_id == OBJ_SEC_NONE) return 0;
    581   return s->kind == SK_OBJ || s->kind == SK_TLS || s->kind == SK_COMMON;
    582 }
    583 
    584 static int opt_reloc_inside_sym(const Reloc* r, const ObjSym* s) {
    585   u64 begin, end;
    586   if (!r || r->removed || !opt_sym_is_relocatable_data(s)) return 0;
    587   if (r->section_id != s->section_id) return 0;
    588   begin = s->value;
    589   end = begin + s->size;
    590   if (s->size == 0) end = begin + 1u;
    591   return (u64)r->offset >= begin && (u64)r->offset < end;
    592 }
    593 
    594 static void opt_enqueue_data_sym(OptImpl* o, ObjSymSet* seen, ObjSymId* queue,
    595                                  u32* qtail, ObjSymId sym) {
    596   const ObjSym* s;
    597   if (sym == OBJ_SYM_NONE || ObjSymSet_get(seen, sym)) return;
    598   s = obj_symbol_get(o->target->obj, sym);
    599   if (!opt_sym_is_relocatable_data(s)) return;
    600   (void)ObjSymSet_set(seen, sym, 1);
    601   queue[(*qtail)++] = sym;
    602 }
    603 
    604 static void opt_enqueue_data_symset(OptImpl* o, ObjSymSet* seen,
    605                                     ObjSymId* queue, u32* qtail,
    606                                     const ObjSymSet* refs) {
    607   if (!refs || !refs->cap) return;
    608   for (u32 i = 0; i < refs->cap; ++i) {
    609     ObjSymId sym = refs->slots[i].k;
    610     if (sym != OBJ_SYM_NONE) opt_enqueue_data_sym(o, seen, queue, qtail, sym);
    611   }
    612 }
    613 
    614 static void opt_mark_data_reloc_graph(OptImpl* o, OptFuncIndex* index,
    615                                       u8* reachable, u8* queued,
    616                                       u32* func_queue, u32* func_qtail,
    617                                       ObjSymSet* data_seen,
    618                                       ObjSymId* data_queue, u32* data_qhead,
    619                                       u32* data_qtail) {
    620   u32 nrel = obj_reloc_total(o->target->obj);
    621   while (*data_qhead < *data_qtail) {
    622     ObjSymId data_sym = data_queue[(*data_qhead)++];
    623     const ObjSym* data = obj_symbol_get(o->target->obj, data_sym);
    624     if (!opt_sym_is_relocatable_data(data)) continue;
    625     for (u32 i = 0; i < nrel; ++i) {
    626       const Reloc* r = obj_reloc_at(o->target->obj, i);
    627       if (!opt_reloc_inside_sym(r, data)) continue;
    628       opt_mark_sym(index, reachable, queued, func_queue, func_qtail, r->sym);
    629       opt_enqueue_data_sym(o, data_seen, data_queue, data_qtail, r->sym);
    630     }
    631   }
    632 }
    633 
    634 static ObjSymId opt_data_reloc_exported_root_sym(OptImpl* o, const Reloc* r) {
    635   ObjSymIter* it;
    636   ObjSymEntry ent;
    637   const Section* sec;
    638   if (!r || r->removed || r->section_id == OBJ_SEC_NONE) return OBJ_SYM_NONE;
    639   sec = obj_section_get(o->target->obj, r->section_id);
    640   if (!sec || sec->removed || sec->kind == SEC_TEXT) return OBJ_SYM_NONE;
    641   it = obj_symiter_new(o->target->obj);
    642   if (!it) return OBJ_SYM_NONE;
    643   while (obj_symiter_next(it, &ent)) {
    644     const ObjSym* s = ent.sym;
    645     if (!opt_reloc_inside_sym(r, s)) continue;
    646     if (sec->flags & SF_RETAIN) {
    647       obj_symiter_free(it);
    648       return ent.id;
    649     }
    650     if (s->bind == SB_LOCAL && !(s->flags & KIT_CG_SYM_USED)) continue;
    651     obj_symiter_free(it);
    652     return ent.id;
    653   }
    654   obj_symiter_free(it);
    655   return OBJ_SYM_NONE;
    656 }
    657 
    658 static void opt_root_exported_data_relocs(OptImpl* o, ObjSymSet* data_seen,
    659                                           ObjSymId* data_queue,
    660                                           u32* data_qtail) {
    661   u32 nrel = obj_reloc_total(o->target->obj);
    662   for (u32 i = 0; i < nrel; ++i) {
    663     const Reloc* r = obj_reloc_at(o->target->obj, i);
    664     ObjSymId sym = opt_data_reloc_exported_root_sym(o, r);
    665     if (sym != OBJ_SYM_NONE)
    666       opt_enqueue_data_sym(o, data_seen, data_queue, data_qtail, sym);
    667   }
    668 }
    669 
    670 static void opt_root_aliases(OptImpl* o, const CgIrModule* module,
    671                              OptFuncIndex* index, u8* reachable, u8* queued,
    672                              u32* queue, u32* qtail) {
    673   for (u32 i = 0; module && i < module->naliases; ++i) {
    674     const CgIrAlias* a = &module->aliases[i];
    675     const ObjSym* s = obj_symbol_get(o->target->obj, a->alias_sym);
    676     if (!s || s->removed) continue;
    677     if (s->bind != SB_LOCAL || (s->flags & KIT_CG_SYM_USED))
    678       opt_mark_sym(index, reachable, queued, queue, qtail, a->target_sym);
    679   }
    680 }
    681 
    682 static void opt_refresh_or_prune_aliases(OptImpl* o, const CgIrModule* module,
    683                                          OptFuncIndex* index,
    684                                          const u8* reachable) {
    685   for (u32 i = 0; module && i < module->naliases; ++i) {
    686     const CgIrAlias* a = &module->aliases[i];
    687     const ObjSym* ts;
    688     const ObjSym* as;
    689     u32* target_idx = OptFuncIndex_get(index, a->target_sym);
    690     if (!target_idx || !reachable[*target_idx]) {
    691       as = obj_symbol_get(o->target->obj, a->alias_sym);
    692       if (as && as->bind == SB_LOCAL)
    693         obj_symbol_remove(o->target->obj, a->alias_sym);
    694       continue;
    695     }
    696     ts = obj_symbol_get(o->target->obj, a->target_sym);
    697     if (ts && !ts->removed && ts->section_id != OBJ_SEC_NONE)
    698       obj_symbol_define(o->target->obj, a->alias_sym, ts->section_id, ts->value,
    699                         ts->size);
    700   }
    701 }
    702 
    703 static void opt_prune_debug(OptImpl* o) {
    704   if (o->native && o->native->mc && o->native->mc->debug)
    705     debug_prune_removed_funcs(o->native->mc->debug);
    706 }
    707 
    708 /* Whole-module finalize: seed roots, walk the call/use + data-reloc graph,
    709  * remove unreachable local symbols, then lower + optimize + emit only the live
    710  * set. Arch-independent — the ARM64 path has always finalized this way; -O2 now
    711  * routes every arch through here (see opt_on_finalize). When `do_inline` is set
    712  * the live functions are lowered into a FuncSet and run through the
    713  * whole-program inliner before the per-function machinize/emit suffix. */
    714 static void opt_whole_module_finalize(OptImpl* o, const CgIrModule* module,
    715                                       int do_inline) {
    716   OptFuncIndex index;
    717   ObjSymSet preserved;
    718   ObjSymSet data_seen;
    719   u8* reachable;
    720   u8* queued;
    721   u32* queue;
    722   ObjSymId* data_queue;
    723   u32 qhead = 0;
    724   u32 qtail = 0;
    725   u32 data_qhead = 0;
    726   u32 data_qtail = 0;
    727   u32 nsym = 1;
    728   if (!module || !module->nfuncs) return;
    729   OptFuncIndex_init_cap(&index, o->c->ctx->heap, 0);
    730   ObjSymSet_init_cap(&preserved, o->c->ctx->heap, 0);
    731   ObjSymSet_init_cap(&data_seen, o->c->ctx->heap, 0);
    732   reachable = arena_zarray(o->c->tu, u8, module->nfuncs);
    733   queued = arena_zarray(o->c->tu, u8, module->nfuncs);
    734   queue = arena_array(o->c->tu, u32, module->nfuncs);
    735   {
    736     ObjSymIter* it = obj_symiter_new(o->target->obj);
    737     ObjSymEntry ent;
    738     while (it && obj_symiter_next(it, &ent)) ++nsym;
    739     if (it) obj_symiter_free(it);
    740   }
    741   data_queue = arena_array(o->c->tu, ObjSymId, nsym);
    742   opt_resolve_duplicate_funcs(o, module, &index);
    743   opt_build_preserved_set(o, &preserved);
    744   opt_internalize_non_preserved(o, module, &preserved);
    745   for (u32 i = 0; i < module->nfuncs; ++i) {
    746     if (module->funcs[i] && module->funcs[i]->removed) continue;
    747     if (module->nfile_scope_asms || opt_func_is_root(o, module->funcs[i]))
    748       opt_mark_func(reachable, queued, queue, &qtail, i);
    749   }
    750   opt_root_aliases(o, module, &index, reachable, queued, queue, &qtail);
    751   opt_root_exported_data_relocs(o, &data_seen, data_queue, &data_qtail);
    752   opt_mark_data_reloc_graph(o, &index, reachable, queued, queue, &qtail,
    753                             &data_seen, data_queue, &data_qhead, &data_qtail);
    754   while (qhead < qtail) {
    755     CgIrFunc* f = module->funcs[queue[qhead++]];
    756     opt_mark_symset(&index, reachable, queued, queue, &qtail, &f->call_refs);
    757     opt_mark_symset(&index, reachable, queued, queue, &qtail, &f->global_refs);
    758     opt_enqueue_data_symset(o, &data_seen, data_queue, &data_qtail,
    759                             &f->global_refs);
    760     opt_mark_data_reloc_graph(o, &index, reachable, queued, queue, &qtail,
    761                               &data_seen, data_queue, &data_qhead, &data_qtail);
    762   }
    763   for (u32 i = 0; i < module->nfuncs; ++i) {
    764     CgIrFunc* cg_func = module->funcs[i];
    765     if (cg_func && cg_func->removed) continue;
    766     if (reachable[i]) continue;
    767     if (cg_func && cg_func->desc.sym != OBJ_SYM_NONE) {
    768       const ObjSym* s = obj_symbol_get(o->target->obj, cg_func->desc.sym);
    769       if (s && s->bind == SB_LOCAL)
    770         obj_symbol_remove(o->target->obj, cg_func->desc.sym);
    771     }
    772   }
    773   opt_prune_debug(o);
    774   if (!do_inline) {
    775     /* Streaming emit: lower and run the full per-function pipeline in place.
    776      * Preserves the historical ARM64 -O1 behavior exactly. */
    777     for (u32 i = 0; i < module->nfuncs; ++i) {
    778       Func* f;
    779       if (!reachable[i]) continue;
    780       metrics_scope_begin(o->c, "opt.o1.cg_ir_lower");
    781       f = opt_func_from_cg_ir(o->c, module->funcs[i]);
    782       metrics_scope_end(o->c, "opt.o1.cg_ir_lower");
    783       opt_run_o1_native(o, f);
    784       opt_maybe_capture_interp(o, module->funcs[i]);
    785     }
    786   } else {
    787     /* Whole-program inline: lower + CFG-prep every live function into one
    788      * FuncSet so the inliner can resolve direct callees by symbol across the
    789      * module, inline under the growth-gated cost model, then run the
    790      * machinize/emit suffix on each (cfg_dirty=1 because opt_inline left the
    791      * caller CFGs stale). Functions and their source CgIrFuncs are tracked in
    792      * parallel so the interp-capture re-lowers the right body. */
    793     FuncSet fs;
    794     CgIrFunc** cg_srcs;
    795     u32 nlive = 0;
    796     for (u32 i = 0; i < module->nfuncs; ++i)
    797       if (reachable[i] && module->funcs[i] && !module->funcs[i]->removed)
    798         ++nlive;
    799     memset(&fs, 0, sizeof fs);
    800     fs.c = o->c;
    801     fs.arena = o->c->tu;
    802     fs.funcs = arena_array(o->c->tu, Func*, nlive ? nlive : 1u);
    803     fs.cap = nlive;
    804     cg_srcs = arena_array(o->c->tu, CgIrFunc*, nlive ? nlive : 1u);
    805     for (u32 i = 0; i < module->nfuncs; ++i) {
    806       Func* f;
    807       if (!reachable[i] || !module->funcs[i] || module->funcs[i]->removed)
    808         continue;
    809       metrics_scope_begin(o->c, "opt.o1.cg_ir_lower");
    810       f = opt_lower_for_inline(o, module->funcs[i]);
    811       metrics_scope_end(o->c, "opt.o1.cg_ir_lower");
    812       opt_o1_native_prepare(o, f);
    813       cg_srcs[fs.nfuncs] = module->funcs[i];
    814       fs.funcs[fs.nfuncs++] = f;
    815     }
    816     metrics_scope_begin(o->c, "opt.inline.total");
    817     opt_inline(&fs, OPT_WHOLE_PROGRAM_INLINE_ITERS);
    818     metrics_scope_end(o->c, "opt.inline.total");
    819     for (u32 k = 0; k < fs.nfuncs; ++k) {
    820       metrics_scope_begin(o->c, "opt.o1.total");
    821       opt_o1_native_finish(o, fs.funcs[k], /*cfg_dirty=*/1);
    822       metrics_scope_end(o->c, "opt.o1.total");
    823       opt_maybe_capture_interp(o, cg_srcs[k]);
    824     }
    825   }
    826   opt_refresh_or_prune_aliases(o, module, &index, reachable);
    827   ObjSymSet_fini(&data_seen);
    828   ObjSymSet_fini(&preserved);
    829   OptFuncIndex_fini(&index);
    830 }
    831 
    832 static void opt_on_finalize(void* user, const CgIrModule* module) {
    833   OptImpl* o = (OptImpl*)user;
    834   /* File-scope asm blocks are captured during recording (no live target then)
    835    * and replayed here, before finalize. Emission order relative to functions
    836    * does not matter: each block selects its own sections (.data/.text/...). */
    837   if (o->native && o->native->file_scope_asm && module) {
    838     for (u32 i = 0; i < module->nfile_scope_asms; ++i)
    839       o->native->file_scope_asm(o->native, module->file_scope_asms[i].src,
    840                                 module->file_scope_asms[i].len);
    841   }
    842   /* Whole-program mode finalizes through the module sweep — one path for every
    843    * arch: GC + cross-function inlining over the full reachable set. If it were
    844    * off, every arch would have emitted eagerly in opt_on_func and the sweep
    845    * would find nothing, so it is skipped. */
    846   if (o->whole_program)
    847     opt_whole_module_finalize(o, module, /*do_inline=*/o->whole_program);
    848   if (o->native && o->native->finalize) o->native->finalize(o->native);
    849 }
    850 
    851 static void opt_on_destroy(void* user) {
    852   OptImpl* o = (OptImpl*)user;
    853   if (o->native && o->native->destroy) o->native->destroy(o->native);
    854   (void)o;
    855 }
    856 
    857 static int opt_on_local_static_data_begin(void* user,
    858                                           const CGLocalStaticDataDesc* desc) {
    859   (void)desc;
    860   OptImpl* o = (OptImpl*)user;
    861   return o && o->target && o->target->local_static_data_begin &&
    862          o->target->local_static_data_write &&
    863          o->target->local_static_data_label_addr &&
    864          o->target->local_static_data_end;
    865 }
    866 
    867 static const char* opt_on_tail_call_unrealizable_reason(
    868     void* user, const struct CGFuncDesc* caller, const CGCallDesc* call) {
    869   OptImpl* o = (OptImpl*)user;
    870   int callee_va = 0, caller_va = 0;
    871   u32 callee_nparams = 0;
    872   u32 outgoing, incoming;
    873   if (!o || !o->native || !o->native->signature_stack_bytes) return NULL;
    874   /* A realized sibling call reuses the area this function received for its own
    875    * incoming stack arguments to hold the callee's outgoing stack arguments, so
    876    * the callee's must fit. If they don't, the tail isn't realizable and CG
    877    * falls back to an ordinary call + return. */
    878   outgoing = o->native->signature_stack_bytes(o->native, call->fn_type,
    879                                               &callee_va, &callee_nparams);
    880   incoming = o->native->signature_stack_bytes(o->native, caller->fn_type,
    881                                               &caller_va, NULL);
    882   /* A variadic caller's incoming layout includes the register save / overflow
    883    * area, which isn't a simple reusable parameter slab — don't realize. And if
    884    * the call actually passes variadic (beyond-fixed) arguments, their stack
    885    * footprint isn't captured by the fixed-parameter sizing above, so be
    886    * conservative and fall back. A variadic callee invoked with only its fixed
    887    * arguments (e.g. `musttail sink(x)`) is fine and uses the byte comparison.
    888    */
    889   if (caller_va) return "variadic caller cannot host a sibling call";
    890   if (callee_va && call->nargs > callee_nparams)
    891     return "variadic tail call arguments are not realizable as a sibling call";
    892   if (outgoing > incoming)
    893     return "tail call stack arguments exceed the caller's parameter area";
    894   return NULL;
    895 }
    896 
    897 static int opt_on_asm_is_reg_constraint(void* user, const char* constraint) {
    898   OptImpl* o = (OptImpl*)user;
    899   return native_asm_constraint_is_reg(o ? o->native : NULL, constraint);
    900 }
    901 
    902 CgTarget* opt_cgtarget_new(Compiler* c, CgTarget* target, int level) {
    903   if (!target)
    904     compiler_panic(c, (SrcLoc){0, 0, 0}, "opt_cgtarget_new: target is NULL");
    905   if (level < 1)
    906     compiler_panic(c, (SrcLoc){0, 0, 0},
    907                    "opt_cgtarget_new: level %d out of range", level);
    908   OptImpl* o = arena_znew(c->tu, OptImpl);
    909   o->c = c;
    910   o->target = target;
    911   o->native = native_direct_target_native(target);
    912   o->level = level;
    913   /* Whenever the optimizer is engaged (-O1 and above) we run whole-program
    914    * optimization: deferred emission plus the module-wide reachability sweep and
    915    * cross-function inliner. The optimizer recorder only exists at level >= 1
    916    * (see kit_cg_begin), so this is effectively "on whenever optimizing".
    917    * -O0 uses the single-pass direct target and never reaches this code. */
    918   o->whole_program = (level >= 1) ? 1 : 0;
    919 
    920   CgIrRecorderConfig cfg;
    921   memset(&cfg, 0, sizeof cfg);
    922   cfg.func_recorded = opt_on_func;
    923   cfg.finalize = opt_on_finalize;
    924   cfg.destroy = opt_on_destroy;
    925   cfg.local_static_data_begin = opt_on_local_static_data_begin;
    926   cfg.tail_call_unrealizable_reason = opt_on_tail_call_unrealizable_reason;
    927   cfg.asm_is_reg_constraint = opt_on_asm_is_reg_constraint;
    928   cfg.user = o;
    929   return cg_ir_recorder_new(c, target->obj, &cfg);
    930 }
    931 
    932 void opt_set_dump_writer(CgTarget* t, Writer* w) {
    933   CgIrRecorder* rec = cg_ir_recorder_from_target(t);
    934   OptImpl* o = rec ? (OptImpl*)cg_ir_recorder_user(rec) : NULL;
    935   if (o) o->dump_writer = w;
    936 }
    937 
    938 void opt_set_finish_policy(CgTarget* t, const CgFinishPolicy* policy) {
    939   CgIrRecorder* rec = cg_ir_recorder_from_target(t);
    940   OptImpl* o = rec ? (OptImpl*)cg_ir_recorder_user(rec) : NULL;
    941   if (!o) return;
    942   memset(&o->finish_policy, 0, sizeof(o->finish_policy));
    943   if (policy) o->finish_policy = *policy;
    944 }