kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

pass_jump.c (20540B)


      1 #include <string.h>
      2 
      3 #include "opt/opt_internal.h"
      4 
      5 #define BLOCK_NONE ((u32)~0u)
      6 
      7 typedef struct JumpCleanupCtx {
      8   Func* f;
      9   u32* emit_index_by_block;
     10   u32* forwarded_target_by_block;
     11   u32* forward_path;
     12   u8* has_label_addr_ref;
     13 } JumpCleanupCtx;
     14 
     15 static u32 emit_order_index(const JumpCleanupCtx* c, u32 block) {
     16   if (block >= c->f->nblocks) return BLOCK_NONE;
     17   return c->emit_index_by_block[block];
     18 }
     19 
     20 static u32 next_emit_block(const JumpCleanupCtx* c, u32 block) {
     21   u32 idx = emit_order_index(c, block);
     22   if (idx == BLOCK_NONE || idx + 1u >= c->f->emit_order_n) return BLOCK_NONE;
     23   return c->f->emit_order[idx + 1u];
     24 }
     25 
     26 static void refresh_emit_index_range(JumpCleanupCtx* c, u32 first, u32 last) {
     27   Func* f = c->f;
     28   if (!f || first >= f->emit_order_n) return;
     29   if (last >= f->emit_order_n) last = f->emit_order_n - 1u;
     30   for (u32 i = first; i <= last; ++i) {
     31     u32 b = f->emit_order[i];
     32     if (b < f->nblocks) c->emit_index_by_block[b] = i;
     33   }
     34 }
     35 
     36 static void mark_label_addr_refs(Func* f, u8* refs) {
     37   if (!f || !refs) return;
     38   for (u32 b = 0; b < f->nblocks; ++b) {
     39     Block* bl = &f->blocks[b];
     40     for (u32 i = 0; i < bl->ninsts; ++i) {
     41       Inst* in = &bl->insts[i];
     42       switch ((IROp)in->op) {
     43         case IR_LOAD_LABEL_ADDR:
     44           if ((u32)in->extra.imm < f->nblocks) refs[(u32)in->extra.imm] = 1;
     45           break;
     46         case IR_LOCAL_STATIC_DATA_LABEL_ADDR: {
     47           CgIrLocalStaticLabelAux* aux =
     48               (CgIrLocalStaticLabelAux*)in->extra.aux;
     49           if (aux && (u32)aux->target < f->nblocks) refs[(u32)aux->target] = 1;
     50           break;
     51         }
     52         default:
     53           break;
     54       }
     55     }
     56   }
     57 }
     58 
     59 static int block_has_label_addr_ref(const JumpCleanupCtx* c, u32 block) {
     60   if (!c || block >= c->f->nblocks) return 0;
     61   return c->has_label_addr_ref && c->has_label_addr_ref[block];
     62 }
     63 
     64 static JumpCleanupCtx jump_cleanup_ctx(Func* f) {
     65   JumpCleanupCtx c;
     66   c.f = f;
     67   c.emit_index_by_block =
     68       arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u);
     69   c.forwarded_target_by_block =
     70       arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u);
     71   c.forward_path = arena_array(f->arena, u32, f->nblocks ? f->nblocks : 1u);
     72   c.has_label_addr_ref =
     73       arena_zarray(f->arena, u8, f->nblocks ? f->nblocks : 1u);
     74   for (u32 b = 0; b < f->nblocks; ++b) c.emit_index_by_block[b] = BLOCK_NONE;
     75   for (u32 b = 0; b < f->nblocks; ++b)
     76     c.forwarded_target_by_block[b] = BLOCK_NONE;
     77   for (u32 i = 0; i < f->emit_order_n; ++i) {
     78     u32 b = f->emit_order[i];
     79     if (b < f->nblocks) c.emit_index_by_block[b] = i;
     80   }
     81   mark_label_addr_refs(f, c.has_label_addr_ref);
     82   return c;
     83 }
     84 
     85 static int single_jump_block(const JumpCleanupCtx* c, u32 block,
     86                              u32* target_out) {
     87   Func* f = c->f;
     88   if (block >= f->nblocks) return 0;
     89   Block* bl = &f->blocks[block];
     90   if (block_has_label_addr_ref(c, block)) return 0;
     91   if (bl->ninsts != 1 || bl->nsucc != 1) return 0;
     92   if ((IROp)bl->insts[0].op != IR_BR) return 0;
     93   if (target_out) *target_out = bl->succ[0];
     94   return 1;
     95 }
     96 
     97 static int empty_fallthrough_block(const JumpCleanupCtx* c, u32 block,
     98                                    u32* target_out) {
     99   Func* f = c->f;
    100   if (block >= f->nblocks || block == f->entry) return 0;
    101   Block* bl = &f->blocks[block];
    102   if (block_has_label_addr_ref(c, block)) return 0;
    103   if (bl->ninsts != 0 || bl->nsucc != 0) return 0;
    104   u32 idx = emit_order_index(c, block);
    105   if (idx == BLOCK_NONE || idx + 1u >= f->emit_order_n) return 0;
    106   u32 next = f->emit_order[idx + 1u];
    107   if (next >= f->nblocks || next == block) return 0;
    108   if (target_out) *target_out = next;
    109   return 1;
    110 }
    111 
    112 static int passthrough_succ_block(const JumpCleanupCtx* c, u32 block,
    113                                   u32* target_out) {
    114   Func* f = c->f;
    115   if (block >= f->nblocks || block == f->entry) return 0;
    116   Block* bl = &f->blocks[block];
    117   if (block_has_label_addr_ref(c, block)) return 0;
    118   if (bl->nsucc != 1) return 0;
    119   if (bl->succ[0] == block) return 0;
    120   if (bl->ninsts != 0) {
    121     if (bl->ninsts != 1) return 0;
    122     switch ((IROp)bl->insts[0].op) {
    123       case IR_NOP:
    124       case IR_SCOPE_BEGIN:
    125       case IR_SCOPE_END:
    126         break;
    127       default:
    128         return 0;
    129     }
    130   }
    131   if (target_out) *target_out = bl->succ[0];
    132   return 1;
    133 }
    134 
    135 static u32 forward_jump_target_ex(JumpCleanupCtx* c, u32 target,
    136                                   int allow_empty_fallthrough) {
    137   u32 cur = target;
    138   Func* f = c->f;
    139   u32 npath = 0;
    140   u32 result = target;
    141   for (u32 step = 0; step < f->nblocks; ++step) {
    142     u32 next = BLOCK_NONE;
    143     if (cur >= f->nblocks) {
    144       result = cur;
    145       break;
    146     }
    147     if (c->forwarded_target_by_block[cur] != BLOCK_NONE) {
    148       result = c->forwarded_target_by_block[cur];
    149       break;
    150     }
    151     if (!single_jump_block(c, cur, &next) &&
    152         (!allow_empty_fallthrough ||
    153          (!passthrough_succ_block(c, cur, &next) &&
    154           !empty_fallthrough_block(c, cur, &next)))) {
    155       result = cur;
    156       c->forwarded_target_by_block[cur] = result;
    157       break;
    158     }
    159     c->forward_path[npath++] = cur;
    160     if (next == cur) {
    161       result = target;
    162       break;
    163     }
    164     cur = next;
    165     if (step + 1u == f->nblocks) result = target;
    166   }
    167   for (u32 i = 0; i < npath; ++i)
    168     c->forwarded_target_by_block[c->forward_path[i]] = result;
    169   return result;
    170 }
    171 
    172 static u32 forward_jump_target(JumpCleanupCtx* c, u32 target) {
    173   return forward_jump_target_ex(c, target, 0);
    174 }
    175 
    176 static int invert_cmp(CmpOp op, CmpOp* out) {
    177   switch (op) {
    178     case CMP_EQ:
    179       *out = CMP_NE;
    180       return 1;
    181     case CMP_NE:
    182       *out = CMP_EQ;
    183       return 1;
    184     case CMP_LT_S:
    185       *out = CMP_GE_S;
    186       return 1;
    187     case CMP_LE_S:
    188       *out = CMP_GT_S;
    189       return 1;
    190     case CMP_GT_S:
    191       *out = CMP_LE_S;
    192       return 1;
    193     case CMP_GE_S:
    194       *out = CMP_LT_S;
    195       return 1;
    196     case CMP_LT_U:
    197       *out = CMP_GE_U;
    198       return 1;
    199     case CMP_LE_U:
    200       *out = CMP_GT_U;
    201       return 1;
    202     case CMP_GT_U:
    203       *out = CMP_LE_U;
    204       return 1;
    205     case CMP_GE_U:
    206       *out = CMP_LT_U;
    207       return 1;
    208     /* FP: negation flips ordered<->unordered (the NaN outcome flips too) as
    209      * well as negating the relation. Kept byte-for-byte in sync with
    210      * api_invert_cmp (src/cg/fold.c); see that function for the rationale. */
    211     case CMP_OEQ_F:
    212       *out = CMP_UNE_F;
    213       return 1;
    214     case CMP_ONE_F:
    215       *out = CMP_UEQ_F;
    216       return 1;
    217     case CMP_OLT_F:
    218       *out = CMP_UGE_F;
    219       return 1;
    220     case CMP_OLE_F:
    221       *out = CMP_UGT_F;
    222       return 1;
    223     case CMP_OGT_F:
    224       *out = CMP_ULE_F;
    225       return 1;
    226     case CMP_OGE_F:
    227       *out = CMP_ULT_F;
    228       return 1;
    229     case CMP_UEQ_F:
    230       *out = CMP_ONE_F;
    231       return 1;
    232     case CMP_UNE_F:
    233       *out = CMP_OEQ_F;
    234       return 1;
    235     case CMP_ULT_F:
    236       *out = CMP_OGE_F;
    237       return 1;
    238     case CMP_ULE_F:
    239       *out = CMP_OGT_F;
    240       return 1;
    241     case CMP_UGT_F:
    242       *out = CMP_OLE_F;
    243       return 1;
    244     case CMP_UGE_F:
    245       *out = CMP_OLT_F;
    246       return 1;
    247     default:
    248       return 0;
    249   }
    250 }
    251 
    252 static int block_has_only_pred(const Func* f, u32 block, u32 pred) {
    253   if (block >= f->nblocks) return 0;
    254   const Block* bl = &f->blocks[block];
    255   return bl->npreds == 1 && bl->preds && bl->preds[0] == pred;
    256 }
    257 
    258 static void cleanup_branch_targets(JumpCleanupCtx* c) {
    259   Func* f = c->f;
    260   for (u32 b = 0; b < f->nblocks; ++b) {
    261     Block* bl = &f->blocks[b];
    262     u32 nsucc = 0;
    263     if (!bl->ninsts) continue;
    264     IROp op = (IROp)bl->insts[bl->ninsts - 1u].op;
    265     switch (op) {
    266       case IR_BR:
    267         nsucc = bl->nsucc;
    268         break;
    269       case IR_CMP_BRANCH:
    270       case IR_CONDBR:
    271         nsucc = bl->nsucc ? 1u : 0u;
    272         break;
    273       default:
    274         continue;
    275     }
    276     for (u32 s = 0; s < nsucc; ++s) {
    277       u32 target = bl->succ[s];
    278       u32 forwarded = forward_jump_target_ex(c, target, 1);
    279       if (forwarded < f->nblocks) bl->succ[s] = forwarded;
    280     }
    281   }
    282 }
    283 
    284 static void cleanup_invert_jump_fallthrough(JumpCleanupCtx* c) {
    285   Func* f = c->f;
    286   for (u32 b = 0; b < f->nblocks; ++b) {
    287     Block* bl = &f->blocks[b];
    288     if (!bl->ninsts || bl->nsucc != 2) continue;
    289     Inst* last = &bl->insts[bl->ninsts - 1u];
    290     if ((IROp)last->op != IR_CMP_BRANCH) continue;
    291 
    292     u32 taken = bl->succ[0];
    293     u32 fallthrough = bl->succ[1];
    294     u32 next = next_emit_block(c, b);
    295     u32 fallthrough_idx = emit_order_index(c, fallthrough);
    296     u32 jump_target = BLOCK_NONE;
    297     CmpOp inverted;
    298     if (next != fallthrough) continue;
    299     if (fallthrough_idx == BLOCK_NONE ||
    300         fallthrough_idx + 1u >= f->emit_order_n)
    301       continue;
    302     if (f->emit_order[fallthrough_idx + 1u] != taken) continue;
    303     if (!block_has_only_pred(f, fallthrough, b)) continue;
    304     if (!single_jump_block(c, fallthrough, &jump_target)) continue;
    305     jump_target = forward_jump_target(c, jump_target);
    306     if (jump_target >= f->nblocks) continue;
    307     if (!invert_cmp((CmpOp)last->extra.imm, &inverted)) continue;
    308 
    309     last->extra.imm = inverted;
    310     bl->succ[0] = jump_target;
    311     bl->succ[1] = taken;
    312   }
    313 }
    314 
    315 static void cleanup_invert_taken_fallthrough(JumpCleanupCtx* c) {
    316   Func* f = c->f;
    317   for (u32 b = 0; b < f->nblocks; ++b) {
    318     Block* bl = &f->blocks[b];
    319     if (!bl->ninsts || bl->nsucc != 2) continue;
    320     Inst* last = &bl->insts[bl->ninsts - 1u];
    321     if ((IROp)last->op != IR_CMP_BRANCH) continue;
    322 
    323     u32 taken = bl->succ[0];
    324     u32 fallthrough = bl->succ[1];
    325     if (next_emit_block(c, b) != taken) continue;
    326     CmpOp inverted;
    327     if (!invert_cmp((CmpOp)last->extra.imm, &inverted)) continue;
    328 
    329     last->extra.imm = inverted;
    330     bl->succ[0] = fallthrough;
    331     bl->succ[1] = taken;
    332   }
    333 }
    334 
    335 /* Greedy chain-extension reorder: pick the entry block, then repeatedly
    336  * extend the current chain to its preferred unvisited successor. For
    337  * conditional branches that's `succ[1]` (fallthrough); for IR_BR or any
    338  * 1-succ block it's `succ[0]`. When the chain stalls, start a new one from
    339  * the lowest unvisited block id. After this pass, every chained pair `(p,
    340  * n)` is adjacent in emit_order, so the existing branch-invert and
    341  * trailing-branch-strip cleanups can collapse the jump.
    342  *
    343  * kit's frontend loop lowering puts `inc` between `head` and `body`
    344  * (because `continue` jumps to `inc`), forcing `b body` and `b inc` per
    345  * iteration. Without reordering the trailing-branch strip can't fire —
    346  * `body`'s next block in original order is `exit`, not `inc`. After
    347  * reordering, body→inc is adjacent and the back-edge collapses. */
    348 static u32 chain_preferred_succ(const Block* bl) {
    349   if (!bl->ninsts) {
    350     return bl->nsucc ? bl->succ[0] : BLOCK_NONE;
    351   }
    352   IROp op = (IROp)bl->insts[bl->ninsts - 1u].op;
    353   switch (op) {
    354     case IR_CMP_BRANCH:
    355     case IR_CONDBR:
    356       return bl->nsucc >= 2u ? bl->succ[1] : BLOCK_NONE;
    357     case IR_BR:
    358       return bl->nsucc >= 1u ? bl->succ[0] : BLOCK_NONE;
    359     default:
    360       /* RET/SWITCH/INDIRECT_BRANCH: no implicit fallthrough preference. */
    361       return BLOCK_NONE;
    362   }
    363 }
    364 
    365 static void cleanup_reorder_for_fallthrough(JumpCleanupCtx* c) {
    366   Func* f = c->f;
    367   if (f->nblocks < 2u || f->emit_order_n < 2u) return;
    368   u8* visited = arena_zarray(f->arena, u8, f->nblocks);
    369   u32* new_order = arena_array(f->arena, u32, f->emit_order_n);
    370   u32 w = 0;
    371 
    372   /* Entry must come first regardless. */
    373   u32 entry = f->entry;
    374   if (entry >= f->nblocks) return;
    375   new_order[w++] = entry;
    376   visited[entry] = 1;
    377   u32 cur = entry;
    378 
    379   /* Extend by preferred successor. When the chain stalls, restart from the
    380    * next unvisited block in original emit_order to preserve some locality. */
    381   u32 scan_cursor = 0;
    382   for (;;) {
    383     u32 next = chain_preferred_succ(&f->blocks[cur]);
    384     if (next >= f->nblocks || visited[next]) {
    385       /* Fall back: any unvisited successor of cur — extends the chain even
    386        * if it's the "taken" arm of a conditional. The subsequent
    387        * cleanup_invert_taken_fallthrough will invert the branch. */
    388       const Block* cb = &f->blocks[cur];
    389       next = BLOCK_NONE;
    390       for (u32 s = 0; s < cb->nsucc; ++s) {
    391         u32 cand = cb->succ[s];
    392         if (cand < f->nblocks && !visited[cand]) {
    393           next = cand;
    394           break;
    395         }
    396       }
    397     }
    398     if (next >= f->nblocks || visited[next]) {
    399       /* Start a new chain from the lowest unvisited block in original order. */
    400       next = BLOCK_NONE;
    401       while (scan_cursor < f->emit_order_n) {
    402         u32 cand = f->emit_order[scan_cursor++];
    403         if (cand < f->nblocks && !visited[cand]) {
    404           next = cand;
    405           break;
    406         }
    407       }
    408       if (next >= f->nblocks) break;
    409     }
    410     new_order[w++] = next;
    411     visited[next] = 1;
    412     cur = next;
    413   }
    414 
    415   if (w != f->emit_order_n) return; /* Conservative: bail if we missed any. */
    416   memcpy(f->emit_order, new_order, sizeof(u32) * w);
    417   /* Refresh the cached emit-index map so subsequent cleanups see the new
    418    * order. */
    419   for (u32 b = 0; b < f->nblocks; ++b) c->emit_index_by_block[b] = BLOCK_NONE;
    420   for (u32 i = 0; i < f->emit_order_n; ++i) {
    421     u32 b = f->emit_order[i];
    422     if (b < f->nblocks) c->emit_index_by_block[b] = i;
    423   }
    424 }
    425 
    426 /* Rotate simple counted loops so the test sits at the bottom: the back-edge
    427  * becomes the per-iteration conditional branch and the body falls through to
    428  * the test, eliminating the unconditional back-jump the head-test shape emits
    429  * every iteration.
    430  *
    431  * Pattern (after the greedy reorder):
    432  *
    433  *   H (emit i):  IR_CMP_BRANCH, succ[0]=E (exit, taken), succ[1]=B (body entry,
    434  *               the fallthrough, placed right after H by the greedy reorder)
    435  *   ... body chain ...
    436  *   L (emit k):  the latch — a predecessor of H at a later emit position, i.e.
    437  *               the source of the loop's back-edge to H
    438  *
    439  * The rewrite moves H from position i to just after L (rotating the emit-order
    440  * run [i..k] left by one) and inverts H's test in place: H's taken edge becomes
    441  * the back-edge to the body entry (the per-iteration conditional) and its
    442  * fallthrough becomes the exit E. No CFG edges move — reordering emit_order is
    443  * always valid because pass_native_emit emits an explicit jump for any
    444  * successor that isn't the textual next block. After the move L falls through
    445  * to H (its back-edge `b H` is stripped), and the preheader keeps its now-
    446  * forward `b H` (executed once, serving as the zero-trip guard); the
    447  * unconditional back-jump every iteration is gone.
    448  *
    449  * Conservative guards: the body entry must be H's fallthrough and immediately
    450  * follow H (the greedy chain), and H must have exactly one back-edge
    451  * predecessor after it in emit order (reducible, single-latch loop). */
    452 static void cleanup_rotate_loops(JumpCleanupCtx* c) {
    453   Func* f = c->f;
    454   if (f->emit_order_n < 2u) return;
    455   for (u32 i = 0; i + 1u < f->emit_order_n; ++i) {
    456     u32 h = f->emit_order[i];
    457     if (h >= f->nblocks) continue;
    458     Block* H = &f->blocks[h];
    459     if (!H->ninsts || H->nsucc != 2) continue;
    460     Inst* hterm = &H->insts[H->ninsts - 1u];
    461     if ((IROp)hterm->op != IR_CMP_BRANCH) continue;
    462     u32 body = H->succ[1]; /* loop body entry (fallthrough) */
    463     if (f->emit_order[i + 1u] != body) continue;
    464     /* Latch: the single predecessor of H later in emit order (back-edge
    465      * source). Zero or multiple => not a simple single-latch loop; skip. */
    466     u32 latch_pos = BLOCK_NONE;
    467     int ok = 1;
    468     for (u32 p = 0; p < H->npreds; ++p) {
    469       u32 pos = emit_order_index(c, H->preds[p]);
    470       if (pos == BLOCK_NONE || pos <= i) continue;
    471       if (latch_pos != BLOCK_NONE) {
    472         ok = 0;
    473         break;
    474       }
    475       latch_pos = pos;
    476     }
    477     if (!ok || latch_pos == BLOCK_NONE) continue;
    478     CmpOp inverted;
    479     if (!invert_cmp((CmpOp)hterm->extra.imm, &inverted)) continue;
    480     u32 exit = H->succ[0];
    481     for (u32 k = i; k < latch_pos; ++k)
    482       f->emit_order[k] = f->emit_order[k + 1u];
    483     f->emit_order[latch_pos] = h;
    484     hterm->extra.imm = inverted;
    485     H->succ[0] = body; /* taken: back-edge to the loop body */
    486     H->succ[1] = exit; /* fallthrough: loop exit */
    487     refresh_emit_index_range(c, i, latch_pos);
    488   }
    489 }
    490 
    491 static void cleanup_layout_fallthrough_branches(const JumpCleanupCtx* c) {
    492   Func* f = c->f;
    493   for (u32 b = 0; b < f->nblocks; ++b) {
    494     Block* bl = &f->blocks[b];
    495     if (!bl->ninsts || bl->nsucc != 1) continue;
    496     Inst* last = &bl->insts[bl->ninsts - 1u];
    497     if ((IROp)last->op != IR_BR) continue;
    498     if (next_emit_block(c, b) != bl->succ[0]) continue;
    499     memset(last, 0, sizeof *last);
    500     --bl->ninsts;
    501   }
    502 }
    503 
    504 static int forward_empty_fallthrough_chain(JumpCleanupCtx* c, u32 target,
    505                                            u32* target_out) {
    506   u32 cur = target;
    507   Func* f = c->f;
    508   for (u32 step = 0; step < f->nblocks; ++step) {
    509     u32 next_block = BLOCK_NONE;
    510     if (!empty_fallthrough_block(c, cur, &next_block)) return 0;
    511     if (next_block >= f->nblocks) return 0;
    512     if (!empty_fallthrough_block(c, next_block, NULL)) {
    513       if (target_out) *target_out = next_block;
    514       return 1;
    515     }
    516     cur = next_block;
    517   }
    518   return 0;
    519 }
    520 
    521 static int full_cond_fallthrough_forwardable(JumpCleanupCtx* c, u32 pred,
    522                                              u32 target, u32* target_out) {
    523   u32 next = next_emit_block(c, pred);
    524   if (next != target) return 0;
    525   return forward_empty_fallthrough_chain(c, target, target_out);
    526 }
    527 
    528 static u32 full_forwardable_successors(const Block* bl, const Inst* term) {
    529   switch ((IROp)term->op) {
    530     case IR_BR:
    531     case IR_SWITCH:
    532       return bl->nsucc;
    533     case IR_CONDBR:
    534     case IR_CMP_BRANCH:
    535       return bl->nsucc < 2u ? bl->nsucc : 2u;
    536     default:
    537       return 0;
    538   }
    539 }
    540 
    541 static void full_rewrite_successor(Func* f, u32 b, u32 old_succ, u32 new_succ) {
    542   opt_replace_succ_ref(f, b, old_succ, new_succ);
    543 }
    544 
    545 static int full_forward_branch_targets(JumpCleanupCtx* c) {
    546   Func* f = c->f;
    547   int changed = 0;
    548   for (u32 b = 0; b < f->nblocks; ++b) {
    549     Block* bl = &f->blocks[b];
    550     if (!bl->ninsts) continue;
    551     Inst* term = &bl->insts[bl->ninsts - 1u];
    552     u32 nsucc = full_forwardable_successors(bl, term);
    553     for (u32 s = 0; s < nsucc; ++s) {
    554       u32 target = bl->succ[s];
    555       u32 forwarded = forward_jump_target_ex(c, target, 1);
    556       if (((IROp)term->op == IR_CONDBR || (IROp)term->op == IR_CMP_BRANCH) &&
    557           s == 1u) {
    558         if (!full_cond_fallthrough_forwardable(c, b, target, &forwarded))
    559           continue;
    560       }
    561       if (forwarded >= f->nblocks || forwarded == target) continue;
    562       full_rewrite_successor(f, b, target, forwarded);
    563       changed = 1;
    564     }
    565   }
    566   return changed;
    567 }
    568 
    569 static int full_collapse_same_target_branches(Func* f) {
    570   int changed = 0;
    571   for (u32 b = 0; b < f->nblocks; ++b) {
    572     Block* bl = &f->blocks[b];
    573     if (!bl->ninsts || bl->nsucc != 2) continue;
    574     Inst* term = &bl->insts[bl->ninsts - 1u];
    575     IROp op = (IROp)term->op;
    576     if (op != IR_CONDBR && op != IR_CMP_BRANCH) continue;
    577     if (bl->succ[0] != bl->succ[1]) continue;
    578     InstId id = term->id;
    579     SrcLoc loc = term->loc;
    580     memset(term, 0, sizeof *term);
    581     term->op = IR_BR;
    582     term->id = id;
    583     term->loc = loc;
    584     bl->nsucc = 1;
    585     changed = 1;
    586   }
    587   return changed;
    588 }
    589 
    590 void opt_jump_cleanup(Func* f, OptJumpCleanupStage stage) {
    591   if (!f) return;
    592   opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE |
    593                                  OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
    594   JumpCleanupCtx c = jump_cleanup_ctx(f);
    595   if (stage == OPT_JUMP_CLEANUP_CFG) {
    596     cleanup_invert_jump_fallthrough(&c);
    597     cleanup_branch_targets(&c);
    598   } else if (stage == OPT_JUMP_CLEANUP_LAYOUT) {
    599     cleanup_reorder_for_fallthrough(&c);
    600     cleanup_rotate_loops(&c);
    601     cleanup_invert_taken_fallthrough(&c);
    602     cleanup_layout_fallthrough_branches(&c);
    603   }
    604 }
    605 
    606 void opt_jump_opt(Func* f) {
    607   if (!f) return;
    608   int changed = 0;
    609 
    610   opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE |
    611                                  OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
    612 
    613   for (u32 iter = 0; iter < f->nblocks; ++iter) {
    614     JumpCleanupCtx c = jump_cleanup_ctx(f);
    615     int iter_changed = 0;
    616     iter_changed |= full_forward_branch_targets(&c);
    617     opt_build_cfg(f);
    618     c = jump_cleanup_ctx(f);
    619     cleanup_invert_jump_fallthrough(&c);
    620     cleanup_branch_targets(&c);
    621     iter_changed |= full_collapse_same_target_branches(f);
    622     if (!iter_changed) break;
    623     changed = 1;
    624     opt_build_cfg(f);
    625   }
    626 
    627   if (changed) {
    628     opt_analysis_invalidate(f, OPT_ANALYSIS_CFG | OPT_ANALYSIS_DEF_USE |
    629                                    OPT_ANALYSIS_DOM | OPT_ANALYSIS_LOOP);
    630   }
    631   opt_build_cfg(f);
    632 }