kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

fold.c (21767B)


      1 /* The semantic-layer peephole optimizer. See cg/fold.h for the contract.
      2  *
      3  * This is the isolated `-O0` peephole that was previously interleaved with the
      4  * value-stack discipline in value.c (Track 6.2). Nothing here emits control
      5  * flow or owns the stack; it folds constants, manages the delayed compare/arith
      6  * forms, and tracks const-valued locals. The op families drive it through the
      7  * fold.h entry points. */
      8 
      9 #include "cg/internal.h"
     10 #include "cg/ir_eval.h"
     11 
     12 /* ============================================================
     13  * 1. Integer constant folding
     14  * ============================================================ */
     15 
     16 u32 api_int_like_width(Compiler* c, KitCgTypeId id) {
     17   const CgType* ty = cg_type_get(c, id);
     18   if (!ty) return 0;
     19   if (ty->kind == KIT_CG_TYPE_ALIAS)
     20     return api_int_like_width(c, ty->alias.base);
     21   if (ty->kind == KIT_CG_TYPE_INT || ty->kind == KIT_CG_TYPE_BOOL)
     22     return ty->integer.width;
     23   if (ty->kind == KIT_CG_TYPE_ENUM) return (u32)(ty->size * 8u);
     24   if (ty->kind == KIT_CG_TYPE_PTR) return (u32)(ty->size * 8u);
     25   return 0;
     26 }
     27 
     28 int api_type_is_bool(Compiler* c, KitCgTypeId id) {
     29   const CgType* ty = cg_type_get(c, id);
     30   if (!ty) return 0;
     31   if (ty->kind == KIT_CG_TYPE_ALIAS) return api_type_is_bool(c, ty->alias.base);
     32   return ty->kind == KIT_CG_TYPE_BOOL;
     33 }
     34 
     35 /* The width-arithmetic core now lives in cg/ir_eval.{c,h} (shared with opt). The
     36  * fold.h surface (still consumed by arith.c / control.c) stays as thin
     37  * delegators. */
     38 u64 api_width_mask(u32 width) { return kit_ir_width_mask(width); }
     39 
     40 u64 api_mask_width(u64 v, u32 width) { return kit_ir_mask_width(v, width); }
     41 
     42 i64 api_sign_extend_width(u64 v, u32 width) {
     43   return kit_ir_sign_extend_width(v, width);
     44 }
     45 
     46 int api_foldable_int_like_type(Compiler* c, KitCgTypeId ty, u32* width_out) {
     47   u32 width = api_int_like_width(c, ty);
     48   if (!width || width > 64) return 0;
     49   *width_out = width;
     50   return 1;
     51 }
     52 
     53 int api_foldable_int_type(Compiler* c, KitCgTypeId ty, u32* width_out) {
     54   if (!cg_type_is_int(c, ty)) return 0;
     55   return api_foldable_int_like_type(c, ty, width_out);
     56 }
     57 
     58 i64 api_fold_result(Compiler* c, KitCgTypeId ty, u64 v, u32 width) {
     59   v = api_mask_width(v, width);
     60   if (api_type_is_bool(c, ty)) v = v != 0;
     61   return (i64)v;
     62 }
     63 
     64 int api_try_fold_int_binop(KitCg* g, BinOp op, KitCgTypeId ty, i64 a, i64 b,
     65                            i64* out) {
     66   u32 width;
     67   i64 r;
     68   /* Strict-int / PTR-aware foldability is fold's own type policy; the wrapping
     69    * arithmetic is the shared core. fold_result re-masks + bool-coerces. */
     70   if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) return 0;
     71   if (!kit_ir_eval_binop(op, width, a, b, &r)) return 0;
     72   *out = api_fold_result(g->c, ty, (u64)r, width);
     73   return 1;
     74 }
     75 
     76 int api_try_fold_int_unop(KitCg* g, UnOp op, KitCgTypeId ty, i64 a, i64* out) {
     77   u32 width;
     78   i64 r;
     79   if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) return 0;
     80   if (!kit_ir_eval_unop(op, width, a, &r)) return 0;
     81   *out = api_fold_result(g->c, ty, (u64)r, width);
     82   return 1;
     83 }
     84 
     85 int api_try_fold_int_cmp(KitCg* g, CmpOp op, KitCgTypeId ty, i64 a, i64 b,
     86                          i64* out) {
     87   u32 width;
     88   /* fold's cmp policy is int-LIKE (bool/enum/ptr admitted); the predicate eval
     89    * is shared. */
     90   if (!g || !out || !api_foldable_int_like_type(g->c, ty, &width)) return 0;
     91   return kit_ir_eval_cmp(op, width, a, b, out);
     92 }
     93 
     94 /* ============================================================
     95  * 2a. Delayed compare (SV_CMP) lifecycle
     96  * ============================================================ */
     97 
     98 ApiSValue api_make_cmp(CmpOp op, Operand a, Operand b, KitCgTypeId result_ty,
     99                        int a_owned, int b_owned) {
    100   ApiSValue sv;
    101   memset(&sv, 0, sizeof sv);
    102   sv.kind = SV_CMP;
    103   sv.type = result_ty;
    104   sv.delayed.cmp.op = op;
    105   sv.delayed.cmp.a = a;
    106   sv.delayed.cmp.b = b;
    107   sv.delayed.cmp.a_owned = a_owned ? 1u : 0u;
    108   sv.delayed.cmp.b_owned = b_owned ? 1u : 0u;
    109   sv.res = RES_INHERENT;
    110   sv.source_local = KIT_CG_LOCAL_NONE;
    111   return sv;
    112 }
    113 
    114 CmpOp api_invert_cmp(CmpOp op) {
    115   switch (op) {
    116     case CMP_EQ:
    117       return CMP_NE;
    118     case CMP_NE:
    119       return CMP_EQ;
    120     case CMP_LT_S:
    121       return CMP_GE_S;
    122     case CMP_LE_S:
    123       return CMP_GT_S;
    124     case CMP_GT_S:
    125       return CMP_LE_S;
    126     case CMP_GE_S:
    127       return CMP_LT_S;
    128     case CMP_LT_U:
    129       return CMP_GE_U;
    130     case CMP_LE_U:
    131       return CMP_GT_U;
    132     case CMP_GT_U:
    133       return CMP_LE_U;
    134     case CMP_GE_U:
    135       return CMP_LT_U;
    136     /* FP: the negation of a compare must flip ordered<->unordered (the NaN
    137      * outcome flips too) as well as negate the relation. The correct inverse
    138      * of ordered `a<b` is *unordered* `a>=b`, not ordered `a>=b`. */
    139     case CMP_OEQ_F:
    140       return CMP_UNE_F;
    141     case CMP_ONE_F:
    142       return CMP_UEQ_F;
    143     case CMP_OLT_F:
    144       return CMP_UGE_F;
    145     case CMP_OLE_F:
    146       return CMP_UGT_F;
    147     case CMP_OGT_F:
    148       return CMP_ULE_F;
    149     case CMP_OGE_F:
    150       return CMP_ULT_F;
    151     case CMP_UEQ_F:
    152       return CMP_ONE_F;
    153     case CMP_UNE_F:
    154       return CMP_OEQ_F;
    155     case CMP_ULT_F:
    156       return CMP_OGE_F;
    157     case CMP_ULE_F:
    158       return CMP_OGT_F;
    159     case CMP_UGT_F:
    160       return CMP_OLE_F;
    161     case CMP_UGE_F:
    162       return CMP_OLT_F;
    163   }
    164   return CMP_EQ;
    165 }
    166 
    167 void api_release_cmp(KitCg* g, ApiSValue* sv) {
    168   if (sv->delayed.cmp.a_owned) api_release_operand_local(g, sv->delayed.cmp.a);
    169   if (sv->delayed.cmp.b_owned &&
    170       (sv->delayed.cmp.b.kind != OPK_LOCAL ||
    171        sv->delayed.cmp.a.kind != OPK_LOCAL ||
    172        sv->delayed.cmp.b.v.local != sv->delayed.cmp.a.v.local ||
    173        !sv->delayed.cmp.a_owned)) {
    174     api_release_operand_local(g, sv->delayed.cmp.b);
    175   }
    176   memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a);
    177   memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b);
    178   sv->delayed.cmp.a_owned = 0;
    179   sv->delayed.cmp.b_owned = 0;
    180   sv->kind = SV_OPERAND;
    181 }
    182 
    183 void api_materialize_cmp_to(KitCg* g, ApiSValue* sv, Operand dst) {
    184   g->target->cmp(g->target, sv->delayed.cmp.op, dst, sv->delayed.cmp.a,
    185                  sv->delayed.cmp.b);
    186   if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_LOCAL &&
    187       sv->delayed.cmp.a.v.local != dst.v.local) {
    188     api_release_operand_local(g, sv->delayed.cmp.a);
    189   }
    190   if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_LOCAL &&
    191       sv->delayed.cmp.b.v.local != dst.v.local) {
    192     api_release_operand_local(g, sv->delayed.cmp.b);
    193   }
    194   memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a);
    195   memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b);
    196   sv->delayed.cmp.a_owned = 0;
    197   sv->delayed.cmp.b_owned = 0;
    198   sv->kind = SV_OPERAND;
    199   sv->op = dst;
    200   sv->type = dst.type;
    201   sv->res = RES_LOCAL;
    202   sv->lvalue = 0;
    203 }
    204 
    205 /* ============================================================
    206  * 2b. Delayed arith (SV_ARITH) lifecycle
    207  *
    208  * Live: api_can_delay_int_arith admits a non-flagged foldable integer op, so an
    209  * unflagged int binop/unop is held un-emitted as an SV_ARITH. A following op
    210  * can then fuse it (fold an imm chain, collapse an identity) and a consumer
    211  * that needs a value materializes it via api_materialize_arith_to. This was
    212  * gated off while the load/store EA rider existed (Track 7 removed it);
    213  * Track 6.3 flipped the gate back on.
    214  * ============================================================ */
    215 
    216 ApiSValue api_make_arith_unop(UnOp op, Operand a, KitCgTypeId ty, int a_owned) {
    217   ApiSValue sv;
    218   memset(&sv, 0, sizeof sv);
    219   sv.kind = SV_ARITH;
    220   sv.delayed.arith.kind = API_DELAYED_UNOP;
    221   sv.type = ty;
    222   sv.delayed.arith.un_op = op;
    223   sv.delayed.arith.a = a;
    224   sv.delayed.arith.a_owned = a_owned ? 1u : 0u;
    225   sv.res = RES_INHERENT;
    226   sv.source_local = KIT_CG_LOCAL_NONE;
    227   return sv;
    228 }
    229 
    230 ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, KitCgTypeId ty,
    231                                int a_owned, int b_owned) {
    232   ApiSValue sv;
    233   memset(&sv, 0, sizeof sv);
    234   sv.kind = SV_ARITH;
    235   sv.delayed.arith.kind = API_DELAYED_BINOP;
    236   sv.type = ty;
    237   sv.delayed.arith.bin_op = op;
    238   sv.delayed.arith.a = a;
    239   sv.delayed.arith.b = b;
    240   sv.delayed.arith.a_owned = a_owned ? 1u : 0u;
    241   sv.delayed.arith.b_owned = b_owned ? 1u : 0u;
    242   sv.res = RES_INHERENT;
    243   sv.source_local = KIT_CG_LOCAL_NONE;
    244   return sv;
    245 }
    246 
    247 void api_release_arith(KitCg* g, ApiSValue* sv) {
    248   if (sv->delayed.arith.a_owned)
    249     api_release_operand_local(g, sv->delayed.arith.a);
    250   if (sv->delayed.arith.b_owned &&
    251       (sv->delayed.arith.b.kind != OPK_LOCAL ||
    252        sv->delayed.arith.a.kind != OPK_LOCAL ||
    253        sv->delayed.arith.b.v.local != sv->delayed.arith.a.v.local ||
    254        !sv->delayed.arith.a_owned)) {
    255     api_release_operand_local(g, sv->delayed.arith.b);
    256   }
    257   memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a);
    258   memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b);
    259   sv->delayed.arith.a_owned = 0;
    260   sv->delayed.arith.b_owned = 0;
    261   sv->kind = SV_OPERAND;
    262 }
    263 
    264 void api_materialize_arith_to(KitCg* g, ApiSValue* sv, Operand dst) {
    265   if (sv->delayed.arith.kind == API_DELAYED_UNOP) {
    266     g->target->unop(g->target, sv->delayed.arith.un_op, dst,
    267                     sv->delayed.arith.a);
    268   } else {
    269     g->target->binop(g->target, sv->delayed.arith.bin_op, dst,
    270                      sv->delayed.arith.a, sv->delayed.arith.b);
    271   }
    272   if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_LOCAL &&
    273       sv->delayed.arith.a.v.local != dst.v.local) {
    274     api_release_operand_local(g, sv->delayed.arith.a);
    275   }
    276   if (sv->delayed.arith.b_owned && sv->delayed.arith.b.kind == OPK_LOCAL &&
    277       sv->delayed.arith.b.v.local != dst.v.local) {
    278     api_release_operand_local(g, sv->delayed.arith.b);
    279   }
    280   memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a);
    281   memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b);
    282   sv->delayed.arith.a_owned = 0;
    283   sv->delayed.arith.b_owned = 0;
    284   sv->kind = SV_OPERAND;
    285   sv->op = dst;
    286   sv->type = dst.type;
    287   sv->res = RES_LOCAL;
    288   sv->lvalue = 0;
    289 }
    290 
    291 int api_arith_rhs_reusable(const ApiSValue* sv) {
    292   if (sv->delayed.arith.kind == API_DELAYED_UNOP) return 0;
    293   switch (sv->delayed.arith.bin_op) {
    294     case BO_IADD:
    295     case BO_IMUL:
    296     case BO_AND:
    297     case BO_OR:
    298     case BO_XOR:
    299       return 1;
    300     default:
    301       return 0;
    302   }
    303 }
    304 
    305 int api_can_delay_int_arith(KitCg* g, KitCgTypeId ty, u32 flags) {
    306   u32 width;
    307   return g && !flags && api_foldable_int_type(g->c, ty, &width);
    308 }
    309 
    310 /* Strength reduction: rewrite a multiply / unsigned-divide / unsigned-remainder
    311  * by a power-of-two immediate into a shift / and. Operates on the freshly
    312  * popped operands (`a` = LHS, `b` = RHS) and the op; on a match it rewrites
    313  * *op, *a and *b in place and returns 1.
    314  *
    315  * Only the cases whose plain wrapping equivalence is exact live here:
    316  *   x * 2^k   -> x << k      (multiply is commutative: the immediate may be on
    317  *                             either side; the result is canonicalized so the
    318  *                             variable is the shift's LHS)
    319  *   x u/ 2^k  -> x u>> k
    320  *   x u% 2^k  -> x & (2^k - 1)
    321  * Signed division/remainder by a power of two needs a sign-bias sequence
    322  * (round toward zero on negatives), so it is left to the optimizer. The caller
    323  * gates this on flags==0, so trap/saturate/exact semantics never reach here. */
    324 static int api_imm_is_pow2(u64 v, u32* log2_out) {
    325   u32 k;
    326   if (v == 0 || (v & (v - 1u)) != 0) return 0;
    327   for (k = 0; k < 64u; ++k) {
    328     if (v == (1ull << k)) {
    329       *log2_out = k;
    330       return 1;
    331     }
    332   }
    333   return 0;
    334 }
    335 
    336 int api_try_strength_reduce(KitCg* g, BinOp* op, KitCgTypeId ty, ApiSValue* a,
    337                             ApiSValue* b) {
    338   u32 width;
    339   u32 k = 0;
    340   u64 v;
    341   int a_imm, b_imm;
    342   if (!g || !op || !a || !b) return 0;
    343   if (!api_foldable_int_type(g->c, ty, &width)) return 0;
    344   a_imm = a->kind == SV_OPERAND && a->op.kind == OPK_IMM;
    345   b_imm = b->kind == SV_OPERAND && b->op.kind == OPK_IMM;
    346   switch (*op) {
    347     case BO_IMUL: {
    348       /* Both-imm is constant-folded before we get here; need exactly one, and
    349        * the variable operand is canonicalized to the shift's LHS. */
    350       ApiSValue* imm_sv;
    351       int imm_on_lhs;
    352       if (b_imm && !a_imm) {
    353         imm_sv = b;
    354         imm_on_lhs = 0;
    355       } else if (a_imm && !b_imm) {
    356         imm_sv = a;
    357         imm_on_lhs = 1;
    358       } else {
    359         return 0;
    360       }
    361       v = api_mask_width((u64)imm_sv->op.v.imm, width);
    362       if (!api_imm_is_pow2(v, &k) || k == 0) return 0;
    363       if (imm_on_lhs) {
    364         ApiSValue tmp = *a;
    365         *a = *b;
    366         *b = tmp;
    367       }
    368       b->op.v.imm = (i64)k;
    369       b->op.type = ty;
    370       *op = BO_SHL;
    371       return 1;
    372     }
    373     case BO_UDIV:
    374     case BO_UREM:
    375       if (!b_imm || a_imm) return 0; /* RHS imm, LHS a real value */
    376       v = api_mask_width((u64)b->op.v.imm, width);
    377       if (!api_imm_is_pow2(v, &k) || k == 0) return 0;
    378       if (*op == BO_UDIV) {
    379         b->op.v.imm = (i64)k;
    380         *op = BO_SHR_U;
    381       } else {
    382         b->op.v.imm = (i64)api_mask_width(v - 1u, width);
    383         *op = BO_AND;
    384       }
    385       b->op.type = ty;
    386       return 1;
    387     default:
    388       return 0;
    389   }
    390 }
    391 
    392 int api_op_is_int_identity(KitCg* g, BinOp op, KitCgTypeId ty, i64 imm) {
    393   u32 width;
    394   u64 v;
    395   if (!api_foldable_int_type(g->c, ty, &width)) return 0;
    396   v = api_mask_width((u64)imm, width);
    397   switch (op) {
    398     case BO_IADD:
    399     case BO_ISUB:
    400     case BO_OR:
    401     case BO_XOR:
    402     case BO_SHL:
    403     case BO_SHR_S:
    404     case BO_SHR_U:
    405       return v == 0;
    406     case BO_IMUL:
    407     case BO_SDIV:
    408     case BO_UDIV:
    409       return v == 1;
    410     case BO_AND:
    411       return v == api_width_mask(width);
    412     default:
    413       return 0;
    414   }
    415 }
    416 
    417 int api_try_collapse_binop_identity(KitCg* g, BinOp op, KitCgTypeId ty,
    418                                     ApiSValue* a, ApiSValue* b,
    419                                     ApiSValue* out) {
    420   u32 width;
    421   u64 av = 0;
    422   u64 bv = 0;
    423   if (!api_foldable_int_type(g->c, ty, &width)) return 0;
    424   if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM)
    425     av = api_mask_width((u64)a->op.v.imm, width);
    426   if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM)
    427     bv = api_mask_width((u64)b->op.v.imm, width);
    428 
    429   if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND &&
    430       a->op.kind != OPK_IMM && api_op_is_int_identity(g, op, ty, b->op.v.imm)) {
    431     *out = api_make_sv_with_local_ownership(
    432         a->op, ty, api_sv_owns_operand_local(a, &a->op));
    433     a->res = RES_INHERENT;
    434     return 1;
    435   }
    436   if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND &&
    437       a->op.kind != OPK_IMM &&
    438       (op == BO_SREM || op == BO_UREM || op == BO_IMUL || op == BO_AND ||
    439        op == BO_OR)) {
    440     if ((op == BO_SREM || op == BO_UREM) && bv == 1) {
    441       *out = api_make_sv(api_op_imm(0, ty), ty);
    442       return 1;
    443     }
    444     if ((op == BO_IMUL || op == BO_AND) && bv == 0) {
    445       *out = api_make_sv(api_op_imm(0, ty), ty);
    446       return 1;
    447     }
    448     if (op == BO_OR && bv == api_width_mask(width)) {
    449       *out =
    450           api_make_sv(api_op_imm(api_fold_result(g->c, ty, bv, width), ty), ty);
    451       return 1;
    452     }
    453   }
    454   if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND &&
    455       b->op.kind != OPK_IMM &&
    456       (op == BO_IADD || op == BO_IMUL || op == BO_OR || op == BO_XOR ||
    457        op == BO_AND) &&
    458       api_op_is_int_identity(g, op, ty, a->op.v.imm)) {
    459     *out = api_make_sv_with_local_ownership(
    460         b->op, ty, api_sv_owns_operand_local(b, &b->op));
    461     b->res = RES_INHERENT;
    462     return 1;
    463   }
    464   if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND &&
    465       b->op.kind != OPK_IMM && (op == BO_IMUL || op == BO_AND || op == BO_OR)) {
    466     if ((op == BO_IMUL || op == BO_AND) && av == 0) {
    467       *out = api_make_sv(api_op_imm(0, ty), ty);
    468       return 1;
    469     }
    470     if (op == BO_OR && av == api_width_mask(width)) {
    471       *out =
    472           api_make_sv(api_op_imm(api_fold_result(g->c, ty, av, width), ty), ty);
    473       return 1;
    474     }
    475   }
    476   return 0;
    477 }
    478 
    479 int api_try_fold_arith_chain(KitCg* g, BinOp op, KitCgTypeId ty, ApiSValue* a,
    480                              ApiSValue* b, ApiSValue* out) {
    481   i64 folded;
    482   BinOp result_op;
    483   if (a->kind != SV_ARITH || a->delayed.arith.kind != API_DELAYED_BINOP ||
    484       a->delayed.arith.a.kind != OPK_LOCAL ||
    485       a->delayed.arith.b.kind != OPK_IMM || b->kind != SV_OPERAND ||
    486       b->op.kind != OPK_IMM) {
    487     return 0;
    488   }
    489   result_op = a->delayed.arith.bin_op;
    490   switch (a->delayed.arith.bin_op) {
    491     case BO_IADD:
    492       if (op == BO_IADD) {
    493         if (!api_try_fold_int_binop(g, BO_IADD, ty, a->delayed.arith.b.v.imm,
    494                                     b->op.v.imm, &folded))
    495           return 0;
    496         result_op = BO_IADD;
    497       } else if (op == BO_ISUB) {
    498         if (!api_try_fold_int_binop(g, BO_ISUB, ty, a->delayed.arith.b.v.imm,
    499                                     b->op.v.imm, &folded))
    500           return 0;
    501         result_op = BO_IADD;
    502       } else {
    503         return 0;
    504       }
    505       break;
    506     case BO_ISUB:
    507       if (op == BO_IADD) {
    508         if (!api_try_fold_int_binop(g, BO_ISUB, ty, b->op.v.imm,
    509                                     a->delayed.arith.b.v.imm, &folded))
    510           return 0;
    511         result_op = BO_IADD;
    512       } else if (op == BO_ISUB) {
    513         if (!api_try_fold_int_binop(g, BO_IADD, ty, a->delayed.arith.b.v.imm,
    514                                     b->op.v.imm, &folded))
    515           return 0;
    516         result_op = BO_ISUB;
    517       } else {
    518         return 0;
    519       }
    520       break;
    521     case BO_XOR:
    522       if (op != BO_XOR ||
    523           !api_try_fold_int_binop(g, BO_XOR, ty, a->delayed.arith.b.v.imm,
    524                                   b->op.v.imm, &folded))
    525         return 0;
    526       result_op = BO_XOR;
    527       break;
    528     case BO_AND:
    529       if (op != BO_AND ||
    530           !api_try_fold_int_binop(g, BO_AND, ty, a->delayed.arith.b.v.imm,
    531                                   b->op.v.imm, &folded))
    532         return 0;
    533       result_op = BO_AND;
    534       break;
    535     case BO_OR:
    536       if (op != BO_OR ||
    537           !api_try_fold_int_binop(g, BO_OR, ty, a->delayed.arith.b.v.imm,
    538                                   b->op.v.imm, &folded))
    539         return 0;
    540       result_op = BO_OR;
    541       break;
    542     default:
    543       return 0;
    544   }
    545   if (api_op_is_int_identity(g, result_op, ty, folded)) {
    546     *out = api_make_sv_with_local_ownership(a->delayed.arith.a, ty,
    547                                             a->delayed.arith.a_owned);
    548     a->delayed.arith.a_owned = 0;
    549     memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a);
    550     return 1;
    551   }
    552   a->delayed.arith.bin_op = result_op;
    553   a->delayed.arith.b.v.imm = folded;
    554   *out = *a;
    555   a->delayed.arith.a_owned = 0;
    556   a->delayed.arith.b_owned = 0;
    557   memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a);
    558   memset(&a->delayed.arith.b, 0, sizeof a->delayed.arith.b);
    559   return 1;
    560 }
    561 
    562 int api_try_fold_unary_chain(ApiSValue* a, UnOp op, KitCgTypeId ty,
    563                              ApiSValue* out) {
    564   if (op != UO_BNOT || a->kind != SV_ARITH ||
    565       a->delayed.arith.kind != API_DELAYED_UNOP ||
    566       a->delayed.arith.un_op != UO_BNOT ||
    567       a->delayed.arith.a.kind != OPK_LOCAL) {
    568     return 0;
    569   }
    570   *out = api_make_sv_with_local_ownership(a->delayed.arith.a, ty,
    571                                           a->delayed.arith.a_owned);
    572   a->delayed.arith.a_owned = 0;
    573   memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a);
    574   return 1;
    575 }
    576 
    577 /* ============================================================
    578  * 3. Const-local store-to-load forwarding
    579  * ============================================================ */
    580 
    581 void api_local_const_clear(ApiSourceLocal* rec) {
    582   if (!rec) return;
    583   rec->const_valid = 0;
    584   rec->const_value = 0;
    585 }
    586 
    587 void api_local_const_clear_all(KitCg* g) {
    588   if (!g) return;
    589   for (u32 i = 0; i < g->nlocals; ++i) api_local_const_clear(&g->locals[i]);
    590 }
    591 
    592 void api_local_const_memory_boundary(KitCg* g) { api_local_const_clear_all(g); }
    593 
    594 void api_local_const_control_boundary(KitCg* g) {
    595   api_local_const_clear_all(g);
    596 }
    597 
    598 void api_local_const_address_taken(KitCg* g, KitCgLocal local) {
    599   api_local_const_clear_all(g);
    600   api_local_const_clear(api_local_from_handle(g, local));
    601 }
    602 
    603 int api_local_const_can_track(KitCg* g, const ApiSourceLocal* rec,
    604                               KitCgMemAccess access) {
    605   u32 width;
    606   KitCgTypeId ty;
    607   u64 access_size;
    608   u64 local_size;
    609   if (!g || !rec) return 0;
    610   if (rec->kind != API_SOURCE_LOCAL_AUTO) return 0;
    611   if (access.flags & KIT_CG_MEM_VOLATILE) return 0;
    612   ty = resolve_type(g->c, access.type);
    613   if (!ty) ty = rec->type;
    614   if (ty != rec->type) return 0;
    615   access_size = abi_cg_sizeof(g->c->abi, ty);
    616   local_size = abi_cg_sizeof(g->c->abi, rec->type);
    617   if (access_size != local_size) return 0;
    618   return api_foldable_int_like_type(g->c, ty, &width);
    619 }
    620 
    621 void api_local_const_store(KitCg* g, KitCgLocal local, KitCgMemAccess access,
    622                            i64 value) {
    623   ApiSourceLocal* rec = api_local_from_handle(g, local);
    624   KitCgTypeId ty;
    625   u32 width;
    626   if (!api_local_const_can_track(g, rec, access)) {
    627     api_local_const_clear(rec);
    628     return;
    629   }
    630   ty = resolve_type(g->c, access.type);
    631   if (!ty) ty = rec->type;
    632   if (!api_foldable_int_like_type(g->c, ty, &width)) {
    633     api_local_const_clear(rec);
    634     return;
    635   }
    636   rec->const_value = api_fold_result(g->c, ty, (u64)value, width);
    637   rec->const_valid = 1;
    638 }
    639 
    640 int api_local_const_load(KitCg* g, KitCgLocal local, KitCgMemAccess access,
    641                          Operand* out) {
    642   ApiSourceLocal* rec = api_local_from_handle(g, local);
    643   KitCgTypeId ty;
    644   u32 width;
    645   if (!out || !api_local_const_can_track(g, rec, access)) return 0;
    646   if (!rec->const_valid) return 0;
    647   ty = resolve_type(g->c, access.type);
    648   if (!ty) ty = rec->type;
    649   if (!api_foldable_int_like_type(g->c, ty, &width)) return 0;
    650   *out =
    651       api_op_imm(api_fold_result(g->c, ty, (u64)rec->const_value, width), ty);
    652   return 1;
    653 }