fold.c (21767B)
1 /* The semantic-layer peephole optimizer. See cg/fold.h for the contract. 2 * 3 * This is the isolated `-O0` peephole that was previously interleaved with the 4 * value-stack discipline in value.c (Track 6.2). Nothing here emits control 5 * flow or owns the stack; it folds constants, manages the delayed compare/arith 6 * forms, and tracks const-valued locals. The op families drive it through the 7 * fold.h entry points. */ 8 9 #include "cg/internal.h" 10 #include "cg/ir_eval.h" 11 12 /* ============================================================ 13 * 1. Integer constant folding 14 * ============================================================ */ 15 16 u32 api_int_like_width(Compiler* c, KitCgTypeId id) { 17 const CgType* ty = cg_type_get(c, id); 18 if (!ty) return 0; 19 if (ty->kind == KIT_CG_TYPE_ALIAS) 20 return api_int_like_width(c, ty->alias.base); 21 if (ty->kind == KIT_CG_TYPE_INT || ty->kind == KIT_CG_TYPE_BOOL) 22 return ty->integer.width; 23 if (ty->kind == KIT_CG_TYPE_ENUM) return (u32)(ty->size * 8u); 24 if (ty->kind == KIT_CG_TYPE_PTR) return (u32)(ty->size * 8u); 25 return 0; 26 } 27 28 int api_type_is_bool(Compiler* c, KitCgTypeId id) { 29 const CgType* ty = cg_type_get(c, id); 30 if (!ty) return 0; 31 if (ty->kind == KIT_CG_TYPE_ALIAS) return api_type_is_bool(c, ty->alias.base); 32 return ty->kind == KIT_CG_TYPE_BOOL; 33 } 34 35 /* The width-arithmetic core now lives in cg/ir_eval.{c,h} (shared with opt). The 36 * fold.h surface (still consumed by arith.c / control.c) stays as thin 37 * delegators. */ 38 u64 api_width_mask(u32 width) { return kit_ir_width_mask(width); } 39 40 u64 api_mask_width(u64 v, u32 width) { return kit_ir_mask_width(v, width); } 41 42 i64 api_sign_extend_width(u64 v, u32 width) { 43 return kit_ir_sign_extend_width(v, width); 44 } 45 46 int api_foldable_int_like_type(Compiler* c, KitCgTypeId ty, u32* width_out) { 47 u32 width = api_int_like_width(c, ty); 48 if (!width || width > 64) return 0; 49 *width_out = width; 50 return 1; 51 } 52 53 int api_foldable_int_type(Compiler* c, KitCgTypeId ty, u32* width_out) { 54 if (!cg_type_is_int(c, ty)) return 0; 55 return api_foldable_int_like_type(c, ty, width_out); 56 } 57 58 i64 api_fold_result(Compiler* c, KitCgTypeId ty, u64 v, u32 width) { 59 v = api_mask_width(v, width); 60 if (api_type_is_bool(c, ty)) v = v != 0; 61 return (i64)v; 62 } 63 64 int api_try_fold_int_binop(KitCg* g, BinOp op, KitCgTypeId ty, i64 a, i64 b, 65 i64* out) { 66 u32 width; 67 i64 r; 68 /* Strict-int / PTR-aware foldability is fold's own type policy; the wrapping 69 * arithmetic is the shared core. fold_result re-masks + bool-coerces. */ 70 if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) return 0; 71 if (!kit_ir_eval_binop(op, width, a, b, &r)) return 0; 72 *out = api_fold_result(g->c, ty, (u64)r, width); 73 return 1; 74 } 75 76 int api_try_fold_int_unop(KitCg* g, UnOp op, KitCgTypeId ty, i64 a, i64* out) { 77 u32 width; 78 i64 r; 79 if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) return 0; 80 if (!kit_ir_eval_unop(op, width, a, &r)) return 0; 81 *out = api_fold_result(g->c, ty, (u64)r, width); 82 return 1; 83 } 84 85 int api_try_fold_int_cmp(KitCg* g, CmpOp op, KitCgTypeId ty, i64 a, i64 b, 86 i64* out) { 87 u32 width; 88 /* fold's cmp policy is int-LIKE (bool/enum/ptr admitted); the predicate eval 89 * is shared. */ 90 if (!g || !out || !api_foldable_int_like_type(g->c, ty, &width)) return 0; 91 return kit_ir_eval_cmp(op, width, a, b, out); 92 } 93 94 /* ============================================================ 95 * 2a. Delayed compare (SV_CMP) lifecycle 96 * ============================================================ */ 97 98 ApiSValue api_make_cmp(CmpOp op, Operand a, Operand b, KitCgTypeId result_ty, 99 int a_owned, int b_owned) { 100 ApiSValue sv; 101 memset(&sv, 0, sizeof sv); 102 sv.kind = SV_CMP; 103 sv.type = result_ty; 104 sv.delayed.cmp.op = op; 105 sv.delayed.cmp.a = a; 106 sv.delayed.cmp.b = b; 107 sv.delayed.cmp.a_owned = a_owned ? 1u : 0u; 108 sv.delayed.cmp.b_owned = b_owned ? 1u : 0u; 109 sv.res = RES_INHERENT; 110 sv.source_local = KIT_CG_LOCAL_NONE; 111 return sv; 112 } 113 114 CmpOp api_invert_cmp(CmpOp op) { 115 switch (op) { 116 case CMP_EQ: 117 return CMP_NE; 118 case CMP_NE: 119 return CMP_EQ; 120 case CMP_LT_S: 121 return CMP_GE_S; 122 case CMP_LE_S: 123 return CMP_GT_S; 124 case CMP_GT_S: 125 return CMP_LE_S; 126 case CMP_GE_S: 127 return CMP_LT_S; 128 case CMP_LT_U: 129 return CMP_GE_U; 130 case CMP_LE_U: 131 return CMP_GT_U; 132 case CMP_GT_U: 133 return CMP_LE_U; 134 case CMP_GE_U: 135 return CMP_LT_U; 136 /* FP: the negation of a compare must flip ordered<->unordered (the NaN 137 * outcome flips too) as well as negate the relation. The correct inverse 138 * of ordered `a<b` is *unordered* `a>=b`, not ordered `a>=b`. */ 139 case CMP_OEQ_F: 140 return CMP_UNE_F; 141 case CMP_ONE_F: 142 return CMP_UEQ_F; 143 case CMP_OLT_F: 144 return CMP_UGE_F; 145 case CMP_OLE_F: 146 return CMP_UGT_F; 147 case CMP_OGT_F: 148 return CMP_ULE_F; 149 case CMP_OGE_F: 150 return CMP_ULT_F; 151 case CMP_UEQ_F: 152 return CMP_ONE_F; 153 case CMP_UNE_F: 154 return CMP_OEQ_F; 155 case CMP_ULT_F: 156 return CMP_OGE_F; 157 case CMP_ULE_F: 158 return CMP_OGT_F; 159 case CMP_UGT_F: 160 return CMP_OLE_F; 161 case CMP_UGE_F: 162 return CMP_OLT_F; 163 } 164 return CMP_EQ; 165 } 166 167 void api_release_cmp(KitCg* g, ApiSValue* sv) { 168 if (sv->delayed.cmp.a_owned) api_release_operand_local(g, sv->delayed.cmp.a); 169 if (sv->delayed.cmp.b_owned && 170 (sv->delayed.cmp.b.kind != OPK_LOCAL || 171 sv->delayed.cmp.a.kind != OPK_LOCAL || 172 sv->delayed.cmp.b.v.local != sv->delayed.cmp.a.v.local || 173 !sv->delayed.cmp.a_owned)) { 174 api_release_operand_local(g, sv->delayed.cmp.b); 175 } 176 memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); 177 memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); 178 sv->delayed.cmp.a_owned = 0; 179 sv->delayed.cmp.b_owned = 0; 180 sv->kind = SV_OPERAND; 181 } 182 183 void api_materialize_cmp_to(KitCg* g, ApiSValue* sv, Operand dst) { 184 g->target->cmp(g->target, sv->delayed.cmp.op, dst, sv->delayed.cmp.a, 185 sv->delayed.cmp.b); 186 if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_LOCAL && 187 sv->delayed.cmp.a.v.local != dst.v.local) { 188 api_release_operand_local(g, sv->delayed.cmp.a); 189 } 190 if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_LOCAL && 191 sv->delayed.cmp.b.v.local != dst.v.local) { 192 api_release_operand_local(g, sv->delayed.cmp.b); 193 } 194 memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); 195 memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); 196 sv->delayed.cmp.a_owned = 0; 197 sv->delayed.cmp.b_owned = 0; 198 sv->kind = SV_OPERAND; 199 sv->op = dst; 200 sv->type = dst.type; 201 sv->res = RES_LOCAL; 202 sv->lvalue = 0; 203 } 204 205 /* ============================================================ 206 * 2b. Delayed arith (SV_ARITH) lifecycle 207 * 208 * Live: api_can_delay_int_arith admits a non-flagged foldable integer op, so an 209 * unflagged int binop/unop is held un-emitted as an SV_ARITH. A following op 210 * can then fuse it (fold an imm chain, collapse an identity) and a consumer 211 * that needs a value materializes it via api_materialize_arith_to. This was 212 * gated off while the load/store EA rider existed (Track 7 removed it); 213 * Track 6.3 flipped the gate back on. 214 * ============================================================ */ 215 216 ApiSValue api_make_arith_unop(UnOp op, Operand a, KitCgTypeId ty, int a_owned) { 217 ApiSValue sv; 218 memset(&sv, 0, sizeof sv); 219 sv.kind = SV_ARITH; 220 sv.delayed.arith.kind = API_DELAYED_UNOP; 221 sv.type = ty; 222 sv.delayed.arith.un_op = op; 223 sv.delayed.arith.a = a; 224 sv.delayed.arith.a_owned = a_owned ? 1u : 0u; 225 sv.res = RES_INHERENT; 226 sv.source_local = KIT_CG_LOCAL_NONE; 227 return sv; 228 } 229 230 ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, KitCgTypeId ty, 231 int a_owned, int b_owned) { 232 ApiSValue sv; 233 memset(&sv, 0, sizeof sv); 234 sv.kind = SV_ARITH; 235 sv.delayed.arith.kind = API_DELAYED_BINOP; 236 sv.type = ty; 237 sv.delayed.arith.bin_op = op; 238 sv.delayed.arith.a = a; 239 sv.delayed.arith.b = b; 240 sv.delayed.arith.a_owned = a_owned ? 1u : 0u; 241 sv.delayed.arith.b_owned = b_owned ? 1u : 0u; 242 sv.res = RES_INHERENT; 243 sv.source_local = KIT_CG_LOCAL_NONE; 244 return sv; 245 } 246 247 void api_release_arith(KitCg* g, ApiSValue* sv) { 248 if (sv->delayed.arith.a_owned) 249 api_release_operand_local(g, sv->delayed.arith.a); 250 if (sv->delayed.arith.b_owned && 251 (sv->delayed.arith.b.kind != OPK_LOCAL || 252 sv->delayed.arith.a.kind != OPK_LOCAL || 253 sv->delayed.arith.b.v.local != sv->delayed.arith.a.v.local || 254 !sv->delayed.arith.a_owned)) { 255 api_release_operand_local(g, sv->delayed.arith.b); 256 } 257 memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); 258 memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); 259 sv->delayed.arith.a_owned = 0; 260 sv->delayed.arith.b_owned = 0; 261 sv->kind = SV_OPERAND; 262 } 263 264 void api_materialize_arith_to(KitCg* g, ApiSValue* sv, Operand dst) { 265 if (sv->delayed.arith.kind == API_DELAYED_UNOP) { 266 g->target->unop(g->target, sv->delayed.arith.un_op, dst, 267 sv->delayed.arith.a); 268 } else { 269 g->target->binop(g->target, sv->delayed.arith.bin_op, dst, 270 sv->delayed.arith.a, sv->delayed.arith.b); 271 } 272 if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_LOCAL && 273 sv->delayed.arith.a.v.local != dst.v.local) { 274 api_release_operand_local(g, sv->delayed.arith.a); 275 } 276 if (sv->delayed.arith.b_owned && sv->delayed.arith.b.kind == OPK_LOCAL && 277 sv->delayed.arith.b.v.local != dst.v.local) { 278 api_release_operand_local(g, sv->delayed.arith.b); 279 } 280 memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); 281 memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); 282 sv->delayed.arith.a_owned = 0; 283 sv->delayed.arith.b_owned = 0; 284 sv->kind = SV_OPERAND; 285 sv->op = dst; 286 sv->type = dst.type; 287 sv->res = RES_LOCAL; 288 sv->lvalue = 0; 289 } 290 291 int api_arith_rhs_reusable(const ApiSValue* sv) { 292 if (sv->delayed.arith.kind == API_DELAYED_UNOP) return 0; 293 switch (sv->delayed.arith.bin_op) { 294 case BO_IADD: 295 case BO_IMUL: 296 case BO_AND: 297 case BO_OR: 298 case BO_XOR: 299 return 1; 300 default: 301 return 0; 302 } 303 } 304 305 int api_can_delay_int_arith(KitCg* g, KitCgTypeId ty, u32 flags) { 306 u32 width; 307 return g && !flags && api_foldable_int_type(g->c, ty, &width); 308 } 309 310 /* Strength reduction: rewrite a multiply / unsigned-divide / unsigned-remainder 311 * by a power-of-two immediate into a shift / and. Operates on the freshly 312 * popped operands (`a` = LHS, `b` = RHS) and the op; on a match it rewrites 313 * *op, *a and *b in place and returns 1. 314 * 315 * Only the cases whose plain wrapping equivalence is exact live here: 316 * x * 2^k -> x << k (multiply is commutative: the immediate may be on 317 * either side; the result is canonicalized so the 318 * variable is the shift's LHS) 319 * x u/ 2^k -> x u>> k 320 * x u% 2^k -> x & (2^k - 1) 321 * Signed division/remainder by a power of two needs a sign-bias sequence 322 * (round toward zero on negatives), so it is left to the optimizer. The caller 323 * gates this on flags==0, so trap/saturate/exact semantics never reach here. */ 324 static int api_imm_is_pow2(u64 v, u32* log2_out) { 325 u32 k; 326 if (v == 0 || (v & (v - 1u)) != 0) return 0; 327 for (k = 0; k < 64u; ++k) { 328 if (v == (1ull << k)) { 329 *log2_out = k; 330 return 1; 331 } 332 } 333 return 0; 334 } 335 336 int api_try_strength_reduce(KitCg* g, BinOp* op, KitCgTypeId ty, ApiSValue* a, 337 ApiSValue* b) { 338 u32 width; 339 u32 k = 0; 340 u64 v; 341 int a_imm, b_imm; 342 if (!g || !op || !a || !b) return 0; 343 if (!api_foldable_int_type(g->c, ty, &width)) return 0; 344 a_imm = a->kind == SV_OPERAND && a->op.kind == OPK_IMM; 345 b_imm = b->kind == SV_OPERAND && b->op.kind == OPK_IMM; 346 switch (*op) { 347 case BO_IMUL: { 348 /* Both-imm is constant-folded before we get here; need exactly one, and 349 * the variable operand is canonicalized to the shift's LHS. */ 350 ApiSValue* imm_sv; 351 int imm_on_lhs; 352 if (b_imm && !a_imm) { 353 imm_sv = b; 354 imm_on_lhs = 0; 355 } else if (a_imm && !b_imm) { 356 imm_sv = a; 357 imm_on_lhs = 1; 358 } else { 359 return 0; 360 } 361 v = api_mask_width((u64)imm_sv->op.v.imm, width); 362 if (!api_imm_is_pow2(v, &k) || k == 0) return 0; 363 if (imm_on_lhs) { 364 ApiSValue tmp = *a; 365 *a = *b; 366 *b = tmp; 367 } 368 b->op.v.imm = (i64)k; 369 b->op.type = ty; 370 *op = BO_SHL; 371 return 1; 372 } 373 case BO_UDIV: 374 case BO_UREM: 375 if (!b_imm || a_imm) return 0; /* RHS imm, LHS a real value */ 376 v = api_mask_width((u64)b->op.v.imm, width); 377 if (!api_imm_is_pow2(v, &k) || k == 0) return 0; 378 if (*op == BO_UDIV) { 379 b->op.v.imm = (i64)k; 380 *op = BO_SHR_U; 381 } else { 382 b->op.v.imm = (i64)api_mask_width(v - 1u, width); 383 *op = BO_AND; 384 } 385 b->op.type = ty; 386 return 1; 387 default: 388 return 0; 389 } 390 } 391 392 int api_op_is_int_identity(KitCg* g, BinOp op, KitCgTypeId ty, i64 imm) { 393 u32 width; 394 u64 v; 395 if (!api_foldable_int_type(g->c, ty, &width)) return 0; 396 v = api_mask_width((u64)imm, width); 397 switch (op) { 398 case BO_IADD: 399 case BO_ISUB: 400 case BO_OR: 401 case BO_XOR: 402 case BO_SHL: 403 case BO_SHR_S: 404 case BO_SHR_U: 405 return v == 0; 406 case BO_IMUL: 407 case BO_SDIV: 408 case BO_UDIV: 409 return v == 1; 410 case BO_AND: 411 return v == api_width_mask(width); 412 default: 413 return 0; 414 } 415 } 416 417 int api_try_collapse_binop_identity(KitCg* g, BinOp op, KitCgTypeId ty, 418 ApiSValue* a, ApiSValue* b, 419 ApiSValue* out) { 420 u32 width; 421 u64 av = 0; 422 u64 bv = 0; 423 if (!api_foldable_int_type(g->c, ty, &width)) return 0; 424 if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM) 425 av = api_mask_width((u64)a->op.v.imm, width); 426 if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM) 427 bv = api_mask_width((u64)b->op.v.imm, width); 428 429 if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND && 430 a->op.kind != OPK_IMM && api_op_is_int_identity(g, op, ty, b->op.v.imm)) { 431 *out = api_make_sv_with_local_ownership( 432 a->op, ty, api_sv_owns_operand_local(a, &a->op)); 433 a->res = RES_INHERENT; 434 return 1; 435 } 436 if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND && 437 a->op.kind != OPK_IMM && 438 (op == BO_SREM || op == BO_UREM || op == BO_IMUL || op == BO_AND || 439 op == BO_OR)) { 440 if ((op == BO_SREM || op == BO_UREM) && bv == 1) { 441 *out = api_make_sv(api_op_imm(0, ty), ty); 442 return 1; 443 } 444 if ((op == BO_IMUL || op == BO_AND) && bv == 0) { 445 *out = api_make_sv(api_op_imm(0, ty), ty); 446 return 1; 447 } 448 if (op == BO_OR && bv == api_width_mask(width)) { 449 *out = 450 api_make_sv(api_op_imm(api_fold_result(g->c, ty, bv, width), ty), ty); 451 return 1; 452 } 453 } 454 if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND && 455 b->op.kind != OPK_IMM && 456 (op == BO_IADD || op == BO_IMUL || op == BO_OR || op == BO_XOR || 457 op == BO_AND) && 458 api_op_is_int_identity(g, op, ty, a->op.v.imm)) { 459 *out = api_make_sv_with_local_ownership( 460 b->op, ty, api_sv_owns_operand_local(b, &b->op)); 461 b->res = RES_INHERENT; 462 return 1; 463 } 464 if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND && 465 b->op.kind != OPK_IMM && (op == BO_IMUL || op == BO_AND || op == BO_OR)) { 466 if ((op == BO_IMUL || op == BO_AND) && av == 0) { 467 *out = api_make_sv(api_op_imm(0, ty), ty); 468 return 1; 469 } 470 if (op == BO_OR && av == api_width_mask(width)) { 471 *out = 472 api_make_sv(api_op_imm(api_fold_result(g->c, ty, av, width), ty), ty); 473 return 1; 474 } 475 } 476 return 0; 477 } 478 479 int api_try_fold_arith_chain(KitCg* g, BinOp op, KitCgTypeId ty, ApiSValue* a, 480 ApiSValue* b, ApiSValue* out) { 481 i64 folded; 482 BinOp result_op; 483 if (a->kind != SV_ARITH || a->delayed.arith.kind != API_DELAYED_BINOP || 484 a->delayed.arith.a.kind != OPK_LOCAL || 485 a->delayed.arith.b.kind != OPK_IMM || b->kind != SV_OPERAND || 486 b->op.kind != OPK_IMM) { 487 return 0; 488 } 489 result_op = a->delayed.arith.bin_op; 490 switch (a->delayed.arith.bin_op) { 491 case BO_IADD: 492 if (op == BO_IADD) { 493 if (!api_try_fold_int_binop(g, BO_IADD, ty, a->delayed.arith.b.v.imm, 494 b->op.v.imm, &folded)) 495 return 0; 496 result_op = BO_IADD; 497 } else if (op == BO_ISUB) { 498 if (!api_try_fold_int_binop(g, BO_ISUB, ty, a->delayed.arith.b.v.imm, 499 b->op.v.imm, &folded)) 500 return 0; 501 result_op = BO_IADD; 502 } else { 503 return 0; 504 } 505 break; 506 case BO_ISUB: 507 if (op == BO_IADD) { 508 if (!api_try_fold_int_binop(g, BO_ISUB, ty, b->op.v.imm, 509 a->delayed.arith.b.v.imm, &folded)) 510 return 0; 511 result_op = BO_IADD; 512 } else if (op == BO_ISUB) { 513 if (!api_try_fold_int_binop(g, BO_IADD, ty, a->delayed.arith.b.v.imm, 514 b->op.v.imm, &folded)) 515 return 0; 516 result_op = BO_ISUB; 517 } else { 518 return 0; 519 } 520 break; 521 case BO_XOR: 522 if (op != BO_XOR || 523 !api_try_fold_int_binop(g, BO_XOR, ty, a->delayed.arith.b.v.imm, 524 b->op.v.imm, &folded)) 525 return 0; 526 result_op = BO_XOR; 527 break; 528 case BO_AND: 529 if (op != BO_AND || 530 !api_try_fold_int_binop(g, BO_AND, ty, a->delayed.arith.b.v.imm, 531 b->op.v.imm, &folded)) 532 return 0; 533 result_op = BO_AND; 534 break; 535 case BO_OR: 536 if (op != BO_OR || 537 !api_try_fold_int_binop(g, BO_OR, ty, a->delayed.arith.b.v.imm, 538 b->op.v.imm, &folded)) 539 return 0; 540 result_op = BO_OR; 541 break; 542 default: 543 return 0; 544 } 545 if (api_op_is_int_identity(g, result_op, ty, folded)) { 546 *out = api_make_sv_with_local_ownership(a->delayed.arith.a, ty, 547 a->delayed.arith.a_owned); 548 a->delayed.arith.a_owned = 0; 549 memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); 550 return 1; 551 } 552 a->delayed.arith.bin_op = result_op; 553 a->delayed.arith.b.v.imm = folded; 554 *out = *a; 555 a->delayed.arith.a_owned = 0; 556 a->delayed.arith.b_owned = 0; 557 memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); 558 memset(&a->delayed.arith.b, 0, sizeof a->delayed.arith.b); 559 return 1; 560 } 561 562 int api_try_fold_unary_chain(ApiSValue* a, UnOp op, KitCgTypeId ty, 563 ApiSValue* out) { 564 if (op != UO_BNOT || a->kind != SV_ARITH || 565 a->delayed.arith.kind != API_DELAYED_UNOP || 566 a->delayed.arith.un_op != UO_BNOT || 567 a->delayed.arith.a.kind != OPK_LOCAL) { 568 return 0; 569 } 570 *out = api_make_sv_with_local_ownership(a->delayed.arith.a, ty, 571 a->delayed.arith.a_owned); 572 a->delayed.arith.a_owned = 0; 573 memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); 574 return 1; 575 } 576 577 /* ============================================================ 578 * 3. Const-local store-to-load forwarding 579 * ============================================================ */ 580 581 void api_local_const_clear(ApiSourceLocal* rec) { 582 if (!rec) return; 583 rec->const_valid = 0; 584 rec->const_value = 0; 585 } 586 587 void api_local_const_clear_all(KitCg* g) { 588 if (!g) return; 589 for (u32 i = 0; i < g->nlocals; ++i) api_local_const_clear(&g->locals[i]); 590 } 591 592 void api_local_const_memory_boundary(KitCg* g) { api_local_const_clear_all(g); } 593 594 void api_local_const_control_boundary(KitCg* g) { 595 api_local_const_clear_all(g); 596 } 597 598 void api_local_const_address_taken(KitCg* g, KitCgLocal local) { 599 api_local_const_clear_all(g); 600 api_local_const_clear(api_local_from_handle(g, local)); 601 } 602 603 int api_local_const_can_track(KitCg* g, const ApiSourceLocal* rec, 604 KitCgMemAccess access) { 605 u32 width; 606 KitCgTypeId ty; 607 u64 access_size; 608 u64 local_size; 609 if (!g || !rec) return 0; 610 if (rec->kind != API_SOURCE_LOCAL_AUTO) return 0; 611 if (access.flags & KIT_CG_MEM_VOLATILE) return 0; 612 ty = resolve_type(g->c, access.type); 613 if (!ty) ty = rec->type; 614 if (ty != rec->type) return 0; 615 access_size = abi_cg_sizeof(g->c->abi, ty); 616 local_size = abi_cg_sizeof(g->c->abi, rec->type); 617 if (access_size != local_size) return 0; 618 return api_foldable_int_like_type(g->c, ty, &width); 619 } 620 621 void api_local_const_store(KitCg* g, KitCgLocal local, KitCgMemAccess access, 622 i64 value) { 623 ApiSourceLocal* rec = api_local_from_handle(g, local); 624 KitCgTypeId ty; 625 u32 width; 626 if (!api_local_const_can_track(g, rec, access)) { 627 api_local_const_clear(rec); 628 return; 629 } 630 ty = resolve_type(g->c, access.type); 631 if (!ty) ty = rec->type; 632 if (!api_foldable_int_like_type(g->c, ty, &width)) { 633 api_local_const_clear(rec); 634 return; 635 } 636 rec->const_value = api_fold_result(g->c, ty, (u64)value, width); 637 rec->const_valid = 1; 638 } 639 640 int api_local_const_load(KitCg* g, KitCgLocal local, KitCgMemAccess access, 641 Operand* out) { 642 ApiSourceLocal* rec = api_local_from_handle(g, local); 643 KitCgTypeId ty; 644 u32 width; 645 if (!out || !api_local_const_can_track(g, rec, access)) return 0; 646 if (!rec->const_valid) return 0; 647 ty = resolve_type(g->c, access.type); 648 if (!ty) ty = rec->type; 649 if (!api_foldable_int_like_type(g->c, ty, &width)) return 0; 650 *out = 651 api_op_imm(api_fold_result(g->c, ty, (u64)rec->const_value, width), ty); 652 return 1; 653 }