kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit d03eb4c8a8580486122ee065c08a6e391489e6f5
parent 20c4b046d8cbce3704e010a55002c6bc7f37f613
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon,  1 Jun 2026 20:03:44 -0700

cg: isolate the -O0 semantic peephole into fold.{c,h} (Track 6.2)

The value stack doubles as an -O0 peephole optimizer (a kept feature). Its
machinery was interleaved with the stack discipline in value.c. Extract it into
a new isolated module with a documented contract:

  - integer constant folding (width/mask helpers + fold_int_binop/unop/cmp),
  - the SV_CMP delayed-compare lifecycle (make/release/materialize/invert),
  - the SV_ARITH delayed-arith lifecycle (currently gated off; kept here so
    Track 6.3 re-enables it with a gate flip, not a code move),
  - const-local store-to-load forwarding with its invalidation boundaries.

fold.h is re-exported at the end of internal.h (after the operand/value types
it names), so op families that include internal.h see the contract unchanged.
Pure relocation: no behavior change. value.c keeps api_lvalue_addr and the
enum-mapping helpers. api_branch_if stays a control-flow op that consumes the
delayed compare via fold.h.

Green: lib, bin, test-cg-api (173+168), test-toy (1344/0/26),
test-opt, test-isa, smoke x64/rv64 — all identical to baseline.

Diffstat:
Mdoc/CODEGEN.md | 21++++++++++++++-------
Asrc/cg/fold.c | 654+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/fold.h | 100+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/cg/internal.h | 51++++++---------------------------------------------
Msrc/cg/value.c | 627-------------------------------------------------------------------------------
5 files changed, 774 insertions(+), 679 deletions(-)

diff --git a/doc/CODEGEN.md b/doc/CODEGEN.md @@ -65,8 +65,10 @@ so the C parser's `cg_adapter`, the toy frontend, and the wasm-language frontend do not change when a backend does. See [FRONTENDS.md](FRONTENDS.md). The implementation lives in `src/cg/`, split by op family rather than one -monolith: `value.c` (stack discipline, lvalue/rvalue conversion, delayed -compares/arith), `memory.c` (loads/stores/addressing/aggregates), `arith.c`, +monolith: `value.c` (stack discipline, lvalue/rvalue conversion, operand +materialization), `fold.c` (the `-O0` semantic peephole — constant folding, the +delayed compare/arith forms, and const-local store-to-load forwarding; contract +in `fold.h`), `memory.c` (loads/stores/addressing/aggregates), `arith.c`, `control.c` (labels, branches, scopes, switch, computed goto), `call.c`, `atomic.c`, `asm.c`, `type.c`, `local.c`, `data.c`, `wide.c` (128-bit scalars), with shared state and helpers in `internal.h` and lifecycle in `session.c`. @@ -76,11 +78,16 @@ The value stack's job is purely semantic lowering. Each entry (`ApiSValue`) is one of an operand (immediate / constant / semantic local / lvalue address), a *delayed compare*, or a *delayed arith* — forms held un-emitted so a following branch can fuse a compare instead of materializing a 0/1, or so a small -immediate can flow straight into a `binop`. The stack does **not** own -registers, frame slots, spill policy, or caller-saved preservation; those moved -down into the target realizations. When an operation needs a value emitted, the -stack calls the corresponding `g->target->op(...)` semantic hook with -local-only operands. +immediate can flow straight into a `binop`. These delayed forms and the +constant folding around them are the `-O0` peephole; it lives in `fold.c` +(contract in `fold.h`), kept isolated from the stack discipline. The delayed +*compare* is live; the delayed *arith* machinery is present but currently gated +off (`api_can_delay_int_arith` returns 0) pending the place/value rework that +removes the load/store addressing rider which forced it off. The stack does +**not** own registers, frame slots, spill policy, or caller-saved preservation; +those moved down into the target realizations. When an operation needs a value +emitted, the stack calls the corresponding `g->target->op(...)` semantic hook +with local-only operands. Switch is a good example of the semantic/structured division. `CgTarget` carries an optional `switch_` hook and a `supports_label_table` query. Native arches diff --git a/src/cg/fold.c b/src/cg/fold.c @@ -0,0 +1,654 @@ +/* The semantic-layer peephole optimizer. See cg/fold.h for the contract. + * + * This is the isolated `-O0` peephole that was previously interleaved with the + * value-stack discipline in value.c (Track 6.2). Nothing here emits control + * flow or owns the stack; it folds constants, manages the delayed compare/arith + * forms, and tracks const-valued locals. The op families drive it through the + * fold.h entry points. */ + +#include "cg/internal.h" + +/* ============================================================ + * 1. Integer constant folding + * ============================================================ */ + +u32 api_int_like_width(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (!ty) return 0; + if (ty->kind == CFREE_CG_TYPE_ALIAS) + return api_int_like_width(c, ty->alias.base); + if (ty->kind == CFREE_CG_TYPE_INT || ty->kind == CFREE_CG_TYPE_BOOL) + return ty->integer.width; + if (ty->kind == CFREE_CG_TYPE_ENUM) return (u32)(ty->size * 8u); + if (ty->kind == CFREE_CG_TYPE_PTR) return (u32)(ty->size * 8u); + return 0; +} + +int api_type_is_bool(Compiler* c, CfreeCgTypeId id) { + const CgType* ty = cg_type_get(c, id); + if (!ty) return 0; + if (ty->kind == CFREE_CG_TYPE_ALIAS) + return api_type_is_bool(c, ty->alias.base); + return ty->kind == CFREE_CG_TYPE_BOOL; +} + +u64 api_width_mask(u32 width) { + if (width >= 64) return UINT64_MAX; + return (1ull << width) - 1ull; +} + +u64 api_mask_width(u64 v, u32 width) { return v & api_width_mask(width); } + +i64 api_sign_extend_width(u64 v, u32 width) { + v = api_mask_width(v, width); + if (width >= 64) return (i64)v; + u64 sign = 1ull << (width - 1u); + return (i64)((v ^ sign) - sign); +} + +int api_foldable_int_like_type(Compiler* c, CfreeCgTypeId ty, u32* width_out) { + u32 width = api_int_like_width(c, ty); + if (!width || width > 64) return 0; + *width_out = width; + return 1; +} + +int api_foldable_int_type(Compiler* c, CfreeCgTypeId ty, u32* width_out) { + if (!cg_type_is_int(c, ty)) return 0; + return api_foldable_int_like_type(c, ty, width_out); +} + +i64 api_fold_result(Compiler* c, CfreeCgTypeId ty, u64 v, u32 width) { + v = api_mask_width(v, width); + if (api_type_is_bool(c, ty)) v = v != 0; + return (i64)v; +} + +int api_try_fold_int_binop(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 a, i64 b, + i64* out) { + u32 width; + u64 ua, ub, r; + if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) return 0; + ua = api_mask_width((u64)a, width); + ub = api_mask_width((u64)b, width); + r = 0; + switch (op) { + case BO_IADD: + r = ua + ub; + break; + case BO_ISUB: + r = ua - ub; + break; + case BO_IMUL: + r = ua * ub; + break; + case BO_AND: + r = ua & ub; + break; + case BO_OR: + r = ua | ub; + break; + case BO_XOR: + r = ua ^ ub; + break; + case BO_SHL: { + u32 sh = (u32)(ub & (u64)(width - 1u)); + r = ua << sh; + break; + } + case BO_SHR_U: { + u32 sh = (u32)(ub & (u64)(width - 1u)); + r = ua >> sh; + break; + } + case BO_SHR_S: { + u32 sh = (u32)(ub & (u64)(width - 1u)); + if (!sh) { + r = ua; + } else { + u64 sign = 1ull << (width - 1u); + r = ua >> sh; + if (ua & sign) r |= api_width_mask(width) << (width - sh); + } + break; + } + default: + return 0; + } + *out = api_fold_result(g->c, ty, r, width); + return 1; +} + +int api_try_fold_int_unop(CfreeCg* g, UnOp op, CfreeCgTypeId ty, i64 a, + i64* out) { + u32 width; + u64 ua, r; + if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) return 0; + ua = api_mask_width((u64)a, width); + switch (op) { + case UO_NEG: + r = 0u - ua; + break; + case UO_NOT: + r = ua == 0; + break; + case UO_BNOT: + r = ~ua; + break; + default: + return 0; + } + *out = api_fold_result(g->c, ty, r, width); + return 1; +} + +int api_try_fold_int_cmp(CfreeCg* g, CmpOp op, CfreeCgTypeId ty, i64 a, i64 b, + i64* out) { + u32 width; + u64 ua, ub; + i64 sa, sb; + int r; + if (!g || !out || !api_foldable_int_like_type(g->c, ty, &width)) return 0; + ua = api_mask_width((u64)a, width); + ub = api_mask_width((u64)b, width); + sa = api_sign_extend_width(ua, width); + sb = api_sign_extend_width(ub, width); + switch (op) { + case CMP_EQ: + r = ua == ub; + break; + case CMP_NE: + r = ua != ub; + break; + case CMP_LT_S: + r = sa < sb; + break; + case CMP_LE_S: + r = sa <= sb; + break; + case CMP_GT_S: + r = sa > sb; + break; + case CMP_GE_S: + r = sa >= sb; + break; + case CMP_LT_U: + r = ua < ub; + break; + case CMP_LE_U: + r = ua <= ub; + break; + case CMP_GT_U: + r = ua > ub; + break; + case CMP_GE_U: + r = ua >= ub; + break; + default: + return 0; + } + *out = r ? 1 : 0; + return 1; +} + +/* ============================================================ + * 2a. Delayed compare (SV_CMP) lifecycle + * ============================================================ */ + +ApiSValue api_make_cmp(CmpOp op, Operand a, Operand b, CfreeCgTypeId result_ty, + int a_owned, int b_owned) { + ApiSValue sv; + memset(&sv, 0, sizeof sv); + sv.kind = SV_CMP; + sv.type = result_ty; + sv.delayed.cmp.op = op; + sv.delayed.cmp.a = a; + sv.delayed.cmp.b = b; + sv.delayed.cmp.a_owned = a_owned ? 1u : 0u; + sv.delayed.cmp.b_owned = b_owned ? 1u : 0u; + sv.res = RES_INHERENT; + sv.source_local = CFREE_CG_LOCAL_NONE; + return sv; +} + +CmpOp api_invert_cmp(CmpOp op) { + switch (op) { + case CMP_EQ: + return CMP_NE; + case CMP_NE: + return CMP_EQ; + case CMP_LT_S: + return CMP_GE_S; + case CMP_LE_S: + return CMP_GT_S; + case CMP_GT_S: + return CMP_LE_S; + case CMP_GE_S: + return CMP_LT_S; + case CMP_LT_U: + return CMP_GE_U; + case CMP_LE_U: + return CMP_GT_U; + case CMP_GT_U: + return CMP_LE_U; + case CMP_GE_U: + return CMP_LT_U; + case CMP_LT_F: + return CMP_GE_F; + case CMP_LE_F: + return CMP_GT_F; + case CMP_GT_F: + return CMP_LE_F; + case CMP_GE_F: + return CMP_LT_F; + } + return CMP_EQ; +} + +void api_release_cmp(CfreeCg* g, ApiSValue* sv) { + if (sv->delayed.cmp.a_owned) api_release_operand_local(g, sv->delayed.cmp.a); + if (sv->delayed.cmp.b_owned && + (sv->delayed.cmp.b.kind != OPK_LOCAL || sv->delayed.cmp.a.kind != OPK_LOCAL || + sv->delayed.cmp.b.v.local != sv->delayed.cmp.a.v.local || + !sv->delayed.cmp.a_owned)) { + api_release_operand_local(g, sv->delayed.cmp.b); + } + memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); + memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); + sv->delayed.cmp.a_owned = 0; + sv->delayed.cmp.b_owned = 0; + sv->kind = SV_OPERAND; +} + +void api_materialize_cmp_to(CfreeCg* g, ApiSValue* sv, Operand dst) { + g->target->cmp(g->target, sv->delayed.cmp.op, dst, sv->delayed.cmp.a, + sv->delayed.cmp.b); + if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_LOCAL && + sv->delayed.cmp.a.v.local != dst.v.local) { + api_release_operand_local(g, sv->delayed.cmp.a); + } + if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_LOCAL && + sv->delayed.cmp.b.v.local != dst.v.local) { + api_release_operand_local(g, sv->delayed.cmp.b); + } + memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); + memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); + sv->delayed.cmp.a_owned = 0; + sv->delayed.cmp.b_owned = 0; + sv->kind = SV_OPERAND; + sv->op = dst; + sv->type = dst.type; + sv->res = RES_LOCAL; + sv->lvalue = 0; +} + +/* ============================================================ + * 2b. Delayed arith (SV_ARITH) lifecycle + * + * Currently gated off: api_can_delay_int_arith returns 0, so nothing builds an + * SV_ARITH today. The machinery is kept here (rather than deleted) because + * Track 6.3 re-enables it once Track 7 removes the load/store EA rider that + * forced it off. Re-enabling is then a gate flip in api_can_delay_int_arith, + * not a code move. + * ============================================================ */ + +ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty, + int a_owned) { + ApiSValue sv; + memset(&sv, 0, sizeof sv); + sv.kind = SV_ARITH; + sv.delayed.arith.kind = API_DELAYED_UNOP; + sv.type = ty; + sv.delayed.arith.un_op = op; + sv.delayed.arith.a = a; + sv.delayed.arith.a_owned = a_owned ? 1u : 0u; + sv.res = RES_INHERENT; + sv.source_local = CFREE_CG_LOCAL_NONE; + return sv; +} + +ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, CfreeCgTypeId ty, + int a_owned, int b_owned) { + ApiSValue sv; + memset(&sv, 0, sizeof sv); + sv.kind = SV_ARITH; + sv.delayed.arith.kind = API_DELAYED_BINOP; + sv.type = ty; + sv.delayed.arith.bin_op = op; + sv.delayed.arith.a = a; + sv.delayed.arith.b = b; + sv.delayed.arith.a_owned = a_owned ? 1u : 0u; + sv.delayed.arith.b_owned = b_owned ? 1u : 0u; + sv.res = RES_INHERENT; + sv.source_local = CFREE_CG_LOCAL_NONE; + return sv; +} + +void api_release_arith(CfreeCg* g, ApiSValue* sv) { + if (sv->delayed.arith.a_owned) + api_release_operand_local(g, sv->delayed.arith.a); + if (sv->delayed.arith.b_owned && + (sv->delayed.arith.b.kind != OPK_LOCAL || + sv->delayed.arith.a.kind != OPK_LOCAL || + sv->delayed.arith.b.v.local != sv->delayed.arith.a.v.local || + !sv->delayed.arith.a_owned)) { + api_release_operand_local(g, sv->delayed.arith.b); + } + memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); + memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); + sv->delayed.arith.a_owned = 0; + sv->delayed.arith.b_owned = 0; + sv->kind = SV_OPERAND; +} + +void api_materialize_arith_to(CfreeCg* g, ApiSValue* sv, Operand dst) { + if (sv->delayed.arith.kind == API_DELAYED_UNOP) { + g->target->unop(g->target, sv->delayed.arith.un_op, dst, + sv->delayed.arith.a); + } else { + g->target->binop(g->target, sv->delayed.arith.bin_op, dst, + sv->delayed.arith.a, sv->delayed.arith.b); + } + if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_LOCAL && + sv->delayed.arith.a.v.local != dst.v.local) { + api_release_operand_local(g, sv->delayed.arith.a); + } + if (sv->delayed.arith.b_owned && sv->delayed.arith.b.kind == OPK_LOCAL && + sv->delayed.arith.b.v.local != dst.v.local) { + api_release_operand_local(g, sv->delayed.arith.b); + } + memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); + memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); + sv->delayed.arith.a_owned = 0; + sv->delayed.arith.b_owned = 0; + sv->kind = SV_OPERAND; + sv->op = dst; + sv->type = dst.type; + sv->res = RES_LOCAL; + sv->lvalue = 0; +} + +int api_arith_rhs_reusable(const ApiSValue* sv) { + if (sv->delayed.arith.kind == API_DELAYED_UNOP) return 0; + switch (sv->delayed.arith.bin_op) { + case BO_IADD: + case BO_IMUL: + case BO_AND: + case BO_OR: + case BO_XOR: + return 1; + default: + return 0; + } +} + +int api_can_delay_int_arith(CfreeCg* g, CfreeCgTypeId ty, u32 flags) { + (void)g; + (void)ty; + (void)flags; + return 0; +} + +int api_op_is_int_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 imm) { + u32 width; + u64 v; + if (!api_foldable_int_type(g->c, ty, &width)) return 0; + v = api_mask_width((u64)imm, width); + switch (op) { + case BO_IADD: + case BO_ISUB: + case BO_OR: + case BO_XOR: + case BO_SHL: + case BO_SHR_S: + case BO_SHR_U: + return v == 0; + case BO_IMUL: + case BO_SDIV: + case BO_UDIV: + return v == 1; + case BO_AND: + return v == api_width_mask(width); + default: + return 0; + } +} + +int api_try_collapse_binop_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, + ApiSValue* a, ApiSValue* b, + ApiSValue* out) { + u32 width; + u64 av = 0; + u64 bv = 0; + if (!api_foldable_int_type(g->c, ty, &width)) return 0; + if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM) + av = api_mask_width((u64)a->op.v.imm, width); + if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM) + bv = api_mask_width((u64)b->op.v.imm, width); + + if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND && + a->op.kind != OPK_IMM && api_op_is_int_identity(g, op, ty, b->op.v.imm)) { + *out = api_make_sv_with_local_ownership(a->op, ty, + api_sv_owns_operand_local(a, &a->op)); + a->res = RES_INHERENT; + return 1; + } + if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND && + a->op.kind != OPK_IMM && + (op == BO_SREM || op == BO_UREM || op == BO_IMUL || op == BO_AND || + op == BO_OR)) { + if ((op == BO_SREM || op == BO_UREM) && bv == 1) { + *out = api_make_sv(api_op_imm(0, ty), ty); + return 1; + } + if ((op == BO_IMUL || op == BO_AND) && bv == 0) { + *out = api_make_sv(api_op_imm(0, ty), ty); + return 1; + } + if (op == BO_OR && bv == api_width_mask(width)) { + *out = + api_make_sv(api_op_imm(api_fold_result(g->c, ty, bv, width), ty), ty); + return 1; + } + } + if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND && + b->op.kind != OPK_IMM && + (op == BO_IADD || op == BO_IMUL || op == BO_OR || op == BO_XOR || + op == BO_AND) && + api_op_is_int_identity(g, op, ty, a->op.v.imm)) { + *out = api_make_sv_with_local_ownership(b->op, ty, + api_sv_owns_operand_local(b, &b->op)); + b->res = RES_INHERENT; + return 1; + } + if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND && + b->op.kind != OPK_IMM && (op == BO_IMUL || op == BO_AND || op == BO_OR)) { + if ((op == BO_IMUL || op == BO_AND) && av == 0) { + *out = api_make_sv(api_op_imm(0, ty), ty); + return 1; + } + if (op == BO_OR && av == api_width_mask(width)) { + *out = + api_make_sv(api_op_imm(api_fold_result(g->c, ty, av, width), ty), ty); + return 1; + } + } + return 0; +} + +int api_try_fold_arith_chain(CfreeCg* g, BinOp op, CfreeCgTypeId ty, + ApiSValue* a, ApiSValue* b, ApiSValue* out) { + i64 folded; + BinOp result_op; + if (a->kind != SV_ARITH || a->delayed.arith.kind != API_DELAYED_BINOP || + a->delayed.arith.a.kind != OPK_LOCAL || + a->delayed.arith.b.kind != OPK_IMM || b->kind != SV_OPERAND || + b->op.kind != OPK_IMM) { + return 0; + } + result_op = a->delayed.arith.bin_op; + switch (a->delayed.arith.bin_op) { + case BO_IADD: + if (op == BO_IADD) { + if (!api_try_fold_int_binop(g, BO_IADD, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_IADD; + } else if (op == BO_ISUB) { + if (!api_try_fold_int_binop(g, BO_ISUB, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_IADD; + } else { + return 0; + } + break; + case BO_ISUB: + if (op == BO_IADD) { + if (!api_try_fold_int_binop(g, BO_ISUB, ty, b->op.v.imm, + a->delayed.arith.b.v.imm, &folded)) + return 0; + result_op = BO_IADD; + } else if (op == BO_ISUB) { + if (!api_try_fold_int_binop(g, BO_IADD, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_ISUB; + } else { + return 0; + } + break; + case BO_XOR: + if (op != BO_XOR || + !api_try_fold_int_binop(g, BO_XOR, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_XOR; + break; + case BO_AND: + if (op != BO_AND || + !api_try_fold_int_binop(g, BO_AND, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_AND; + break; + case BO_OR: + if (op != BO_OR || + !api_try_fold_int_binop(g, BO_OR, ty, a->delayed.arith.b.v.imm, + b->op.v.imm, &folded)) + return 0; + result_op = BO_OR; + break; + default: + return 0; + } + if (api_op_is_int_identity(g, result_op, ty, folded)) { + *out = api_make_sv_with_local_ownership(a->delayed.arith.a, ty, + a->delayed.arith.a_owned); + a->delayed.arith.a_owned = 0; + memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); + return 1; + } + a->delayed.arith.bin_op = result_op; + a->delayed.arith.b.v.imm = folded; + *out = *a; + a->delayed.arith.a_owned = 0; + a->delayed.arith.b_owned = 0; + memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); + memset(&a->delayed.arith.b, 0, sizeof a->delayed.arith.b); + return 1; +} + +int api_try_fold_unary_chain(ApiSValue* a, UnOp op, CfreeCgTypeId ty, + ApiSValue* out) { + if (op != UO_BNOT || a->kind != SV_ARITH || + a->delayed.arith.kind != API_DELAYED_UNOP || + a->delayed.arith.un_op != UO_BNOT || a->delayed.arith.a.kind != OPK_LOCAL) { + return 0; + } + *out = api_make_sv_with_local_ownership(a->delayed.arith.a, ty, + a->delayed.arith.a_owned); + a->delayed.arith.a_owned = 0; + memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); + return 1; +} + +/* ============================================================ + * 3. Const-local store-to-load forwarding + * ============================================================ */ + +void api_local_const_clear(ApiSourceLocal* rec) { + if (!rec) return; + rec->const_valid = 0; + rec->const_value = 0; +} + +void api_local_const_clear_all(CfreeCg* g) { + if (!g) return; + for (u32 i = 0; i < g->nlocals; ++i) api_local_const_clear(&g->locals[i]); +} + +void api_local_const_memory_boundary(CfreeCg* g) { + api_local_const_clear_all(g); +} + +void api_local_const_control_boundary(CfreeCg* g) { + api_local_const_clear_all(g); +} + +void api_local_const_address_taken(CfreeCg* g, CfreeCgLocal local) { + api_local_const_clear_all(g); + api_local_const_clear(api_local_from_handle(g, local)); +} + +int api_local_const_can_track(CfreeCg* g, const ApiSourceLocal* rec, + CfreeCgMemAccess access) { + u32 width; + CfreeCgTypeId ty; + u64 access_size; + u64 local_size; + if (!g || !rec) return 0; + if (rec->kind != API_SOURCE_LOCAL_AUTO) return 0; + if (access.flags & CFREE_CG_MEM_VOLATILE) return 0; + ty = resolve_type(g->c, access.type); + if (!ty) ty = rec->type; + if (ty != rec->type) return 0; + access_size = abi_cg_sizeof(g->c->abi, ty); + local_size = abi_cg_sizeof(g->c->abi, rec->type); + if (access_size != local_size) return 0; + return api_foldable_int_like_type(g->c, ty, &width); +} + +void api_local_const_store(CfreeCg* g, CfreeCgLocal local, + CfreeCgMemAccess access, i64 value) { + ApiSourceLocal* rec = api_local_from_handle(g, local); + CfreeCgTypeId ty; + u32 width; + if (!api_local_const_can_track(g, rec, access)) { + api_local_const_clear(rec); + return; + } + ty = resolve_type(g->c, access.type); + if (!ty) ty = rec->type; + if (!api_foldable_int_like_type(g->c, ty, &width)) { + api_local_const_clear(rec); + return; + } + rec->const_value = api_fold_result(g->c, ty, (u64)value, width); + rec->const_valid = 1; +} + +int api_local_const_load(CfreeCg* g, CfreeCgLocal local, + CfreeCgMemAccess access, Operand* out) { + ApiSourceLocal* rec = api_local_from_handle(g, local); + CfreeCgTypeId ty; + u32 width; + if (!out || !api_local_const_can_track(g, rec, access)) return 0; + if (!rec->const_valid) return 0; + ty = resolve_type(g->c, access.type); + if (!ty) ty = rec->type; + if (!api_foldable_int_like_type(g->c, ty, &width)) return 0; + *out = + api_op_imm(api_fold_result(g->c, ty, (u64)rec->const_value, width), ty); + return 1; +} diff --git a/src/cg/fold.h b/src/cg/fold.h @@ -0,0 +1,100 @@ +#ifndef CFREE_CG_FOLD_H +#define CFREE_CG_FOLD_H + +/* The semantic-layer peephole optimizer (Track 6). + * + * The value stack is not just a lowering buffer — it is also a small, named + * `-O0` peephole optimizer (a kept feature: free unoptimized-build quality). + * This header is its contract; the implementation is src/cg/fold.c. Op families + * (arith.c, value.c, memory.c, control.c) call *into* these helpers rather than + * reaching into the peephole's internals. + * + * This header is part of cg/internal.h's surface: cg/internal.h includes it + * after the core operand/value types are defined, so anything that includes + * cg/internal.h sees these declarations. Do not include fold.h on its own. + * + * Three responsibilities live here: + * + * 1. Integer constant folding. Pure width-aware integer arithmetic on + * immediates: a binop/unop/cmp of two constants becomes one constant. + * The width/mask/sign helpers and the foldable-type predicates back this. + * + * 2. Delayed-form lifecycle. Two stack entries are held un-emitted so a + * consumer can fuse: + * - SV_CMP — a compare held so a following branch fuses cmp_branch + * instead of materializing a 0/1 then testing it. The + * consumer is api_branch_if (control.c); api_ensure_local + * materializes it to a 0/1 when used as a value. LIVE. + * - SV_ARITH — a small immediate/local arith held so it can flow into a + * following binop or collapse via identities. Currently + * gated off (api_can_delay_int_arith returns 0); re-enabled + * by Track 6.3 once Track 7 removes the EA rider. The code + * lives here so 6.3 is a gate flip, not a move. + * Each delayed form has make / release / materialize-to-dst entry points, + * plus the fold-chain and identity-collapse helpers for SV_ARITH. + * + * 3. Const-local store-to-load forwarding. A scalar auto local with a known + * constant value is tracked so a later load reads the immediate directly. + * The invalidation boundaries are explicit: api_local_const_memory_boundary + * (a possibly-aliasing store/call), api_local_const_control_boundary (a + * label/branch/jump — no CFG, so values cannot cross edges), and + * api_local_const_address_taken (the local escaped). + */ + +/* ---- 1. integer constant folding ---- */ + +u32 api_int_like_width(Compiler* c, CfreeCgTypeId id); +int api_type_is_bool(Compiler* c, CfreeCgTypeId id); +u64 api_width_mask(u32 width); +u64 api_mask_width(u64 v, u32 width); +i64 api_sign_extend_width(u64 v, u32 width); +int api_foldable_int_like_type(Compiler* c, CfreeCgTypeId ty, u32* width_out); +int api_foldable_int_type(Compiler* c, CfreeCgTypeId ty, u32* width_out); +i64 api_fold_result(Compiler* c, CfreeCgTypeId ty, u64 v, u32 width); +int api_try_fold_int_binop(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 a, i64 b, + i64* out); +int api_try_fold_int_unop(CfreeCg* g, UnOp op, CfreeCgTypeId ty, i64 a, + i64* out); +int api_try_fold_int_cmp(CfreeCg* g, CmpOp op, CfreeCgTypeId ty, i64 a, i64 b, + i64* out); + +/* ---- 2a. delayed compare (SV_CMP) lifecycle ---- */ + +ApiSValue api_make_cmp(CmpOp op, Operand a, Operand b, CfreeCgTypeId result_ty, + int a_owned, int b_owned); +void api_release_cmp(CfreeCg* g, ApiSValue* sv); +void api_materialize_cmp_to(CfreeCg* g, ApiSValue* sv, Operand dst); +CmpOp api_invert_cmp(CmpOp op); + +/* ---- 2b. delayed arith (SV_ARITH) lifecycle — gated off pending 6.3 ---- */ + +ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty, int a_owned); +ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, CfreeCgTypeId ty, + int a_owned, int b_owned); +void api_release_arith(CfreeCg* g, ApiSValue* sv); +void api_materialize_arith_to(CfreeCg* g, ApiSValue* sv, Operand dst); +int api_arith_rhs_reusable(const ApiSValue* sv); +int api_can_delay_int_arith(CfreeCg* g, CfreeCgTypeId ty, u32 flags); +int api_op_is_int_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 imm); +int api_try_collapse_binop_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, + ApiSValue* a, ApiSValue* b, ApiSValue* out); +int api_try_fold_arith_chain(CfreeCg* g, BinOp op, CfreeCgTypeId ty, + ApiSValue* a, ApiSValue* b, ApiSValue* out); +int api_try_fold_unary_chain(ApiSValue* a, UnOp op, CfreeCgTypeId ty, + ApiSValue* out); + +/* ---- 3. const-local store-to-load forwarding ---- */ + +void api_local_const_clear(ApiSourceLocal* rec); +void api_local_const_clear_all(CfreeCg* g); +void api_local_const_memory_boundary(CfreeCg* g); +void api_local_const_control_boundary(CfreeCg* g); +void api_local_const_address_taken(CfreeCg* g, CfreeCgLocal local); +int api_local_const_can_track(CfreeCg* g, const ApiSourceLocal* rec, + CfreeCgMemAccess access); +void api_local_const_store(CfreeCg* g, CfreeCgLocal local, + CfreeCgMemAccess access, i64 value); +int api_local_const_load(CfreeCg* g, CfreeCgLocal local, + CfreeCgMemAccess access, Operand* out); + +#endif diff --git a/src/cg/internal.h b/src/cg/internal.h @@ -378,12 +378,6 @@ Operand api_op_indirect_indexed(CGLocal base, CGLocal index, u8 log2_scale, u8 api_residency_for(const Operand* o); ApiSValue api_make_sv(Operand op, CfreeCgTypeId ty); ApiSValue api_make_lv(Operand op, CfreeCgTypeId ty); -ApiSValue api_make_cmp(CmpOp op, Operand a, Operand b, CfreeCgTypeId result_ty, - int a_owned, int b_owned); -ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty, - int a_owned); -ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, CfreeCgTypeId ty, - int a_owned, int b_owned); ApiSValue api_make_sv_with_local_ownership(Operand op, CfreeCgTypeId ty, int owned); CfreeCgTypeId api_sv_type(const ApiSValue* sv); @@ -413,11 +407,6 @@ void api_validate_memory_value(CfreeCg* g, const char* who, CfreeCgTypeId access_ty, CfreeCgTypeId value_ty); void api_release_operand_local(CfreeCg* g, Operand op); int api_sv_owns_operand_local(const ApiSValue* sv, const Operand* op); -void api_release_cmp(CfreeCg* g, ApiSValue* sv); -void api_release_arith(CfreeCg* g, ApiSValue* sv); -void api_materialize_cmp_to(CfreeCg* g, ApiSValue* sv, Operand dst); -void api_materialize_arith_to(CfreeCg* g, ApiSValue* sv, Operand dst); -int api_arith_rhs_reusable(const ApiSValue* sv); void api_ensure_local(CfreeCg* g, ApiSValue* sv); Operand api_force_local(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty); Operand api_force_local_unless_imm(CfreeCg* g, ApiSValue* v, CfreeCgTypeId ty); @@ -427,41 +416,7 @@ BinOp api_map_fp_binop(CfreeCgFpBinOp op); UnOp api_map_int_unop(CfreeCgIntUnOp op); CmpOp api_map_int_cmp(CfreeCgIntCmpOp op); CmpOp api_map_fp_cmp(CfreeCgFpCmpOp op); -CmpOp api_invert_cmp(CmpOp op); -u32 api_int_like_width(Compiler* c, CfreeCgTypeId id); -int api_type_is_bool(Compiler* c, CfreeCgTypeId id); -u64 api_width_mask(u32 width); -u64 api_mask_width(u64 v, u32 width); -i64 api_sign_extend_width(u64 v, u32 width); -int api_foldable_int_like_type(Compiler* c, CfreeCgTypeId ty, u32* width_out); -int api_foldable_int_type(Compiler* c, CfreeCgTypeId ty, u32* width_out); -i64 api_fold_result(Compiler* c, CfreeCgTypeId ty, u64 v, u32 width); -int api_try_fold_int_binop(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 a, i64 b, - i64* out); -int api_try_fold_int_unop(CfreeCg* g, UnOp op, CfreeCgTypeId ty, i64 a, - i64* out); -int api_try_fold_int_cmp(CfreeCg* g, CmpOp op, CfreeCgTypeId ty, i64 a, i64 b, - i64* out); -void api_local_const_clear(ApiSourceLocal* rec); -void api_local_const_clear_all(CfreeCg* g); -void api_local_const_memory_boundary(CfreeCg* g); -void api_local_const_control_boundary(CfreeCg* g); -void api_local_const_address_taken(CfreeCg* g, CfreeCgLocal local); Operand api_lvalue_addr(CfreeCg* g, ApiSValue* v, CfreeCgTypeId pty); -int api_local_const_can_track(CfreeCg* g, const ApiSourceLocal* rec, - CfreeCgMemAccess access); -void api_local_const_store(CfreeCg* g, CfreeCgLocal local, - CfreeCgMemAccess access, i64 value); -int api_local_const_load(CfreeCg* g, CfreeCgLocal local, - CfreeCgMemAccess access, Operand* out); -int api_can_delay_int_arith(CfreeCg* g, CfreeCgTypeId ty, u32 flags); -int api_op_is_int_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 imm); -int api_try_collapse_binop_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, - ApiSValue* a, ApiSValue* b, ApiSValue* out); -int api_try_fold_arith_chain(CfreeCg* g, BinOp op, CfreeCgTypeId ty, - ApiSValue* a, ApiSValue* b, ApiSValue* out); -int api_try_fold_unary_chain(ApiSValue* a, UnOp op, CfreeCgTypeId ty, - ApiSValue* out); CGLocal api_f128_temp_local(CfreeCg* g, CfreeCgTypeId ty); u64 api_u64_from_target_bytes(CfreeCg* g, const u8* bytes); void api_store_f128_bytes(CfreeCg* g, CGLocal local, CfreeCgTypeId ty, @@ -478,4 +433,10 @@ void api_runtime_call_values(CfreeCg* g, const char* name, CfreeCgTypeId ret, const CfreeCgTypeId* params, u32 nparams, ApiSValue* args); +/* The semantic-layer peephole optimizer: constant folding, the delayed + * compare/arith forms, and const-local store-to-load forwarding. Included here, + * after the operand and value types above, so its declarations can name + * ApiSValue / ApiSourceLocal / Operand. */ +#include "cg/fold.h" + #endif diff --git a/src/cg/value.c b/src/cg/value.c @@ -100,54 +100,6 @@ ApiSValue api_make_lv(Operand op, CfreeCgTypeId ty) { return sv; } -ApiSValue api_make_cmp(CmpOp op, Operand a, Operand b, CfreeCgTypeId result_ty, - int a_owned, int b_owned) { - ApiSValue sv; - memset(&sv, 0, sizeof sv); - sv.kind = SV_CMP; - sv.type = result_ty; - sv.delayed.cmp.op = op; - sv.delayed.cmp.a = a; - sv.delayed.cmp.b = b; - sv.delayed.cmp.a_owned = a_owned ? 1u : 0u; - sv.delayed.cmp.b_owned = b_owned ? 1u : 0u; - sv.res = RES_INHERENT; - sv.source_local = CFREE_CG_LOCAL_NONE; - return sv; -} - -ApiSValue api_make_arith_unop(UnOp op, Operand a, CfreeCgTypeId ty, - int a_owned) { - ApiSValue sv; - memset(&sv, 0, sizeof sv); - sv.kind = SV_ARITH; - sv.delayed.arith.kind = API_DELAYED_UNOP; - sv.type = ty; - sv.delayed.arith.un_op = op; - sv.delayed.arith.a = a; - sv.delayed.arith.a_owned = a_owned ? 1u : 0u; - sv.res = RES_INHERENT; - sv.source_local = CFREE_CG_LOCAL_NONE; - return sv; -} - -ApiSValue api_make_arith_binop(BinOp op, Operand a, Operand b, CfreeCgTypeId ty, - int a_owned, int b_owned) { - ApiSValue sv; - memset(&sv, 0, sizeof sv); - sv.kind = SV_ARITH; - sv.delayed.arith.kind = API_DELAYED_BINOP; - sv.type = ty; - sv.delayed.arith.bin_op = op; - sv.delayed.arith.a = a; - sv.delayed.arith.b = b; - sv.delayed.arith.a_owned = a_owned ? 1u : 0u; - sv.delayed.arith.b_owned = b_owned ? 1u : 0u; - sv.res = RES_INHERENT; - sv.source_local = CFREE_CG_LOCAL_NONE; - return sv; -} - ApiSValue api_make_sv_with_local_ownership(Operand op, CfreeCgTypeId ty, int owned) { ApiSValue sv = api_make_sv(op, ty); @@ -378,101 +330,6 @@ int api_sv_owns_operand_local(const ApiSValue* sv, const Operand* op) { sv->op.v.local == op->v.local; } -void api_release_cmp(CfreeCg* g, ApiSValue* sv) { - if (sv->delayed.cmp.a_owned) api_release_operand_local(g, sv->delayed.cmp.a); - if (sv->delayed.cmp.b_owned && - (sv->delayed.cmp.b.kind != OPK_LOCAL || sv->delayed.cmp.a.kind != OPK_LOCAL || - sv->delayed.cmp.b.v.local != sv->delayed.cmp.a.v.local || - !sv->delayed.cmp.a_owned)) { - api_release_operand_local(g, sv->delayed.cmp.b); - } - memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); - memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); - sv->delayed.cmp.a_owned = 0; - sv->delayed.cmp.b_owned = 0; - sv->kind = SV_OPERAND; -} - -void api_release_arith(CfreeCg* g, ApiSValue* sv) { - if (sv->delayed.arith.a_owned) - api_release_operand_local(g, sv->delayed.arith.a); - if (sv->delayed.arith.b_owned && - (sv->delayed.arith.b.kind != OPK_LOCAL || - sv->delayed.arith.a.kind != OPK_LOCAL || - sv->delayed.arith.b.v.local != sv->delayed.arith.a.v.local || - !sv->delayed.arith.a_owned)) { - api_release_operand_local(g, sv->delayed.arith.b); - } - memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); - memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); - sv->delayed.arith.a_owned = 0; - sv->delayed.arith.b_owned = 0; - sv->kind = SV_OPERAND; -} - -void api_materialize_cmp_to(CfreeCg* g, ApiSValue* sv, Operand dst) { - g->target->cmp(g->target, sv->delayed.cmp.op, dst, sv->delayed.cmp.a, - sv->delayed.cmp.b); - if (sv->delayed.cmp.a_owned && sv->delayed.cmp.a.kind == OPK_LOCAL && - sv->delayed.cmp.a.v.local != dst.v.local) { - api_release_operand_local(g, sv->delayed.cmp.a); - } - if (sv->delayed.cmp.b_owned && sv->delayed.cmp.b.kind == OPK_LOCAL && - sv->delayed.cmp.b.v.local != dst.v.local) { - api_release_operand_local(g, sv->delayed.cmp.b); - } - memset(&sv->delayed.cmp.a, 0, sizeof sv->delayed.cmp.a); - memset(&sv->delayed.cmp.b, 0, sizeof sv->delayed.cmp.b); - sv->delayed.cmp.a_owned = 0; - sv->delayed.cmp.b_owned = 0; - sv->kind = SV_OPERAND; - sv->op = dst; - sv->type = dst.type; - sv->res = RES_LOCAL; - sv->lvalue = 0; -} - -void api_materialize_arith_to(CfreeCg* g, ApiSValue* sv, Operand dst) { - if (sv->delayed.arith.kind == API_DELAYED_UNOP) { - g->target->unop(g->target, sv->delayed.arith.un_op, dst, - sv->delayed.arith.a); - } else { - g->target->binop(g->target, sv->delayed.arith.bin_op, dst, - sv->delayed.arith.a, sv->delayed.arith.b); - } - if (sv->delayed.arith.a_owned && sv->delayed.arith.a.kind == OPK_LOCAL && - sv->delayed.arith.a.v.local != dst.v.local) { - api_release_operand_local(g, sv->delayed.arith.a); - } - if (sv->delayed.arith.b_owned && sv->delayed.arith.b.kind == OPK_LOCAL && - sv->delayed.arith.b.v.local != dst.v.local) { - api_release_operand_local(g, sv->delayed.arith.b); - } - memset(&sv->delayed.arith.a, 0, sizeof sv->delayed.arith.a); - memset(&sv->delayed.arith.b, 0, sizeof sv->delayed.arith.b); - sv->delayed.arith.a_owned = 0; - sv->delayed.arith.b_owned = 0; - sv->kind = SV_OPERAND; - sv->op = dst; - sv->type = dst.type; - sv->res = RES_LOCAL; - sv->lvalue = 0; -} - -int api_arith_rhs_reusable(const ApiSValue* sv) { - if (sv->delayed.arith.kind == API_DELAYED_UNOP) return 0; - switch (sv->delayed.arith.bin_op) { - case BO_IADD: - case BO_IMUL: - case BO_AND: - case BO_OR: - case BO_XOR: - return 1; - default: - return 0; - } -} - void api_ensure_local(CfreeCg* g, ApiSValue* sv) { if (sv->kind == SV_CMP) { CfreeCgTypeId ty = api_sv_type(sv); @@ -667,248 +524,6 @@ CmpOp api_map_fp_cmp(CfreeCgFpCmpOp op) { return CMP_EQ; } -CmpOp api_invert_cmp(CmpOp op) { - switch (op) { - case CMP_EQ: - return CMP_NE; - case CMP_NE: - return CMP_EQ; - case CMP_LT_S: - return CMP_GE_S; - case CMP_LE_S: - return CMP_GT_S; - case CMP_GT_S: - return CMP_LE_S; - case CMP_GE_S: - return CMP_LT_S; - case CMP_LT_U: - return CMP_GE_U; - case CMP_LE_U: - return CMP_GT_U; - case CMP_GT_U: - return CMP_LE_U; - case CMP_GE_U: - return CMP_LT_U; - case CMP_LT_F: - return CMP_GE_F; - case CMP_LE_F: - return CMP_GT_F; - case CMP_GT_F: - return CMP_LE_F; - case CMP_GE_F: - return CMP_LT_F; - } - return CMP_EQ; -} - - - -/* ---- immediate integer folding ---- */ - -u32 api_int_like_width(Compiler* c, CfreeCgTypeId id) { - const CgType* ty = cg_type_get(c, id); - if (!ty) return 0; - if (ty->kind == CFREE_CG_TYPE_ALIAS) - return api_int_like_width(c, ty->alias.base); - if (ty->kind == CFREE_CG_TYPE_INT || ty->kind == CFREE_CG_TYPE_BOOL) - return ty->integer.width; - if (ty->kind == CFREE_CG_TYPE_ENUM) return (u32)(ty->size * 8u); - if (ty->kind == CFREE_CG_TYPE_PTR) return (u32)(ty->size * 8u); - return 0; -} - -int api_type_is_bool(Compiler* c, CfreeCgTypeId id) { - const CgType* ty = cg_type_get(c, id); - if (!ty) return 0; - if (ty->kind == CFREE_CG_TYPE_ALIAS) - return api_type_is_bool(c, ty->alias.base); - return ty->kind == CFREE_CG_TYPE_BOOL; -} - -u64 api_width_mask(u32 width) { - if (width >= 64) return UINT64_MAX; - return (1ull << width) - 1ull; -} - -u64 api_mask_width(u64 v, u32 width) { return v & api_width_mask(width); } - -i64 api_sign_extend_width(u64 v, u32 width) { - v = api_mask_width(v, width); - if (width >= 64) return (i64)v; - u64 sign = 1ull << (width - 1u); - return (i64)((v ^ sign) - sign); -} - -int api_foldable_int_like_type(Compiler* c, CfreeCgTypeId ty, u32* width_out) { - u32 width = api_int_like_width(c, ty); - if (!width || width > 64) return 0; - *width_out = width; - return 1; -} - -int api_foldable_int_type(Compiler* c, CfreeCgTypeId ty, u32* width_out) { - if (!cg_type_is_int(c, ty)) return 0; - return api_foldable_int_like_type(c, ty, width_out); -} - -i64 api_fold_result(Compiler* c, CfreeCgTypeId ty, u64 v, u32 width) { - v = api_mask_width(v, width); - if (api_type_is_bool(c, ty)) v = v != 0; - return (i64)v; -} - -int api_try_fold_int_binop(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 a, i64 b, - i64* out) { - u32 width; - u64 ua, ub, r; - if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) return 0; - ua = api_mask_width((u64)a, width); - ub = api_mask_width((u64)b, width); - r = 0; - switch (op) { - case BO_IADD: - r = ua + ub; - break; - case BO_ISUB: - r = ua - ub; - break; - case BO_IMUL: - r = ua * ub; - break; - case BO_AND: - r = ua & ub; - break; - case BO_OR: - r = ua | ub; - break; - case BO_XOR: - r = ua ^ ub; - break; - case BO_SHL: { - u32 sh = (u32)(ub & (u64)(width - 1u)); - r = ua << sh; - break; - } - case BO_SHR_U: { - u32 sh = (u32)(ub & (u64)(width - 1u)); - r = ua >> sh; - break; - } - case BO_SHR_S: { - u32 sh = (u32)(ub & (u64)(width - 1u)); - if (!sh) { - r = ua; - } else { - u64 sign = 1ull << (width - 1u); - r = ua >> sh; - if (ua & sign) r |= api_width_mask(width) << (width - sh); - } - break; - } - default: - return 0; - } - *out = api_fold_result(g->c, ty, r, width); - return 1; -} - -int api_try_fold_int_unop(CfreeCg* g, UnOp op, CfreeCgTypeId ty, i64 a, - i64* out) { - u32 width; - u64 ua, r; - if (!g || !out || !api_foldable_int_type(g->c, ty, &width)) return 0; - ua = api_mask_width((u64)a, width); - switch (op) { - case UO_NEG: - r = 0u - ua; - break; - case UO_NOT: - r = ua == 0; - break; - case UO_BNOT: - r = ~ua; - break; - default: - return 0; - } - *out = api_fold_result(g->c, ty, r, width); - return 1; -} - -int api_try_fold_int_cmp(CfreeCg* g, CmpOp op, CfreeCgTypeId ty, i64 a, i64 b, - i64* out) { - u32 width; - u64 ua, ub; - i64 sa, sb; - int r; - if (!g || !out || !api_foldable_int_like_type(g->c, ty, &width)) return 0; - ua = api_mask_width((u64)a, width); - ub = api_mask_width((u64)b, width); - sa = api_sign_extend_width(ua, width); - sb = api_sign_extend_width(ub, width); - switch (op) { - case CMP_EQ: - r = ua == ub; - break; - case CMP_NE: - r = ua != ub; - break; - case CMP_LT_S: - r = sa < sb; - break; - case CMP_LE_S: - r = sa <= sb; - break; - case CMP_GT_S: - r = sa > sb; - break; - case CMP_GE_S: - r = sa >= sb; - break; - case CMP_LT_U: - r = ua < ub; - break; - case CMP_LE_U: - r = ua <= ub; - break; - case CMP_GT_U: - r = ua > ub; - break; - case CMP_GE_U: - r = ua >= ub; - break; - default: - return 0; - } - *out = r ? 1 : 0; - return 1; -} - -ApiSourceLocal* api_local_from_handle(CfreeCg* g, CfreeCgLocal local); - -void api_local_const_clear(ApiSourceLocal* rec) { - if (!rec) return; - rec->const_valid = 0; - rec->const_value = 0; -} - -void api_local_const_clear_all(CfreeCg* g) { - if (!g) return; - for (u32 i = 0; i < g->nlocals; ++i) api_local_const_clear(&g->locals[i]); -} - -void api_local_const_memory_boundary(CfreeCg* g) { - api_local_const_clear_all(g); -} - -void api_local_const_control_boundary(CfreeCg* g) { - api_local_const_clear_all(g); -} - -void api_local_const_address_taken(CfreeCg* g, CfreeCgLocal local) { - api_local_const_clear_all(g); - api_local_const_clear(api_local_from_handle(g, local)); -} Operand api_lvalue_addr(CfreeCg* g, ApiSValue* v, CfreeCgTypeId pty) { CgTarget* T; @@ -933,247 +548,5 @@ Operand api_lvalue_addr(CfreeCg* g, ApiSValue* v, CfreeCgTypeId pty) { return dst; } -int api_local_const_can_track(CfreeCg* g, const ApiSourceLocal* rec, - CfreeCgMemAccess access) { - u32 width; - CfreeCgTypeId ty; - u64 access_size; - u64 local_size; - if (!g || !rec) return 0; - if (rec->kind != API_SOURCE_LOCAL_AUTO) return 0; - if (access.flags & CFREE_CG_MEM_VOLATILE) return 0; - ty = resolve_type(g->c, access.type); - if (!ty) ty = rec->type; - if (ty != rec->type) return 0; - access_size = abi_cg_sizeof(g->c->abi, ty); - local_size = abi_cg_sizeof(g->c->abi, rec->type); - if (access_size != local_size) return 0; - return api_foldable_int_like_type(g->c, ty, &width); -} - -void api_local_const_store(CfreeCg* g, CfreeCgLocal local, - CfreeCgMemAccess access, i64 value) { - ApiSourceLocal* rec = api_local_from_handle(g, local); - CfreeCgTypeId ty; - u32 width; - if (!api_local_const_can_track(g, rec, access)) { - api_local_const_clear(rec); - return; - } - ty = resolve_type(g->c, access.type); - if (!ty) ty = rec->type; - if (!api_foldable_int_like_type(g->c, ty, &width)) { - api_local_const_clear(rec); - return; - } - rec->const_value = api_fold_result(g->c, ty, (u64)value, width); - rec->const_valid = 1; -} - -int api_local_const_load(CfreeCg* g, CfreeCgLocal local, - CfreeCgMemAccess access, Operand* out) { - ApiSourceLocal* rec = api_local_from_handle(g, local); - CfreeCgTypeId ty; - u32 width; - if (!out || !api_local_const_can_track(g, rec, access)) return 0; - if (!rec->const_valid) return 0; - ty = resolve_type(g->c, access.type); - if (!ty) ty = rec->type; - if (!api_foldable_int_like_type(g->c, ty, &width)) return 0; - *out = - api_op_imm(api_fold_result(g->c, ty, (u64)rec->const_value, width), ty); - return 1; -} - -int api_can_delay_int_arith(CfreeCg* g, CfreeCgTypeId ty, u32 flags) { - (void)g; - (void)ty; - (void)flags; - return 0; -} - -int api_op_is_int_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, i64 imm) { - u32 width; - u64 v; - if (!api_foldable_int_type(g->c, ty, &width)) return 0; - v = api_mask_width((u64)imm, width); - switch (op) { - case BO_IADD: - case BO_ISUB: - case BO_OR: - case BO_XOR: - case BO_SHL: - case BO_SHR_S: - case BO_SHR_U: - return v == 0; - case BO_IMUL: - case BO_SDIV: - case BO_UDIV: - return v == 1; - case BO_AND: - return v == api_width_mask(width); - default: - return 0; - } -} - -int api_try_collapse_binop_identity(CfreeCg* g, BinOp op, CfreeCgTypeId ty, - ApiSValue* a, ApiSValue* b, - ApiSValue* out) { - u32 width; - u64 av = 0; - u64 bv = 0; - if (!api_foldable_int_type(g->c, ty, &width)) return 0; - if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM) - av = api_mask_width((u64)a->op.v.imm, width); - if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM) - bv = api_mask_width((u64)b->op.v.imm, width); - - if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND && - a->op.kind != OPK_IMM && api_op_is_int_identity(g, op, ty, b->op.v.imm)) { - *out = api_make_sv_with_local_ownership(a->op, ty, - api_sv_owns_operand_local(a, &a->op)); - a->res = RES_INHERENT; - return 1; - } - if (b->kind == SV_OPERAND && b->op.kind == OPK_IMM && a->kind == SV_OPERAND && - a->op.kind != OPK_IMM && - (op == BO_SREM || op == BO_UREM || op == BO_IMUL || op == BO_AND || - op == BO_OR)) { - if ((op == BO_SREM || op == BO_UREM) && bv == 1) { - *out = api_make_sv(api_op_imm(0, ty), ty); - return 1; - } - if ((op == BO_IMUL || op == BO_AND) && bv == 0) { - *out = api_make_sv(api_op_imm(0, ty), ty); - return 1; - } - if (op == BO_OR && bv == api_width_mask(width)) { - *out = - api_make_sv(api_op_imm(api_fold_result(g->c, ty, bv, width), ty), ty); - return 1; - } - } - if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND && - b->op.kind != OPK_IMM && - (op == BO_IADD || op == BO_IMUL || op == BO_OR || op == BO_XOR || - op == BO_AND) && - api_op_is_int_identity(g, op, ty, a->op.v.imm)) { - *out = api_make_sv_with_local_ownership(b->op, ty, - api_sv_owns_operand_local(b, &b->op)); - b->res = RES_INHERENT; - return 1; - } - if (a->kind == SV_OPERAND && a->op.kind == OPK_IMM && b->kind == SV_OPERAND && - b->op.kind != OPK_IMM && (op == BO_IMUL || op == BO_AND || op == BO_OR)) { - if ((op == BO_IMUL || op == BO_AND) && av == 0) { - *out = api_make_sv(api_op_imm(0, ty), ty); - return 1; - } - if (op == BO_OR && av == api_width_mask(width)) { - *out = - api_make_sv(api_op_imm(api_fold_result(g->c, ty, av, width), ty), ty); - return 1; - } - } - return 0; -} - -int api_try_fold_arith_chain(CfreeCg* g, BinOp op, CfreeCgTypeId ty, - ApiSValue* a, ApiSValue* b, ApiSValue* out) { - i64 folded; - BinOp result_op; - if (a->kind != SV_ARITH || a->delayed.arith.kind != API_DELAYED_BINOP || - a->delayed.arith.a.kind != OPK_LOCAL || - a->delayed.arith.b.kind != OPK_IMM || b->kind != SV_OPERAND || - b->op.kind != OPK_IMM) { - return 0; - } - result_op = a->delayed.arith.bin_op; - switch (a->delayed.arith.bin_op) { - case BO_IADD: - if (op == BO_IADD) { - if (!api_try_fold_int_binop(g, BO_IADD, ty, a->delayed.arith.b.v.imm, - b->op.v.imm, &folded)) - return 0; - result_op = BO_IADD; - } else if (op == BO_ISUB) { - if (!api_try_fold_int_binop(g, BO_ISUB, ty, a->delayed.arith.b.v.imm, - b->op.v.imm, &folded)) - return 0; - result_op = BO_IADD; - } else { - return 0; - } - break; - case BO_ISUB: - if (op == BO_IADD) { - if (!api_try_fold_int_binop(g, BO_ISUB, ty, b->op.v.imm, - a->delayed.arith.b.v.imm, &folded)) - return 0; - result_op = BO_IADD; - } else if (op == BO_ISUB) { - if (!api_try_fold_int_binop(g, BO_IADD, ty, a->delayed.arith.b.v.imm, - b->op.v.imm, &folded)) - return 0; - result_op = BO_ISUB; - } else { - return 0; - } - break; - case BO_XOR: - if (op != BO_XOR || - !api_try_fold_int_binop(g, BO_XOR, ty, a->delayed.arith.b.v.imm, - b->op.v.imm, &folded)) - return 0; - result_op = BO_XOR; - break; - case BO_AND: - if (op != BO_AND || - !api_try_fold_int_binop(g, BO_AND, ty, a->delayed.arith.b.v.imm, - b->op.v.imm, &folded)) - return 0; - result_op = BO_AND; - break; - case BO_OR: - if (op != BO_OR || - !api_try_fold_int_binop(g, BO_OR, ty, a->delayed.arith.b.v.imm, - b->op.v.imm, &folded)) - return 0; - result_op = BO_OR; - break; - default: - return 0; - } - if (api_op_is_int_identity(g, result_op, ty, folded)) { - *out = api_make_sv_with_local_ownership(a->delayed.arith.a, ty, - a->delayed.arith.a_owned); - a->delayed.arith.a_owned = 0; - memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); - return 1; - } - a->delayed.arith.bin_op = result_op; - a->delayed.arith.b.v.imm = folded; - *out = *a; - a->delayed.arith.a_owned = 0; - a->delayed.arith.b_owned = 0; - memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); - memset(&a->delayed.arith.b, 0, sizeof a->delayed.arith.b); - return 1; -} - -int api_try_fold_unary_chain(ApiSValue* a, UnOp op, CfreeCgTypeId ty, - ApiSValue* out) { - if (op != UO_BNOT || a->kind != SV_ARITH || - a->delayed.arith.kind != API_DELAYED_UNOP || - a->delayed.arith.un_op != UO_BNOT || a->delayed.arith.a.kind != OPK_LOCAL) { - return 0; - } - *out = api_make_sv_with_local_ownership(a->delayed.arith.a, ty, - a->delayed.arith.a_owned); - a->delayed.arith.a_owned = 0; - memset(&a->delayed.arith.a, 0, sizeof a->delayed.arith.a); - return 1; -} /* ---- C-symbol mangling ---- */