kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit edb863ae6bdc7fba48ca788ced88e943c92ee280
parent 91a660f11818a3d87a1e8510360b843f670efa01
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 11 May 2026 12:29:17 -0700

cg: pre-SSA peephole — constant fold + aarch64 imm-form encodings

Adds a pure fold helper (Tier 1+2) and lets the value-stack pass
OPK_IMM operands through to backend binop/unop/cmp instead of always
force_reg'ing them.  aarch64 picks the imm-form encoding when the
literal fits: add/sub imm12 (with sh), cmp imm12, AND/ORR/EOR bitmask,
and LSL/LSR/ASR via UBFM/SBFM; misses fall back to the existing
shifted-register path through force_reg_int.

Why: avoids burning a value-stack register on x+const sites and emits
one fewer instruction per matched pattern.  The REG|IMM contract on
binop/unop/cmp is now documented in arch.h so opt's eventual machinize
+ opt_emit reuses the same backend path without new wiring.

Diffstat:
Msrc/arch/aa64_isa.h | 168+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/aarch64.c | 125++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Msrc/arch/arch.h | 19++++++++++++++-----
Msrc/cg/cg.c | 80++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Asrc/cg/fold.c | 154+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cg/fold.h | 47+++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 569 insertions(+), 24 deletions(-)

diff --git a/src/arch/aa64_isa.h b/src/arch/aa64_isa.h @@ -177,6 +177,147 @@ static inline u32 aa64_mvn(u32 sf, u32 Rd, u32 Rm) { } /* ==================================================================== + * Logical, immediate (AND / ORR / EOR / ANDS, bitmask-imm form) + * sf opc(2) 100100 N(1) immr(6) imms(6) Rn(5) Rd(5) + * 31 30..29 28..23 22 21..16 15..10 9..5 4..0 + * + * N:immr:imms encodes a repeated-pattern bitmask. The encoder + * aa64_logimm_encode below computes those fields from a literal value; + * this pack just lays the bits out. For 32-bit ops (sf=0), N must be 0; + * for 64-bit ops N can be 0 or 1 and selects whether the pattern + * element is 64 bits (N=1) or 2..32 bits (N=0). + * ==================================================================== */ + +#define AA64_LOGIMM_FAMILY_MATCH 0x12000000u +#define AA64_LOGIMM_FAMILY_MASK 0x1F800000u /* bits 28:23 */ + +typedef struct AA64LogImm { + u32 sf, opc, N, immr, imms, Rn, Rd; +} AA64LogImm; + +static inline u32 aa64_logimm_pack(AA64LogImm f) { + return ((f.sf & 1u) << 31) | ((f.opc & 3u) << 29) | AA64_LOGIMM_FAMILY_MATCH | + ((f.N & 1u) << 22) | ((f.immr & 0x3fu) << 16) | + ((f.imms & 0x3fu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rd & 0x1fu); +} + +static inline u32 aa64_and_imm(u32 sf, u32 Rd, u32 Rn, u32 N, u32 immr, + u32 imms) { + return aa64_logimm_pack((AA64LogImm){.sf = sf, + .opc = AA64_LOG_AND_OPC, + .N = N, + .immr = immr, + .imms = imms, + .Rn = Rn, + .Rd = Rd}); +} +static inline u32 aa64_orr_imm(u32 sf, u32 Rd, u32 Rn, u32 N, u32 immr, + u32 imms) { + return aa64_logimm_pack((AA64LogImm){.sf = sf, + .opc = AA64_LOG_ORR_OPC, + .N = N, + .immr = immr, + .imms = imms, + .Rn = Rn, + .Rd = Rd}); +} +static inline u32 aa64_eor_imm(u32 sf, u32 Rd, u32 Rn, u32 N, u32 immr, + u32 imms) { + return aa64_logimm_pack((AA64LogImm){.sf = sf, + .opc = AA64_LOG_EOR_OPC, + .N = N, + .immr = immr, + .imms = imms, + .Rn = Rn, + .Rd = Rd}); +} + +/* Bitmask-immediate predicate + encoder. Returns 1 and writes N/immr/imms + * if `imm` is encodable as an AArch64 logical immediate of width + * (sf ? 64 : 32); returns 0 otherwise (caller materializes into a + * scratch and uses the shifted-register form). + * + * Algorithm (inverse of ARM ARM "DecodeBitMasks"): an encodable value + * is a non-zero, non-all-ones bitmask made of a repeated `size`-bit + * element (size ∈ {2,4,8,16,32,64}); within one element the pattern is + * a rotation of (0…0 1…1). Find size by detecting the smallest + * repeating period; find the rotation that places the 1-run at the + * LSB; encode size and ones-count into imms per the standard scheme + * (top bits of imms inverted-encode size, low bits are ones-count-1). */ +static inline int aa64_logimm_encode(u64 imm, u32 sf, u32 *N_out, + u32 *immr_out, u32 *imms_out) { + if (!sf) { + u64 lo = imm & 0xFFFFFFFFu; + u64 hi = imm >> 32; + if (hi != 0 && hi != lo) return 0; + imm = lo | (lo << 32); + } + if (imm == 0 || imm == ~(u64)0) return 0; + + u32 size = 64; + for (u32 s = 32; s >= 2; s >>= 1) { + u64 mask = ((u64)1 << s) - 1u; + if ((imm & mask) != ((imm >> s) & mask)) break; + size = s; + } + u64 elt_mask = (size == 64) ? ~(u64)0 : (((u64)1 << size) - 1u); + u64 elt = imm & elt_mask; + if (elt == 0 || elt == elt_mask) return 0; + + u32 ones = 0; + for (u64 x = elt; x; x >>= 1) ones += (u32)(x & 1u); + if (ones == 0 || ones >= size) return 0; + + u64 aligned = ((u64)1 << ones) - 1u; + u32 rotation = 0xFFFFFFFFu; + for (u32 r = 0; r < size; r++) { + u64 rotated = r == 0 ? elt : (((elt >> r) | (elt << (size - r))) & elt_mask); + if (rotated == aligned) { rotation = r; break; } + } + if (rotation == 0xFFFFFFFFu) return 0; + + if (size == 64) { + *N_out = 1u; + *imms_out = (ones - 1u) & 0x3Fu; + } else { + *N_out = 0u; + u32 neg_size_shl1 = ((u32)(-(i32)size) << 1) & 0x3Fu; + *imms_out = neg_size_shl1 | ((ones - 1u) & 0x3Fu); + } + *immr_out = rotation; + return 1; +} + +/* Shift-by-immediate field generators for LSL/LSR/ASR (encoded via + * UBFM/SBFM). Predicate: shift < width. The aa64_ubfm / aa64_sbfm + * encoders live in aarch64.c; callers pair these (immr, imms) with the + * matching pack. */ +static inline int aa64_lsl_imm_fields(u32 shift, u32 sf, u32 *immr_out, + u32 *imms_out) { + u32 width = sf ? 64u : 32u; + if (shift >= width) return 0; + *immr_out = (width - shift) & (width - 1u); + *imms_out = width - 1u - shift; + return 1; +} +static inline int aa64_lsr_imm_fields(u32 shift, u32 sf, u32 *immr_out, + u32 *imms_out) { + u32 width = sf ? 64u : 32u; + if (shift >= width) return 0; + *immr_out = shift; + *imms_out = width - 1u; + return 1; +} +static inline int aa64_asr_imm_fields(u32 shift, u32 sf, u32 *immr_out, + u32 *imms_out) { + u32 width = sf ? 64u : 32u; + if (shift >= width) return 0; + *immr_out = shift; + *imms_out = width - 1u; + return 1; +} + +/* ==================================================================== * Add/Sub, shifted register (ADD / SUB / ADDS / SUBS) * sf op(1) S(1) 01011 shift(2) 0 Rm(5) imm6(6) Rn(5) Rd(5) * 31 30 29 28..24 23..22 21 20..16 15..10 9..5 4..0 @@ -460,6 +601,33 @@ static inline u32 aa64_sub_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12, u32 sh) { return aa64_addsubimm_pack((AA64AddSubImm){ .sf = sf, .op = 1, .sh = sh, .imm12 = imm12, .Rn = Rn, .Rd = Rd}); } +/* SUBS imm — sets flags. Used for CMP imm (Rd=ZR) and for branchless + * compares that feed CSET. The 12-bit-shifted form covers 0..0xFFFFF000 + * stepped by 0x1000; cg_fold collapses literal-only compares upstream, + * so this encoder is reached for `x cmp const` and `if (x)` patterns. */ +static inline u32 aa64_subs_imm12(u32 sf, u32 Rd, u32 Rn, u32 imm12, u32 sh) { + return aa64_addsubimm_pack((AA64AddSubImm){ + .sf = sf, .op = 1, .S = 1, .sh = sh, .imm12 = imm12, .Rn = Rn, .Rd = Rd}); +} + +/* Predicate: does `imm` fit ADD/SUB/CMP's 12-bit immediate (optionally + * left-shifted by 12)? On success writes the encoded imm12 and sh and + * returns 1; on failure returns 0 and leaves outputs untouched. + * + * The encoding admits 0..4095 directly (sh=0) and multiples of 4096 up + * to 0xFFF000 (sh=1). Negative literals are rejected here — the caller + * (e.g. opt's machinize, or a smarter cg) is free to swap ADD ↔ SUB and + * retry with the negated literal; the bare predicate keeps the contract + * narrow. */ +static inline int aa64_addsub_imm_fits(i64 imm, u32 *imm12_out, u32 *sh_out) { + if (imm < 0) return 0; + u64 u = (u64)imm; + if (u <= 0xFFFu) { *imm12_out = (u32)u; *sh_out = 0; return 1; } + if ((u & 0xFFFu) == 0 && (u >> 12) <= 0xFFFu) { + *imm12_out = (u32)(u >> 12); *sh_out = 1; return 1; + } + return 0; +} /* ==================================================================== * Load/store, unsigned 12-bit immediate offset (LDR / STR, scaled) diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c @@ -1068,16 +1068,22 @@ static u32 cmp_to_cond(CmpOp op) { } } -/* Emit CMP a, b (= SUBS ZR, a, b). Materializes IMM operands through - * scratch x9/x10. Width comes from `a`; signedness lives in the cond. */ +/* Emit CMP a, b (= SUBS ZR, a, b). Uses the 12-bit-imm form when `b` is + * an OPK_IMM that fits; otherwise materializes through scratch x9/x10 + * and uses the shifted-register form. CMP is not commutative across the + * condition codes, so an IMM-on-LHS still materializes (the caller has + * to swap the cond if it wants to swap the operands). Width comes from + * `a`; signedness lives in the cond. */ static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) { MCEmitter* mc = t->mc; u32 sf = type_is_64(a_op.type) ? 1u : 0u; - /* Special-case CMP Rn, #0 so a literal zero compare doesn't need - * a scratch register. */ - if (b_op.kind == OPK_IMM && b_op.v.imm == 0 && a_op.kind == OPK_REG) { - emit32(mc, aa64_subs_imm(sf, /*Rd=ZR*/ 31u, reg_num(a_op), 0)); - return; + if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) { + u32 imm12, sh; + if (aa64_addsub_imm_fits(b_op.v.imm, &imm12, &sh)) { + u32 rn = force_reg_int(t, a_op, sf, 9); + emit32(mc, aa64_subs_imm12(sf, /*Rd=ZR*/ 31u, rn, imm12, sh)); + return; + } } u32 rn = force_reg_int(t, a_op, sf, 9); u32 rm = force_reg_int(t, b_op, sf, (rn == 9) ? 10u : 9u); @@ -1838,9 +1844,100 @@ static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, u32 sf = type_is_64(dst.type) ? 1u : 0u; u32 rd = reg_num(dst); + + /* Imm-form fast paths. For commutative ops (ADD/AND/OR/XOR), if the + * LHS is the IMM swap to canonicalize (REG, IMM) and try to encode. + * For SUB we don't swap — `SUB imm, reg` has no encoding without + * materializing. Shifts take the imm as the count and require RHS-IMM + * by definition. Anything that doesn't fit the encoding falls through + * to force_reg_int + the shifted-register form, preserving the old + * behavior. */ + u32 word; + switch (op) { + case BO_IADD: + case BO_AND: + case BO_OR: + case BO_XOR: { + if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) { + Operand t_op = a_op; a_op = b_op; b_op = t_op; + } + break; + } + default: break; + } + + /* Try the imm-form before materializing. Each case sets `word` and + * jumps to emit; misses fall through to the reg path below. */ + if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) { + u32 rn_reg = reg_num(a_op); + i64 imm = b_op.v.imm; + u32 imm12, sh, N, immr, imms; + switch (op) { + case BO_IADD: + if (aa64_addsub_imm_fits(imm, &imm12, &sh)) { + emit32(mc, aa64_add_imm(sf, rd, rn_reg, imm12, sh)); + return; + } + break; + case BO_ISUB: + if (aa64_addsub_imm_fits(imm, &imm12, &sh)) { + emit32(mc, aa64_sub_imm(sf, rd, rn_reg, imm12, sh)); + return; + } + break; + case BO_AND: + if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) { + emit32(mc, aa64_and_imm(sf, rd, rn_reg, N, immr, imms)); + return; + } + break; + case BO_OR: + if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) { + emit32(mc, aa64_orr_imm(sf, rd, rn_reg, N, immr, imms)); + return; + } + break; + case BO_XOR: + if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) { + emit32(mc, aa64_eor_imm(sf, rd, rn_reg, N, immr, imms)); + return; + } + break; + case BO_SHL: { + /* C shifts by ≥ width are UB but we don't exploit it; mask the + * count to width-1 to match the variable-shift behavior. */ + u32 width = sf ? 64u : 32u; + u32 sh_amt = (u32)((u64)imm & (width - 1u)); + if (aa64_lsl_imm_fields(sh_amt, sf, &immr, &imms)) { + emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms)); + return; + } + break; + } + case BO_SHR_U: { + u32 width = sf ? 64u : 32u; + u32 sh_amt = (u32)((u64)imm & (width - 1u)); + if (aa64_lsr_imm_fields(sh_amt, sf, &immr, &imms)) { + emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms)); + return; + } + break; + } + case BO_SHR_S: { + u32 width = sf ? 64u : 32u; + u32 sh_amt = (u32)((u64)imm & (width - 1u)); + if (aa64_asr_imm_fields(sh_amt, sf, &immr, &imms)) { + emit32(mc, aa64_sbfm(sf, rd, rn_reg, immr, imms)); + return; + } + break; + } + default: break; + } + } + u32 rn = force_reg_int(t, a_op, sf, 9); u32 rm = force_reg_int(t, b_op, sf, (rn == 9) ? 10 : 9); - u32 word; switch (op) { case BO_IADD: @@ -1900,14 +1997,14 @@ static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { MCEmitter* mc = t->mc; u32 sf = type_is_64(dst.type) ? 1u : 0u; u32 rd = reg_num(dst); - u32 rn = reg_num(a_op); + /* OPK_IMM is legal per the CGTarget contract (arch.h); force_reg_int + * materializes into x9 when the operand isn't already a register. + * cg folds literal unops upstream (cg_fold_unop), so the IMM path + * here is only reached from opt's emit when the IR carries an + * unfolded literal — still a contract case we must honor. */ + u32 rn = force_reg_int(t, a_op, sf, 9); u32 word; - if (a_op.kind != OPK_REG) { - compiler_panic(t->c, impl_of(t)->loc, - "aarch64 unop: non-REG operand not yet supported"); - } - switch (op) { case UO_NEG: word = aa64_neg(sf, rd, rn); diff --git a/src/arch/arch.h b/src/arch/arch.h @@ -530,11 +530,20 @@ struct CGTarget { void (*bitfield_store)(CGTarget*, Operand record_addr, Operand src /*REG|IMM*/, BitFieldAccess); - /* ---- arithmetic, compare, convert ---- */ - void (*binop)(CGTarget*, BinOp, Operand dst, Operand a, Operand b); - void (*unop)(CGTarget*, UnOp, Operand dst, Operand a); - void (*cmp)(CGTarget*, CmpOp, Operand dst, Operand a, - Operand b); /* materialize 0/1 */ + /* ---- arithmetic, compare, convert ---- + * binop/unop/cmp accept OPK_REG or OPK_IMM in source operand positions + * (`a`, `b`); `dst` is always OPK_REG. The backend chooses between an + * imm-form encoding and materializing the literal into a scratch + * register based on whether the value fits the instruction's imm + * field. FP ops require REG sources — FP literals reach the value + * stack through load_const into OPK_REG. cg and opt's machinize/emit + * both rely on this contract to pass small constants through without + * burning a value-stack register on materialization. */ + void (*binop)(CGTarget*, BinOp, Operand dst /*REG*/, + Operand a /*REG|IMM*/, Operand b /*REG|IMM*/); + void (*unop)(CGTarget*, UnOp, Operand dst /*REG*/, Operand a /*REG|IMM*/); + void (*cmp)(CGTarget*, CmpOp, Operand dst /*REG*/, + Operand a /*REG|IMM*/, Operand b /*REG|IMM*/); /* materialize 0/1 */ void (*convert)(CGTarget*, ConvKind, Operand dst, Operand src); /* ---- calls / return ---- diff --git a/src/cg/cg.c b/src/cg/cg.c @@ -41,6 +41,7 @@ #include "abi/abi.h" #include "arch/arch.h" +#include "cg/fold.h" #include "core/arena.h" #include "core/core.h" #include "core/heap.h" @@ -891,6 +892,12 @@ void cg_bitfield_store(CG* g, BitFieldAccess b) { * Arithmetic / compare / convert * ============================================================ */ +/* Like force_reg, but leaves an OPK_IMM SValue alone — the CGTarget + * contract for binop/unop/cmp accepts IMM sources, so we avoid burning + * a value-stack register on `x + 3` style sites. The backend decides + * imm-form vs. materialize per the literal's width. */ +static Operand force_reg_unless_imm(CG* g, SValue* v, const Type* ty); + /* Force an SValue (already popped, by reference) into a register operand * of the given type. Mutates `*v` so that v->op is OPK_REG and v->res is * RES_REG; on lvalue inputs this means the original lvalue's base reg is @@ -920,6 +927,11 @@ static Operand force_reg(CG* g, SValue* v, const Type* ty) { return dst; } +static Operand force_reg_unless_imm(CG* g, SValue* v, const Type* ty) { + if (v->op.kind == OPK_IMM) return v->op; + return force_reg(g, v, ty); +} + void cg_binop(CG* g, BinOp op) { /* stack: [a, b] → [a OP b] */ SValue b = pop(g); @@ -927,8 +939,37 @@ void cg_binop(CG* g, BinOp op) { CGTarget* T = g->target; /* Result type is `a`'s type at this slice (parser already coerced). */ const Type* ty = a.type ? a.type : b.type; - Operand ra = force_reg(g, &a, ty); - Operand rb = force_reg(g, &b, ty); + + /* Tier 1+2: constant-fold or apply algebraic identities via the + * pure fold helper. KEEP_A/KEEP_B re-push the non-constant operand + * unchanged after releasing the IMM side (IMM carries no reg/slot + * obligation, but the helper is symmetric and a no-op release is + * cheap). */ + { + Operand folded; + switch (cg_fold_binop(op, a.op, b.op, ty, g->abi, &folded)) { + case CG_FOLD_IMM: + release(g, &a); + release(g, &b); + push(g, make_sv(folded, ty)); + return; + case CG_FOLD_KEEP_A: + release(g, &b); + push(g, a); + return; + case CG_FOLD_KEEP_B: + release(g, &a); + push(g, b); + return; + case CG_FOLD_NONE: break; + } + } + + /* IMM sources are legal per the binop contract (arch.h) — the backend + * picks imm-form vs. materialize. cg_fold_binop has already collapsed + * IMM+IMM, so at most one operand here is IMM. */ + Operand ra = force_reg_unless_imm(g, &a, ty); + Operand rb = force_reg_unless_imm(g, &b, ty); Reg rr = alloc_reg_or_spill(g, type_class(ty), ty); Operand dst = op_reg(rr, ty); T->binop(T, op, dst, ra, rb); @@ -941,7 +982,17 @@ void cg_unop(CG* g, UnOp op) { SValue a = pop(g); CGTarget* T = g->target; const Type* ty = a.type ? a.type : a.op.type; - Operand ra = force_reg(g, &a, ty); + + { + Operand folded; + if (cg_fold_unop(op, a.op, ty, g->abi, &folded) == CG_FOLD_IMM) { + release(g, &a); + push(g, make_sv(folded, ty)); + return; + } + } + + Operand ra = force_reg_unless_imm(g, &a, ty); Reg rr = alloc_reg_or_spill(g, type_class(ty), ty); Operand dst = op_reg(rr, ty); T->unop(T, op, dst, ra); @@ -956,8 +1007,19 @@ void cg_cmp(CG* g, CmpOp op) { CGTarget* T = g->target; const Type* opty = a.type ? a.type : b.type; const Type* i32 = type_prim(g->pool, TY_INT); - Operand ra = force_reg(g, &a, opty); - Operand rb = force_reg(g, &b, opty); + + { + Operand folded; + if (cg_fold_cmp(op, a.op, b.op, i32, g->abi, &folded) == CG_FOLD_IMM) { + release(g, &a); + release(g, &b); + push(g, make_sv(folded, i32)); + return; + } + } + + Operand ra = force_reg_unless_imm(g, &a, opty); + Operand rb = force_reg_unless_imm(g, &b, opty); Reg rr = alloc_reg_or_spill(g, RC_INT, i32); Operand dst = op_reg(rr, i32); T->cmp(T, op, dst, ra, rb); @@ -1458,6 +1520,14 @@ void cg_branch_true(CG* g, CGLabel l) { SValue v = pop(g); CGTarget* T = g->target; const Type* ty = v.type ? v.type : type_prim(g->pool, TY_INT); + /* Mirror cg_branch_false: a literal condition resolves at compile time. */ + if (v.op.kind == OPK_IMM) { + if (v.op.v.imm != 0) { + T->jump(T, (Label)l); + } + release(g, &v); + return; + } Operand a = force_reg(g, &v, ty); Operand zero = op_imm(0, ty); T->cmp_branch(T, CMP_NE, a, zero, (Label)l); diff --git a/src/cg/fold.c b/src/cg/fold.c @@ -0,0 +1,154 @@ +#include "cg/fold.h" + +/* Truncate (and re-sign-extend, for signed types) a folded i64 down to + * the width of `ty`, so that subsequent folds and compares see the same + * value the backend would have produced after narrowing to the + * destination register. No-op for >= 8-byte types and for NULL ty. */ +static i64 narrow(TargetABI* abi, const Type* ty, i64 v) { + if (!ty || !abi) return v; + u32 sz = abi_sizeof(abi, ty); + if (sz >= 8) return v; + u64 mask = ((u64)1 << (sz * 8)) - 1; + u64 u = (u64)v & mask; + if (abi_type_info(abi, ty).signed_) { + u64 sign_bit = (u64)1 << (sz * 8 - 1); + if (u & sign_bit) u |= ~mask; + } + return (i64)u; +} + +static Operand make_imm(i64 v, const Type* ty) { + Operand o; + o.kind = OPK_IMM; + o.cls = RC_INT; + o.pad = 0; + o.type = ty; + o.v.imm = v; + return o; +} + +/* Literal-literal integer binop. Returns 1 with *out set, or 0 if `op` + * isn't a foldable kind. Excludes SDIV/UDIV/SREM/UREM (must trap on + * divisor 0 and INT_MIN/-1), SHL/SHR_* (count >= width is type-width- + * dependent and not in this tier), and float ops (rounding/NaN belong + * to the backend). */ +static int literal_binop(BinOp op, i64 a, i64 b, i64* out) { + switch (op) { + case BO_IADD: *out = (i64)((u64)a + (u64)b); return 1; + case BO_ISUB: *out = (i64)((u64)a - (u64)b); return 1; + case BO_IMUL: *out = (i64)((u64)a * (u64)b); return 1; + case BO_AND: *out = a & b; return 1; + case BO_OR: *out = a | b; return 1; + case BO_XOR: *out = a ^ b; return 1; + default: return 0; + } +} + +/* Algebraic-identity dispatch for an integer binop with one literal + * operand. `k` is the constant; `k_on_right` distinguishes lhs vs rhs + * for non-commutative ops (ISUB, SHL, SHR_*, SDIV, UDIV). + * FID_NONE — no identity, caller emits normally. + * FID_KEEP — drop the IMM, the non-constant operand is the result. + * FID_ZERO — result is constant 0; drop both operands. */ +typedef enum FoldIdent { FID_NONE, FID_KEEP, FID_ZERO } FoldIdent; + +static FoldIdent identity_for(BinOp op, i64 k, int k_on_right) { + switch (op) { + case BO_IADD: case BO_OR: case BO_XOR: + /* x + 0, 0 + x, x | 0, 0 | x, x ^ 0, 0 ^ x */ + return (k == 0) ? FID_KEEP : FID_NONE; + case BO_ISUB: + /* x - 0 only; 0 - x needs a UO_NEG and isn't an identity. */ + return (k == 0 && k_on_right) ? FID_KEEP : FID_NONE; + case BO_IMUL: + if (k == 1) return FID_KEEP; + if (k == 0) return FID_ZERO; + return FID_NONE; + case BO_AND: + if (k == 0) return FID_ZERO; + /* All-ones mask of any width sign-extends to -1 as i64. */ + if (k == -1) return FID_KEEP; + return FID_NONE; + case BO_SDIV: case BO_UDIV: + /* x / 1 only; 1 / x isn't an identity and divisor-on-lhs gives + * no useful fold. */ + return (k == 1 && k_on_right) ? FID_KEEP : FID_NONE; + case BO_SHL: case BO_SHR_S: case BO_SHR_U: + /* x << 0, x >> 0 only; a zero shift-count on the lhs is the + * value being shifted — folding that would need to release the + * rhs operand, deferred until a use exists. */ + return (k == 0 && k_on_right) ? FID_KEEP : FID_NONE; + default: + return FID_NONE; + } +} + +CGFoldKind cg_fold_binop(BinOp op, Operand a, Operand b, const Type* ty, + TargetABI* abi, Operand* out) { + /* Tier 1: both literal — fold to a single IMM. */ + if (a.kind == OPK_IMM && b.kind == OPK_IMM) { + i64 r; + if (literal_binop(op, a.v.imm, b.v.imm, &r)) { + *out = make_imm(narrow(abi, ty, r), ty); + return CG_FOLD_IMM; + } + } + /* Tier 2: algebraic identities. Side-effect-free: the non-constant + * operand has already been materialized onto the value stack, so any + * computation that produced it has already executed. Dropping the + * IMM side is the caller's responsibility (release reg/slot if any). */ + if (b.kind == OPK_IMM) { + switch (identity_for(op, b.v.imm, /*k_on_right=*/1)) { + case FID_KEEP: return CG_FOLD_KEEP_A; + case FID_ZERO: *out = make_imm(0, ty); return CG_FOLD_IMM; + case FID_NONE: break; + } + } + if (a.kind == OPK_IMM) { + switch (identity_for(op, a.v.imm, /*k_on_right=*/0)) { + case FID_KEEP: return CG_FOLD_KEEP_B; + case FID_ZERO: *out = make_imm(0, ty); return CG_FOLD_IMM; + case FID_NONE: break; + } + } + return CG_FOLD_NONE; +} + +CGFoldKind cg_fold_unop(UnOp op, Operand a, const Type* ty, + TargetABI* abi, Operand* out) { + if (a.kind != OPK_IMM) return CG_FOLD_NONE; + i64 v = a.v.imm; + i64 r; + switch (op) { + case UO_NEG: r = (i64)(-(u64)v); break; + case UO_BNOT: r = ~v; break; + case UO_NOT: r = v ? 0 : 1; break; + default: return CG_FOLD_NONE; + } + *out = make_imm(narrow(abi, ty, r), ty); + return CG_FOLD_IMM; +} + +CGFoldKind cg_fold_cmp(CmpOp op, Operand a, Operand b, const Type* int_ty, + TargetABI* abi, Operand* out) { + if (a.kind != OPK_IMM || b.kind != OPK_IMM) return CG_FOLD_NONE; + (void)abi; /* compare result is `int` 0/1 — no narrowing needed */ + i64 x = a.v.imm; + i64 y = b.v.imm; + i64 r; + switch (op) { + case CMP_EQ: r = (x == y); break; + case CMP_NE: r = (x != y); break; + case CMP_LT_S: r = (x < y); break; + case CMP_LE_S: r = (x <= y); break; + case CMP_GT_S: r = (x > y); break; + case CMP_GE_S: r = (x >= y); break; + case CMP_LT_U: r = ((u64)x < (u64)y); break; + case CMP_LE_U: r = ((u64)x <= (u64)y); break; + case CMP_GT_U: r = ((u64)x > (u64)y); break; + case CMP_GE_U: r = ((u64)x >= (u64)y); break; + default: return CG_FOLD_NONE; + } + *out = make_imm(r, int_ty); + return CG_FOLD_IMM; +} diff --git a/src/cg/fold.h b/src/cg/fold.h @@ -0,0 +1,47 @@ +#ifndef CFREE_CG_FOLD_H +#define CFREE_CG_FOLD_H + +/* Pure constant-folding and algebraic-identity helpers for binop/unop/cmp + * on Operand inputs. No CG or IR state: callers (cg.c today, opt's + * pass_gvn / pass_combine eventually) inspect the result and apply it + * to whichever value representation they hold. All folds are restricted + * to integer domain — float ops never reach the OPK_IMM path (FP + * literals materialize via load_const to OPK_REG before they enter the + * value stack). Division, remainder, shifts, and FP arithmetic are + * deliberately excluded from the literal-fold paths to preserve trap + * semantics and rounding behavior; algebraic identities on those ops + * are limited to cases that don't depend on UB-exploiting transforms + * (see doc/OPT.md §5.5). */ + +#include "abi/abi.h" +#include "arch/arch.h" +#include "type/type.h" + +typedef enum CGFoldKind { + CG_FOLD_NONE, /* no fold; caller emits normally */ + CG_FOLD_IMM, /* result is the OPK_IMM Operand in *out; drop both inputs */ + CG_FOLD_KEEP_A, /* result is `a` unchanged; drop `b` */ + CG_FOLD_KEEP_B, /* result is `b` unchanged; drop `a` */ +} CGFoldKind; + +/* Binop fold + identity. Examines `a` and `b` against `op`: + * - both OPK_IMM → CG_FOLD_IMM, *out = literal narrowed to `ty` + * - one OPK_IMM identity → CG_FOLD_KEEP_A or _B, or CG_FOLD_IMM (zero) + * - otherwise → CG_FOLD_NONE + * `ty` is the result type (used for width-narrowing on the fold path). + * `abi` supplies size/signedness; required when ty is non-NULL. */ +CGFoldKind cg_fold_binop(BinOp op, Operand a, Operand b, const Type* ty, + TargetABI* abi, Operand* out); + +/* Unop fold. Returns CG_FOLD_IMM with *out set on success, CG_FOLD_NONE + * otherwise. Only integer-domain unops are folded. */ +CGFoldKind cg_fold_unop(UnOp op, Operand a, const Type* ty, + TargetABI* abi, Operand* out); + +/* Integer-compare fold. Returns CG_FOLD_IMM with *out set to 0 or 1 of + * type `int_ty` on success. FP compares (CMP_*_F) return CG_FOLD_NONE — + * NaN/ordering belongs to the backend. */ +CGFoldKind cg_fold_cmp(CmpOp op, Operand a, Operand b, const Type* int_ty, + TargetABI* abi, Operand* out); + +#endif