commit edb863ae6bdc7fba48ca788ced88e943c92ee280
parent 91a660f11818a3d87a1e8510360b843f670efa01
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 12:29:17 -0700
cg: pre-SSA peephole — constant fold + aarch64 imm-form encodings
Adds a pure fold helper (Tier 1+2) and lets the value-stack pass
OPK_IMM operands through to backend binop/unop/cmp instead of always
force_reg'ing them. aarch64 picks the imm-form encoding when the
literal fits: add/sub imm12 (with sh), cmp imm12, AND/ORR/EOR bitmask,
and LSL/LSR/ASR via UBFM/SBFM; misses fall back to the existing
shifted-register path through force_reg_int.
Why: avoids burning a value-stack register on x+const sites and emits
one fewer instruction per matched pattern. The REG|IMM contract on
binop/unop/cmp is now documented in arch.h so opt's eventual machinize
+ opt_emit reuses the same backend path without new wiring.
Diffstat:
| M | src/arch/aa64_isa.h | | | 168 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | src/arch/aarch64.c | | | 125 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- |
| M | src/arch/arch.h | | | 19 | ++++++++++++++----- |
| M | src/cg/cg.c | | | 80 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- |
| A | src/cg/fold.c | | | 154 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/cg/fold.h | | | 47 | +++++++++++++++++++++++++++++++++++++++++++++++ |
6 files changed, 569 insertions(+), 24 deletions(-)
diff --git a/src/arch/aa64_isa.h b/src/arch/aa64_isa.h
@@ -177,6 +177,147 @@ static inline u32 aa64_mvn(u32 sf, u32 Rd, u32 Rm) {
}
/* ====================================================================
+ * Logical, immediate (AND / ORR / EOR / ANDS, bitmask-imm form)
+ * sf opc(2) 100100 N(1) immr(6) imms(6) Rn(5) Rd(5)
+ * 31 30..29 28..23 22 21..16 15..10 9..5 4..0
+ *
+ * N:immr:imms encodes a repeated-pattern bitmask. The encoder
+ * aa64_logimm_encode below computes those fields from a literal value;
+ * this pack just lays the bits out. For 32-bit ops (sf=0), N must be 0;
+ * for 64-bit ops N can be 0 or 1 and selects whether the pattern
+ * element is 64 bits (N=1) or 2..32 bits (N=0).
+ * ==================================================================== */
+
+#define AA64_LOGIMM_FAMILY_MATCH 0x12000000u
+#define AA64_LOGIMM_FAMILY_MASK 0x1F800000u /* bits 28:23 */
+
+typedef struct AA64LogImm {
+ u32 sf, opc, N, immr, imms, Rn, Rd;
+} AA64LogImm;
+
+static inline u32 aa64_logimm_pack(AA64LogImm f) {
+ return ((f.sf & 1u) << 31) | ((f.opc & 3u) << 29) | AA64_LOGIMM_FAMILY_MATCH |
+ ((f.N & 1u) << 22) | ((f.immr & 0x3fu) << 16) |
+ ((f.imms & 0x3fu) << 10) | ((f.Rn & 0x1fu) << 5) | (f.Rd & 0x1fu);
+}
+
+static inline u32 aa64_and_imm(u32 sf, u32 Rd, u32 Rn, u32 N, u32 immr,
+ u32 imms) {
+ return aa64_logimm_pack((AA64LogImm){.sf = sf,
+ .opc = AA64_LOG_AND_OPC,
+ .N = N,
+ .immr = immr,
+ .imms = imms,
+ .Rn = Rn,
+ .Rd = Rd});
+}
+static inline u32 aa64_orr_imm(u32 sf, u32 Rd, u32 Rn, u32 N, u32 immr,
+ u32 imms) {
+ return aa64_logimm_pack((AA64LogImm){.sf = sf,
+ .opc = AA64_LOG_ORR_OPC,
+ .N = N,
+ .immr = immr,
+ .imms = imms,
+ .Rn = Rn,
+ .Rd = Rd});
+}
+static inline u32 aa64_eor_imm(u32 sf, u32 Rd, u32 Rn, u32 N, u32 immr,
+ u32 imms) {
+ return aa64_logimm_pack((AA64LogImm){.sf = sf,
+ .opc = AA64_LOG_EOR_OPC,
+ .N = N,
+ .immr = immr,
+ .imms = imms,
+ .Rn = Rn,
+ .Rd = Rd});
+}
+
+/* Bitmask-immediate predicate + encoder. Returns 1 and writes N/immr/imms
+ * if `imm` is encodable as an AArch64 logical immediate of width
+ * (sf ? 64 : 32); returns 0 otherwise (caller materializes into a
+ * scratch and uses the shifted-register form).
+ *
+ * Algorithm (inverse of ARM ARM "DecodeBitMasks"): an encodable value
+ * is a non-zero, non-all-ones bitmask made of a repeated `size`-bit
+ * element (size ∈ {2,4,8,16,32,64}); within one element the pattern is
+ * a rotation of (0…0 1…1). Find size by detecting the smallest
+ * repeating period; find the rotation that places the 1-run at the
+ * LSB; encode size and ones-count into imms per the standard scheme
+ * (top bits of imms inverted-encode size, low bits are ones-count-1). */
+static inline int aa64_logimm_encode(u64 imm, u32 sf, u32 *N_out,
+ u32 *immr_out, u32 *imms_out) {
+ if (!sf) {
+ u64 lo = imm & 0xFFFFFFFFu;
+ u64 hi = imm >> 32;
+ if (hi != 0 && hi != lo) return 0;
+ imm = lo | (lo << 32);
+ }
+ if (imm == 0 || imm == ~(u64)0) return 0;
+
+ u32 size = 64;
+ for (u32 s = 32; s >= 2; s >>= 1) {
+ u64 mask = ((u64)1 << s) - 1u;
+ if ((imm & mask) != ((imm >> s) & mask)) break;
+ size = s;
+ }
+ u64 elt_mask = (size == 64) ? ~(u64)0 : (((u64)1 << size) - 1u);
+ u64 elt = imm & elt_mask;
+ if (elt == 0 || elt == elt_mask) return 0;
+
+ u32 ones = 0;
+ for (u64 x = elt; x; x >>= 1) ones += (u32)(x & 1u);
+ if (ones == 0 || ones >= size) return 0;
+
+ u64 aligned = ((u64)1 << ones) - 1u;
+ u32 rotation = 0xFFFFFFFFu;
+ for (u32 r = 0; r < size; r++) {
+ u64 rotated = r == 0 ? elt : (((elt >> r) | (elt << (size - r))) & elt_mask);
+ if (rotated == aligned) { rotation = r; break; }
+ }
+ if (rotation == 0xFFFFFFFFu) return 0;
+
+ if (size == 64) {
+ *N_out = 1u;
+ *imms_out = (ones - 1u) & 0x3Fu;
+ } else {
+ *N_out = 0u;
+ u32 neg_size_shl1 = ((u32)(-(i32)size) << 1) & 0x3Fu;
+ *imms_out = neg_size_shl1 | ((ones - 1u) & 0x3Fu);
+ }
+ *immr_out = rotation;
+ return 1;
+}
+
+/* Shift-by-immediate field generators for LSL/LSR/ASR (encoded via
+ * UBFM/SBFM). Predicate: shift < width. The aa64_ubfm / aa64_sbfm
+ * encoders live in aarch64.c; callers pair these (immr, imms) with the
+ * matching pack. */
+static inline int aa64_lsl_imm_fields(u32 shift, u32 sf, u32 *immr_out,
+ u32 *imms_out) {
+ u32 width = sf ? 64u : 32u;
+ if (shift >= width) return 0;
+ *immr_out = (width - shift) & (width - 1u);
+ *imms_out = width - 1u - shift;
+ return 1;
+}
+static inline int aa64_lsr_imm_fields(u32 shift, u32 sf, u32 *immr_out,
+ u32 *imms_out) {
+ u32 width = sf ? 64u : 32u;
+ if (shift >= width) return 0;
+ *immr_out = shift;
+ *imms_out = width - 1u;
+ return 1;
+}
+static inline int aa64_asr_imm_fields(u32 shift, u32 sf, u32 *immr_out,
+ u32 *imms_out) {
+ u32 width = sf ? 64u : 32u;
+ if (shift >= width) return 0;
+ *immr_out = shift;
+ *imms_out = width - 1u;
+ return 1;
+}
+
+/* ====================================================================
* Add/Sub, shifted register (ADD / SUB / ADDS / SUBS)
* sf op(1) S(1) 01011 shift(2) 0 Rm(5) imm6(6) Rn(5) Rd(5)
* 31 30 29 28..24 23..22 21 20..16 15..10 9..5 4..0
@@ -460,6 +601,33 @@ static inline u32 aa64_sub_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12, u32 sh) {
return aa64_addsubimm_pack((AA64AddSubImm){
.sf = sf, .op = 1, .sh = sh, .imm12 = imm12, .Rn = Rn, .Rd = Rd});
}
+/* SUBS imm — sets flags. Used for CMP imm (Rd=ZR) and for branchless
+ * compares that feed CSET. The 12-bit-shifted form covers 0..0xFFFFF000
+ * stepped by 0x1000; cg_fold collapses literal-only compares upstream,
+ * so this encoder is reached for `x cmp const` and `if (x)` patterns. */
+static inline u32 aa64_subs_imm12(u32 sf, u32 Rd, u32 Rn, u32 imm12, u32 sh) {
+ return aa64_addsubimm_pack((AA64AddSubImm){
+ .sf = sf, .op = 1, .S = 1, .sh = sh, .imm12 = imm12, .Rn = Rn, .Rd = Rd});
+}
+
+/* Predicate: does `imm` fit ADD/SUB/CMP's 12-bit immediate (optionally
+ * left-shifted by 12)? On success writes the encoded imm12 and sh and
+ * returns 1; on failure returns 0 and leaves outputs untouched.
+ *
+ * The encoding admits 0..4095 directly (sh=0) and multiples of 4096 up
+ * to 0xFFF000 (sh=1). Negative literals are rejected here — the caller
+ * (e.g. opt's machinize, or a smarter cg) is free to swap ADD ↔ SUB and
+ * retry with the negated literal; the bare predicate keeps the contract
+ * narrow. */
+static inline int aa64_addsub_imm_fits(i64 imm, u32 *imm12_out, u32 *sh_out) {
+ if (imm < 0) return 0;
+ u64 u = (u64)imm;
+ if (u <= 0xFFFu) { *imm12_out = (u32)u; *sh_out = 0; return 1; }
+ if ((u & 0xFFFu) == 0 && (u >> 12) <= 0xFFFu) {
+ *imm12_out = (u32)(u >> 12); *sh_out = 1; return 1;
+ }
+ return 0;
+}
/* ====================================================================
* Load/store, unsigned 12-bit immediate offset (LDR / STR, scaled)
diff --git a/src/arch/aarch64.c b/src/arch/aarch64.c
@@ -1068,16 +1068,22 @@ static u32 cmp_to_cond(CmpOp op) {
}
}
-/* Emit CMP a, b (= SUBS ZR, a, b). Materializes IMM operands through
- * scratch x9/x10. Width comes from `a`; signedness lives in the cond. */
+/* Emit CMP a, b (= SUBS ZR, a, b). Uses the 12-bit-imm form when `b` is
+ * an OPK_IMM that fits; otherwise materializes through scratch x9/x10
+ * and uses the shifted-register form. CMP is not commutative across the
+ * condition codes, so an IMM-on-LHS still materializes (the caller has
+ * to swap the cond if it wants to swap the operands). Width comes from
+ * `a`; signedness lives in the cond. */
static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) {
MCEmitter* mc = t->mc;
u32 sf = type_is_64(a_op.type) ? 1u : 0u;
- /* Special-case CMP Rn, #0 so a literal zero compare doesn't need
- * a scratch register. */
- if (b_op.kind == OPK_IMM && b_op.v.imm == 0 && a_op.kind == OPK_REG) {
- emit32(mc, aa64_subs_imm(sf, /*Rd=ZR*/ 31u, reg_num(a_op), 0));
- return;
+ if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) {
+ u32 imm12, sh;
+ if (aa64_addsub_imm_fits(b_op.v.imm, &imm12, &sh)) {
+ u32 rn = force_reg_int(t, a_op, sf, 9);
+ emit32(mc, aa64_subs_imm12(sf, /*Rd=ZR*/ 31u, rn, imm12, sh));
+ return;
+ }
}
u32 rn = force_reg_int(t, a_op, sf, 9);
u32 rm = force_reg_int(t, b_op, sf, (rn == 9) ? 10u : 9u);
@@ -1838,9 +1844,100 @@ static void aa_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
u32 sf = type_is_64(dst.type) ? 1u : 0u;
u32 rd = reg_num(dst);
+
+ /* Imm-form fast paths. For commutative ops (ADD/AND/OR/XOR), if the
+ * LHS is the IMM swap to canonicalize (REG, IMM) and try to encode.
+ * For SUB we don't swap — `SUB imm, reg` has no encoding without
+ * materializing. Shifts take the imm as the count and require RHS-IMM
+ * by definition. Anything that doesn't fit the encoding falls through
+ * to force_reg_int + the shifted-register form, preserving the old
+ * behavior. */
+ u32 word;
+ switch (op) {
+ case BO_IADD:
+ case BO_AND:
+ case BO_OR:
+ case BO_XOR: {
+ if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) {
+ Operand t_op = a_op; a_op = b_op; b_op = t_op;
+ }
+ break;
+ }
+ default: break;
+ }
+
+ /* Try the imm-form before materializing. Each case sets `word` and
+ * jumps to emit; misses fall through to the reg path below. */
+ if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) {
+ u32 rn_reg = reg_num(a_op);
+ i64 imm = b_op.v.imm;
+ u32 imm12, sh, N, immr, imms;
+ switch (op) {
+ case BO_IADD:
+ if (aa64_addsub_imm_fits(imm, &imm12, &sh)) {
+ emit32(mc, aa64_add_imm(sf, rd, rn_reg, imm12, sh));
+ return;
+ }
+ break;
+ case BO_ISUB:
+ if (aa64_addsub_imm_fits(imm, &imm12, &sh)) {
+ emit32(mc, aa64_sub_imm(sf, rd, rn_reg, imm12, sh));
+ return;
+ }
+ break;
+ case BO_AND:
+ if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) {
+ emit32(mc, aa64_and_imm(sf, rd, rn_reg, N, immr, imms));
+ return;
+ }
+ break;
+ case BO_OR:
+ if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) {
+ emit32(mc, aa64_orr_imm(sf, rd, rn_reg, N, immr, imms));
+ return;
+ }
+ break;
+ case BO_XOR:
+ if (aa64_logimm_encode((u64)imm, sf, &N, &immr, &imms)) {
+ emit32(mc, aa64_eor_imm(sf, rd, rn_reg, N, immr, imms));
+ return;
+ }
+ break;
+ case BO_SHL: {
+ /* C shifts by ≥ width are UB but we don't exploit it; mask the
+ * count to width-1 to match the variable-shift behavior. */
+ u32 width = sf ? 64u : 32u;
+ u32 sh_amt = (u32)((u64)imm & (width - 1u));
+ if (aa64_lsl_imm_fields(sh_amt, sf, &immr, &imms)) {
+ emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms));
+ return;
+ }
+ break;
+ }
+ case BO_SHR_U: {
+ u32 width = sf ? 64u : 32u;
+ u32 sh_amt = (u32)((u64)imm & (width - 1u));
+ if (aa64_lsr_imm_fields(sh_amt, sf, &immr, &imms)) {
+ emit32(mc, aa64_ubfm(sf, rd, rn_reg, immr, imms));
+ return;
+ }
+ break;
+ }
+ case BO_SHR_S: {
+ u32 width = sf ? 64u : 32u;
+ u32 sh_amt = (u32)((u64)imm & (width - 1u));
+ if (aa64_asr_imm_fields(sh_amt, sf, &immr, &imms)) {
+ emit32(mc, aa64_sbfm(sf, rd, rn_reg, immr, imms));
+ return;
+ }
+ break;
+ }
+ default: break;
+ }
+ }
+
u32 rn = force_reg_int(t, a_op, sf, 9);
u32 rm = force_reg_int(t, b_op, sf, (rn == 9) ? 10 : 9);
- u32 word;
switch (op) {
case BO_IADD:
@@ -1900,14 +1997,14 @@ static void aa_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
MCEmitter* mc = t->mc;
u32 sf = type_is_64(dst.type) ? 1u : 0u;
u32 rd = reg_num(dst);
- u32 rn = reg_num(a_op);
+ /* OPK_IMM is legal per the CGTarget contract (arch.h); force_reg_int
+ * materializes into x9 when the operand isn't already a register.
+ * cg folds literal unops upstream (cg_fold_unop), so the IMM path
+ * here is only reached from opt's emit when the IR carries an
+ * unfolded literal — still a contract case we must honor. */
+ u32 rn = force_reg_int(t, a_op, sf, 9);
u32 word;
- if (a_op.kind != OPK_REG) {
- compiler_panic(t->c, impl_of(t)->loc,
- "aarch64 unop: non-REG operand not yet supported");
- }
-
switch (op) {
case UO_NEG:
word = aa64_neg(sf, rd, rn);
diff --git a/src/arch/arch.h b/src/arch/arch.h
@@ -530,11 +530,20 @@ struct CGTarget {
void (*bitfield_store)(CGTarget*, Operand record_addr,
Operand src /*REG|IMM*/, BitFieldAccess);
- /* ---- arithmetic, compare, convert ---- */
- void (*binop)(CGTarget*, BinOp, Operand dst, Operand a, Operand b);
- void (*unop)(CGTarget*, UnOp, Operand dst, Operand a);
- void (*cmp)(CGTarget*, CmpOp, Operand dst, Operand a,
- Operand b); /* materialize 0/1 */
+ /* ---- arithmetic, compare, convert ----
+ * binop/unop/cmp accept OPK_REG or OPK_IMM in source operand positions
+ * (`a`, `b`); `dst` is always OPK_REG. The backend chooses between an
+ * imm-form encoding and materializing the literal into a scratch
+ * register based on whether the value fits the instruction's imm
+ * field. FP ops require REG sources — FP literals reach the value
+ * stack through load_const into OPK_REG. cg and opt's machinize/emit
+ * both rely on this contract to pass small constants through without
+ * burning a value-stack register on materialization. */
+ void (*binop)(CGTarget*, BinOp, Operand dst /*REG*/,
+ Operand a /*REG|IMM*/, Operand b /*REG|IMM*/);
+ void (*unop)(CGTarget*, UnOp, Operand dst /*REG*/, Operand a /*REG|IMM*/);
+ void (*cmp)(CGTarget*, CmpOp, Operand dst /*REG*/,
+ Operand a /*REG|IMM*/, Operand b /*REG|IMM*/); /* materialize 0/1 */
void (*convert)(CGTarget*, ConvKind, Operand dst, Operand src);
/* ---- calls / return ----
diff --git a/src/cg/cg.c b/src/cg/cg.c
@@ -41,6 +41,7 @@
#include "abi/abi.h"
#include "arch/arch.h"
+#include "cg/fold.h"
#include "core/arena.h"
#include "core/core.h"
#include "core/heap.h"
@@ -891,6 +892,12 @@ void cg_bitfield_store(CG* g, BitFieldAccess b) {
* Arithmetic / compare / convert
* ============================================================ */
+/* Like force_reg, but leaves an OPK_IMM SValue alone — the CGTarget
+ * contract for binop/unop/cmp accepts IMM sources, so we avoid burning
+ * a value-stack register on `x + 3` style sites. The backend decides
+ * imm-form vs. materialize per the literal's width. */
+static Operand force_reg_unless_imm(CG* g, SValue* v, const Type* ty);
+
/* Force an SValue (already popped, by reference) into a register operand
* of the given type. Mutates `*v` so that v->op is OPK_REG and v->res is
* RES_REG; on lvalue inputs this means the original lvalue's base reg is
@@ -920,6 +927,11 @@ static Operand force_reg(CG* g, SValue* v, const Type* ty) {
return dst;
}
+static Operand force_reg_unless_imm(CG* g, SValue* v, const Type* ty) {
+ if (v->op.kind == OPK_IMM) return v->op;
+ return force_reg(g, v, ty);
+}
+
void cg_binop(CG* g, BinOp op) {
/* stack: [a, b] → [a OP b] */
SValue b = pop(g);
@@ -927,8 +939,37 @@ void cg_binop(CG* g, BinOp op) {
CGTarget* T = g->target;
/* Result type is `a`'s type at this slice (parser already coerced). */
const Type* ty = a.type ? a.type : b.type;
- Operand ra = force_reg(g, &a, ty);
- Operand rb = force_reg(g, &b, ty);
+
+ /* Tier 1+2: constant-fold or apply algebraic identities via the
+ * pure fold helper. KEEP_A/KEEP_B re-push the non-constant operand
+ * unchanged after releasing the IMM side (IMM carries no reg/slot
+ * obligation, but the helper is symmetric and a no-op release is
+ * cheap). */
+ {
+ Operand folded;
+ switch (cg_fold_binop(op, a.op, b.op, ty, g->abi, &folded)) {
+ case CG_FOLD_IMM:
+ release(g, &a);
+ release(g, &b);
+ push(g, make_sv(folded, ty));
+ return;
+ case CG_FOLD_KEEP_A:
+ release(g, &b);
+ push(g, a);
+ return;
+ case CG_FOLD_KEEP_B:
+ release(g, &a);
+ push(g, b);
+ return;
+ case CG_FOLD_NONE: break;
+ }
+ }
+
+ /* IMM sources are legal per the binop contract (arch.h) — the backend
+ * picks imm-form vs. materialize. cg_fold_binop has already collapsed
+ * IMM+IMM, so at most one operand here is IMM. */
+ Operand ra = force_reg_unless_imm(g, &a, ty);
+ Operand rb = force_reg_unless_imm(g, &b, ty);
Reg rr = alloc_reg_or_spill(g, type_class(ty), ty);
Operand dst = op_reg(rr, ty);
T->binop(T, op, dst, ra, rb);
@@ -941,7 +982,17 @@ void cg_unop(CG* g, UnOp op) {
SValue a = pop(g);
CGTarget* T = g->target;
const Type* ty = a.type ? a.type : a.op.type;
- Operand ra = force_reg(g, &a, ty);
+
+ {
+ Operand folded;
+ if (cg_fold_unop(op, a.op, ty, g->abi, &folded) == CG_FOLD_IMM) {
+ release(g, &a);
+ push(g, make_sv(folded, ty));
+ return;
+ }
+ }
+
+ Operand ra = force_reg_unless_imm(g, &a, ty);
Reg rr = alloc_reg_or_spill(g, type_class(ty), ty);
Operand dst = op_reg(rr, ty);
T->unop(T, op, dst, ra);
@@ -956,8 +1007,19 @@ void cg_cmp(CG* g, CmpOp op) {
CGTarget* T = g->target;
const Type* opty = a.type ? a.type : b.type;
const Type* i32 = type_prim(g->pool, TY_INT);
- Operand ra = force_reg(g, &a, opty);
- Operand rb = force_reg(g, &b, opty);
+
+ {
+ Operand folded;
+ if (cg_fold_cmp(op, a.op, b.op, i32, g->abi, &folded) == CG_FOLD_IMM) {
+ release(g, &a);
+ release(g, &b);
+ push(g, make_sv(folded, i32));
+ return;
+ }
+ }
+
+ Operand ra = force_reg_unless_imm(g, &a, opty);
+ Operand rb = force_reg_unless_imm(g, &b, opty);
Reg rr = alloc_reg_or_spill(g, RC_INT, i32);
Operand dst = op_reg(rr, i32);
T->cmp(T, op, dst, ra, rb);
@@ -1458,6 +1520,14 @@ void cg_branch_true(CG* g, CGLabel l) {
SValue v = pop(g);
CGTarget* T = g->target;
const Type* ty = v.type ? v.type : type_prim(g->pool, TY_INT);
+ /* Mirror cg_branch_false: a literal condition resolves at compile time. */
+ if (v.op.kind == OPK_IMM) {
+ if (v.op.v.imm != 0) {
+ T->jump(T, (Label)l);
+ }
+ release(g, &v);
+ return;
+ }
Operand a = force_reg(g, &v, ty);
Operand zero = op_imm(0, ty);
T->cmp_branch(T, CMP_NE, a, zero, (Label)l);
diff --git a/src/cg/fold.c b/src/cg/fold.c
@@ -0,0 +1,154 @@
+#include "cg/fold.h"
+
+/* Truncate (and re-sign-extend, for signed types) a folded i64 down to
+ * the width of `ty`, so that subsequent folds and compares see the same
+ * value the backend would have produced after narrowing to the
+ * destination register. No-op for >= 8-byte types and for NULL ty. */
+static i64 narrow(TargetABI* abi, const Type* ty, i64 v) {
+ if (!ty || !abi) return v;
+ u32 sz = abi_sizeof(abi, ty);
+ if (sz >= 8) return v;
+ u64 mask = ((u64)1 << (sz * 8)) - 1;
+ u64 u = (u64)v & mask;
+ if (abi_type_info(abi, ty).signed_) {
+ u64 sign_bit = (u64)1 << (sz * 8 - 1);
+ if (u & sign_bit) u |= ~mask;
+ }
+ return (i64)u;
+}
+
+static Operand make_imm(i64 v, const Type* ty) {
+ Operand o;
+ o.kind = OPK_IMM;
+ o.cls = RC_INT;
+ o.pad = 0;
+ o.type = ty;
+ o.v.imm = v;
+ return o;
+}
+
+/* Literal-literal integer binop. Returns 1 with *out set, or 0 if `op`
+ * isn't a foldable kind. Excludes SDIV/UDIV/SREM/UREM (must trap on
+ * divisor 0 and INT_MIN/-1), SHL/SHR_* (count >= width is type-width-
+ * dependent and not in this tier), and float ops (rounding/NaN belong
+ * to the backend). */
+static int literal_binop(BinOp op, i64 a, i64 b, i64* out) {
+ switch (op) {
+ case BO_IADD: *out = (i64)((u64)a + (u64)b); return 1;
+ case BO_ISUB: *out = (i64)((u64)a - (u64)b); return 1;
+ case BO_IMUL: *out = (i64)((u64)a * (u64)b); return 1;
+ case BO_AND: *out = a & b; return 1;
+ case BO_OR: *out = a | b; return 1;
+ case BO_XOR: *out = a ^ b; return 1;
+ default: return 0;
+ }
+}
+
+/* Algebraic-identity dispatch for an integer binop with one literal
+ * operand. `k` is the constant; `k_on_right` distinguishes lhs vs rhs
+ * for non-commutative ops (ISUB, SHL, SHR_*, SDIV, UDIV).
+ * FID_NONE — no identity, caller emits normally.
+ * FID_KEEP — drop the IMM, the non-constant operand is the result.
+ * FID_ZERO — result is constant 0; drop both operands. */
+typedef enum FoldIdent { FID_NONE, FID_KEEP, FID_ZERO } FoldIdent;
+
+static FoldIdent identity_for(BinOp op, i64 k, int k_on_right) {
+ switch (op) {
+ case BO_IADD: case BO_OR: case BO_XOR:
+ /* x + 0, 0 + x, x | 0, 0 | x, x ^ 0, 0 ^ x */
+ return (k == 0) ? FID_KEEP : FID_NONE;
+ case BO_ISUB:
+ /* x - 0 only; 0 - x needs a UO_NEG and isn't an identity. */
+ return (k == 0 && k_on_right) ? FID_KEEP : FID_NONE;
+ case BO_IMUL:
+ if (k == 1) return FID_KEEP;
+ if (k == 0) return FID_ZERO;
+ return FID_NONE;
+ case BO_AND:
+ if (k == 0) return FID_ZERO;
+ /* All-ones mask of any width sign-extends to -1 as i64. */
+ if (k == -1) return FID_KEEP;
+ return FID_NONE;
+ case BO_SDIV: case BO_UDIV:
+ /* x / 1 only; 1 / x isn't an identity and divisor-on-lhs gives
+ * no useful fold. */
+ return (k == 1 && k_on_right) ? FID_KEEP : FID_NONE;
+ case BO_SHL: case BO_SHR_S: case BO_SHR_U:
+ /* x << 0, x >> 0 only; a zero shift-count on the lhs is the
+ * value being shifted — folding that would need to release the
+ * rhs operand, deferred until a use exists. */
+ return (k == 0 && k_on_right) ? FID_KEEP : FID_NONE;
+ default:
+ return FID_NONE;
+ }
+}
+
+CGFoldKind cg_fold_binop(BinOp op, Operand a, Operand b, const Type* ty,
+ TargetABI* abi, Operand* out) {
+ /* Tier 1: both literal — fold to a single IMM. */
+ if (a.kind == OPK_IMM && b.kind == OPK_IMM) {
+ i64 r;
+ if (literal_binop(op, a.v.imm, b.v.imm, &r)) {
+ *out = make_imm(narrow(abi, ty, r), ty);
+ return CG_FOLD_IMM;
+ }
+ }
+ /* Tier 2: algebraic identities. Side-effect-free: the non-constant
+ * operand has already been materialized onto the value stack, so any
+ * computation that produced it has already executed. Dropping the
+ * IMM side is the caller's responsibility (release reg/slot if any). */
+ if (b.kind == OPK_IMM) {
+ switch (identity_for(op, b.v.imm, /*k_on_right=*/1)) {
+ case FID_KEEP: return CG_FOLD_KEEP_A;
+ case FID_ZERO: *out = make_imm(0, ty); return CG_FOLD_IMM;
+ case FID_NONE: break;
+ }
+ }
+ if (a.kind == OPK_IMM) {
+ switch (identity_for(op, a.v.imm, /*k_on_right=*/0)) {
+ case FID_KEEP: return CG_FOLD_KEEP_B;
+ case FID_ZERO: *out = make_imm(0, ty); return CG_FOLD_IMM;
+ case FID_NONE: break;
+ }
+ }
+ return CG_FOLD_NONE;
+}
+
+CGFoldKind cg_fold_unop(UnOp op, Operand a, const Type* ty,
+ TargetABI* abi, Operand* out) {
+ if (a.kind != OPK_IMM) return CG_FOLD_NONE;
+ i64 v = a.v.imm;
+ i64 r;
+ switch (op) {
+ case UO_NEG: r = (i64)(-(u64)v); break;
+ case UO_BNOT: r = ~v; break;
+ case UO_NOT: r = v ? 0 : 1; break;
+ default: return CG_FOLD_NONE;
+ }
+ *out = make_imm(narrow(abi, ty, r), ty);
+ return CG_FOLD_IMM;
+}
+
+CGFoldKind cg_fold_cmp(CmpOp op, Operand a, Operand b, const Type* int_ty,
+ TargetABI* abi, Operand* out) {
+ if (a.kind != OPK_IMM || b.kind != OPK_IMM) return CG_FOLD_NONE;
+ (void)abi; /* compare result is `int` 0/1 — no narrowing needed */
+ i64 x = a.v.imm;
+ i64 y = b.v.imm;
+ i64 r;
+ switch (op) {
+ case CMP_EQ: r = (x == y); break;
+ case CMP_NE: r = (x != y); break;
+ case CMP_LT_S: r = (x < y); break;
+ case CMP_LE_S: r = (x <= y); break;
+ case CMP_GT_S: r = (x > y); break;
+ case CMP_GE_S: r = (x >= y); break;
+ case CMP_LT_U: r = ((u64)x < (u64)y); break;
+ case CMP_LE_U: r = ((u64)x <= (u64)y); break;
+ case CMP_GT_U: r = ((u64)x > (u64)y); break;
+ case CMP_GE_U: r = ((u64)x >= (u64)y); break;
+ default: return CG_FOLD_NONE;
+ }
+ *out = make_imm(r, int_ty);
+ return CG_FOLD_IMM;
+}
diff --git a/src/cg/fold.h b/src/cg/fold.h
@@ -0,0 +1,47 @@
+#ifndef CFREE_CG_FOLD_H
+#define CFREE_CG_FOLD_H
+
+/* Pure constant-folding and algebraic-identity helpers for binop/unop/cmp
+ * on Operand inputs. No CG or IR state: callers (cg.c today, opt's
+ * pass_gvn / pass_combine eventually) inspect the result and apply it
+ * to whichever value representation they hold. All folds are restricted
+ * to integer domain — float ops never reach the OPK_IMM path (FP
+ * literals materialize via load_const to OPK_REG before they enter the
+ * value stack). Division, remainder, shifts, and FP arithmetic are
+ * deliberately excluded from the literal-fold paths to preserve trap
+ * semantics and rounding behavior; algebraic identities on those ops
+ * are limited to cases that don't depend on UB-exploiting transforms
+ * (see doc/OPT.md §5.5). */
+
+#include "abi/abi.h"
+#include "arch/arch.h"
+#include "type/type.h"
+
+typedef enum CGFoldKind {
+ CG_FOLD_NONE, /* no fold; caller emits normally */
+ CG_FOLD_IMM, /* result is the OPK_IMM Operand in *out; drop both inputs */
+ CG_FOLD_KEEP_A, /* result is `a` unchanged; drop `b` */
+ CG_FOLD_KEEP_B, /* result is `b` unchanged; drop `a` */
+} CGFoldKind;
+
+/* Binop fold + identity. Examines `a` and `b` against `op`:
+ * - both OPK_IMM → CG_FOLD_IMM, *out = literal narrowed to `ty`
+ * - one OPK_IMM identity → CG_FOLD_KEEP_A or _B, or CG_FOLD_IMM (zero)
+ * - otherwise → CG_FOLD_NONE
+ * `ty` is the result type (used for width-narrowing on the fold path).
+ * `abi` supplies size/signedness; required when ty is non-NULL. */
+CGFoldKind cg_fold_binop(BinOp op, Operand a, Operand b, const Type* ty,
+ TargetABI* abi, Operand* out);
+
+/* Unop fold. Returns CG_FOLD_IMM with *out set on success, CG_FOLD_NONE
+ * otherwise. Only integer-domain unops are folded. */
+CGFoldKind cg_fold_unop(UnOp op, Operand a, const Type* ty,
+ TargetABI* abi, Operand* out);
+
+/* Integer-compare fold. Returns CG_FOLD_IMM with *out set to 0 or 1 of
+ * type `int_ty` on success. FP compares (CMP_*_F) return CG_FOLD_NONE —
+ * NaN/ordering belongs to the backend. */
+CGFoldKind cg_fold_cmp(CmpOp op, Operand a, Operand b, const Type* int_ty,
+ TargetABI* abi, Operand* out);
+
+#endif