commit 4934c27ff80eca5aedb33bb589ddc42bdd9973ee
parent edb863ae6bdc7fba48ca788ced88e943c92ee280
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 11 May 2026 12:42:44 -0700
cg: imm-form encodings for x64 and rv64 binop/unop
Mirrors the aarch64 work in edb863a — both backends now honor the
REG|IMM contract on binop/unop and pick the imm-form encoding when the
literal fits.
x64: emit_alu_imm8/imm32, emit_imul_imm8/imm32, emit_cmp imm32 fast
path beyond the existing imm8 case. x_binop swaps commutative ops to
put IMM on the RHS, then encodes ADD/SUB/AND/OR/XOR via 0x83/0x81 /sub
and IMUL via 0x6B/0x69 /r when the literal fits i8 or i32. Shifts
use the existing C1 /sub ib helper for OPK_IMM counts and skip the
mov-to-cl step. x_unop routes through force_reg_int.
rv64: rv_binop encodes IADD/AND/OR/XOR via *_addi/andi/ori/xori for
12-bit signed imm. ISUB has no SUBI in RV-I; encoded as ADDI with the
negated literal when -imm fits the same range. Shifts use the
slli(w)/srli(w)/srai(w) helpers for OPK_IMM counts. rv_unop routes
through force_reg_int.
Diffstat:
| M | src/arch/rv64.c | | | 80 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- |
| M | src/arch/x64.c | | | 164 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ |
2 files changed, 228 insertions(+), 16 deletions(-)
diff --git a/src/arch/rv64.c b/src/arch/rv64.c
@@ -1459,6 +1459,78 @@ static void rv_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
}
u32 sf = type_is_64(dst.type) ? 1u : 0u;
u32 rd = reg_num(dst);
+
+ /* Canonicalize IMM to the RHS for commutative ops so the imm-form
+ * check below handles `3 + a` the same as `a + 3`. ISUB is not
+ * commutative — IMM-on-LHS still materializes. */
+ switch (op) {
+ case BO_IADD:
+ case BO_AND:
+ case BO_OR:
+ case BO_XOR: {
+ if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) {
+ Operand t_op = a_op; a_op = b_op; b_op = t_op;
+ }
+ break;
+ }
+ default: break;
+ }
+
+ /* IMM-form fast paths. RV-I admits a 12-bit signed immediate for
+ * ADDI/ANDI/ORI/XORI/SLTI/SLTIU (range [-2048, 2047]). ISUB has no
+ * SUBI — we encode it as ADDI with the negated literal when -imm
+ * fits the same range (i.e., imm ∈ [-2047, 2048]; INT_MIN is
+ * intentionally excluded since -INT_MIN overflows). Shifts admit a
+ * shamt: 6 bits (0..63) on the 64-bit forms, 5 bits (0..31) on the
+ * W-variants. */
+ if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) {
+ u32 ra = reg_num(a_op);
+ i64 imm = b_op.v.imm;
+ int fits12 = imm >= -2048 && imm <= 2047;
+ switch (op) {
+ case BO_IADD:
+ if (fits12) {
+ emit32(mc, sf ? rv_addi(rd, ra, (i32)imm) : rv_addiw(rd, ra, (i32)imm));
+ return;
+ }
+ break;
+ case BO_ISUB:
+ if (imm >= -2047 && imm <= 2048) {
+ emit32(mc, sf ? rv_addi(rd, ra, (i32)-imm) : rv_addiw(rd, ra, (i32)-imm));
+ return;
+ }
+ break;
+ case BO_AND:
+ if (fits12) { emit32(mc, rv_andi(rd, ra, (i32)imm)); return; }
+ break;
+ case BO_OR:
+ if (fits12) { emit32(mc, rv_ori(rd, ra, (i32)imm)); return; }
+ break;
+ case BO_XOR:
+ if (fits12) { emit32(mc, rv_xori(rd, ra, (i32)imm)); return; }
+ break;
+ case BO_SHL: {
+ u32 width = sf ? 64u : 32u;
+ u32 sh = (u32)((u64)imm & (width - 1u));
+ emit32(mc, sf ? rv_slli(rd, ra, sh) : rv_slliw(rd, ra, sh));
+ return;
+ }
+ case BO_SHR_U: {
+ u32 width = sf ? 64u : 32u;
+ u32 sh = (u32)((u64)imm & (width - 1u));
+ emit32(mc, sf ? rv_srli(rd, ra, sh) : rv_srliw(rd, ra, sh));
+ return;
+ }
+ case BO_SHR_S: {
+ u32 width = sf ? 64u : 32u;
+ u32 sh = (u32)((u64)imm & (width - 1u));
+ emit32(mc, sf ? rv_srai(rd, ra, sh) : rv_sraiw(rd, ra, sh));
+ return;
+ }
+ default: break;
+ }
+ }
+
u32 ra = force_reg_int(t, a_op, RV_T0);
u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0);
@@ -1485,10 +1557,10 @@ static void rv_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
MCEmitter* mc = t->mc;
u32 sf = type_is_64(dst.type) ? 1u : 0u;
u32 rd = reg_num(dst);
- if (a_op.kind != OPK_REG) {
- compiler_panic(t->c, impl_of(t)->loc, "rv64 unop: non-REG operand NYI");
- }
- u32 rn = reg_num(a_op);
+ /* IMM operand is legal per the CGTarget contract (arch.h); materialize
+ * into t0 when not already a register. cg folds literal unops upstream
+ * via cg_fold_unop. */
+ u32 rn = force_reg_int(t, a_op, RV_T0);
switch (op) {
case UO_NEG:
emit32(mc, sf ? rv_sub(rd, RV_ZERO, rn) : rv_subw(rd, RV_ZERO, rn));
diff --git a/src/arch/x64.c b/src/arch/x64.c
@@ -496,6 +496,73 @@ static void emit_cmp_imm8(MCEmitter* mc, int w, u32 reg, i8 imm) {
if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
}
+/* ALU r/m, imm8: opcode 0x83 /sub ib (sign-extended). sub: ADD=0,
+ * OR=1, ADC=2, SBB=3, AND=4, SUB=5, XOR=6, CMP=7. */
+static void emit_alu_imm8(MCEmitter* mc, int w, u32 sub, u32 reg, i8 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 buf[3];
+ buf[0] = 0x83;
+ buf[1] = modrm(3u, sub, reg);
+ buf[2] = (u8)imm;
+ mc->emit_bytes(mc, buf, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* ALU r/m, imm32: opcode 0x81 /sub id (sign-extended for w=1). */
+static void emit_alu_imm32(MCEmitter* mc, int w, u32 sub, u32 reg, i32 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, 0, 0, reg);
+ u8 buf[6];
+ buf[0] = 0x81;
+ buf[1] = modrm(3u, sub, reg);
+ buf[2] = (u8)(imm & 0xFF);
+ buf[3] = (u8)((imm >> 8) & 0xFF);
+ buf[4] = (u8)((imm >> 16) & 0xFF);
+ buf[5] = (u8)((imm >> 24) & 0xFF);
+ mc->emit_bytes(mc, buf, 6);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* IMUL r, r/m, imm: 0x6B /r ib (imm8 sext) or 0x69 /r id (imm32 sext).
+ * Both forms write the result back to the same `dst` register so the
+ * caller doesn't need an explicit copy beforehand — unlike the ALU
+ * forms which read-modify-write a single operand. */
+static void emit_imul_imm8(MCEmitter* mc, int w, u32 dst, u32 src, i8 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, dst, 0, src);
+ u8 buf[3];
+ buf[0] = 0x6B;
+ buf[1] = modrm(3u, dst, src);
+ buf[2] = (u8)imm;
+ mc->emit_bytes(mc, buf, 3);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+static void emit_imul_imm32(MCEmitter* mc, int w, u32 dst, u32 src, i32 imm) {
+ u32 ofs = obj_pos(mc->obj, mc->section_id);
+ emit_rex(mc, w, dst, 0, src);
+ u8 buf[6];
+ buf[0] = 0x69;
+ buf[1] = modrm(3u, dst, src);
+ buf[2] = (u8)(imm & 0xFF);
+ buf[3] = (u8)((imm >> 8) & 0xFF);
+ buf[4] = (u8)((imm >> 16) & 0xFF);
+ buf[5] = (u8)((imm >> 24) & 0xFF);
+ mc->emit_bytes(mc, buf, 6);
+ if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc);
+}
+
+/* Width predicate: does `imm` fit in an i8 (used by the 0x83/0x6B
+ * imm8-sign-extended forms)? */
+static int imm_fits_i8(i64 imm) { return imm >= -128 && imm <= 127; }
+/* Width predicate: does `imm` fit in a signed 32-bit value (the 0x81/
+ * 0x69 imm32-sign-extended forms; for w=1 the imm is sign-extended to
+ * 64). Returns 0 for values outside [INT32_MIN, INT32_MAX] — those
+ * require a full materialization through emit_load_imm. */
+static int imm_fits_i32(i64 imm) {
+ return imm >= -2147483648LL && imm <= 2147483647LL;
+}
+
static void emit_test_self(MCEmitter* mc, int w, u32 reg) {
emit_alu_rr(mc, w, 0x85, reg, reg);
}
@@ -1012,10 +1079,18 @@ static u32 force_reg_int(CGTarget* t, Operand op, int w, u32 scratch) {
static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) {
int w = type_is_64(a_op.type) ? 1 : 0;
- if (a_op.kind == OPK_REG && b_op.kind == OPK_IMM && b_op.v.imm >= -128 &&
- b_op.v.imm <= 127) {
- emit_cmp_imm8(t->mc, w, a_op.v.reg & 0xFu, (i8)b_op.v.imm);
- return;
+ /* IMM RHS imm8 / imm32 fast paths. CMP is not commutative across the
+ * cond codes, so IMM-on-LHS still has to materialize. */
+ if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG) {
+ if (imm_fits_i8(b_op.v.imm)) {
+ emit_cmp_imm8(t->mc, w, a_op.v.reg & 0xFu, (i8)b_op.v.imm);
+ return;
+ }
+ if (imm_fits_i32(b_op.v.imm)) {
+ emit_alu_imm32(t->mc, w, /*sub=CMP*/ 7u, a_op.v.reg & 0xFu,
+ (i32)b_op.v.imm);
+ return;
+ }
}
u32 ra = force_reg_int(t, a_op, w, X64_RAX);
u32 rb = force_reg_int(t, b_op, w, (ra == X64_R11) ? X64_RAX : X64_R11);
@@ -1578,24 +1653,88 @@ static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op,
return;
}
- /* Shifts: shift count must be in cl. */
+ /* Shifts: shift count must be in cl OR encoded as imm8 directly (C1
+ * /sub ib). Use the imm form when b is OPK_IMM and skip materializing
+ * into cl. */
if (op == BO_SHL || op == BO_SHR_U || op == BO_SHR_S) {
u32 ra = force_reg_int(t, a_op, w, X64_RAX);
if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ u32 sub = (op == BO_SHL) ? 4u : (op == BO_SHR_U ? 5u : 7u);
+ if (b_op.kind == OPK_IMM) {
+ u32 width = w ? 64u : 32u;
+ emit_shift_imm(mc, w, sub, rd, (u8)((u64)b_op.v.imm & (width - 1u)));
+ return;
+ }
if (b_op.kind == OPK_REG) {
u32 rb = b_op.v.reg & 0xFu;
if (rb != X64_RCX) emit_mov_rr(mc, 0, X64_RCX, rb);
- } else if (b_op.kind == OPK_IMM) {
- emit_load_imm(mc, 0, X64_RCX, b_op.v.imm & 0x3f);
} else {
compiler_panic(t->c, impl_of(t)->loc,
"x64 shift: count kind %d unsupported", (int)b_op.kind);
}
- u32 sub = (op == BO_SHL) ? 4u : (op == BO_SHR_U ? 5u : 7u);
emit_shift_cl(mc, w, sub, rd);
return;
}
+ /* For commutative ops, canonicalize IMM to the RHS so the imm-form
+ * check below fires uniformly. ISUB is non-commutative — IMM-on-LHS
+ * still materializes. */
+ switch (op) {
+ case BO_IADD:
+ case BO_AND:
+ case BO_OR:
+ case BO_XOR:
+ case BO_IMUL: {
+ if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) {
+ Operand t_op = a_op; a_op = b_op; b_op = t_op;
+ }
+ break;
+ }
+ default: break;
+ }
+
+ /* IMM-form fast paths. For ADD/SUB/AND/OR/XOR the ALU imm encoding
+ * reads-and-writes a single reg — copy ra → dst first, then `dst OP=
+ * imm`. For IMUL the imm form is three-operand (`dst = src * imm`)
+ * and reads from `ra` directly without the prep copy. */
+ if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG &&
+ (op == BO_IADD || op == BO_ISUB || op == BO_AND || op == BO_OR ||
+ op == BO_XOR || op == BO_IMUL)) {
+ i64 imm = b_op.v.imm;
+ u32 ra = a_op.v.reg & 0xFu;
+ if (op == BO_IMUL) {
+ if (imm_fits_i8(imm)) {
+ emit_imul_imm8(mc, w, rd, ra, (i8)imm);
+ return;
+ }
+ if (imm_fits_i32(imm)) {
+ emit_imul_imm32(mc, w, rd, ra, (i32)imm);
+ return;
+ }
+ } else {
+ u32 sub;
+ switch (op) {
+ case BO_IADD: sub = 0u; break;
+ case BO_OR: sub = 1u; break;
+ case BO_AND: sub = 4u; break;
+ case BO_ISUB: sub = 5u; break;
+ case BO_XOR: sub = 6u; break;
+ default: sub = 0u; break; /* unreachable */
+ }
+ if (imm_fits_i8(imm)) {
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ emit_alu_imm8(mc, w, sub, rd, (i8)imm);
+ return;
+ }
+ if (imm_fits_i32(imm)) {
+ if (rd != ra) emit_mov_rr(mc, w, rd, ra);
+ emit_alu_imm32(mc, w, sub, rd, (i32)imm);
+ return;
+ }
+ }
+ /* Fall through to materialize for >32-bit literals. */
+ }
+
/* Generic 2-operand ALU: copy ra → dst, then dst op= rb. */
u32 ra = force_reg_int(t, a_op, w, X64_RAX);
if (rd != ra) emit_mov_rr(mc, w, rd, ra);
@@ -1617,10 +1756,11 @@ static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) {
MCEmitter* mc = t->mc;
int w = type_is_64(dst.type) ? 1 : 0;
u32 rd = dst.v.reg & 0xFu;
- u32 ra = a_op.v.reg & 0xFu;
- if (a_op.kind != OPK_REG)
- compiler_panic(t->c, impl_of(t)->loc,
- "x64 unop: non-REG operand not supported");
+ /* IMM operand is legal per the CGTarget contract (arch.h); materialize
+ * into a scratch register when not already a register. cg folds
+ * literal unops upstream (cg_fold_unop), so this path is reached only
+ * when opt's emit hands us an unfolded literal. */
+ u32 ra = force_reg_int(t, a_op, w, X64_R11);
switch (op) {
case UO_NEG:
if (rd != ra) emit_mov_rr(mc, w, rd, ra);