kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 4934c27ff80eca5aedb33bb589ddc42bdd9973ee
parent edb863ae6bdc7fba48ca788ced88e943c92ee280
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Mon, 11 May 2026 12:42:44 -0700

cg: imm-form encodings for x64 and rv64 binop/unop

Mirrors the aarch64 work in edb863a — both backends now honor the
REG|IMM contract on binop/unop and pick the imm-form encoding when the
literal fits.

x64: emit_alu_imm8/imm32, emit_imul_imm8/imm32, emit_cmp imm32 fast
path beyond the existing imm8 case.  x_binop swaps commutative ops to
put IMM on the RHS, then encodes ADD/SUB/AND/OR/XOR via 0x83/0x81 /sub
and IMUL via 0x6B/0x69 /r when the literal fits i8 or i32.  Shifts
use the existing C1 /sub ib helper for OPK_IMM counts and skip the
mov-to-cl step.  x_unop routes through force_reg_int.

rv64: rv_binop encodes IADD/AND/OR/XOR via *_addi/andi/ori/xori for
12-bit signed imm.  ISUB has no SUBI in RV-I; encoded as ADDI with the
negated literal when -imm fits the same range.  Shifts use the
slli(w)/srli(w)/srai(w) helpers for OPK_IMM counts.  rv_unop routes
through force_reg_int.

Diffstat:
Msrc/arch/rv64.c | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Msrc/arch/x64.c | 164+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 228 insertions(+), 16 deletions(-)

diff --git a/src/arch/rv64.c b/src/arch/rv64.c @@ -1459,6 +1459,78 @@ static void rv_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, } u32 sf = type_is_64(dst.type) ? 1u : 0u; u32 rd = reg_num(dst); + + /* Canonicalize IMM to the RHS for commutative ops so the imm-form + * check below handles `3 + a` the same as `a + 3`. ISUB is not + * commutative — IMM-on-LHS still materializes. */ + switch (op) { + case BO_IADD: + case BO_AND: + case BO_OR: + case BO_XOR: { + if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) { + Operand t_op = a_op; a_op = b_op; b_op = t_op; + } + break; + } + default: break; + } + + /* IMM-form fast paths. RV-I admits a 12-bit signed immediate for + * ADDI/ANDI/ORI/XORI/SLTI/SLTIU (range [-2048, 2047]). ISUB has no + * SUBI — we encode it as ADDI with the negated literal when -imm + * fits the same range (i.e., imm ∈ [-2047, 2048]; INT_MIN is + * intentionally excluded since -INT_MIN overflows). Shifts admit a + * shamt: 6 bits (0..63) on the 64-bit forms, 5 bits (0..31) on the + * W-variants. */ + if (b_op.kind == OPK_IMM && a_op.kind != OPK_IMM) { + u32 ra = reg_num(a_op); + i64 imm = b_op.v.imm; + int fits12 = imm >= -2048 && imm <= 2047; + switch (op) { + case BO_IADD: + if (fits12) { + emit32(mc, sf ? rv_addi(rd, ra, (i32)imm) : rv_addiw(rd, ra, (i32)imm)); + return; + } + break; + case BO_ISUB: + if (imm >= -2047 && imm <= 2048) { + emit32(mc, sf ? rv_addi(rd, ra, (i32)-imm) : rv_addiw(rd, ra, (i32)-imm)); + return; + } + break; + case BO_AND: + if (fits12) { emit32(mc, rv_andi(rd, ra, (i32)imm)); return; } + break; + case BO_OR: + if (fits12) { emit32(mc, rv_ori(rd, ra, (i32)imm)); return; } + break; + case BO_XOR: + if (fits12) { emit32(mc, rv_xori(rd, ra, (i32)imm)); return; } + break; + case BO_SHL: { + u32 width = sf ? 64u : 32u; + u32 sh = (u32)((u64)imm & (width - 1u)); + emit32(mc, sf ? rv_slli(rd, ra, sh) : rv_slliw(rd, ra, sh)); + return; + } + case BO_SHR_U: { + u32 width = sf ? 64u : 32u; + u32 sh = (u32)((u64)imm & (width - 1u)); + emit32(mc, sf ? rv_srli(rd, ra, sh) : rv_srliw(rd, ra, sh)); + return; + } + case BO_SHR_S: { + u32 width = sf ? 64u : 32u; + u32 sh = (u32)((u64)imm & (width - 1u)); + emit32(mc, sf ? rv_srai(rd, ra, sh) : rv_sraiw(rd, ra, sh)); + return; + } + default: break; + } + } + u32 ra = force_reg_int(t, a_op, RV_T0); u32 rb = force_reg_int(t, b_op, (ra == RV_T0) ? RV_T1 : RV_T0); @@ -1485,10 +1557,10 @@ static void rv_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { MCEmitter* mc = t->mc; u32 sf = type_is_64(dst.type) ? 1u : 0u; u32 rd = reg_num(dst); - if (a_op.kind != OPK_REG) { - compiler_panic(t->c, impl_of(t)->loc, "rv64 unop: non-REG operand NYI"); - } - u32 rn = reg_num(a_op); + /* IMM operand is legal per the CGTarget contract (arch.h); materialize + * into t0 when not already a register. cg folds literal unops upstream + * via cg_fold_unop. */ + u32 rn = force_reg_int(t, a_op, RV_T0); switch (op) { case UO_NEG: emit32(mc, sf ? rv_sub(rd, RV_ZERO, rn) : rv_subw(rd, RV_ZERO, rn)); diff --git a/src/arch/x64.c b/src/arch/x64.c @@ -496,6 +496,73 @@ static void emit_cmp_imm8(MCEmitter* mc, int w, u32 reg, i8 imm) { if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); } +/* ALU r/m, imm8: opcode 0x83 /sub ib (sign-extended). sub: ADD=0, + * OR=1, ADC=2, SBB=3, AND=4, SUB=5, XOR=6, CMP=7. */ +static void emit_alu_imm8(MCEmitter* mc, int w, u32 sub, u32 reg, i8 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, 0, 0, reg); + u8 buf[3]; + buf[0] = 0x83; + buf[1] = modrm(3u, sub, reg); + buf[2] = (u8)imm; + mc->emit_bytes(mc, buf, 3); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* ALU r/m, imm32: opcode 0x81 /sub id (sign-extended for w=1). */ +static void emit_alu_imm32(MCEmitter* mc, int w, u32 sub, u32 reg, i32 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, 0, 0, reg); + u8 buf[6]; + buf[0] = 0x81; + buf[1] = modrm(3u, sub, reg); + buf[2] = (u8)(imm & 0xFF); + buf[3] = (u8)((imm >> 8) & 0xFF); + buf[4] = (u8)((imm >> 16) & 0xFF); + buf[5] = (u8)((imm >> 24) & 0xFF); + mc->emit_bytes(mc, buf, 6); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* IMUL r, r/m, imm: 0x6B /r ib (imm8 sext) or 0x69 /r id (imm32 sext). + * Both forms write the result back to the same `dst` register so the + * caller doesn't need an explicit copy beforehand — unlike the ALU + * forms which read-modify-write a single operand. */ +static void emit_imul_imm8(MCEmitter* mc, int w, u32 dst, u32 src, i8 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, dst, 0, src); + u8 buf[3]; + buf[0] = 0x6B; + buf[1] = modrm(3u, dst, src); + buf[2] = (u8)imm; + mc->emit_bytes(mc, buf, 3); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} +static void emit_imul_imm32(MCEmitter* mc, int w, u32 dst, u32 src, i32 imm) { + u32 ofs = obj_pos(mc->obj, mc->section_id); + emit_rex(mc, w, dst, 0, src); + u8 buf[6]; + buf[0] = 0x69; + buf[1] = modrm(3u, dst, src); + buf[2] = (u8)(imm & 0xFF); + buf[3] = (u8)((imm >> 8) & 0xFF); + buf[4] = (u8)((imm >> 16) & 0xFF); + buf[5] = (u8)((imm >> 24) & 0xFF); + mc->emit_bytes(mc, buf, 6); + if (mc->debug) debug_emit_row(mc->debug, mc->section_id, ofs, mc->loc); +} + +/* Width predicate: does `imm` fit in an i8 (used by the 0x83/0x6B + * imm8-sign-extended forms)? */ +static int imm_fits_i8(i64 imm) { return imm >= -128 && imm <= 127; } +/* Width predicate: does `imm` fit in a signed 32-bit value (the 0x81/ + * 0x69 imm32-sign-extended forms; for w=1 the imm is sign-extended to + * 64). Returns 0 for values outside [INT32_MIN, INT32_MAX] — those + * require a full materialization through emit_load_imm. */ +static int imm_fits_i32(i64 imm) { + return imm >= -2147483648LL && imm <= 2147483647LL; +} + static void emit_test_self(MCEmitter* mc, int w, u32 reg) { emit_alu_rr(mc, w, 0x85, reg, reg); } @@ -1012,10 +1079,18 @@ static u32 force_reg_int(CGTarget* t, Operand op, int w, u32 scratch) { static void emit_cmp_ab(CGTarget* t, Operand a_op, Operand b_op) { int w = type_is_64(a_op.type) ? 1 : 0; - if (a_op.kind == OPK_REG && b_op.kind == OPK_IMM && b_op.v.imm >= -128 && - b_op.v.imm <= 127) { - emit_cmp_imm8(t->mc, w, a_op.v.reg & 0xFu, (i8)b_op.v.imm); - return; + /* IMM RHS imm8 / imm32 fast paths. CMP is not commutative across the + * cond codes, so IMM-on-LHS still has to materialize. */ + if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG) { + if (imm_fits_i8(b_op.v.imm)) { + emit_cmp_imm8(t->mc, w, a_op.v.reg & 0xFu, (i8)b_op.v.imm); + return; + } + if (imm_fits_i32(b_op.v.imm)) { + emit_alu_imm32(t->mc, w, /*sub=CMP*/ 7u, a_op.v.reg & 0xFu, + (i32)b_op.v.imm); + return; + } } u32 ra = force_reg_int(t, a_op, w, X64_RAX); u32 rb = force_reg_int(t, b_op, w, (ra == X64_R11) ? X64_RAX : X64_R11); @@ -1578,24 +1653,88 @@ static void x_binop(CGTarget* t, BinOp op, Operand dst, Operand a_op, return; } - /* Shifts: shift count must be in cl. */ + /* Shifts: shift count must be in cl OR encoded as imm8 directly (C1 + * /sub ib). Use the imm form when b is OPK_IMM and skip materializing + * into cl. */ if (op == BO_SHL || op == BO_SHR_U || op == BO_SHR_S) { u32 ra = force_reg_int(t, a_op, w, X64_RAX); if (rd != ra) emit_mov_rr(mc, w, rd, ra); + u32 sub = (op == BO_SHL) ? 4u : (op == BO_SHR_U ? 5u : 7u); + if (b_op.kind == OPK_IMM) { + u32 width = w ? 64u : 32u; + emit_shift_imm(mc, w, sub, rd, (u8)((u64)b_op.v.imm & (width - 1u))); + return; + } if (b_op.kind == OPK_REG) { u32 rb = b_op.v.reg & 0xFu; if (rb != X64_RCX) emit_mov_rr(mc, 0, X64_RCX, rb); - } else if (b_op.kind == OPK_IMM) { - emit_load_imm(mc, 0, X64_RCX, b_op.v.imm & 0x3f); } else { compiler_panic(t->c, impl_of(t)->loc, "x64 shift: count kind %d unsupported", (int)b_op.kind); } - u32 sub = (op == BO_SHL) ? 4u : (op == BO_SHR_U ? 5u : 7u); emit_shift_cl(mc, w, sub, rd); return; } + /* For commutative ops, canonicalize IMM to the RHS so the imm-form + * check below fires uniformly. ISUB is non-commutative — IMM-on-LHS + * still materializes. */ + switch (op) { + case BO_IADD: + case BO_AND: + case BO_OR: + case BO_XOR: + case BO_IMUL: { + if (a_op.kind == OPK_IMM && b_op.kind != OPK_IMM) { + Operand t_op = a_op; a_op = b_op; b_op = t_op; + } + break; + } + default: break; + } + + /* IMM-form fast paths. For ADD/SUB/AND/OR/XOR the ALU imm encoding + * reads-and-writes a single reg — copy ra → dst first, then `dst OP= + * imm`. For IMUL the imm form is three-operand (`dst = src * imm`) + * and reads from `ra` directly without the prep copy. */ + if (b_op.kind == OPK_IMM && a_op.kind == OPK_REG && + (op == BO_IADD || op == BO_ISUB || op == BO_AND || op == BO_OR || + op == BO_XOR || op == BO_IMUL)) { + i64 imm = b_op.v.imm; + u32 ra = a_op.v.reg & 0xFu; + if (op == BO_IMUL) { + if (imm_fits_i8(imm)) { + emit_imul_imm8(mc, w, rd, ra, (i8)imm); + return; + } + if (imm_fits_i32(imm)) { + emit_imul_imm32(mc, w, rd, ra, (i32)imm); + return; + } + } else { + u32 sub; + switch (op) { + case BO_IADD: sub = 0u; break; + case BO_OR: sub = 1u; break; + case BO_AND: sub = 4u; break; + case BO_ISUB: sub = 5u; break; + case BO_XOR: sub = 6u; break; + default: sub = 0u; break; /* unreachable */ + } + if (imm_fits_i8(imm)) { + if (rd != ra) emit_mov_rr(mc, w, rd, ra); + emit_alu_imm8(mc, w, sub, rd, (i8)imm); + return; + } + if (imm_fits_i32(imm)) { + if (rd != ra) emit_mov_rr(mc, w, rd, ra); + emit_alu_imm32(mc, w, sub, rd, (i32)imm); + return; + } + } + /* Fall through to materialize for >32-bit literals. */ + } + /* Generic 2-operand ALU: copy ra → dst, then dst op= rb. */ u32 ra = force_reg_int(t, a_op, w, X64_RAX); if (rd != ra) emit_mov_rr(mc, w, rd, ra); @@ -1617,10 +1756,11 @@ static void x_unop(CGTarget* t, UnOp op, Operand dst, Operand a_op) { MCEmitter* mc = t->mc; int w = type_is_64(dst.type) ? 1 : 0; u32 rd = dst.v.reg & 0xFu; - u32 ra = a_op.v.reg & 0xFu; - if (a_op.kind != OPK_REG) - compiler_panic(t->c, impl_of(t)->loc, - "x64 unop: non-REG operand not supported"); + /* IMM operand is legal per the CGTarget contract (arch.h); materialize + * into a scratch register when not already a register. cg folds + * literal unops upstream (cg_fold_unop), so this path is reached only + * when opt's emit hands us an unfolded literal. */ + u32 ra = force_reg_int(t, a_op, w, X64_R11); switch (op) { case UO_NEG: if (rd != ra) emit_mov_rr(mc, w, rd, ra);