kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 8198e7ccb0d435aa17347041a18771fd3c9f7359
parent caa879d161dc0a3bd199b5f4efdf630702c33f4f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 15:15:33 -0700

rv64 asm: encode call/tail/la/lla pseudos and multi-word li

call/tail expand to auipc+jalr with R_RV_CALL; la/lla expand to the PC-relative
auipc+addi pair (R_RV_PCREL_HI20 + R_RV_PCREL_LO12_I via a .LpcrelHi local
anchor), mirroring native.c's rv_emit_global_addr (cfree's static Local-Exec
model has no GOT, so la==lla). Multi-word li implements the RISCVMatInt
lui/addi/slli chain (ADDI, with ADDIW only when the hi20 is negative in 32-bit
form). Corpus: rv64_{call_tail,la_lla,li_multi}, byte-verified vs llvm-mc.

Diffstat:
Msrc/arch/rv64/asm.c | 164+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Msrc/arch/rv64/isa.c | 21++++++++++++++++++++-
Msrc/arch/rv64/isa.h | 9+++++++++
Atest/asm/encode/rv64_call_tail.expected.hex | 1+
Atest/asm/encode/rv64_call_tail.s | 7+++++++
Atest/asm/encode/rv64_call_tail.targets | 1+
Atest/asm/encode/rv64_la_lla.expected.hex | 1+
Atest/asm/encode/rv64_la_lla.s | 5+++++
Atest/asm/encode/rv64_la_lla.targets | 1+
Atest/asm/encode/rv64_li_multi.expected.hex | 1+
Atest/asm/encode/rv64_li_multi.s | 6++++++
Atest/asm/encode/rv64_li_multi.targets | 1+
12 files changed, 210 insertions(+), 8 deletions(-)

diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c @@ -341,14 +341,9 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) { } case RV64_FMT_I: - /* Aliases first. */ + /* Aliases first. `li` is handled earlier by rv64_emit_pseudo (it may + * need a multi-word expansion), so it never reaches here. */ if (desc->flags & RV64_ASMFL_ALIAS) { - if (slice_eq_cstr(desc->mnemonic, "li")) { - rd = parse_xreg(d); - expect_comma(d); - imm = (i32)asm_driver_parse_const(d); - return enc_i(m, rd, 0u, imm); - } if (slice_eq_cstr(desc->mnemonic, "mv")) { rd = parse_xreg(d); expect_comma(d); @@ -673,6 +668,160 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) { } } +/* ============================================================ + * Multi-word pseudo-instruction expansion. + * + * call/tail/la/lla expand to a PC-relative AUIPC + (JALR | ADDI) pair; + * `li` with a constant that does not fit a 12-bit signed immediate + * expands to an LUI/ADDI(W)/SLLI chain (no relocations). Each 32-bit + * word goes out through rv64_emit32 — the same path assemble_one's + * single-word result uses — and relocations are attached via + * mc->emit_reloc_at at the appropriate word offset. */ + +/* 12-bit signed immediate range check for li short-circuit. */ +static bool rv_fits_i12(i64 v) { return v >= -2048 && v <= 2047; } + +/* Sign-extend the low 12 bits of v. */ +static i64 rv_sext12(i64 v) { + return (i64)((((u64)v & 0xfffu) ^ 0x800u)) - 0x800; +} + +/* Emit an AUIPC rd,0 + a R_RV_PCREL_HI20(sym) reloc, then create a local + * `.LpcrelHi` anchor at the AUIPC offset and return that anchor symbol so + * the paired low-half reloc can reference it. Mirrors native.c's + * rv_emit_global_addr (the non-GOT branch). */ +static ObjSymId rv_emit_pcrel_hi(AsmDriver* d, u32 rd, ObjSymId sym, + i64 addend) { + MCEmitter* mc = asm_driver_mc(d); + ObjBuilder* obj = asm_driver_ob(d); + Compiler* c = asm_driver_compiler(d); + u32 sec = mc->section_id; + u32 ap = mc->pos(mc); + rv64_emit32(mc, rv_auipc(rd, 0)); + mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, addend, 0, 0); + Sym an = pool_intern_slice(c->global, SLICE_LIT(".LpcrelHi")); + return obj_symbol(obj, an, SB_LOCAL, SK_OBJ, sec, (u64)ap, 0); +} + +/* call/tail: AUIPC <link>,0 + JALR <rd>,<link>,0 with one R_RV_CALL reloc + * at the AUIPC. `link` is the register the AUIPC materializes into and the + * JALR's base; `rd` is the JALR link-register (ra for call, zero for + * tail). The linker patches both words from the single R_RV_CALL reloc. */ +static void rv_emit_call_pseudo(AsmDriver* d, u32 link, u32 rd) { + MCEmitter* mc = asm_driver_mc(d); + ObjSymId sym = OBJ_SYM_NONE; + i64 off = 0; + asm_driver_parse_sym_expr(d, &sym, &off); + if (sym == OBJ_SYM_NONE) + asm_driver_panic(d, "rv64 asm: call/tail target must be a symbol"); + u32 sec = mc->section_id; + u32 ap = mc->pos(mc); + rv64_emit32(mc, rv_auipc(link, 0)); + rv64_emit32(mc, rv_jalr(rd, link, 0)); + mc->emit_reloc_at(mc, sec, ap, R_RV_CALL, sym, off, 0, 0); +} + +/* la/lla rd, sym: AUIPC rd,%pcrel_hi(sym) + ADDI rd,rd,%pcrel_lo(anchor). + * cfree's static Local-Exec model has no GOT, so `la` == `lla`. */ +static void rv_emit_la_pseudo(AsmDriver* d) { + MCEmitter* mc = asm_driver_mc(d); + u32 rd = parse_xreg(d); + expect_comma(d); + ObjSymId sym = OBJ_SYM_NONE; + i64 off = 0; + asm_driver_parse_sym_expr(d, &sym, &off); + if (sym == OBJ_SYM_NONE) + asm_driver_panic(d, "rv64 asm: la/lla target must be a symbol"); + ObjSymId anchor = rv_emit_pcrel_hi(d, rd, sym, off); + u32 sec = mc->section_id; + u32 lp = mc->pos(mc); + rv64_emit32(mc, rv_addi(rd, rd, 0)); + mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0); +} + +/* LUI immediate that sign-extends to a negative 32-bit value: bit 19 of + * the 20-bit field is set, i.e. Hi20 >= 0x80000. */ +#define RV_LUI_HI20_SIGN 0x80000LL + +/* Materialize a 64-bit constant into `rd` via the LLVM RISCVMatInt + * sequence: for values fitting a signed 32-bit range, LUI + ADDI/ADDIW; + * otherwise a recursive top-down hi20/lo12 split with SLLI shifts that + * absorb trailing zeros. No relocations. + * + * After an LUI, the low-half add uses ADDIW only when the LUI value is + * negative in 32-bit form (Hi20 >= RV_LUI_HI20_SIGN): there the add must + * wrap in 32-bit arithmetic and re-sign-extend to land in range. When the + * LUI value is non-negative in its low 32 bits, plain ADDI keeps the + * 64-bit result correct (matching LLVM's generateInstSeqImpl). */ +static void rv_emit_li_value(MCEmitter* mc, u32 rd, i64 val) { + if (val >= -2147483648LL && val <= 2147483647LL) { + i64 hi20 = ((val + 0x800) >> 12) & 0xfffffLL; + i64 lo12 = rv_sext12(val); + if (hi20) rv64_emit32(mc, rv_lui(rd, (u32)hi20)); + if (lo12 || hi20 == 0) { + u32 src = hi20 ? rd : (u32)RV_ZERO; + if (hi20 >= RV_LUI_HI20_SIGN) + rv64_emit32(mc, rv_addiw(rd, src, (i32)lo12)); + else + rv64_emit32(mc, rv_addi(rd, src, (i32)lo12)); + } + return; + } + /* >32-bit: split off the low 12 bits, recurse on the (shifted) high + * part, then SLLI back and ADD the low bits. The subtraction is done in + * unsigned space so it cannot signed-overflow at the int64 extremes + * (e.g. val=INT64_MAX, lo12=-1); the result has its low 12 bits clear, + * and the arithmetic right shift recovers the sign-extended high part. */ + i64 lo12 = rv_sext12(val); + i64 hi = (i64)((u64)val - (u64)lo12) >> 12; + u32 shift = 12; + /* Absorb trailing zeros of the high part into the shift amount. */ + while ((hi & 1) == 0) { + hi >>= 1; + ++shift; + } + rv_emit_li_value(mc, rd, hi); + rv64_emit32(mc, rv_slli(rd, rd, shift)); + if (lo12) rv64_emit32(mc, rv_addi(rd, rd, (i32)lo12)); +} + +/* Dispatch a multi-word pseudo. Returns true if it consumed the operands + * and emitted its expansion; false to fall through to the single-word + * path. `li` is handled here only when its immediate exceeds the 12-bit + * signed range the alias row encodes directly. */ +static bool rv64_emit_pseudo(AsmDriver* d, const Rv64InsnDesc* desc) { + MCEmitter* mc = asm_driver_mc(d); + if (desc->fmt == RV64_FMT_PSEUDO) { + if (slice_eq_cstr(desc->mnemonic, "call")) { + rv_emit_call_pseudo(d, RV_RA, RV_RA); + return true; + } + if (slice_eq_cstr(desc->mnemonic, "tail")) { + rv_emit_call_pseudo(d, RV_T1, RV_ZERO); + return true; + } + /* la / lla — identical PC-relative expansion in cfree. */ + rv_emit_la_pseudo(d); + return true; + } + if ((desc->flags & RV64_ASMFL_ALIAS) && slice_eq_cstr(desc->mnemonic, "li")) { + /* Peek the immediate without consuming the destination register: the + * single-word alias path re-parses both. We commit to the multi-word + * path only for out-of-range constants, leaving the existing 12-bit + * fast path (and its golden behavior) untouched. */ + u32 rd = parse_xreg(d); + expect_comma(d); + i64 imm = asm_driver_parse_const(d); + if (rv_fits_i12(imm)) { + rv64_emit32(mc, rv_addi(rd, RV_ZERO, (i32)imm)); + } else { + rv_emit_li_value(mc, rd, imm); + } + return true; + } + return false; +} + static void rv64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) { MCEmitter* mc = asm_driver_mc(d); const Rv64InsnDesc* desc; @@ -680,6 +829,7 @@ static void rv64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) { (void)asm_driver_cur_section(d); desc = rv64_asm_find(pool_slice(asm_driver_pool(d), mnemonic)); if (!desc) asm_driver_panic(d, "rv64 asm: unsupported instruction"); + if (rv64_emit_pseudo(d, desc)) return; if (desc->flags & RV64_ASMFL_C16) rv64_emit16(mc, assemble_one(d, desc)); else diff --git a/src/arch/rv64/isa.c b/src/arch/rv64/isa.c @@ -279,6 +279,20 @@ const Rv64InsnDesc rv64_insn_table[] = { {MN("jal"), MATCH_J(RV_JAL), MASK_J, RV64_FMT_J, 0, {0, 0}}, {MN("jalr"), MATCH_I(0x0, RV_JALR), MASK_I, RV64_FMT_JALR, 0, {0, 0}}, + /* ---- Multi-word pseudo-instructions ---- + * `call sym` = AUIPC ra, %pcrel_hi(sym); JALR ra, %pcrel_lo(ra) — one + * R_RV_CALL reloc at the AUIPC; the linker patches both. + * `tail sym` = AUIPC t1, ...; JALR zero, t1 — same R_RV_CALL reloc. + * `la rd,sym` / `lla rd,sym` = AUIPC rd, %pcrel_hi(sym); ADDI rd, rd, + * %pcrel_lo. cfree's static Local-Exec model treats `la` + * and `lla` identically (no GOT indirection). The match + * column is unused: RV64_FMT_PSEUDO dispatches on the + * mnemonic and emits the expansion directly. */ + {MN("call"), 0u, 0u, RV64_FMT_PSEUDO, RV64_ASMFL_PSEUDO, {0, 0}}, + {MN("tail"), 0u, 0u, RV64_FMT_PSEUDO, RV64_ASMFL_PSEUDO, {0, 0}}, + {MN("la"), 0u, 0u, RV64_FMT_PSEUDO, RV64_ASMFL_PSEUDO, {0, 0}}, + {MN("lla"), 0u, 0u, RV64_FMT_PSEUDO, RV64_ASMFL_PSEUDO, {0, 0}}, + /* ---- FENCE ---- */ {MN("fence"), MATCH_I(0x0, RV_FENCE), MASK_I, RV64_FMT_FENCE, 0, {0, 0}}, {MN("fence.i"), @@ -1043,7 +1057,8 @@ const u32 rv64_insn_table_n = const Rv64InsnDesc* rv64_disasm_find(u32 word) { for (u32 i = 0; i < rv64_insn_table_n; ++i) { const Rv64InsnDesc* d = &rv64_insn_table[i]; - if ((d->flags & RV64_ASMFL_C16)) continue; /* 32-bit decode path */ + if ((d->flags & RV64_ASMFL_C16)) continue; /* 32-bit decode path */ + if ((d->flags & RV64_ASMFL_PSEUDO)) continue; /* assembler-only expansion */ if ((word & d->mask) == d->match) return d; } return NULL; @@ -1980,5 +1995,9 @@ void rv64_print_operands(StrBuf* sb, const Rv64InsnDesc* desc, u32 word, break; case RV64_FMT_C_NONE: break; + case RV64_FMT_PSEUDO: + /* Assembler-only multi-word pseudo; rv64_disasm_find never returns + * these rows, so the printer is never reached for this format. */ + break; } } diff --git a/src/arch/rv64/isa.h b/src/arch/rv64/isa.h @@ -604,6 +604,10 @@ typedef enum Rv64Format { RV64_FMT_CB, /* branch: funct3 imm rs1' imm op (C.BEQZ, C.BNEZ) */ RV64_FMT_CJ, /* jump: funct3 imm op (C.J, C.JAL_unused on RV64) */ RV64_FMT_C_NONE, /* known opcode with no operands (C.NOP, C.EBREAK) */ + /* Assembler-only multi-word pseudo-instruction (call/tail/la/lla). The + * descriptor's `match` is unused; the assembler dispatches on mnemonic + * and emits the AUIPC+JALR / AUIPC+ADDI expansion directly. */ + RV64_FMT_PSEUDO, } Rv64Format; typedef enum Rv64DecodedOpcode { @@ -623,6 +627,11 @@ typedef enum Rv64DecodedOpcode { #define RV64_ASMFL_FP 0x02u /* operands take f-register prefix */ #define RV64_ASMFL_NORM 0x04u /* FP_RM row prints without rm suffix */ #define RV64_ASMFL_C16 0x08u /* 16-bit compressed instruction */ +/* Assembler-only multi-word pseudo (call/tail/la/lla). These expand to + * several 32-bit words and never participate in disassembly — the decoder + * sees the individual auipc/jalr/addi words instead. rv64_disasm_find + * skips rows carrying this flag. */ +#define RV64_ASMFL_PSEUDO 0x10u /* =================================================================== * Per-format field structs + pack/unpack pure functions. diff --git a/test/asm/encode/rv64_call_tail.expected.hex b/test/asm/encode/rv64_call_tail.expected.hex @@ -0,0 +1 @@ +97000000e780000017030000670003006780000067800000 diff --git a/test/asm/encode/rv64_call_tail.s b/test/asm/encode/rv64_call_tail.s @@ -0,0 +1,7 @@ +.text + call foo + tail bar +foo: + ret +bar: + ret diff --git a/test/asm/encode/rv64_call_tail.targets b/test/asm/encode/rv64_call_tail.targets @@ -0,0 +1 @@ +rv64 diff --git a/test/asm/encode/rv64_la_lla.expected.hex b/test/asm/encode/rv64_la_lla.expected.hex @@ -0,0 +1 @@ +1705000013050500970500009385050067800000 diff --git a/test/asm/encode/rv64_la_lla.s b/test/asm/encode/rv64_la_lla.s @@ -0,0 +1,5 @@ +.text + la a0, foo + lla a1, foo +foo: + ret diff --git a/test/asm/encode/rv64_la_lla.targets b/test/asm/encode/rv64_la_lla.targets @@ -0,0 +1 @@ +rv64 diff --git a/test/asm/encode/rv64_li_multi.expected.hex b/test/asm/encode/rv64_li_multi.expected.hex @@ -0,0 +1 @@ +3755341213058567b7b5de009385f5db9395c5009385d5ef9395c5009385e5af1306f0ff9306f07f67800000 diff --git a/test/asm/encode/rv64_li_multi.s b/test/asm/encode/rv64_li_multi.s @@ -0,0 +1,6 @@ +.text + li a0, 0x12345678 + li a1, 0xdeadbeefcafe + li a2, -1 + li a3, 0x7ff + ret diff --git a/test/asm/encode/rv64_li_multi.targets b/test/asm/encode/rv64_li_multi.targets @@ -0,0 +1 @@ +rv64