commit 8198e7ccb0d435aa17347041a18771fd3c9f7359
parent caa879d161dc0a3bd199b5f4efdf630702c33f4f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 15:15:33 -0700
rv64 asm: encode call/tail/la/lla pseudos and multi-word li
call/tail expand to auipc+jalr with R_RV_CALL; la/lla expand to the PC-relative
auipc+addi pair (R_RV_PCREL_HI20 + R_RV_PCREL_LO12_I via a .LpcrelHi local
anchor), mirroring native.c's rv_emit_global_addr (cfree's static Local-Exec
model has no GOT, so la==lla). Multi-word li implements the RISCVMatInt
lui/addi/slli chain (ADDI, with ADDIW only when the hi20 is negative in 32-bit
form). Corpus: rv64_{call_tail,la_lla,li_multi}, byte-verified vs llvm-mc.
Diffstat:
12 files changed, 210 insertions(+), 8 deletions(-)
diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c
@@ -341,14 +341,9 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) {
}
case RV64_FMT_I:
- /* Aliases first. */
+ /* Aliases first. `li` is handled earlier by rv64_emit_pseudo (it may
+ * need a multi-word expansion), so it never reaches here. */
if (desc->flags & RV64_ASMFL_ALIAS) {
- if (slice_eq_cstr(desc->mnemonic, "li")) {
- rd = parse_xreg(d);
- expect_comma(d);
- imm = (i32)asm_driver_parse_const(d);
- return enc_i(m, rd, 0u, imm);
- }
if (slice_eq_cstr(desc->mnemonic, "mv")) {
rd = parse_xreg(d);
expect_comma(d);
@@ -673,6 +668,160 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) {
}
}
+/* ============================================================
+ * Multi-word pseudo-instruction expansion.
+ *
+ * call/tail/la/lla expand to a PC-relative AUIPC + (JALR | ADDI) pair;
+ * `li` with a constant that does not fit a 12-bit signed immediate
+ * expands to an LUI/ADDI(W)/SLLI chain (no relocations). Each 32-bit
+ * word goes out through rv64_emit32 — the same path assemble_one's
+ * single-word result uses — and relocations are attached via
+ * mc->emit_reloc_at at the appropriate word offset. */
+
+/* 12-bit signed immediate range check for li short-circuit. */
+static bool rv_fits_i12(i64 v) { return v >= -2048 && v <= 2047; }
+
+/* Sign-extend the low 12 bits of v. */
+static i64 rv_sext12(i64 v) {
+ return (i64)((((u64)v & 0xfffu) ^ 0x800u)) - 0x800;
+}
+
+/* Emit an AUIPC rd,0 + a R_RV_PCREL_HI20(sym) reloc, then create a local
+ * `.LpcrelHi` anchor at the AUIPC offset and return that anchor symbol so
+ * the paired low-half reloc can reference it. Mirrors native.c's
+ * rv_emit_global_addr (the non-GOT branch). */
+static ObjSymId rv_emit_pcrel_hi(AsmDriver* d, u32 rd, ObjSymId sym,
+ i64 addend) {
+ MCEmitter* mc = asm_driver_mc(d);
+ ObjBuilder* obj = asm_driver_ob(d);
+ Compiler* c = asm_driver_compiler(d);
+ u32 sec = mc->section_id;
+ u32 ap = mc->pos(mc);
+ rv64_emit32(mc, rv_auipc(rd, 0));
+ mc->emit_reloc_at(mc, sec, ap, R_RV_PCREL_HI20, sym, addend, 0, 0);
+ Sym an = pool_intern_slice(c->global, SLICE_LIT(".LpcrelHi"));
+ return obj_symbol(obj, an, SB_LOCAL, SK_OBJ, sec, (u64)ap, 0);
+}
+
+/* call/tail: AUIPC <link>,0 + JALR <rd>,<link>,0 with one R_RV_CALL reloc
+ * at the AUIPC. `link` is the register the AUIPC materializes into and the
+ * JALR's base; `rd` is the JALR link-register (ra for call, zero for
+ * tail). The linker patches both words from the single R_RV_CALL reloc. */
+static void rv_emit_call_pseudo(AsmDriver* d, u32 link, u32 rd) {
+ MCEmitter* mc = asm_driver_mc(d);
+ ObjSymId sym = OBJ_SYM_NONE;
+ i64 off = 0;
+ asm_driver_parse_sym_expr(d, &sym, &off);
+ if (sym == OBJ_SYM_NONE)
+ asm_driver_panic(d, "rv64 asm: call/tail target must be a symbol");
+ u32 sec = mc->section_id;
+ u32 ap = mc->pos(mc);
+ rv64_emit32(mc, rv_auipc(link, 0));
+ rv64_emit32(mc, rv_jalr(rd, link, 0));
+ mc->emit_reloc_at(mc, sec, ap, R_RV_CALL, sym, off, 0, 0);
+}
+
+/* la/lla rd, sym: AUIPC rd,%pcrel_hi(sym) + ADDI rd,rd,%pcrel_lo(anchor).
+ * cfree's static Local-Exec model has no GOT, so `la` == `lla`. */
+static void rv_emit_la_pseudo(AsmDriver* d) {
+ MCEmitter* mc = asm_driver_mc(d);
+ u32 rd = parse_xreg(d);
+ expect_comma(d);
+ ObjSymId sym = OBJ_SYM_NONE;
+ i64 off = 0;
+ asm_driver_parse_sym_expr(d, &sym, &off);
+ if (sym == OBJ_SYM_NONE)
+ asm_driver_panic(d, "rv64 asm: la/lla target must be a symbol");
+ ObjSymId anchor = rv_emit_pcrel_hi(d, rd, sym, off);
+ u32 sec = mc->section_id;
+ u32 lp = mc->pos(mc);
+ rv64_emit32(mc, rv_addi(rd, rd, 0));
+ mc->emit_reloc_at(mc, sec, lp, R_RV_PCREL_LO12_I, anchor, 0, 0, 0);
+}
+
+/* LUI immediate that sign-extends to a negative 32-bit value: bit 19 of
+ * the 20-bit field is set, i.e. Hi20 >= 0x80000. */
+#define RV_LUI_HI20_SIGN 0x80000LL
+
+/* Materialize a 64-bit constant into `rd` via the LLVM RISCVMatInt
+ * sequence: for values fitting a signed 32-bit range, LUI + ADDI/ADDIW;
+ * otherwise a recursive top-down hi20/lo12 split with SLLI shifts that
+ * absorb trailing zeros. No relocations.
+ *
+ * After an LUI, the low-half add uses ADDIW only when the LUI value is
+ * negative in 32-bit form (Hi20 >= RV_LUI_HI20_SIGN): there the add must
+ * wrap in 32-bit arithmetic and re-sign-extend to land in range. When the
+ * LUI value is non-negative in its low 32 bits, plain ADDI keeps the
+ * 64-bit result correct (matching LLVM's generateInstSeqImpl). */
+static void rv_emit_li_value(MCEmitter* mc, u32 rd, i64 val) {
+ if (val >= -2147483648LL && val <= 2147483647LL) {
+ i64 hi20 = ((val + 0x800) >> 12) & 0xfffffLL;
+ i64 lo12 = rv_sext12(val);
+ if (hi20) rv64_emit32(mc, rv_lui(rd, (u32)hi20));
+ if (lo12 || hi20 == 0) {
+ u32 src = hi20 ? rd : (u32)RV_ZERO;
+ if (hi20 >= RV_LUI_HI20_SIGN)
+ rv64_emit32(mc, rv_addiw(rd, src, (i32)lo12));
+ else
+ rv64_emit32(mc, rv_addi(rd, src, (i32)lo12));
+ }
+ return;
+ }
+ /* >32-bit: split off the low 12 bits, recurse on the (shifted) high
+ * part, then SLLI back and ADD the low bits. The subtraction is done in
+ * unsigned space so it cannot signed-overflow at the int64 extremes
+ * (e.g. val=INT64_MAX, lo12=-1); the result has its low 12 bits clear,
+ * and the arithmetic right shift recovers the sign-extended high part. */
+ i64 lo12 = rv_sext12(val);
+ i64 hi = (i64)((u64)val - (u64)lo12) >> 12;
+ u32 shift = 12;
+ /* Absorb trailing zeros of the high part into the shift amount. */
+ while ((hi & 1) == 0) {
+ hi >>= 1;
+ ++shift;
+ }
+ rv_emit_li_value(mc, rd, hi);
+ rv64_emit32(mc, rv_slli(rd, rd, shift));
+ if (lo12) rv64_emit32(mc, rv_addi(rd, rd, (i32)lo12));
+}
+
+/* Dispatch a multi-word pseudo. Returns true if it consumed the operands
+ * and emitted its expansion; false to fall through to the single-word
+ * path. `li` is handled here only when its immediate exceeds the 12-bit
+ * signed range the alias row encodes directly. */
+static bool rv64_emit_pseudo(AsmDriver* d, const Rv64InsnDesc* desc) {
+ MCEmitter* mc = asm_driver_mc(d);
+ if (desc->fmt == RV64_FMT_PSEUDO) {
+ if (slice_eq_cstr(desc->mnemonic, "call")) {
+ rv_emit_call_pseudo(d, RV_RA, RV_RA);
+ return true;
+ }
+ if (slice_eq_cstr(desc->mnemonic, "tail")) {
+ rv_emit_call_pseudo(d, RV_T1, RV_ZERO);
+ return true;
+ }
+ /* la / lla — identical PC-relative expansion in cfree. */
+ rv_emit_la_pseudo(d);
+ return true;
+ }
+ if ((desc->flags & RV64_ASMFL_ALIAS) && slice_eq_cstr(desc->mnemonic, "li")) {
+ /* Peek the immediate without consuming the destination register: the
+ * single-word alias path re-parses both. We commit to the multi-word
+ * path only for out-of-range constants, leaving the existing 12-bit
+ * fast path (and its golden behavior) untouched. */
+ u32 rd = parse_xreg(d);
+ expect_comma(d);
+ i64 imm = asm_driver_parse_const(d);
+ if (rv_fits_i12(imm)) {
+ rv64_emit32(mc, rv_addi(rd, RV_ZERO, (i32)imm));
+ } else {
+ rv_emit_li_value(mc, rd, imm);
+ }
+ return true;
+ }
+ return false;
+}
+
static void rv64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) {
MCEmitter* mc = asm_driver_mc(d);
const Rv64InsnDesc* desc;
@@ -680,6 +829,7 @@ static void rv64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) {
(void)asm_driver_cur_section(d);
desc = rv64_asm_find(pool_slice(asm_driver_pool(d), mnemonic));
if (!desc) asm_driver_panic(d, "rv64 asm: unsupported instruction");
+ if (rv64_emit_pseudo(d, desc)) return;
if (desc->flags & RV64_ASMFL_C16)
rv64_emit16(mc, assemble_one(d, desc));
else
diff --git a/src/arch/rv64/isa.c b/src/arch/rv64/isa.c
@@ -279,6 +279,20 @@ const Rv64InsnDesc rv64_insn_table[] = {
{MN("jal"), MATCH_J(RV_JAL), MASK_J, RV64_FMT_J, 0, {0, 0}},
{MN("jalr"), MATCH_I(0x0, RV_JALR), MASK_I, RV64_FMT_JALR, 0, {0, 0}},
+ /* ---- Multi-word pseudo-instructions ----
+ * `call sym` = AUIPC ra, %pcrel_hi(sym); JALR ra, %pcrel_lo(ra) — one
+ * R_RV_CALL reloc at the AUIPC; the linker patches both.
+ * `tail sym` = AUIPC t1, ...; JALR zero, t1 — same R_RV_CALL reloc.
+ * `la rd,sym` / `lla rd,sym` = AUIPC rd, %pcrel_hi(sym); ADDI rd, rd,
+ * %pcrel_lo. cfree's static Local-Exec model treats `la`
+ * and `lla` identically (no GOT indirection). The match
+ * column is unused: RV64_FMT_PSEUDO dispatches on the
+ * mnemonic and emits the expansion directly. */
+ {MN("call"), 0u, 0u, RV64_FMT_PSEUDO, RV64_ASMFL_PSEUDO, {0, 0}},
+ {MN("tail"), 0u, 0u, RV64_FMT_PSEUDO, RV64_ASMFL_PSEUDO, {0, 0}},
+ {MN("la"), 0u, 0u, RV64_FMT_PSEUDO, RV64_ASMFL_PSEUDO, {0, 0}},
+ {MN("lla"), 0u, 0u, RV64_FMT_PSEUDO, RV64_ASMFL_PSEUDO, {0, 0}},
+
/* ---- FENCE ---- */
{MN("fence"), MATCH_I(0x0, RV_FENCE), MASK_I, RV64_FMT_FENCE, 0, {0, 0}},
{MN("fence.i"),
@@ -1043,7 +1057,8 @@ const u32 rv64_insn_table_n =
const Rv64InsnDesc* rv64_disasm_find(u32 word) {
for (u32 i = 0; i < rv64_insn_table_n; ++i) {
const Rv64InsnDesc* d = &rv64_insn_table[i];
- if ((d->flags & RV64_ASMFL_C16)) continue; /* 32-bit decode path */
+ if ((d->flags & RV64_ASMFL_C16)) continue; /* 32-bit decode path */
+ if ((d->flags & RV64_ASMFL_PSEUDO)) continue; /* assembler-only expansion */
if ((word & d->mask) == d->match) return d;
}
return NULL;
@@ -1980,5 +1995,9 @@ void rv64_print_operands(StrBuf* sb, const Rv64InsnDesc* desc, u32 word,
break;
case RV64_FMT_C_NONE:
break;
+ case RV64_FMT_PSEUDO:
+ /* Assembler-only multi-word pseudo; rv64_disasm_find never returns
+ * these rows, so the printer is never reached for this format. */
+ break;
}
}
diff --git a/src/arch/rv64/isa.h b/src/arch/rv64/isa.h
@@ -604,6 +604,10 @@ typedef enum Rv64Format {
RV64_FMT_CB, /* branch: funct3 imm rs1' imm op (C.BEQZ, C.BNEZ) */
RV64_FMT_CJ, /* jump: funct3 imm op (C.J, C.JAL_unused on RV64) */
RV64_FMT_C_NONE, /* known opcode with no operands (C.NOP, C.EBREAK) */
+ /* Assembler-only multi-word pseudo-instruction (call/tail/la/lla). The
+ * descriptor's `match` is unused; the assembler dispatches on mnemonic
+ * and emits the AUIPC+JALR / AUIPC+ADDI expansion directly. */
+ RV64_FMT_PSEUDO,
} Rv64Format;
typedef enum Rv64DecodedOpcode {
@@ -623,6 +627,11 @@ typedef enum Rv64DecodedOpcode {
#define RV64_ASMFL_FP 0x02u /* operands take f-register prefix */
#define RV64_ASMFL_NORM 0x04u /* FP_RM row prints without rm suffix */
#define RV64_ASMFL_C16 0x08u /* 16-bit compressed instruction */
+/* Assembler-only multi-word pseudo (call/tail/la/lla). These expand to
+ * several 32-bit words and never participate in disassembly — the decoder
+ * sees the individual auipc/jalr/addi words instead. rv64_disasm_find
+ * skips rows carrying this flag. */
+#define RV64_ASMFL_PSEUDO 0x10u
/* ===================================================================
* Per-format field structs + pack/unpack pure functions.
diff --git a/test/asm/encode/rv64_call_tail.expected.hex b/test/asm/encode/rv64_call_tail.expected.hex
@@ -0,0 +1 @@
+97000000e780000017030000670003006780000067800000
diff --git a/test/asm/encode/rv64_call_tail.s b/test/asm/encode/rv64_call_tail.s
@@ -0,0 +1,7 @@
+.text
+ call foo
+ tail bar
+foo:
+ ret
+bar:
+ ret
diff --git a/test/asm/encode/rv64_call_tail.targets b/test/asm/encode/rv64_call_tail.targets
@@ -0,0 +1 @@
+rv64
diff --git a/test/asm/encode/rv64_la_lla.expected.hex b/test/asm/encode/rv64_la_lla.expected.hex
@@ -0,0 +1 @@
+1705000013050500970500009385050067800000
diff --git a/test/asm/encode/rv64_la_lla.s b/test/asm/encode/rv64_la_lla.s
@@ -0,0 +1,5 @@
+.text
+ la a0, foo
+ lla a1, foo
+foo:
+ ret
diff --git a/test/asm/encode/rv64_la_lla.targets b/test/asm/encode/rv64_la_lla.targets
@@ -0,0 +1 @@
+rv64
diff --git a/test/asm/encode/rv64_li_multi.expected.hex b/test/asm/encode/rv64_li_multi.expected.hex
@@ -0,0 +1 @@
+3755341213058567b7b5de009385f5db9395c5009385d5ef9395c5009385e5af1306f0ff9306f07f67800000
diff --git a/test/asm/encode/rv64_li_multi.s b/test/asm/encode/rv64_li_multi.s
@@ -0,0 +1,6 @@
+.text
+ li a0, 0x12345678
+ li a1, 0xdeadbeefcafe
+ li a2, -1
+ li a3, 0x7ff
+ ret
diff --git a/test/asm/encode/rv64_li_multi.targets b/test/asm/encode/rv64_li_multi.targets
@@ -0,0 +1 @@
+rv64