commit 75f73a664a610f563956626b66d30aa3c5766190
parent 81b4f3f49ae6eddfb4e460f85a323ea47715141f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 13:54:47 -0700
rv64 asm: assemble symbolic branches and jumps via relocations
assemble_one routed every B/J immediate through asm_driver_parse_const, which
panics on any symbol, so the standalone assembler (and inline-asm template
walker) could not assemble 'beq a0,a1,label', 'j label', or 'jal ra,func' —
ordinary symbolic control flow. The reference aa64/x64 assemblers already
parse a symbolic target and emit a branch relocation.
Add rv_reloc_target: parse the operand as a sym-expr; on a symbolic target
emit R_RV_BRANCH (B-format) or R_RV_JAL (J-format) at the instruction offset
(the position the caller is about to write) and encode imm=0; on a bare
constant keep the existing numeric-displacement behavior. These are exactly
the reloc kinds the rv64 codegen already emits via emit_label_ref, so the
apply path is already exercised.
Verified: symbolic beq/bne/beqz/j/jal now emit the right RV_BRANCH/RV_JAL
relocs against the right symbols, numeric offsets still assemble, and an
rv64_branch_sym encode corpus case is added (runs under the rv64 asm lane).
Not yet covered (larger, separate): %hi/%lo/%pcrel reloc-operator operand
syntax and the multi-word call/tail/la/lla pseudo-instructions.
Diffstat:
4 files changed, 38 insertions(+), 8 deletions(-)
diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c
@@ -287,6 +287,23 @@ static u32 enc_c_cj(u32 match, i32 imm) {
(((u >> 1) & 7u) << 3) | (((u >> 5) & 1u) << 2);
}
+/* Parse a branch/jump target operand. With a symbolic target (a label), emit
+ * the relocation at the current position — which is exactly where the caller
+ * is about to write this instruction word — and return 0 as the placeholder
+ * immediate. With a bare constant, return it as the PC-relative byte
+ * displacement (preserving the existing numeric-offset corpus behavior). */
+static i32 rv_reloc_target(AsmDriver* d, RelocKind kind) {
+ ObjSymId sym = OBJ_SYM_NONE;
+ i64 off = 0;
+ asm_driver_parse_sym_expr(d, &sym, &off);
+ if (sym != OBJ_SYM_NONE) {
+ MCEmitter* mc = asm_driver_mc(d);
+ mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), kind, sym, off, 0, 0);
+ return 0;
+ }
+ return (i32)off;
+}
+
/* Per-format parser — reads the operand list off the driver and returns
* the encoded 32-bit word, given the matched descriptor. */
static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) {
@@ -383,30 +400,30 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) {
return enc_u(m, rd, (u32)imm);
case RV64_FMT_J:
+ /* `j label` / `jal rd, label` accept a symbolic target (R_RV_JAL) or a
+ * bare numeric displacement. */
if ((desc->flags & RV64_ASMFL_ALIAS) &&
slice_eq_cstr(desc->mnemonic, "j")) {
- imm = (i32)asm_driver_parse_const(d);
- return enc_j(m, 0u, imm);
+ return enc_j(m, 0u, rv_reloc_target(d, R_RV_JAL));
}
rd = parse_xreg(d);
expect_comma(d);
- imm = (i32)asm_driver_parse_const(d);
- return enc_j(m, rd, imm);
+ return enc_j(m, rd, rv_reloc_target(d, R_RV_JAL));
case RV64_FMT_B:
+ /* `beq rs1, rs2, label` (and beqz/bnez aliases) accept a symbolic target
+ * (R_RV_BRANCH) or a bare numeric displacement. */
if (desc->flags & RV64_ASMFL_ALIAS) {
/* beqz / bnez: rs, off. */
rs1 = parse_xreg(d);
expect_comma(d);
- imm = (i32)asm_driver_parse_const(d);
- return enc_b(m, rs1, 0u, imm);
+ return enc_b(m, rs1, 0u, rv_reloc_target(d, R_RV_BRANCH));
}
rs1 = parse_xreg(d);
expect_comma(d);
rs2 = parse_xreg(d);
expect_comma(d);
- imm = (i32)asm_driver_parse_const(d);
- return enc_b(m, rs1, rs2, imm);
+ return enc_b(m, rs1, rs2, rv_reloc_target(d, R_RV_BRANCH));
case RV64_FMT_LOAD:
rd = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d);
diff --git a/test/asm/encode/rv64_branch_sym.expected.hex b/test/asm/encode/rv64_branch_sym.expected.hex
@@ -0,0 +1 @@
+6300b5006310d600630007006f000000ef0000006780000067800000
diff --git a/test/asm/encode/rv64_branch_sym.s b/test/asm/encode/rv64_branch_sym.s
@@ -0,0 +1,10 @@
+.text
+ beq a0, a1, foo
+ bne a2, a3, bar
+ beqz a4, foo
+ j foo
+ jal ra, bar
+foo:
+ ret
+bar:
+ ret
diff --git a/test/asm/encode/rv64_branch_sym.targets b/test/asm/encode/rv64_branch_sym.targets
@@ -0,0 +1 @@
+rv64
+\ No newline at end of file