kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 75f73a664a610f563956626b66d30aa3c5766190
parent 81b4f3f49ae6eddfb4e460f85a323ea47715141f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 13:54:47 -0700

rv64 asm: assemble symbolic branches and jumps via relocations

assemble_one routed every B/J immediate through asm_driver_parse_const, which
panics on any symbol, so the standalone assembler (and inline-asm template
walker) could not assemble 'beq a0,a1,label', 'j label', or 'jal ra,func' —
ordinary symbolic control flow. The reference aa64/x64 assemblers already
parse a symbolic target and emit a branch relocation.

Add rv_reloc_target: parse the operand as a sym-expr; on a symbolic target
emit R_RV_BRANCH (B-format) or R_RV_JAL (J-format) at the instruction offset
(the position the caller is about to write) and encode imm=0; on a bare
constant keep the existing numeric-displacement behavior. These are exactly
the reloc kinds the rv64 codegen already emits via emit_label_ref, so the
apply path is already exercised.

Verified: symbolic beq/bne/beqz/j/jal now emit the right RV_BRANCH/RV_JAL
relocs against the right symbols, numeric offsets still assemble, and an
rv64_branch_sym encode corpus case is added (runs under the rv64 asm lane).

Not yet covered (larger, separate): %hi/%lo/%pcrel reloc-operator operand
syntax and the multi-word call/tail/la/lla pseudo-instructions.

Diffstat:
Msrc/arch/rv64/asm.c | 33+++++++++++++++++++++++++--------
Atest/asm/encode/rv64_branch_sym.expected.hex | 1+
Atest/asm/encode/rv64_branch_sym.s | 10++++++++++
Atest/asm/encode/rv64_branch_sym.targets | 2++
4 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c @@ -287,6 +287,23 @@ static u32 enc_c_cj(u32 match, i32 imm) { (((u >> 1) & 7u) << 3) | (((u >> 5) & 1u) << 2); } +/* Parse a branch/jump target operand. With a symbolic target (a label), emit + * the relocation at the current position — which is exactly where the caller + * is about to write this instruction word — and return 0 as the placeholder + * immediate. With a bare constant, return it as the PC-relative byte + * displacement (preserving the existing numeric-offset corpus behavior). */ +static i32 rv_reloc_target(AsmDriver* d, RelocKind kind) { + ObjSymId sym = OBJ_SYM_NONE; + i64 off = 0; + asm_driver_parse_sym_expr(d, &sym, &off); + if (sym != OBJ_SYM_NONE) { + MCEmitter* mc = asm_driver_mc(d); + mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), kind, sym, off, 0, 0); + return 0; + } + return (i32)off; +} + /* Per-format parser — reads the operand list off the driver and returns * the encoded 32-bit word, given the matched descriptor. */ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) { @@ -383,30 +400,30 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) { return enc_u(m, rd, (u32)imm); case RV64_FMT_J: + /* `j label` / `jal rd, label` accept a symbolic target (R_RV_JAL) or a + * bare numeric displacement. */ if ((desc->flags & RV64_ASMFL_ALIAS) && slice_eq_cstr(desc->mnemonic, "j")) { - imm = (i32)asm_driver_parse_const(d); - return enc_j(m, 0u, imm); + return enc_j(m, 0u, rv_reloc_target(d, R_RV_JAL)); } rd = parse_xreg(d); expect_comma(d); - imm = (i32)asm_driver_parse_const(d); - return enc_j(m, rd, imm); + return enc_j(m, rd, rv_reloc_target(d, R_RV_JAL)); case RV64_FMT_B: + /* `beq rs1, rs2, label` (and beqz/bnez aliases) accept a symbolic target + * (R_RV_BRANCH) or a bare numeric displacement. */ if (desc->flags & RV64_ASMFL_ALIAS) { /* beqz / bnez: rs, off. */ rs1 = parse_xreg(d); expect_comma(d); - imm = (i32)asm_driver_parse_const(d); - return enc_b(m, rs1, 0u, imm); + return enc_b(m, rs1, 0u, rv_reloc_target(d, R_RV_BRANCH)); } rs1 = parse_xreg(d); expect_comma(d); rs2 = parse_xreg(d); expect_comma(d); - imm = (i32)asm_driver_parse_const(d); - return enc_b(m, rs1, rs2, imm); + return enc_b(m, rs1, rs2, rv_reloc_target(d, R_RV_BRANCH)); case RV64_FMT_LOAD: rd = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d); diff --git a/test/asm/encode/rv64_branch_sym.expected.hex b/test/asm/encode/rv64_branch_sym.expected.hex @@ -0,0 +1 @@ +6300b5006310d600630007006f000000ef0000006780000067800000 diff --git a/test/asm/encode/rv64_branch_sym.s b/test/asm/encode/rv64_branch_sym.s @@ -0,0 +1,10 @@ +.text + beq a0, a1, foo + bne a2, a3, bar + beqz a4, foo + j foo + jal ra, bar +foo: + ret +bar: + ret diff --git a/test/asm/encode/rv64_branch_sym.targets b/test/asm/encode/rv64_branch_sym.targets @@ -0,0 +1 @@ +rv64 +\ No newline at end of file