commit cfb1975a7bd0c912109521795062f394e28aba18
parent dfb02970752ef0b82a41854c3f10e23ec96c3a4f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 15:30:11 -0700
rv64 asm: parse %hi/%lo/%pcrel_hi/%pcrel_lo relocation-operator operands
Adds GNU-as relocation-modifier syntax to the standalone assembler's rv64
immediate/memory-offset operands: lui/auipc accept %hi/%pcrel_hi/%got_pcrel_hi
(R_RV_HI20/PCREL_HI20/GOT_HI20); addi and loads accept %lo/%pcrel_lo
(R_RV_LO12_I/PCREL_LO12_I); stores get the S-type variants. A leading '%' at an
operand position is unambiguous (modulo is infix). Emits the relocation at the
instruction offset with a zero placeholder, matching codegen. Corpus
rv64_reloc_modifiers is byte- and reloc-identical to llvm-mc (verified via
llvm-objdump -r on the cfree-emitted object).
Diffstat:
4 files changed, 125 insertions(+), 1 deletion(-)
diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c
@@ -45,9 +45,20 @@ struct Rv64Asm {
typedef struct Rv64Asm Rv64Asm;
+/* Relocation modifier on a 12-bit immediate offset (`%lo`/`%pcrel_lo`).
+ * RV_MEMMOD_NONE means a plain numeric displacement in `disp`. */
+typedef enum RvMemMod {
+ RV_MEMMOD_NONE = 0,
+ RV_MEMMOD_LO,
+ RV_MEMMOD_PCREL_LO,
+} RvMemMod;
+
typedef struct Rv64Mem {
i32 disp;
u32 base;
+ RvMemMod mod; /* reloc modifier on the offset, or RV_MEMMOD_NONE */
+ ObjSymId sym; /* symbol when mod != NONE */
+ i64 off; /* addend when mod != NONE */
} Rv64Mem;
static int sym_to_cstr(AsmDriver* d, Sym s, char* out, size_t cap) {
@@ -100,15 +111,105 @@ static void expect_comma(AsmDriver* d) {
if (!asm_driver_eat_comma(d)) asm_driver_panic(d, "rv64 asm: expected ','");
}
+/* Position of a `%mod(sym)` relocation operand: the 20-bit upper field of
+ * lui/auipc, or a 12-bit I-type (addi/load) or S-type (store) immediate. */
+typedef enum RvModPos {
+ RV_MODPOS_HI20,
+ RV_MODPOS_LO_I,
+ RV_MODPOS_LO_S,
+} RvModPos;
+
+/* Map a relocation-modifier name (`hi`, `lo`, `pcrel_hi`, `pcrel_lo`,
+ * `got_pcrel_hi`) to the RelocKind appropriate for `pos`. Panics on a name
+ * that is not valid at this operand position. */
+static RelocKind rv_mod_to_reloc(AsmDriver* d, Slice name, RvModPos pos) {
+ if (pos == RV_MODPOS_HI20) {
+ if (slice_eq_cstr(name, "hi")) return R_RV_HI20;
+ if (slice_eq_cstr(name, "pcrel_hi")) return R_RV_PCREL_HI20;
+ if (slice_eq_cstr(name, "got_pcrel_hi")) return R_RV_GOT_HI20;
+ } else {
+ int store = (pos == RV_MODPOS_LO_S);
+ if (slice_eq_cstr(name, "lo")) return store ? R_RV_LO12_S : R_RV_LO12_I;
+ if (slice_eq_cstr(name, "pcrel_lo"))
+ return store ? R_RV_PCREL_LO12_S : R_RV_PCREL_LO12_I;
+ }
+ asm_driver_panic(d, "rv64 asm: relocation modifier not valid here");
+}
+
+/* If the next token is `%`, parse a `%mod(sym{+off})` relocation operand,
+ * emit the relocation at the current emit position (where the about-to-be-
+ * returned instruction word will land), and return 1. The caller encodes a
+ * zero placeholder in the immediate field. Returns 0 if there is no modifier
+ * (leaving the stream untouched for normal constant parsing). A leading `%`
+ * is unambiguous here: modulo is infix and never starts an operand. */
+static int rv_parse_mod_reloc(AsmDriver* d, RvModPos pos, ObjSymId* sym_out,
+ i64* off_out, RelocKind* kind_out) {
+ if (!asm_driver_tok_is_punct(asm_driver_peek(d), '%')) return 0;
+ (void)asm_driver_next(d); /* eat '%' */
+ AsmTok name = asm_driver_next(d);
+ if (name.kind != ASM_TOK_IDENT)
+ asm_driver_panic(d, "rv64 asm: expected relocation modifier name");
+ Slice nm = pool_slice(asm_driver_pool(d), name.v.ident);
+ asm_driver_expect_punct(d, '(', "'(' after relocation modifier");
+ ObjSymId sym = OBJ_SYM_NONE;
+ i64 off = 0;
+ asm_driver_parse_sym_expr(d, &sym, &off);
+ asm_driver_expect_punct(d, ')', "')' after %mod(sym)");
+ RelocKind k = rv_mod_to_reloc(d, nm, pos);
+ if (sym_out) *sym_out = sym;
+ if (off_out) *off_out = off;
+ if (kind_out) *kind_out = k;
+ return 1;
+}
+
+/* Emit a relocation for a U-type / I-type immediate `%mod(sym)` operand at
+ * the current instruction position; returns 1 if one was present. */
+static int rv_emit_imm_mod_reloc(AsmDriver* d, RvModPos pos) {
+ ObjSymId sym;
+ i64 off;
+ RelocKind k;
+ if (!rv_parse_mod_reloc(d, pos, &sym, &off, &k)) return 0;
+ MCEmitter* mc = asm_driver_mc(d);
+ mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), k, sym, off, 0, 0);
+ return 1;
+}
+
static Rv64Mem parse_mem(AsmDriver* d) {
Rv64Mem m;
- m.disp = (i32)asm_driver_parse_const(d);
+ m.disp = 0;
+ m.mod = RV_MEMMOD_NONE;
+ m.sym = OBJ_SYM_NONE;
+ m.off = 0;
+ if (asm_driver_tok_is_punct(asm_driver_peek(d), '%')) {
+ /* `%lo(sym)(base)` / `%pcrel_lo(label)(base)` — record the modifier; the
+ * load/store caller emits the I- or S-type relocation. */
+ ObjSymId sym;
+ i64 off;
+ RelocKind k;
+ (void)rv_parse_mod_reloc(d, RV_MODPOS_LO_I, &sym, &off, &k);
+ m.mod = (k == R_RV_PCREL_LO12_I) ? RV_MEMMOD_PCREL_LO : RV_MEMMOD_LO;
+ m.sym = sym;
+ m.off = off;
+ } else {
+ m.disp = (i32)asm_driver_parse_const(d);
+ }
asm_driver_expect_punct(d, '(', "'(' in rv64 memory operand");
m.base = parse_xreg(d);
asm_driver_expect_punct(d, ')', "')' in rv64 memory operand");
return m;
}
+/* Emit the I/S-type relocation recorded by parse_mem for a `%lo`/`%pcrel_lo`
+ * memory offset, picking the S-type variant for stores. */
+static void rv_emit_mem_mod_reloc(AsmDriver* d, const Rv64Mem* m, int is_store) {
+ if (m->mod == RV_MEMMOD_NONE) return;
+ RelocKind k = (m->mod == RV_MEMMOD_PCREL_LO)
+ ? (is_store ? R_RV_PCREL_LO12_S : R_RV_PCREL_LO12_I)
+ : (is_store ? R_RV_LO12_S : R_RV_LO12_I);
+ MCEmitter* mc = asm_driver_mc(d);
+ mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), k, m->sym, m->off, 0, 0);
+}
+
/* Fence pred/succ parser — accepts a string like "rw" / "iorw" / "0" /
* a numeric literal. Returns the 4-bit mask: bit3=i, bit2=o, bit1=r,
* bit0=w. */
@@ -369,6 +470,8 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) {
expect_comma(d);
rs1 = parse_xreg(d);
expect_comma(d);
+ /* `addi rd, rs1, %lo(sym)` / `%pcrel_lo(label)` → R_RV_LO12_I. */
+ if (rv_emit_imm_mod_reloc(d, RV_MODPOS_LO_I)) return enc_i(m, rd, rs1, 0);
imm = (i32)asm_driver_parse_const(d);
return enc_i(m, rd, rs1, imm);
@@ -389,6 +492,9 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) {
case RV64_FMT_U:
rd = parse_xreg(d);
expect_comma(d);
+ /* `lui rd, %hi(sym)` → R_RV_HI20; `auipc rd, %pcrel_hi(sym)` →
+ * R_RV_PCREL_HI20 (or %got_pcrel_hi → R_RV_GOT_HI20). */
+ if (rv_emit_imm_mod_reloc(d, RV_MODPOS_HI20)) return enc_u(m, rd, 0);
imm = (i32)asm_driver_parse_const(d);
/* LUI/AUIPC immediate is the upper-20 value: the input is interpreted
* as the literal 20-bit value (already shifted-out form). */
@@ -424,24 +530,28 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) {
rd = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d);
expect_comma(d);
mem = parse_mem(d);
+ rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/0);
return enc_i(m, rd, mem.base, mem.disp);
case RV64_FMT_FP_LOAD:
rd = parse_freg(d);
expect_comma(d);
mem = parse_mem(d);
+ rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/0);
return enc_i(m, rd, mem.base, mem.disp);
case RV64_FMT_STORE:
rs2 = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d);
expect_comma(d);
mem = parse_mem(d);
+ rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/1);
return enc_s(m, rs2, mem.base, mem.disp);
case RV64_FMT_FP_STORE:
rs2 = parse_freg(d);
expect_comma(d);
mem = parse_mem(d);
+ rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/1);
return enc_s(m, rs2, mem.base, mem.disp);
case RV64_FMT_JALR:
diff --git a/test/asm/encode/rv64_reloc_modifiers.expected.hex b/test/asm/encode/rv64_reloc_modifiers.expected.hex
@@ -0,0 +1 @@
+3705000013050500832505002320b50017060000130606008336060067800000
diff --git a/test/asm/encode/rv64_reloc_modifiers.s b/test/asm/encode/rv64_reloc_modifiers.s
@@ -0,0 +1,11 @@
+.text
+hilo:
+ lui a0, %hi(sym)
+ addi a0, a0, %lo(sym)
+ lw a1, %lo(sym)(a0)
+ sw a1, %lo(sym)(a0)
+pc0:
+ auipc a2, %pcrel_hi(sym)
+ addi a2, a2, %pcrel_lo(pc0)
+ ld a3, %pcrel_lo(pc0)(a2)
+ ret
diff --git a/test/asm/encode/rv64_reloc_modifiers.targets b/test/asm/encode/rv64_reloc_modifiers.targets
@@ -0,0 +1 @@
+rv64
+\ No newline at end of file