kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit cfb1975a7bd0c912109521795062f394e28aba18
parent dfb02970752ef0b82a41854c3f10e23ec96c3a4f
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 15:30:11 -0700

rv64 asm: parse %hi/%lo/%pcrel_hi/%pcrel_lo relocation-operator operands

Adds GNU-as relocation-modifier syntax to the standalone assembler's rv64
immediate/memory-offset operands: lui/auipc accept %hi/%pcrel_hi/%got_pcrel_hi
(R_RV_HI20/PCREL_HI20/GOT_HI20); addi and loads accept %lo/%pcrel_lo
(R_RV_LO12_I/PCREL_LO12_I); stores get the S-type variants. A leading '%' at an
operand position is unambiguous (modulo is infix). Emits the relocation at the
instruction offset with a zero placeholder, matching codegen. Corpus
rv64_reloc_modifiers is byte- and reloc-identical to llvm-mc (verified via
llvm-objdump -r on the cfree-emitted object).

Diffstat:
Msrc/arch/rv64/asm.c | 112++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Atest/asm/encode/rv64_reloc_modifiers.expected.hex | 1+
Atest/asm/encode/rv64_reloc_modifiers.s | 11+++++++++++
Atest/asm/encode/rv64_reloc_modifiers.targets | 2++
4 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c @@ -45,9 +45,20 @@ struct Rv64Asm { typedef struct Rv64Asm Rv64Asm; +/* Relocation modifier on a 12-bit immediate offset (`%lo`/`%pcrel_lo`). + * RV_MEMMOD_NONE means a plain numeric displacement in `disp`. */ +typedef enum RvMemMod { + RV_MEMMOD_NONE = 0, + RV_MEMMOD_LO, + RV_MEMMOD_PCREL_LO, +} RvMemMod; + typedef struct Rv64Mem { i32 disp; u32 base; + RvMemMod mod; /* reloc modifier on the offset, or RV_MEMMOD_NONE */ + ObjSymId sym; /* symbol when mod != NONE */ + i64 off; /* addend when mod != NONE */ } Rv64Mem; static int sym_to_cstr(AsmDriver* d, Sym s, char* out, size_t cap) { @@ -100,15 +111,105 @@ static void expect_comma(AsmDriver* d) { if (!asm_driver_eat_comma(d)) asm_driver_panic(d, "rv64 asm: expected ','"); } +/* Position of a `%mod(sym)` relocation operand: the 20-bit upper field of + * lui/auipc, or a 12-bit I-type (addi/load) or S-type (store) immediate. */ +typedef enum RvModPos { + RV_MODPOS_HI20, + RV_MODPOS_LO_I, + RV_MODPOS_LO_S, +} RvModPos; + +/* Map a relocation-modifier name (`hi`, `lo`, `pcrel_hi`, `pcrel_lo`, + * `got_pcrel_hi`) to the RelocKind appropriate for `pos`. Panics on a name + * that is not valid at this operand position. */ +static RelocKind rv_mod_to_reloc(AsmDriver* d, Slice name, RvModPos pos) { + if (pos == RV_MODPOS_HI20) { + if (slice_eq_cstr(name, "hi")) return R_RV_HI20; + if (slice_eq_cstr(name, "pcrel_hi")) return R_RV_PCREL_HI20; + if (slice_eq_cstr(name, "got_pcrel_hi")) return R_RV_GOT_HI20; + } else { + int store = (pos == RV_MODPOS_LO_S); + if (slice_eq_cstr(name, "lo")) return store ? R_RV_LO12_S : R_RV_LO12_I; + if (slice_eq_cstr(name, "pcrel_lo")) + return store ? R_RV_PCREL_LO12_S : R_RV_PCREL_LO12_I; + } + asm_driver_panic(d, "rv64 asm: relocation modifier not valid here"); +} + +/* If the next token is `%`, parse a `%mod(sym{+off})` relocation operand, + * emit the relocation at the current emit position (where the about-to-be- + * returned instruction word will land), and return 1. The caller encodes a + * zero placeholder in the immediate field. Returns 0 if there is no modifier + * (leaving the stream untouched for normal constant parsing). A leading `%` + * is unambiguous here: modulo is infix and never starts an operand. */ +static int rv_parse_mod_reloc(AsmDriver* d, RvModPos pos, ObjSymId* sym_out, + i64* off_out, RelocKind* kind_out) { + if (!asm_driver_tok_is_punct(asm_driver_peek(d), '%')) return 0; + (void)asm_driver_next(d); /* eat '%' */ + AsmTok name = asm_driver_next(d); + if (name.kind != ASM_TOK_IDENT) + asm_driver_panic(d, "rv64 asm: expected relocation modifier name"); + Slice nm = pool_slice(asm_driver_pool(d), name.v.ident); + asm_driver_expect_punct(d, '(', "'(' after relocation modifier"); + ObjSymId sym = OBJ_SYM_NONE; + i64 off = 0; + asm_driver_parse_sym_expr(d, &sym, &off); + asm_driver_expect_punct(d, ')', "')' after %mod(sym)"); + RelocKind k = rv_mod_to_reloc(d, nm, pos); + if (sym_out) *sym_out = sym; + if (off_out) *off_out = off; + if (kind_out) *kind_out = k; + return 1; +} + +/* Emit a relocation for a U-type / I-type immediate `%mod(sym)` operand at + * the current instruction position; returns 1 if one was present. */ +static int rv_emit_imm_mod_reloc(AsmDriver* d, RvModPos pos) { + ObjSymId sym; + i64 off; + RelocKind k; + if (!rv_parse_mod_reloc(d, pos, &sym, &off, &k)) return 0; + MCEmitter* mc = asm_driver_mc(d); + mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), k, sym, off, 0, 0); + return 1; +} + static Rv64Mem parse_mem(AsmDriver* d) { Rv64Mem m; - m.disp = (i32)asm_driver_parse_const(d); + m.disp = 0; + m.mod = RV_MEMMOD_NONE; + m.sym = OBJ_SYM_NONE; + m.off = 0; + if (asm_driver_tok_is_punct(asm_driver_peek(d), '%')) { + /* `%lo(sym)(base)` / `%pcrel_lo(label)(base)` — record the modifier; the + * load/store caller emits the I- or S-type relocation. */ + ObjSymId sym; + i64 off; + RelocKind k; + (void)rv_parse_mod_reloc(d, RV_MODPOS_LO_I, &sym, &off, &k); + m.mod = (k == R_RV_PCREL_LO12_I) ? RV_MEMMOD_PCREL_LO : RV_MEMMOD_LO; + m.sym = sym; + m.off = off; + } else { + m.disp = (i32)asm_driver_parse_const(d); + } asm_driver_expect_punct(d, '(', "'(' in rv64 memory operand"); m.base = parse_xreg(d); asm_driver_expect_punct(d, ')', "')' in rv64 memory operand"); return m; } +/* Emit the I/S-type relocation recorded by parse_mem for a `%lo`/`%pcrel_lo` + * memory offset, picking the S-type variant for stores. */ +static void rv_emit_mem_mod_reloc(AsmDriver* d, const Rv64Mem* m, int is_store) { + if (m->mod == RV_MEMMOD_NONE) return; + RelocKind k = (m->mod == RV_MEMMOD_PCREL_LO) + ? (is_store ? R_RV_PCREL_LO12_S : R_RV_PCREL_LO12_I) + : (is_store ? R_RV_LO12_S : R_RV_LO12_I); + MCEmitter* mc = asm_driver_mc(d); + mc->emit_reloc_at(mc, mc->section_id, mc->pos(mc), k, m->sym, m->off, 0, 0); +} + /* Fence pred/succ parser — accepts a string like "rw" / "iorw" / "0" / * a numeric literal. Returns the 4-bit mask: bit3=i, bit2=o, bit1=r, * bit0=w. */ @@ -369,6 +470,8 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) { expect_comma(d); rs1 = parse_xreg(d); expect_comma(d); + /* `addi rd, rs1, %lo(sym)` / `%pcrel_lo(label)` → R_RV_LO12_I. */ + if (rv_emit_imm_mod_reloc(d, RV_MODPOS_LO_I)) return enc_i(m, rd, rs1, 0); imm = (i32)asm_driver_parse_const(d); return enc_i(m, rd, rs1, imm); @@ -389,6 +492,9 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) { case RV64_FMT_U: rd = parse_xreg(d); expect_comma(d); + /* `lui rd, %hi(sym)` → R_RV_HI20; `auipc rd, %pcrel_hi(sym)` → + * R_RV_PCREL_HI20 (or %got_pcrel_hi → R_RV_GOT_HI20). */ + if (rv_emit_imm_mod_reloc(d, RV_MODPOS_HI20)) return enc_u(m, rd, 0); imm = (i32)asm_driver_parse_const(d); /* LUI/AUIPC immediate is the upper-20 value: the input is interpreted * as the literal 20-bit value (already shifted-out form). */ @@ -424,24 +530,28 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) { rd = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d); expect_comma(d); mem = parse_mem(d); + rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/0); return enc_i(m, rd, mem.base, mem.disp); case RV64_FMT_FP_LOAD: rd = parse_freg(d); expect_comma(d); mem = parse_mem(d); + rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/0); return enc_i(m, rd, mem.base, mem.disp); case RV64_FMT_STORE: rs2 = (desc->flags & RV64_ASMFL_FP) ? parse_freg(d) : parse_xreg(d); expect_comma(d); mem = parse_mem(d); + rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/1); return enc_s(m, rs2, mem.base, mem.disp); case RV64_FMT_FP_STORE: rs2 = parse_freg(d); expect_comma(d); mem = parse_mem(d); + rv_emit_mem_mod_reloc(d, &mem, /*is_store=*/1); return enc_s(m, rs2, mem.base, mem.disp); case RV64_FMT_JALR: diff --git a/test/asm/encode/rv64_reloc_modifiers.expected.hex b/test/asm/encode/rv64_reloc_modifiers.expected.hex @@ -0,0 +1 @@ +3705000013050500832505002320b50017060000130606008336060067800000 diff --git a/test/asm/encode/rv64_reloc_modifiers.s b/test/asm/encode/rv64_reloc_modifiers.s @@ -0,0 +1,11 @@ +.text +hilo: + lui a0, %hi(sym) + addi a0, a0, %lo(sym) + lw a1, %lo(sym)(a0) + sw a1, %lo(sym)(a0) +pc0: + auipc a2, %pcrel_hi(sym) + addi a2, a2, %pcrel_lo(pc0) + ld a3, %pcrel_lo(pc0)(a2) + ret diff --git a/test/asm/encode/rv64_reloc_modifiers.targets b/test/asm/encode/rv64_reloc_modifiers.targets @@ -0,0 +1 @@ +rv64 +\ No newline at end of file