kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 894150d6695638fa176f671a4b288675bedbd29b
parent 74ca227e2b3673d0e7955f10349183737e72b5cf
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 30 May 2026 21:30:12 -0700

rv64: emit canonical `addi rd,rs,%pcrel_lo` (not 3-operand `mv`) — clang-standard

The only remaining clang-lane rejection across all three arches was rv64: cc -S
spelled a %pcrel_lo low-half (an ADDI with a relocated zero immediate the
disassembler aliases to `mv rd, rs`) as a non-standard 3-operand
`mv rd, rs, %pcrel_lo(L)`. cfree-as accepted it; clang rejects it ("invalid
operand for instruction"), 54/312 cases.

Fix (disasm side, src/api/asm_emit.c): when an instruction carrying a low-half
(ARCH_RELOC_SURG_RV_LO12) reloc disassembles to the `mv` alias, de-alias it to
the canonical `addi`, so the low-half surgery yields `addi rd, rs, %pcrel_lo(L)`
— which clang accepts and cfree-as already parses (the addi I-type %lo path).
Cleanup (asm side, src/arch/rv64/asm.c): drop the now-unused non-standard
3-operand `mv ...,%pcrel_lo` parsing; standard 2-operand `mv` is unchanged.

clang now assembles the whole toy corpus on all three ELF targets:
x86_64 312/312, riscv64 312/312, aarch64 312/312 (`clang -c` of cc -S).
No regression: test-asm-rv64 43/0 (decode goldens unaffected — the de-alias
only triggers on a relocated instruction), rv64 byte-faithfulness unchanged
(282/312; addi and the mv alias encode identically). cfree-as round-trip and
cross-exec are unaffected (the emitted bytes are identical).

Diffstat:
Msrc/api/asm_emit.c | 21+++++++++++++++++++--
Msrc/arch/rv64/asm.c | 10++++------
2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/api/asm_emit.c b/src/api/asm_emit.c @@ -1147,8 +1147,25 @@ static CfreeStatus emit_disasm_range(Writer* w, const EmitCtx* x, st = w_str(w, "\t"); if (st != CFREE_OK) return st; - st = cfree_writer_write(w, insn.mnemonic.s, insn.mnemonic.len); - if (st != CFREE_OK) return st; + { + /* De-alias a relocated `mv rd, rs` — an ADDI whose %pcrel_lo/%lo + * immediate the disassembler aliased to `mv` because the encoded imm is + * 0 — to the canonical `addi rd, rs, %lo(...)`. The RV_LO12 surgery in + * emit_operands appends the `%lo(...)` as the third operand, and a + * 3-operand `mv` is non-standard (clang rejects it). */ + CfreeSlice mn = insn.mnemonic; + if (mn.len == 2 && mn.s[0] == 'm' && mn.s[1] == 'v') { + const SecReloc* lr = reloc_in_range(x->relocs, x->nrelocs, off, n); + ArchRelocOperand ro = {0}; + if (lr && arch_reloc_operand(x->c, lr->kind, &ro) && + ro.surg == ARCH_RELOC_SURG_RV_LO12) { + mn.s = "addi"; + mn.len = 4; + } + } + st = cfree_writer_write(w, mn.s, mn.len); + if (st != CFREE_OK) return st; + } if (insn.operands.len) { st = w_str(w, "\t"); if (st != CFREE_OK) return st; diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c @@ -446,15 +446,13 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) { * need a multi-word expansion), so it never reaches here. */ if (desc->flags & RV64_ASMFL_ALIAS) { if (slice_eq_cstr(desc->mnemonic, "mv")) { + /* Standard two-operand `mv rd, rs` = `addi rd, rs, 0`. (A %pcrel_lo + * low-half is emitted as the canonical `addi rd, rs, %pcrel_lo(L)`, + * not a non-standard 3-operand `mv`, so it lands in the ADDI path + * below — matching clang.) */ rd = parse_xreg(d); expect_comma(d); rs1 = parse_xreg(d); - /* `cc -S` spells an ADDI rd,rs,%pcrel_lo(L) low-half (imm 0) as the - * `mv` alias plus a trailing relocation operand: `mv rd, rs, - * %pcrel_lo(L)`. Fold it back into ADDI with the I-type reloc. */ - if (asm_driver_eat_comma(d) && - !rv_emit_imm_mod_reloc(d, RV_MODPOS_LO_I)) - asm_driver_panic(d, "rv64 asm: mv: expected %lo/%pcrel_lo operand"); return enc_i(m, rd, rs1, 0); } if (slice_eq_cstr(desc->mnemonic, "sext.w")) {