commit 894150d6695638fa176f671a4b288675bedbd29b
parent 74ca227e2b3673d0e7955f10349183737e72b5cf
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 30 May 2026 21:30:12 -0700
rv64: emit canonical `addi rd,rs,%pcrel_lo` (not 3-operand `mv`) — clang-standard
The only remaining clang-lane rejection across all three arches was rv64: cc -S
spelled a %pcrel_lo low-half (an ADDI with a relocated zero immediate the
disassembler aliases to `mv rd, rs`) as a non-standard 3-operand
`mv rd, rs, %pcrel_lo(L)`. cfree-as accepted it; clang rejects it ("invalid
operand for instruction"), 54/312 cases.
Fix (disasm side, src/api/asm_emit.c): when an instruction carrying a low-half
(ARCH_RELOC_SURG_RV_LO12) reloc disassembles to the `mv` alias, de-alias it to
the canonical `addi`, so the low-half surgery yields `addi rd, rs, %pcrel_lo(L)`
— which clang accepts and cfree-as already parses (the addi I-type %lo path).
Cleanup (asm side, src/arch/rv64/asm.c): drop the now-unused non-standard
3-operand `mv ...,%pcrel_lo` parsing; standard 2-operand `mv` is unchanged.
clang now assembles the whole toy corpus on all three ELF targets:
x86_64 312/312, riscv64 312/312, aarch64 312/312 (`clang -c` of cc -S).
No regression: test-asm-rv64 43/0 (decode goldens unaffected — the de-alias
only triggers on a relocated instruction), rv64 byte-faithfulness unchanged
(282/312; addi and the mv alias encode identically). cfree-as round-trip and
cross-exec are unaffected (the emitted bytes are identical).
Diffstat:
2 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/src/api/asm_emit.c b/src/api/asm_emit.c
@@ -1147,8 +1147,25 @@ static CfreeStatus emit_disasm_range(Writer* w, const EmitCtx* x,
st = w_str(w, "\t");
if (st != CFREE_OK) return st;
- st = cfree_writer_write(w, insn.mnemonic.s, insn.mnemonic.len);
- if (st != CFREE_OK) return st;
+ {
+ /* De-alias a relocated `mv rd, rs` — an ADDI whose %pcrel_lo/%lo
+ * immediate the disassembler aliased to `mv` because the encoded imm is
+ * 0 — to the canonical `addi rd, rs, %lo(...)`. The RV_LO12 surgery in
+ * emit_operands appends the `%lo(...)` as the third operand, and a
+ * 3-operand `mv` is non-standard (clang rejects it). */
+ CfreeSlice mn = insn.mnemonic;
+ if (mn.len == 2 && mn.s[0] == 'm' && mn.s[1] == 'v') {
+ const SecReloc* lr = reloc_in_range(x->relocs, x->nrelocs, off, n);
+ ArchRelocOperand ro = {0};
+ if (lr && arch_reloc_operand(x->c, lr->kind, &ro) &&
+ ro.surg == ARCH_RELOC_SURG_RV_LO12) {
+ mn.s = "addi";
+ mn.len = 4;
+ }
+ }
+ st = cfree_writer_write(w, mn.s, mn.len);
+ if (st != CFREE_OK) return st;
+ }
if (insn.operands.len) {
st = w_str(w, "\t");
if (st != CFREE_OK) return st;
diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c
@@ -446,15 +446,13 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) {
* need a multi-word expansion), so it never reaches here. */
if (desc->flags & RV64_ASMFL_ALIAS) {
if (slice_eq_cstr(desc->mnemonic, "mv")) {
+ /* Standard two-operand `mv rd, rs` = `addi rd, rs, 0`. (A %pcrel_lo
+ * low-half is emitted as the canonical `addi rd, rs, %pcrel_lo(L)`,
+ * not a non-standard 3-operand `mv`, so it lands in the ADDI path
+ * below — matching clang.) */
rd = parse_xreg(d);
expect_comma(d);
rs1 = parse_xreg(d);
- /* `cc -S` spells an ADDI rd,rs,%pcrel_lo(L) low-half (imm 0) as the
- * `mv` alias plus a trailing relocation operand: `mv rd, rs,
- * %pcrel_lo(L)`. Fold it back into ADDI with the I-type reloc. */
- if (asm_driver_eat_comma(d) &&
- !rv_emit_imm_mod_reloc(d, RV_MODPOS_LO_I))
- asm_driver_panic(d, "rv64 asm: mv: expected %lo/%pcrel_lo operand");
return enc_i(m, rd, rs1, 0);
}
if (slice_eq_cstr(desc->mnemonic, "sext.w")) {