commit 892d1268979e7a1df6d56de09516d0de77dc140b
parent 94d7d4bd6cfe1d24cebed41aec3450f1f059cc09
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 21 May 2026 07:06:27 -0700
Add rv64 asm/disasm parity scaffolding
Diffstat:
17 files changed, 741 insertions(+), 40 deletions(-)
diff --git a/doc/RV64_PARITY_CHECKLIST.md b/doc/RV64_PARITY_CHECKLIST.md
@@ -0,0 +1,171 @@
+# rv64 parity checklist
+
+Goal: bring `riscv64` / `rv64` to the same practical coverage as `aarch64`
+across standalone asm, disasm, C/toy compilation, object/link output, runtime,
+debug tooling, and executable test paths.
+
+This checklist tracks parity with the aa64 lane, not architectural feature
+completeness for all RISC-V extensions. The baseline target is RV64GC Linux
+ELF with the psABI double-float ABI unless a task says otherwise.
+
+## Asm / disasm
+
+- [x] Wire rv64 into `arch_disasm_new` through `src/arch/rv64/disasm.{h,c}`.
+- [x] Add rv64 `test/asm` smoke coverage for text decode, object listing, hex
+ encode, and podman-backed ELF execution.
+- [x] Add arch-scoped asm fixture applicability (`*.targets`) so aa64/x64/rv64
+ cases do not fail on unrelated targets.
+- [ ] Replace the current hand-written rv64 disassembler with an ISA descriptor
+ layer equivalent in role to `src/arch/aa64/isa.{h,c}` so encoding,
+ decoding, and printing share one description.
+- [ ] Expand standalone rv64 asm parsing beyond the current small subset:
+ branches, calls, arithmetic, shifts, compares, loads/stores, AUIPC/LUI,
+ relocation-bearing operands, atomics, fences, CSR/system forms, scalar
+ FP, and backend-emitted forms.
+- [ ] Expand rv64 disasm to decode every instruction emitted by rv64 codegen and
+ accepted by standalone asm, including unknown/truncated handling that
+ matches the public iterator contract.
+- [ ] Add relocation/symbol annotation coverage for rv64 object disassembly.
+- [ ] Update `test/asm/regen.sh` or add an rv64 variant for clang/objdump golden
+ regeneration.
+- [ ] Make asm round-trip (`S`) meaningful for rv64 codegen output and gate the
+ rv64-emitted corpus on it.
+
+## Register API / target surface
+
+- [x] Add rv64 public register-name/index support for psABI names plus `xN` and
+ `fN` aliases.
+- [ ] Audit all register naming users (`dbg`, asm constraints, disasm printers)
+ for consistent DWARF numbering: `x0..x31` as 0..31 and `f0..f31` as
+ 32..63.
+- [ ] Verify predefined macros, driver triple parsing, target defaults, and
+ `cfree_test_target` setup against clang's `riscv64-linux-gnu` behavior.
+- [ ] Decide policy for optional extensions (`C`, `A`, `F`, `D`, `Zicsr`,
+ `Zifencei`, future vector) and reflect it in target feature queries.
+
+## Inline asm
+
+- [ ] Implement rv64 inline-asm template rendering parallel to aa64:
+ placeholders, symbolic operands, memory operands, width/addr modifiers,
+ escaped percent, and statement splitting.
+- [ ] Add rv64 constraint support for integer, FP, immediate, memory, matching,
+ early-clobber, and read-write operands.
+- [ ] Verify clobbers, `"memory"`, callee-saved preservation, named registers,
+ and fixed-register conflicts on rv64.
+- [ ] Add an rv64 inline-asm unit test parallel to
+ `test/arch/aa64_inline_test.c`.
+- [ ] Add C and toy inline-asm execution cases that run through podman/qemu rv64.
+
+## C / toy codegen
+
+- [x] Prove a targeted rv64 C parse path can compile, link, and execute through
+ podman path E.
+- [ ] Run and triage the full C parse corpus for rv64 at `-O0`, `-O1`, and
+ `-O2`; track failures by missing backend feature rather than broad skips.
+- [ ] Run and triage toy cross-arch path `X` for rv64 alongside aa64 cases.
+- [ ] Match aa64 coverage for scalar integer, pointer, aggregate, varargs,
+ atomics, intrinsics, labels, computed goto, switch lowering, tail calls,
+ alloca, and dynamic stack adjustment.
+- [ ] Close remaining explicit rv64 backend panics in `src/arch/rv64/ops.c`,
+ `alloc.c`, and `emit.c`.
+- [ ] Verify optimized rv64 lowering after recent opt pipeline work: liveness,
+ register allocation, hard-register constraints, call plans, and spill
+ reloads.
+- [ ] Add targeted rv64 cases for large frames, far branches, far label-address
+ materialization, large immediates, and pcrel/GOT materialization.
+- [ ] Add targeted rv64 FP conversion, comparison, NaN, and rounding cases.
+- [ ] Add targeted rv64 atomic cases for all supported widths and memory orders.
+
+## ABI / platform
+
+- [ ] Finish psABI edge-case coverage: aggregate classification, indirect args,
+ mixed int/FP aggregates, homogeneous FP shapes where applicable, sret,
+ byval, empty/zero-sized fields, and mixed returns.
+- [ ] Verify variadic functions: register save area layout, `va_list` shape,
+ stack argument traversal, and mixed int/FP varargs.
+- [ ] Verify stack alignment, frame pointer conventions, callee-saved integer
+ registers `s0..s11`, and callee-saved FP registers `fs0..fs11`.
+- [ ] Decide `long double` policy for rv64 (`quad` vs compatibility mode) and
+ align C frontend, ABI lowering, libc harnesses, and runtime helpers.
+- [ ] Audit TLS models for rv64: local-exec, GOT/TLS relocations, static link,
+ dynamic link, and emulator/JIT behavior.
+
+## Object / link / driver
+
+- [x] Keep rv64 ELF roundtrip link corpus green for path R.
+- [x] Fix `cfree objdump -d` to choose the disassembler target from the object
+ file rather than the host target.
+- [ ] Run rv64 link path E broadly under podman and triage execution failures.
+- [ ] Ensure ELF rv64 relocations cover all codegen, asm, TLS, PLT/GOT, ifunc,
+ linker-script, archive, and GC cases currently passing for aa64.
+- [ ] Implement or explicitly reject any unsupported rv64 relocation kinds with
+ diagnostics that name the relocation and input object.
+- [ ] Exercise `cfree as`, `cc`, `ld`, `ar`, `objdump`, `strip`, and `objcopy`
+ paths with rv64-specific command tests where the tool claims rv64 support.
+- [ ] Verify dynamic-linker defaults for musl and glibc rv64 Linux.
+- [ ] Add rv64 `objdump` golden tests for sections, symbols, relocs, and
+ disassembly annotations.
+
+## Runtime / libc
+
+- [ ] Build `libcfree_rt.a` for `riscv64-linux` through cfree, not only host
+ clang probes.
+- [ ] Bring rv64 coroutine/runtime support through the cfree assembler/compiler
+ path.
+- [ ] Run `test-rt-runtime` with rv64 enabled and triage every runtime helper
+ failure.
+- [ ] Retarget musl and glibc libc harnesses to rv64 sysroots and run the same
+ cases currently exercised for aa64.
+- [ ] Add rv64 smoke cases that use cfree-emitted bytes for startup/runtime
+ paths, not only clang-produced harness binaries.
+- [ ] Verify compiler-rt-style integer, FP, memory, atomic, and coroutine
+ helpers for rv64 ABI correctness.
+
+## Debug / DWARF / JIT
+
+- [ ] Add rv64 debugger breakpoint support (`ebreak`) and displaced-step logic.
+- [ ] Add rv64 ucontext/register marshalling for supported host OSes.
+- [ ] Emit and validate rv64 DWARF CFI/line-info details, including CFA rules,
+ frame-pointer conventions, return-address register `ra`, and FP register
+ numbering.
+- [ ] Extend DWARF tests with rv64 producer roundtrips where instruction size
+ and register numbering differ from aa64.
+- [ ] Fill rv64 JIT support gaps: executable memory, relocations, symbol calls,
+ TLS/TLV behavior, and native-host execution tests where available.
+- [ ] Decide debugger scope for non-native rv64 execution; either support it
+ through emulation or mark it explicitly out of parity.
+
+## Emulator
+
+- [ ] Audit rv64 ELF loader behavior against aa64: program headers, auxv,
+ stack setup, argv/envp, TLS, brk/mmap, and dynamic loader handoff.
+- [ ] Expand rv64 decode/lift coverage to match all instructions produced by
+ cfree rv64 codegen and clang-built harnesses.
+- [ ] Add rv64 syscall coverage for libc and smoke workloads.
+- [ ] Add emulator regression tests for rv64 branches, calls, atomics, FP, TLS,
+ and signals/traps.
+
+## Execution infrastructure
+
+- [x] Use podman `--platform linux/riscv64` for rv64 execution when no native or
+ qemu-user runner is available.
+- [x] Prove `test-smoke-rv64` direct and batched execution paths.
+- [x] Prove `test/asm` rv64 path E through podman.
+- [x] Prove a targeted `test/parse` rv64 path E through podman.
+- [ ] Run larger rv64 E matrices under podman with batching and record stable
+ filters for CI-equivalent local runs.
+- [ ] Add clear diagnostics for missing podman image/platform support, binfmt,
+ qemu-user, or clang rv64 cross support.
+- [ ] Decide default images for `RUN_RV64_IMAGE` across musl/glibc tests.
+
+## Test policy
+
+- [ ] Add rv64-targeted filters/goldens for each new feature as it lands.
+- [ ] Keep skips explicit and arch-scoped through `*.targets`, not hidden in
+ harness defaults.
+- [ ] Prefer red/green targeted runs: one failing feature family at a time,
+ one arch at a time.
+- [ ] Promote stable rv64 lanes into default or CI-equivalent coverage once the
+ runner assumptions are reliable.
+- [ ] Keep aa64 lanes green while changing shared asm/disasm/link/test harness
+ code.
diff --git a/src/arch/rv64/arch.c b/src/arch/rv64/arch.c
@@ -2,6 +2,8 @@
#include "abi/abi_internal.h"
#include "arch/rv64/asm.h"
+#include "arch/rv64/disasm.h"
+#include "arch/rv64/regs.h"
#include "arch/rv64/rv64.h"
#include "core/bytes.h"
#include "link/link_arch.h"
@@ -23,6 +25,11 @@ static const ArchElfOps rv64_elf_ops = {
.reloc_from = elf_riscv64_reloc_from,
};
+static int rv64_register_at_public(uint32_t idx, CfreeArchReg* out) {
+ if (!out) return 1;
+ return rv64_register_iter_get(idx, &out->dwarf_idx, &out->name);
+}
+
static int rv64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) {
const Section* s;
u8 cur[4];
@@ -102,7 +109,7 @@ const ArchImpl arch_impl_rv64 = {
.abi_vtable = rv64_abi_vtable,
.cgtarget_new = rv64_cgtarget_new,
.asm_new = rv64_arch_asm_new,
- .disasm_new = NULL,
+ .disasm_new = rv64_disasm_new,
.apply_label_fixup = rv64_apply_label_fixup,
.link = &link_arch_rv64,
.elf = &rv64_elf_ops,
@@ -110,8 +117,8 @@ const ArchImpl arch_impl_rv64 = {
.predefined_macros = rv64_predefined_macros,
.npredefined_macros =
(u32)(sizeof rv64_predefined_macros / sizeof rv64_predefined_macros[0]),
- .register_name = NULL,
- .register_index = NULL,
- .register_count = NULL,
- .register_at = NULL,
+ .register_name = rv64_register_name,
+ .register_index = rv64_register_index,
+ .register_count = rv64_register_iter_size,
+ .register_at = rv64_register_at_public,
};
diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c
@@ -3,6 +3,7 @@
#include <string.h>
#include "arch/rv64/internal.h"
+#include "arch/rv64/regs.h"
#include "asm/asm_helpers.h"
#include "core/arena.h"
@@ -25,41 +26,14 @@ static int sym_eq(AsmDriver* d, Sym s, const char* lit) {
static int rv_reg_from_name(AsmDriver* d, Sym s, u32* reg_out, int* fp_out) {
size_t n = 0;
const char* p = pool_str(asm_driver_pool(d), s, &n);
- u32 r;
- int fp = 0;
- if (!p || !n) return 0;
- if (n == 2 && p[0] == 'r' && p[1] == 'a')
- r = RV_RA;
- else if (n == 2 && p[0] == 's' && p[1] == 'p')
- r = RV_SP;
- else if (n == 2 && p[0] == 't' && p[1] == '0')
- r = RV_T0;
- else if (n == 2 && p[0] == 'a' && p[1] >= '0' && p[1] <= '7')
- r = RV_A0 + (u32)(p[1] - '0');
- else if (n == 2 && p[0] == 's' && p[1] >= '0' && p[1] <= '1')
- r = RV_S0 + (u32)(p[1] - '0');
- else if (n == 3 && p[0] == 's' && p[1] == '1' && p[2] >= '0' &&
- p[2] <= '1')
- r = 26u + (u32)(p[2] - '0');
- else if (n == 2 && p[0] == 's' && p[1] >= '2' && p[1] <= '9')
- r = RV_S2 + (u32)(p[1] - '2');
- else if (n == 3 && p[0] == 'f' && p[1] == 's' && p[2] >= '0' &&
- p[2] <= '1') {
- r = RV_S0 + (u32)(p[2] - '0');
- fp = 1;
- } else if (n == 4 && p[0] == 'f' && p[1] == 's' && p[2] == '1' &&
- p[3] >= '0' && p[3] <= '1') {
- r = 26u + (u32)(p[3] - '0');
- fp = 1;
- } else if (n == 3 && p[0] == 'f' && p[1] == 's' && p[2] >= '2' &&
- p[2] <= '9') {
- r = RV_S2 + (u32)(p[2] - '2');
- fp = 1;
- } else {
- return 0;
- }
- if (reg_out) *reg_out = r;
- if (fp_out) *fp_out = fp;
+ char name[16];
+ u32 dwarf = 0;
+ if (!p || !n || n >= sizeof name) return 0;
+ memcpy(name, p, n);
+ name[n] = '\0';
+ if (rv64_register_index(name, &dwarf) != 0) return 0;
+ if (reg_out) *reg_out = dwarf & 31u;
+ if (fp_out) *fp_out = dwarf >= 32u;
return 1;
}
diff --git a/src/arch/rv64/disasm.c b/src/arch/rv64/disasm.c
@@ -0,0 +1,407 @@
+#include "arch/rv64/disasm.h"
+
+#include <string.h>
+
+#include "arch/rv64/isa.h"
+#include "core/heap.h"
+#include "core/strbuf.h"
+
+#define RV64_DASM_MNEM_CAP 16u
+#define RV64_DASM_OPS_CAP 96u
+#define RV64_DASM_ANN_CAP 64u
+
+typedef struct Rv64Disasm {
+ ArchDisasm base;
+ Compiler* c;
+ Heap* heap;
+ char mnem_buf[RV64_DASM_MNEM_CAP];
+ char ops_buf[RV64_DASM_OPS_CAP];
+ char ann_buf[RV64_DASM_ANN_CAP];
+ StrBuf mnem;
+ StrBuf ops;
+ StrBuf ann;
+} Rv64Disasm;
+
+static const char* const rv_xnames[32] = {
+ "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2",
+ "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5",
+ "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7",
+ "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6",
+};
+
+static const char* const rv_fnames[32] = {
+ "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7",
+ "fs0", "fs1", "fa0", "fa1", "fa2", "fa3", "fa4", "fa5",
+ "fa6", "fa7", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7",
+ "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11",
+};
+
+static u32 rv_read_u32_le(const u8* b) {
+ return (u32)b[0] | ((u32)b[1] << 8) | ((u32)b[2] << 16) |
+ ((u32)b[3] << 24);
+}
+
+static i64 rv_sext(u64 v, u32 bits) {
+ u64 m = 1ull << (bits - 1u);
+ return (i64)((v ^ m) - m);
+}
+
+static i32 rv_i_imm(u32 w) { return (i32)rv_sext(w >> 20, 12); }
+
+static i32 rv_s_imm(u32 w) {
+ u32 imm = ((w >> 7) & 0x1fu) | (((w >> 25) & 0x7fu) << 5);
+ return (i32)rv_sext(imm, 12);
+}
+
+static i32 rv_b_imm(u32 w) {
+ u32 imm = (((w >> 31) & 0x1u) << 12) | (((w >> 7) & 0x1u) << 11) |
+ (((w >> 25) & 0x3fu) << 5) | (((w >> 8) & 0xfu) << 1);
+ return (i32)rv_sext(imm, 13);
+}
+
+static i32 rv_j_imm(u32 w) {
+ u32 imm = (((w >> 31) & 0x1u) << 20) | (((w >> 12) & 0xffu) << 12) |
+ (((w >> 20) & 0x1u) << 11) | (((w >> 21) & 0x3ffu) << 1);
+ return (i32)rv_sext(imm, 21);
+}
+
+static void rv_set(Rv64Disasm* d, const char* mnemonic) {
+ strbuf_reset(&d->mnem);
+ strbuf_puts(&d->mnem, mnemonic);
+ strbuf_reset(&d->ops);
+}
+
+static void rv_reg(StrBuf* sb, u32 r) { strbuf_puts(sb, rv_xnames[r & 31u]); }
+
+static void rv_freg(StrBuf* sb, u32 r) {
+ strbuf_puts(sb, rv_fnames[r & 31u]);
+}
+
+static void rv_sep(StrBuf* sb) { strbuf_puts(sb, ", "); }
+
+static void rv_addr(StrBuf* sb, i64 off, u32 base) {
+ strbuf_put_i64(sb, off);
+ strbuf_putc(sb, '(');
+ rv_reg(sb, base);
+ strbuf_putc(sb, ')');
+}
+
+static void rv_rel(StrBuf* sb, u64 vaddr, i64 off) {
+ if (vaddr) {
+ strbuf_put_hex_u64(sb, vaddr + (u64)off);
+ } else {
+ strbuf_putc(sb, '#');
+ strbuf_put_i64(sb, off);
+ }
+}
+
+static void rv_word(Rv64Disasm* d, u32 word) {
+ rv_set(d, ".word");
+ strbuf_put_hex_u64(&d->ops, (u64)word);
+}
+
+static const char* rv_op_name(u32 funct7, u32 funct3) {
+ if (funct7 == 0x00u) {
+ static const char* const names[8] = {
+ "add", "sll", "slt", "sltu", "xor", "srl", "or", "and",
+ };
+ return names[funct3 & 7u];
+ }
+ if (funct7 == 0x20u) {
+ if (funct3 == 0) return "sub";
+ if (funct3 == 5) return "sra";
+ }
+ if (funct7 == 0x01u) {
+ static const char* const names[8] = {
+ "mul", "mulh", "mulhsu", "mulhu", "div", "divu", "rem", "remu",
+ };
+ return names[funct3 & 7u];
+ }
+ return NULL;
+}
+
+static const char* rv_op32_name(u32 funct7, u32 funct3) {
+ if (funct7 == 0x00u) {
+ if (funct3 == 0) return "addw";
+ if (funct3 == 1) return "sllw";
+ if (funct3 == 5) return "srlw";
+ }
+ if (funct7 == 0x20u) {
+ if (funct3 == 0) return "subw";
+ if (funct3 == 5) return "sraw";
+ }
+ if (funct7 == 0x01u) {
+ static const char* const names[8] = {
+ "mulw", NULL, NULL, NULL, "divw", "divuw", "remw", "remuw",
+ };
+ return names[funct3 & 7u];
+ }
+ return NULL;
+}
+
+static void rv_r_operands(Rv64Disasm* d, u32 w) {
+ u32 rd = (w >> 7) & 31u;
+ u32 rs1 = (w >> 15) & 31u;
+ u32 rs2 = (w >> 20) & 31u;
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ rv_reg(&d->ops, rs1);
+ rv_sep(&d->ops);
+ rv_reg(&d->ops, rs2);
+}
+
+static u32 rv_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr,
+ CfreeInsn* out) {
+ Rv64Disasm* d = (Rv64Disasm*)base;
+ u32 w;
+ u32 op;
+ u32 rd;
+ u32 rs1;
+ u32 rs2;
+ u32 funct3;
+ u32 funct7;
+ const char* name;
+
+ if (len < 4u) return 0;
+ w = rv_read_u32_le(bytes);
+ op = w & 0x7fu;
+ rd = (w >> 7) & 31u;
+ funct3 = (w >> 12) & 7u;
+ rs1 = (w >> 15) & 31u;
+ rs2 = (w >> 20) & 31u;
+ funct7 = (w >> 25) & 0x7fu;
+
+ if (w == rv_nop()) {
+ rv_set(d, "nop");
+ } else if (w == rv_ret_()) {
+ rv_set(d, "ret");
+ } else {
+ switch (op) {
+ case RV_LUI:
+ rv_set(d, "lui");
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ strbuf_put_hex_u64(&d->ops, (u64)(w & 0xfffff000u));
+ break;
+ case RV_AUIPC:
+ rv_set(d, "auipc");
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ strbuf_put_hex_u64(&d->ops, (u64)(w & 0xfffff000u));
+ break;
+ case RV_JAL:
+ rv_set(d, rd == RV_ZERO ? "j" : "jal");
+ if (rd != RV_ZERO) {
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ }
+ rv_rel(&d->ops, vaddr, rv_j_imm(w));
+ break;
+ case RV_JALR:
+ if (rd == RV_ZERO && rv_i_imm(w) == 0) {
+ rv_set(d, "jr");
+ rv_reg(&d->ops, rs1);
+ } else {
+ rv_set(d, "jalr");
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ rv_addr(&d->ops, rv_i_imm(w), rs1);
+ }
+ break;
+ case RV_BRANCH: {
+ static const char* const names[8] = {
+ "beq", "bne", NULL, NULL, "blt", "bge", "bltu", "bgeu",
+ };
+ name = names[funct3];
+ if (!name) {
+ rv_word(d, w);
+ break;
+ }
+ rv_set(d, name);
+ rv_reg(&d->ops, rs1);
+ rv_sep(&d->ops);
+ rv_reg(&d->ops, rs2);
+ rv_sep(&d->ops);
+ rv_rel(&d->ops, vaddr, rv_b_imm(w));
+ break;
+ }
+ case RV_LOAD: {
+ static const char* const names[8] = {
+ "lb", "lh", "lw", "ld", "lbu", "lhu", "lwu", NULL,
+ };
+ name = names[funct3];
+ if (!name) {
+ rv_word(d, w);
+ break;
+ }
+ rv_set(d, name);
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ rv_addr(&d->ops, rv_i_imm(w), rs1);
+ break;
+ }
+ case RV_STORE: {
+ static const char* const names[8] = {
+ "sb", "sh", "sw", "sd", NULL, NULL, NULL, NULL,
+ };
+ name = names[funct3];
+ if (!name) {
+ rv_word(d, w);
+ break;
+ }
+ rv_set(d, name);
+ rv_reg(&d->ops, rs2);
+ rv_sep(&d->ops);
+ rv_addr(&d->ops, rv_s_imm(w), rs1);
+ break;
+ }
+ case RV_LOAD_FP:
+ if (funct3 == 2 || funct3 == 3) {
+ rv_set(d, funct3 == 2 ? "flw" : "fld");
+ rv_freg(&d->ops, rd);
+ rv_sep(&d->ops);
+ rv_addr(&d->ops, rv_i_imm(w), rs1);
+ } else {
+ rv_word(d, w);
+ }
+ break;
+ case RV_STORE_FP:
+ if (funct3 == 2 || funct3 == 3) {
+ rv_set(d, funct3 == 2 ? "fsw" : "fsd");
+ rv_freg(&d->ops, rs2);
+ rv_sep(&d->ops);
+ rv_addr(&d->ops, rv_s_imm(w), rs1);
+ } else {
+ rv_word(d, w);
+ }
+ break;
+ case RV_OP_IMM:
+ if (funct3 == 0 && rs1 == RV_ZERO) {
+ rv_set(d, "li");
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ strbuf_put_i64(&d->ops, rv_i_imm(w));
+ } else if (funct3 == 0 && rv_i_imm(w) == 0) {
+ rv_set(d, "mv");
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ rv_reg(&d->ops, rs1);
+ } else {
+ static const char* const names[8] = {
+ "addi", NULL, "slti", "sltiu", "xori", NULL, "ori", "andi",
+ };
+ if (funct3 == 1) {
+ rv_set(d, "slli");
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ rv_reg(&d->ops, rs1);
+ rv_sep(&d->ops);
+ strbuf_put_u64(&d->ops, (w >> 20) & 0x3fu);
+ } else if (funct3 == 5 && ((w >> 26) == 0x00u ||
+ (w >> 26) == 0x10u)) {
+ rv_set(d, (w >> 26) == 0x10u ? "srai" : "srli");
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ rv_reg(&d->ops, rs1);
+ rv_sep(&d->ops);
+ strbuf_put_u64(&d->ops, (w >> 20) & 0x3fu);
+ } else if (names[funct3]) {
+ rv_set(d, names[funct3]);
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ rv_reg(&d->ops, rs1);
+ rv_sep(&d->ops);
+ strbuf_put_i64(&d->ops, rv_i_imm(w));
+ } else {
+ rv_word(d, w);
+ }
+ }
+ break;
+ case RV_OP_IMM_32:
+ if (funct3 == 0) {
+ rv_set(d, "addiw");
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ rv_reg(&d->ops, rs1);
+ rv_sep(&d->ops);
+ strbuf_put_i64(&d->ops, rv_i_imm(w));
+ } else if (funct3 == 1 && funct7 == 0) {
+ rv_set(d, "slliw");
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ rv_reg(&d->ops, rs1);
+ rv_sep(&d->ops);
+ strbuf_put_u64(&d->ops, rs2);
+ } else if (funct3 == 5 && (funct7 == 0 || funct7 == 0x20u)) {
+ rv_set(d, funct7 == 0x20u ? "sraiw" : "srliw");
+ rv_reg(&d->ops, rd);
+ rv_sep(&d->ops);
+ rv_reg(&d->ops, rs1);
+ rv_sep(&d->ops);
+ strbuf_put_u64(&d->ops, rs2);
+ } else {
+ rv_word(d, w);
+ }
+ break;
+ case RV_OP:
+ name = rv_op_name(funct7, funct3);
+ if (name) {
+ rv_set(d, name);
+ rv_r_operands(d, w);
+ } else {
+ rv_word(d, w);
+ }
+ break;
+ case RV_OP_32:
+ name = rv_op32_name(funct7, funct3);
+ if (name) {
+ rv_set(d, name);
+ rv_r_operands(d, w);
+ } else {
+ rv_word(d, w);
+ }
+ break;
+ case RV_SYSTEM:
+ if (w == rv_ecall()) {
+ rv_set(d, "ecall");
+ } else if (w == rv_ebreak()) {
+ rv_set(d, "ebreak");
+ } else {
+ rv_word(d, w);
+ }
+ break;
+ default:
+ rv_word(d, w);
+ break;
+ }
+ }
+
+ strbuf_reset(&d->ann);
+ out->vaddr = vaddr;
+ out->bytes = bytes;
+ out->nbytes = 4;
+ out->mnemonic = strbuf_cstr(&d->mnem);
+ out->operands = strbuf_cstr(&d->ops);
+ out->annotation = strbuf_cstr(&d->ann);
+ return 4;
+}
+
+static void rv64_destroy(ArchDisasm* base) {
+ Rv64Disasm* d = (Rv64Disasm*)base;
+ d->heap->free(d->heap, d, sizeof(*d));
+}
+
+ArchDisasm* rv64_disasm_new(Compiler* c) {
+ Heap* h = (Heap*)c->ctx->heap;
+ Rv64Disasm* d = (Rv64Disasm*)h->alloc(h, sizeof(*d), _Alignof(Rv64Disasm));
+ if (!d) return NULL;
+ memset(d, 0, sizeof(*d));
+ d->c = c;
+ d->heap = h;
+ d->base.decode = rv_decode;
+ d->base.destroy = rv64_destroy;
+ strbuf_init(&d->mnem, d->mnem_buf, sizeof d->mnem_buf);
+ strbuf_init(&d->ops, d->ops_buf, sizeof d->ops_buf);
+ strbuf_init(&d->ann, d->ann_buf, sizeof d->ann_buf);
+ return &d->base;
+}
diff --git a/src/arch/rv64/disasm.h b/src/arch/rv64/disasm.h
@@ -0,0 +1,14 @@
+#ifndef CFREE_ARCH_RV64_DISASM_H
+#define CFREE_ARCH_RV64_DISASM_H
+
+/* RV64 disassembler -- ArchDisasm implementation.
+ *
+ * This is intentionally table-light for now: it decodes the RV64I/M/FD
+ * forms that the assembler and backend already emit in smoke and frontend
+ * paths, and falls back to `.word` for unknown 32-bit instructions. */
+
+#include "arch/arch.h"
+
+ArchDisasm* rv64_disasm_new(Compiler*);
+
+#endif
diff --git a/src/arch/rv64/regs.c b/src/arch/rv64/regs.c
@@ -0,0 +1,97 @@
+/* RV64 register name table -- DWARF index <-> psABI assembler name.
+ *
+ * RISC-V DWARF numbering uses 0..31 for x-registers and 32..63 for
+ * f-registers. Canonical names use psABI spellings; xN/fN aliases are
+ * accepted by lookup. */
+
+#include "arch/rv64/regs.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include "core/core.h"
+
+typedef struct Rv64Reg {
+ uint32_t dwarf_idx;
+ const char* name;
+} Rv64Reg;
+
+static const Rv64Reg RV64_REGS[] = {
+ {0, "zero"}, {1, "ra"}, {2, "sp"}, {3, "gp"}, {4, "tp"},
+ {5, "t0"}, {6, "t1"}, {7, "t2"}, {8, "s0"}, {9, "s1"},
+ {10, "a0"}, {11, "a1"}, {12, "a2"}, {13, "a3"}, {14, "a4"},
+ {15, "a5"}, {16, "a6"}, {17, "a7"}, {18, "s2"}, {19, "s3"},
+ {20, "s4"}, {21, "s5"}, {22, "s6"}, {23, "s7"}, {24, "s8"},
+ {25, "s9"}, {26, "s10"}, {27, "s11"}, {28, "t3"}, {29, "t4"},
+ {30, "t5"}, {31, "t6"},
+
+ {32, "ft0"}, {33, "ft1"}, {34, "ft2"}, {35, "ft3"}, {36, "ft4"},
+ {37, "ft5"}, {38, "ft6"}, {39, "ft7"}, {40, "fs0"}, {41, "fs1"},
+ {42, "fa0"}, {43, "fa1"}, {44, "fa2"}, {45, "fa3"}, {46, "fa4"},
+ {47, "fa5"}, {48, "fa6"}, {49, "fa7"}, {50, "fs2"}, {51, "fs3"},
+ {52, "fs4"}, {53, "fs5"}, {54, "fs6"}, {55, "fs7"}, {56, "fs8"},
+ {57, "fs9"}, {58, "fs10"}, {59, "fs11"}, {60, "ft8"}, {61, "ft9"},
+ {62, "ft10"}, {63, "ft11"},
+};
+
+static const uint32_t RV64_REGS_N = (uint32_t)(sizeof RV64_REGS /
+ sizeof RV64_REGS[0]);
+
+static int parse_num_suffix(const char* name, char prefix, uint32_t max,
+ uint32_t* out) {
+ uint32_t v = 0;
+ const char* p;
+ if (!name || name[0] != prefix || name[1] == '\0') return 1;
+ p = name + 1;
+ while (*p) {
+ if (*p < '0' || *p > '9') return 1;
+ v = v * 10u + (uint32_t)(*p - '0');
+ if (v > max) return 1;
+ ++p;
+ }
+ if (out) *out = v;
+ return 0;
+}
+
+const char* rv64_register_name(uint32_t dwarf_idx) {
+ uint32_t i;
+ for (i = 0; i < RV64_REGS_N; ++i) {
+ if (RV64_REGS[i].dwarf_idx == dwarf_idx) return RV64_REGS[i].name;
+ }
+ return NULL;
+}
+
+int rv64_register_index(const char* name, uint32_t* idx_out) {
+ uint32_t i;
+ uint32_t n;
+ if (!name) return 1;
+ for (i = 0; i < RV64_REGS_N; ++i) {
+ if (!strcmp(RV64_REGS[i].name, name)) {
+ if (idx_out) *idx_out = RV64_REGS[i].dwarf_idx;
+ return 0;
+ }
+ }
+ if (!parse_num_suffix(name, 'x', 31, &n)) {
+ if (idx_out) *idx_out = n;
+ return 0;
+ }
+ if (!parse_num_suffix(name, 'f', 31, &n)) {
+ if (idx_out) *idx_out = 32u + n;
+ return 0;
+ }
+ if (!strcmp(name, "fp")) {
+ if (idx_out) *idx_out = 8u;
+ return 0;
+ }
+ return 1;
+}
+
+uint32_t rv64_register_iter_size(void) { return RV64_REGS_N; }
+
+int rv64_register_iter_get(uint32_t i, uint32_t* dwarf_out,
+ const char** name_out) {
+ if (i >= RV64_REGS_N) return 1;
+ if (dwarf_out) *dwarf_out = RV64_REGS[i].dwarf_idx;
+ if (name_out) *name_out = RV64_REGS[i].name;
+ return 0;
+}
diff --git a/src/arch/rv64/regs.h b/src/arch/rv64/regs.h
@@ -0,0 +1,12 @@
+#ifndef CFREE_ARCH_RV64_REGS_H
+#define CFREE_ARCH_RV64_REGS_H
+
+#include <stdint.h>
+
+const char* rv64_register_name(uint32_t dwarf_idx);
+int rv64_register_index(const char* name, uint32_t* idx_out);
+uint32_t rv64_register_iter_size(void);
+int rv64_register_iter_get(uint32_t i, uint32_t* dwarf_out,
+ const char** name_out);
+
+#endif
diff --git a/test/asm/decode/rv64_nop_ret.expected.txt b/test/asm/decode/rv64_nop_ret.expected.txt
@@ -0,0 +1,2 @@
+0: nop
+4: ret
diff --git a/test/asm/decode/rv64_nop_ret.hex b/test/asm/decode/rv64_nop_ret.hex
@@ -0,0 +1 @@
+1300000067800000
diff --git a/test/asm/decode/rv64_nop_ret.targets b/test/asm/decode/rv64_nop_ret.targets
@@ -0,0 +1 @@
+rv64
diff --git a/test/asm/encode/rv64_exit_zero.expected b/test/asm/encode/rv64_exit_zero.expected
@@ -0,0 +1 @@
+0
diff --git a/test/asm/encode/rv64_exit_zero.expected.hex b/test/asm/encode/rv64_exit_zero.expected.hex
@@ -0,0 +1 @@
+1305000067800000
diff --git a/test/asm/encode/rv64_exit_zero.s b/test/asm/encode/rv64_exit_zero.s
@@ -0,0 +1,6 @@
+// RV64 smoke case: defines test_main returning 0.
+.text
+.globl test_main
+test_main:
+ li a0, 0
+ ret
diff --git a/test/asm/encode/rv64_exit_zero.targets b/test/asm/encode/rv64_exit_zero.targets
@@ -0,0 +1 @@
+rv64
diff --git a/test/asm/listing/rv64_nop_ret.expected.lst b/test/asm/listing/rv64_nop_ret.expected.lst
@@ -0,0 +1,5 @@
+Disassembly of section .text:
+
+0000000000000000 <_start>:
+ 0: 00000013 nop
+ 4: 00008067 ret
diff --git a/test/asm/listing/rv64_nop_ret.in.bin b/test/asm/listing/rv64_nop_ret.in.bin
Binary files differ.
diff --git a/test/asm/listing/rv64_nop_ret.targets b/test/asm/listing/rv64_nop_ret.targets
@@ -0,0 +1 @@
+rv64