kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 892d1268979e7a1df6d56de09516d0de77dc140b
parent 94d7d4bd6cfe1d24cebed41aec3450f1f059cc09
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu, 21 May 2026 07:06:27 -0700

Add rv64 asm/disasm parity scaffolding

Diffstat:
Adoc/RV64_PARITY_CHECKLIST.md | 171+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/rv64/arch.c | 17++++++++++++-----
Msrc/arch/rv64/asm.c | 44+++++++++-----------------------------------
Asrc/arch/rv64/disasm.c | 407+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/rv64/disasm.h | 14++++++++++++++
Asrc/arch/rv64/regs.c | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/arch/rv64/regs.h | 12++++++++++++
Atest/asm/decode/rv64_nop_ret.expected.txt | 2++
Atest/asm/decode/rv64_nop_ret.hex | 1+
Atest/asm/decode/rv64_nop_ret.targets | 1+
Atest/asm/encode/rv64_exit_zero.expected | 1+
Atest/asm/encode/rv64_exit_zero.expected.hex | 1+
Atest/asm/encode/rv64_exit_zero.s | 6++++++
Atest/asm/encode/rv64_exit_zero.targets | 1+
Atest/asm/listing/rv64_nop_ret.expected.lst | 5+++++
Atest/asm/listing/rv64_nop_ret.in.bin | 0
Atest/asm/listing/rv64_nop_ret.targets | 1+
17 files changed, 741 insertions(+), 40 deletions(-)

diff --git a/doc/RV64_PARITY_CHECKLIST.md b/doc/RV64_PARITY_CHECKLIST.md @@ -0,0 +1,171 @@ +# rv64 parity checklist + +Goal: bring `riscv64` / `rv64` to the same practical coverage as `aarch64` +across standalone asm, disasm, C/toy compilation, object/link output, runtime, +debug tooling, and executable test paths. + +This checklist tracks parity with the aa64 lane, not architectural feature +completeness for all RISC-V extensions. The baseline target is RV64GC Linux +ELF with the psABI double-float ABI unless a task says otherwise. + +## Asm / disasm + +- [x] Wire rv64 into `arch_disasm_new` through `src/arch/rv64/disasm.{h,c}`. +- [x] Add rv64 `test/asm` smoke coverage for text decode, object listing, hex + encode, and podman-backed ELF execution. +- [x] Add arch-scoped asm fixture applicability (`*.targets`) so aa64/x64/rv64 + cases do not fail on unrelated targets. +- [ ] Replace the current hand-written rv64 disassembler with an ISA descriptor + layer equivalent in role to `src/arch/aa64/isa.{h,c}` so encoding, + decoding, and printing share one description. +- [ ] Expand standalone rv64 asm parsing beyond the current small subset: + branches, calls, arithmetic, shifts, compares, loads/stores, AUIPC/LUI, + relocation-bearing operands, atomics, fences, CSR/system forms, scalar + FP, and backend-emitted forms. +- [ ] Expand rv64 disasm to decode every instruction emitted by rv64 codegen and + accepted by standalone asm, including unknown/truncated handling that + matches the public iterator contract. +- [ ] Add relocation/symbol annotation coverage for rv64 object disassembly. +- [ ] Update `test/asm/regen.sh` or add an rv64 variant for clang/objdump golden + regeneration. +- [ ] Make asm round-trip (`S`) meaningful for rv64 codegen output and gate the + rv64-emitted corpus on it. + +## Register API / target surface + +- [x] Add rv64 public register-name/index support for psABI names plus `xN` and + `fN` aliases. +- [ ] Audit all register naming users (`dbg`, asm constraints, disasm printers) + for consistent DWARF numbering: `x0..x31` as 0..31 and `f0..f31` as + 32..63. +- [ ] Verify predefined macros, driver triple parsing, target defaults, and + `cfree_test_target` setup against clang's `riscv64-linux-gnu` behavior. +- [ ] Decide policy for optional extensions (`C`, `A`, `F`, `D`, `Zicsr`, + `Zifencei`, future vector) and reflect it in target feature queries. + +## Inline asm + +- [ ] Implement rv64 inline-asm template rendering parallel to aa64: + placeholders, symbolic operands, memory operands, width/addr modifiers, + escaped percent, and statement splitting. +- [ ] Add rv64 constraint support for integer, FP, immediate, memory, matching, + early-clobber, and read-write operands. +- [ ] Verify clobbers, `"memory"`, callee-saved preservation, named registers, + and fixed-register conflicts on rv64. +- [ ] Add an rv64 inline-asm unit test parallel to + `test/arch/aa64_inline_test.c`. +- [ ] Add C and toy inline-asm execution cases that run through podman/qemu rv64. + +## C / toy codegen + +- [x] Prove a targeted rv64 C parse path can compile, link, and execute through + podman path E. +- [ ] Run and triage the full C parse corpus for rv64 at `-O0`, `-O1`, and + `-O2`; track failures by missing backend feature rather than broad skips. +- [ ] Run and triage toy cross-arch path `X` for rv64 alongside aa64 cases. +- [ ] Match aa64 coverage for scalar integer, pointer, aggregate, varargs, + atomics, intrinsics, labels, computed goto, switch lowering, tail calls, + alloca, and dynamic stack adjustment. +- [ ] Close remaining explicit rv64 backend panics in `src/arch/rv64/ops.c`, + `alloc.c`, and `emit.c`. +- [ ] Verify optimized rv64 lowering after recent opt pipeline work: liveness, + register allocation, hard-register constraints, call plans, and spill + reloads. +- [ ] Add targeted rv64 cases for large frames, far branches, far label-address + materialization, large immediates, and pcrel/GOT materialization. +- [ ] Add targeted rv64 FP conversion, comparison, NaN, and rounding cases. +- [ ] Add targeted rv64 atomic cases for all supported widths and memory orders. + +## ABI / platform + +- [ ] Finish psABI edge-case coverage: aggregate classification, indirect args, + mixed int/FP aggregates, homogeneous FP shapes where applicable, sret, + byval, empty/zero-sized fields, and mixed returns. +- [ ] Verify variadic functions: register save area layout, `va_list` shape, + stack argument traversal, and mixed int/FP varargs. +- [ ] Verify stack alignment, frame pointer conventions, callee-saved integer + registers `s0..s11`, and callee-saved FP registers `fs0..fs11`. +- [ ] Decide `long double` policy for rv64 (`quad` vs compatibility mode) and + align C frontend, ABI lowering, libc harnesses, and runtime helpers. +- [ ] Audit TLS models for rv64: local-exec, GOT/TLS relocations, static link, + dynamic link, and emulator/JIT behavior. + +## Object / link / driver + +- [x] Keep rv64 ELF roundtrip link corpus green for path R. +- [x] Fix `cfree objdump -d` to choose the disassembler target from the object + file rather than the host target. +- [ ] Run rv64 link path E broadly under podman and triage execution failures. +- [ ] Ensure ELF rv64 relocations cover all codegen, asm, TLS, PLT/GOT, ifunc, + linker-script, archive, and GC cases currently passing for aa64. +- [ ] Implement or explicitly reject any unsupported rv64 relocation kinds with + diagnostics that name the relocation and input object. +- [ ] Exercise `cfree as`, `cc`, `ld`, `ar`, `objdump`, `strip`, and `objcopy` + paths with rv64-specific command tests where the tool claims rv64 support. +- [ ] Verify dynamic-linker defaults for musl and glibc rv64 Linux. +- [ ] Add rv64 `objdump` golden tests for sections, symbols, relocs, and + disassembly annotations. + +## Runtime / libc + +- [ ] Build `libcfree_rt.a` for `riscv64-linux` through cfree, not only host + clang probes. +- [ ] Bring rv64 coroutine/runtime support through the cfree assembler/compiler + path. +- [ ] Run `test-rt-runtime` with rv64 enabled and triage every runtime helper + failure. +- [ ] Retarget musl and glibc libc harnesses to rv64 sysroots and run the same + cases currently exercised for aa64. +- [ ] Add rv64 smoke cases that use cfree-emitted bytes for startup/runtime + paths, not only clang-produced harness binaries. +- [ ] Verify compiler-rt-style integer, FP, memory, atomic, and coroutine + helpers for rv64 ABI correctness. + +## Debug / DWARF / JIT + +- [ ] Add rv64 debugger breakpoint support (`ebreak`) and displaced-step logic. +- [ ] Add rv64 ucontext/register marshalling for supported host OSes. +- [ ] Emit and validate rv64 DWARF CFI/line-info details, including CFA rules, + frame-pointer conventions, return-address register `ra`, and FP register + numbering. +- [ ] Extend DWARF tests with rv64 producer roundtrips where instruction size + and register numbering differ from aa64. +- [ ] Fill rv64 JIT support gaps: executable memory, relocations, symbol calls, + TLS/TLV behavior, and native-host execution tests where available. +- [ ] Decide debugger scope for non-native rv64 execution; either support it + through emulation or mark it explicitly out of parity. + +## Emulator + +- [ ] Audit rv64 ELF loader behavior against aa64: program headers, auxv, + stack setup, argv/envp, TLS, brk/mmap, and dynamic loader handoff. +- [ ] Expand rv64 decode/lift coverage to match all instructions produced by + cfree rv64 codegen and clang-built harnesses. +- [ ] Add rv64 syscall coverage for libc and smoke workloads. +- [ ] Add emulator regression tests for rv64 branches, calls, atomics, FP, TLS, + and signals/traps. + +## Execution infrastructure + +- [x] Use podman `--platform linux/riscv64` for rv64 execution when no native or + qemu-user runner is available. +- [x] Prove `test-smoke-rv64` direct and batched execution paths. +- [x] Prove `test/asm` rv64 path E through podman. +- [x] Prove a targeted `test/parse` rv64 path E through podman. +- [ ] Run larger rv64 E matrices under podman with batching and record stable + filters for CI-equivalent local runs. +- [ ] Add clear diagnostics for missing podman image/platform support, binfmt, + qemu-user, or clang rv64 cross support. +- [ ] Decide default images for `RUN_RV64_IMAGE` across musl/glibc tests. + +## Test policy + +- [ ] Add rv64-targeted filters/goldens for each new feature as it lands. +- [ ] Keep skips explicit and arch-scoped through `*.targets`, not hidden in + harness defaults. +- [ ] Prefer red/green targeted runs: one failing feature family at a time, + one arch at a time. +- [ ] Promote stable rv64 lanes into default or CI-equivalent coverage once the + runner assumptions are reliable. +- [ ] Keep aa64 lanes green while changing shared asm/disasm/link/test harness + code. diff --git a/src/arch/rv64/arch.c b/src/arch/rv64/arch.c @@ -2,6 +2,8 @@ #include "abi/abi_internal.h" #include "arch/rv64/asm.h" +#include "arch/rv64/disasm.h" +#include "arch/rv64/regs.h" #include "arch/rv64/rv64.h" #include "core/bytes.h" #include "link/link_arch.h" @@ -23,6 +25,11 @@ static const ArchElfOps rv64_elf_ops = { .reloc_from = elf_riscv64_reloc_from, }; +static int rv64_register_at_public(uint32_t idx, CfreeArchReg* out) { + if (!out) return 1; + return rv64_register_iter_get(idx, &out->dwarf_idx, &out->name); +} + static int rv64_apply_label_fixup(Compiler* c, const ArchLabelFixup* fx) { const Section* s; u8 cur[4]; @@ -102,7 +109,7 @@ const ArchImpl arch_impl_rv64 = { .abi_vtable = rv64_abi_vtable, .cgtarget_new = rv64_cgtarget_new, .asm_new = rv64_arch_asm_new, - .disasm_new = NULL, + .disasm_new = rv64_disasm_new, .apply_label_fixup = rv64_apply_label_fixup, .link = &link_arch_rv64, .elf = &rv64_elf_ops, @@ -110,8 +117,8 @@ const ArchImpl arch_impl_rv64 = { .predefined_macros = rv64_predefined_macros, .npredefined_macros = (u32)(sizeof rv64_predefined_macros / sizeof rv64_predefined_macros[0]), - .register_name = NULL, - .register_index = NULL, - .register_count = NULL, - .register_at = NULL, + .register_name = rv64_register_name, + .register_index = rv64_register_index, + .register_count = rv64_register_iter_size, + .register_at = rv64_register_at_public, }; diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c @@ -3,6 +3,7 @@ #include <string.h> #include "arch/rv64/internal.h" +#include "arch/rv64/regs.h" #include "asm/asm_helpers.h" #include "core/arena.h" @@ -25,41 +26,14 @@ static int sym_eq(AsmDriver* d, Sym s, const char* lit) { static int rv_reg_from_name(AsmDriver* d, Sym s, u32* reg_out, int* fp_out) { size_t n = 0; const char* p = pool_str(asm_driver_pool(d), s, &n); - u32 r; - int fp = 0; - if (!p || !n) return 0; - if (n == 2 && p[0] == 'r' && p[1] == 'a') - r = RV_RA; - else if (n == 2 && p[0] == 's' && p[1] == 'p') - r = RV_SP; - else if (n == 2 && p[0] == 't' && p[1] == '0') - r = RV_T0; - else if (n == 2 && p[0] == 'a' && p[1] >= '0' && p[1] <= '7') - r = RV_A0 + (u32)(p[1] - '0'); - else if (n == 2 && p[0] == 's' && p[1] >= '0' && p[1] <= '1') - r = RV_S0 + (u32)(p[1] - '0'); - else if (n == 3 && p[0] == 's' && p[1] == '1' && p[2] >= '0' && - p[2] <= '1') - r = 26u + (u32)(p[2] - '0'); - else if (n == 2 && p[0] == 's' && p[1] >= '2' && p[1] <= '9') - r = RV_S2 + (u32)(p[1] - '2'); - else if (n == 3 && p[0] == 'f' && p[1] == 's' && p[2] >= '0' && - p[2] <= '1') { - r = RV_S0 + (u32)(p[2] - '0'); - fp = 1; - } else if (n == 4 && p[0] == 'f' && p[1] == 's' && p[2] == '1' && - p[3] >= '0' && p[3] <= '1') { - r = 26u + (u32)(p[3] - '0'); - fp = 1; - } else if (n == 3 && p[0] == 'f' && p[1] == 's' && p[2] >= '2' && - p[2] <= '9') { - r = RV_S2 + (u32)(p[2] - '2'); - fp = 1; - } else { - return 0; - } - if (reg_out) *reg_out = r; - if (fp_out) *fp_out = fp; + char name[16]; + u32 dwarf = 0; + if (!p || !n || n >= sizeof name) return 0; + memcpy(name, p, n); + name[n] = '\0'; + if (rv64_register_index(name, &dwarf) != 0) return 0; + if (reg_out) *reg_out = dwarf & 31u; + if (fp_out) *fp_out = dwarf >= 32u; return 1; } diff --git a/src/arch/rv64/disasm.c b/src/arch/rv64/disasm.c @@ -0,0 +1,407 @@ +#include "arch/rv64/disasm.h" + +#include <string.h> + +#include "arch/rv64/isa.h" +#include "core/heap.h" +#include "core/strbuf.h" + +#define RV64_DASM_MNEM_CAP 16u +#define RV64_DASM_OPS_CAP 96u +#define RV64_DASM_ANN_CAP 64u + +typedef struct Rv64Disasm { + ArchDisasm base; + Compiler* c; + Heap* heap; + char mnem_buf[RV64_DASM_MNEM_CAP]; + char ops_buf[RV64_DASM_OPS_CAP]; + char ann_buf[RV64_DASM_ANN_CAP]; + StrBuf mnem; + StrBuf ops; + StrBuf ann; +} Rv64Disasm; + +static const char* const rv_xnames[32] = { + "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", + "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5", + "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", + "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6", +}; + +static const char* const rv_fnames[32] = { + "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", + "fs0", "fs1", "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", + "fa6", "fa7", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7", + "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11", +}; + +static u32 rv_read_u32_le(const u8* b) { + return (u32)b[0] | ((u32)b[1] << 8) | ((u32)b[2] << 16) | + ((u32)b[3] << 24); +} + +static i64 rv_sext(u64 v, u32 bits) { + u64 m = 1ull << (bits - 1u); + return (i64)((v ^ m) - m); +} + +static i32 rv_i_imm(u32 w) { return (i32)rv_sext(w >> 20, 12); } + +static i32 rv_s_imm(u32 w) { + u32 imm = ((w >> 7) & 0x1fu) | (((w >> 25) & 0x7fu) << 5); + return (i32)rv_sext(imm, 12); +} + +static i32 rv_b_imm(u32 w) { + u32 imm = (((w >> 31) & 0x1u) << 12) | (((w >> 7) & 0x1u) << 11) | + (((w >> 25) & 0x3fu) << 5) | (((w >> 8) & 0xfu) << 1); + return (i32)rv_sext(imm, 13); +} + +static i32 rv_j_imm(u32 w) { + u32 imm = (((w >> 31) & 0x1u) << 20) | (((w >> 12) & 0xffu) << 12) | + (((w >> 20) & 0x1u) << 11) | (((w >> 21) & 0x3ffu) << 1); + return (i32)rv_sext(imm, 21); +} + +static void rv_set(Rv64Disasm* d, const char* mnemonic) { + strbuf_reset(&d->mnem); + strbuf_puts(&d->mnem, mnemonic); + strbuf_reset(&d->ops); +} + +static void rv_reg(StrBuf* sb, u32 r) { strbuf_puts(sb, rv_xnames[r & 31u]); } + +static void rv_freg(StrBuf* sb, u32 r) { + strbuf_puts(sb, rv_fnames[r & 31u]); +} + +static void rv_sep(StrBuf* sb) { strbuf_puts(sb, ", "); } + +static void rv_addr(StrBuf* sb, i64 off, u32 base) { + strbuf_put_i64(sb, off); + strbuf_putc(sb, '('); + rv_reg(sb, base); + strbuf_putc(sb, ')'); +} + +static void rv_rel(StrBuf* sb, u64 vaddr, i64 off) { + if (vaddr) { + strbuf_put_hex_u64(sb, vaddr + (u64)off); + } else { + strbuf_putc(sb, '#'); + strbuf_put_i64(sb, off); + } +} + +static void rv_word(Rv64Disasm* d, u32 word) { + rv_set(d, ".word"); + strbuf_put_hex_u64(&d->ops, (u64)word); +} + +static const char* rv_op_name(u32 funct7, u32 funct3) { + if (funct7 == 0x00u) { + static const char* const names[8] = { + "add", "sll", "slt", "sltu", "xor", "srl", "or", "and", + }; + return names[funct3 & 7u]; + } + if (funct7 == 0x20u) { + if (funct3 == 0) return "sub"; + if (funct3 == 5) return "sra"; + } + if (funct7 == 0x01u) { + static const char* const names[8] = { + "mul", "mulh", "mulhsu", "mulhu", "div", "divu", "rem", "remu", + }; + return names[funct3 & 7u]; + } + return NULL; +} + +static const char* rv_op32_name(u32 funct7, u32 funct3) { + if (funct7 == 0x00u) { + if (funct3 == 0) return "addw"; + if (funct3 == 1) return "sllw"; + if (funct3 == 5) return "srlw"; + } + if (funct7 == 0x20u) { + if (funct3 == 0) return "subw"; + if (funct3 == 5) return "sraw"; + } + if (funct7 == 0x01u) { + static const char* const names[8] = { + "mulw", NULL, NULL, NULL, "divw", "divuw", "remw", "remuw", + }; + return names[funct3 & 7u]; + } + return NULL; +} + +static void rv_r_operands(Rv64Disasm* d, u32 w) { + u32 rd = (w >> 7) & 31u; + u32 rs1 = (w >> 15) & 31u; + u32 rs2 = (w >> 20) & 31u; + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + rv_reg(&d->ops, rs1); + rv_sep(&d->ops); + rv_reg(&d->ops, rs2); +} + +static u32 rv_decode(ArchDisasm* base, const u8* bytes, size_t len, u64 vaddr, + CfreeInsn* out) { + Rv64Disasm* d = (Rv64Disasm*)base; + u32 w; + u32 op; + u32 rd; + u32 rs1; + u32 rs2; + u32 funct3; + u32 funct7; + const char* name; + + if (len < 4u) return 0; + w = rv_read_u32_le(bytes); + op = w & 0x7fu; + rd = (w >> 7) & 31u; + funct3 = (w >> 12) & 7u; + rs1 = (w >> 15) & 31u; + rs2 = (w >> 20) & 31u; + funct7 = (w >> 25) & 0x7fu; + + if (w == rv_nop()) { + rv_set(d, "nop"); + } else if (w == rv_ret_()) { + rv_set(d, "ret"); + } else { + switch (op) { + case RV_LUI: + rv_set(d, "lui"); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + strbuf_put_hex_u64(&d->ops, (u64)(w & 0xfffff000u)); + break; + case RV_AUIPC: + rv_set(d, "auipc"); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + strbuf_put_hex_u64(&d->ops, (u64)(w & 0xfffff000u)); + break; + case RV_JAL: + rv_set(d, rd == RV_ZERO ? "j" : "jal"); + if (rd != RV_ZERO) { + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + } + rv_rel(&d->ops, vaddr, rv_j_imm(w)); + break; + case RV_JALR: + if (rd == RV_ZERO && rv_i_imm(w) == 0) { + rv_set(d, "jr"); + rv_reg(&d->ops, rs1); + } else { + rv_set(d, "jalr"); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + rv_addr(&d->ops, rv_i_imm(w), rs1); + } + break; + case RV_BRANCH: { + static const char* const names[8] = { + "beq", "bne", NULL, NULL, "blt", "bge", "bltu", "bgeu", + }; + name = names[funct3]; + if (!name) { + rv_word(d, w); + break; + } + rv_set(d, name); + rv_reg(&d->ops, rs1); + rv_sep(&d->ops); + rv_reg(&d->ops, rs2); + rv_sep(&d->ops); + rv_rel(&d->ops, vaddr, rv_b_imm(w)); + break; + } + case RV_LOAD: { + static const char* const names[8] = { + "lb", "lh", "lw", "ld", "lbu", "lhu", "lwu", NULL, + }; + name = names[funct3]; + if (!name) { + rv_word(d, w); + break; + } + rv_set(d, name); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + rv_addr(&d->ops, rv_i_imm(w), rs1); + break; + } + case RV_STORE: { + static const char* const names[8] = { + "sb", "sh", "sw", "sd", NULL, NULL, NULL, NULL, + }; + name = names[funct3]; + if (!name) { + rv_word(d, w); + break; + } + rv_set(d, name); + rv_reg(&d->ops, rs2); + rv_sep(&d->ops); + rv_addr(&d->ops, rv_s_imm(w), rs1); + break; + } + case RV_LOAD_FP: + if (funct3 == 2 || funct3 == 3) { + rv_set(d, funct3 == 2 ? "flw" : "fld"); + rv_freg(&d->ops, rd); + rv_sep(&d->ops); + rv_addr(&d->ops, rv_i_imm(w), rs1); + } else { + rv_word(d, w); + } + break; + case RV_STORE_FP: + if (funct3 == 2 || funct3 == 3) { + rv_set(d, funct3 == 2 ? "fsw" : "fsd"); + rv_freg(&d->ops, rs2); + rv_sep(&d->ops); + rv_addr(&d->ops, rv_s_imm(w), rs1); + } else { + rv_word(d, w); + } + break; + case RV_OP_IMM: + if (funct3 == 0 && rs1 == RV_ZERO) { + rv_set(d, "li"); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + strbuf_put_i64(&d->ops, rv_i_imm(w)); + } else if (funct3 == 0 && rv_i_imm(w) == 0) { + rv_set(d, "mv"); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + rv_reg(&d->ops, rs1); + } else { + static const char* const names[8] = { + "addi", NULL, "slti", "sltiu", "xori", NULL, "ori", "andi", + }; + if (funct3 == 1) { + rv_set(d, "slli"); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + rv_reg(&d->ops, rs1); + rv_sep(&d->ops); + strbuf_put_u64(&d->ops, (w >> 20) & 0x3fu); + } else if (funct3 == 5 && ((w >> 26) == 0x00u || + (w >> 26) == 0x10u)) { + rv_set(d, (w >> 26) == 0x10u ? "srai" : "srli"); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + rv_reg(&d->ops, rs1); + rv_sep(&d->ops); + strbuf_put_u64(&d->ops, (w >> 20) & 0x3fu); + } else if (names[funct3]) { + rv_set(d, names[funct3]); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + rv_reg(&d->ops, rs1); + rv_sep(&d->ops); + strbuf_put_i64(&d->ops, rv_i_imm(w)); + } else { + rv_word(d, w); + } + } + break; + case RV_OP_IMM_32: + if (funct3 == 0) { + rv_set(d, "addiw"); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + rv_reg(&d->ops, rs1); + rv_sep(&d->ops); + strbuf_put_i64(&d->ops, rv_i_imm(w)); + } else if (funct3 == 1 && funct7 == 0) { + rv_set(d, "slliw"); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + rv_reg(&d->ops, rs1); + rv_sep(&d->ops); + strbuf_put_u64(&d->ops, rs2); + } else if (funct3 == 5 && (funct7 == 0 || funct7 == 0x20u)) { + rv_set(d, funct7 == 0x20u ? "sraiw" : "srliw"); + rv_reg(&d->ops, rd); + rv_sep(&d->ops); + rv_reg(&d->ops, rs1); + rv_sep(&d->ops); + strbuf_put_u64(&d->ops, rs2); + } else { + rv_word(d, w); + } + break; + case RV_OP: + name = rv_op_name(funct7, funct3); + if (name) { + rv_set(d, name); + rv_r_operands(d, w); + } else { + rv_word(d, w); + } + break; + case RV_OP_32: + name = rv_op32_name(funct7, funct3); + if (name) { + rv_set(d, name); + rv_r_operands(d, w); + } else { + rv_word(d, w); + } + break; + case RV_SYSTEM: + if (w == rv_ecall()) { + rv_set(d, "ecall"); + } else if (w == rv_ebreak()) { + rv_set(d, "ebreak"); + } else { + rv_word(d, w); + } + break; + default: + rv_word(d, w); + break; + } + } + + strbuf_reset(&d->ann); + out->vaddr = vaddr; + out->bytes = bytes; + out->nbytes = 4; + out->mnemonic = strbuf_cstr(&d->mnem); + out->operands = strbuf_cstr(&d->ops); + out->annotation = strbuf_cstr(&d->ann); + return 4; +} + +static void rv64_destroy(ArchDisasm* base) { + Rv64Disasm* d = (Rv64Disasm*)base; + d->heap->free(d->heap, d, sizeof(*d)); +} + +ArchDisasm* rv64_disasm_new(Compiler* c) { + Heap* h = (Heap*)c->ctx->heap; + Rv64Disasm* d = (Rv64Disasm*)h->alloc(h, sizeof(*d), _Alignof(Rv64Disasm)); + if (!d) return NULL; + memset(d, 0, sizeof(*d)); + d->c = c; + d->heap = h; + d->base.decode = rv_decode; + d->base.destroy = rv64_destroy; + strbuf_init(&d->mnem, d->mnem_buf, sizeof d->mnem_buf); + strbuf_init(&d->ops, d->ops_buf, sizeof d->ops_buf); + strbuf_init(&d->ann, d->ann_buf, sizeof d->ann_buf); + return &d->base; +} diff --git a/src/arch/rv64/disasm.h b/src/arch/rv64/disasm.h @@ -0,0 +1,14 @@ +#ifndef CFREE_ARCH_RV64_DISASM_H +#define CFREE_ARCH_RV64_DISASM_H + +/* RV64 disassembler -- ArchDisasm implementation. + * + * This is intentionally table-light for now: it decodes the RV64I/M/FD + * forms that the assembler and backend already emit in smoke and frontend + * paths, and falls back to `.word` for unknown 32-bit instructions. */ + +#include "arch/arch.h" + +ArchDisasm* rv64_disasm_new(Compiler*); + +#endif diff --git a/src/arch/rv64/regs.c b/src/arch/rv64/regs.c @@ -0,0 +1,97 @@ +/* RV64 register name table -- DWARF index <-> psABI assembler name. + * + * RISC-V DWARF numbering uses 0..31 for x-registers and 32..63 for + * f-registers. Canonical names use psABI spellings; xN/fN aliases are + * accepted by lookup. */ + +#include "arch/rv64/regs.h" + +#include <stdint.h> +#include <string.h> + +#include "core/core.h" + +typedef struct Rv64Reg { + uint32_t dwarf_idx; + const char* name; +} Rv64Reg; + +static const Rv64Reg RV64_REGS[] = { + {0, "zero"}, {1, "ra"}, {2, "sp"}, {3, "gp"}, {4, "tp"}, + {5, "t0"}, {6, "t1"}, {7, "t2"}, {8, "s0"}, {9, "s1"}, + {10, "a0"}, {11, "a1"}, {12, "a2"}, {13, "a3"}, {14, "a4"}, + {15, "a5"}, {16, "a6"}, {17, "a7"}, {18, "s2"}, {19, "s3"}, + {20, "s4"}, {21, "s5"}, {22, "s6"}, {23, "s7"}, {24, "s8"}, + {25, "s9"}, {26, "s10"}, {27, "s11"}, {28, "t3"}, {29, "t4"}, + {30, "t5"}, {31, "t6"}, + + {32, "ft0"}, {33, "ft1"}, {34, "ft2"}, {35, "ft3"}, {36, "ft4"}, + {37, "ft5"}, {38, "ft6"}, {39, "ft7"}, {40, "fs0"}, {41, "fs1"}, + {42, "fa0"}, {43, "fa1"}, {44, "fa2"}, {45, "fa3"}, {46, "fa4"}, + {47, "fa5"}, {48, "fa6"}, {49, "fa7"}, {50, "fs2"}, {51, "fs3"}, + {52, "fs4"}, {53, "fs5"}, {54, "fs6"}, {55, "fs7"}, {56, "fs8"}, + {57, "fs9"}, {58, "fs10"}, {59, "fs11"}, {60, "ft8"}, {61, "ft9"}, + {62, "ft10"}, {63, "ft11"}, +}; + +static const uint32_t RV64_REGS_N = (uint32_t)(sizeof RV64_REGS / + sizeof RV64_REGS[0]); + +static int parse_num_suffix(const char* name, char prefix, uint32_t max, + uint32_t* out) { + uint32_t v = 0; + const char* p; + if (!name || name[0] != prefix || name[1] == '\0') return 1; + p = name + 1; + while (*p) { + if (*p < '0' || *p > '9') return 1; + v = v * 10u + (uint32_t)(*p - '0'); + if (v > max) return 1; + ++p; + } + if (out) *out = v; + return 0; +} + +const char* rv64_register_name(uint32_t dwarf_idx) { + uint32_t i; + for (i = 0; i < RV64_REGS_N; ++i) { + if (RV64_REGS[i].dwarf_idx == dwarf_idx) return RV64_REGS[i].name; + } + return NULL; +} + +int rv64_register_index(const char* name, uint32_t* idx_out) { + uint32_t i; + uint32_t n; + if (!name) return 1; + for (i = 0; i < RV64_REGS_N; ++i) { + if (!strcmp(RV64_REGS[i].name, name)) { + if (idx_out) *idx_out = RV64_REGS[i].dwarf_idx; + return 0; + } + } + if (!parse_num_suffix(name, 'x', 31, &n)) { + if (idx_out) *idx_out = n; + return 0; + } + if (!parse_num_suffix(name, 'f', 31, &n)) { + if (idx_out) *idx_out = 32u + n; + return 0; + } + if (!strcmp(name, "fp")) { + if (idx_out) *idx_out = 8u; + return 0; + } + return 1; +} + +uint32_t rv64_register_iter_size(void) { return RV64_REGS_N; } + +int rv64_register_iter_get(uint32_t i, uint32_t* dwarf_out, + const char** name_out) { + if (i >= RV64_REGS_N) return 1; + if (dwarf_out) *dwarf_out = RV64_REGS[i].dwarf_idx; + if (name_out) *name_out = RV64_REGS[i].name; + return 0; +} diff --git a/src/arch/rv64/regs.h b/src/arch/rv64/regs.h @@ -0,0 +1,12 @@ +#ifndef CFREE_ARCH_RV64_REGS_H +#define CFREE_ARCH_RV64_REGS_H + +#include <stdint.h> + +const char* rv64_register_name(uint32_t dwarf_idx); +int rv64_register_index(const char* name, uint32_t* idx_out); +uint32_t rv64_register_iter_size(void); +int rv64_register_iter_get(uint32_t i, uint32_t* dwarf_out, + const char** name_out); + +#endif diff --git a/test/asm/decode/rv64_nop_ret.expected.txt b/test/asm/decode/rv64_nop_ret.expected.txt @@ -0,0 +1,2 @@ +0: nop +4: ret diff --git a/test/asm/decode/rv64_nop_ret.hex b/test/asm/decode/rv64_nop_ret.hex @@ -0,0 +1 @@ +1300000067800000 diff --git a/test/asm/decode/rv64_nop_ret.targets b/test/asm/decode/rv64_nop_ret.targets @@ -0,0 +1 @@ +rv64 diff --git a/test/asm/encode/rv64_exit_zero.expected b/test/asm/encode/rv64_exit_zero.expected @@ -0,0 +1 @@ +0 diff --git a/test/asm/encode/rv64_exit_zero.expected.hex b/test/asm/encode/rv64_exit_zero.expected.hex @@ -0,0 +1 @@ +1305000067800000 diff --git a/test/asm/encode/rv64_exit_zero.s b/test/asm/encode/rv64_exit_zero.s @@ -0,0 +1,6 @@ +// RV64 smoke case: defines test_main returning 0. +.text +.globl test_main +test_main: + li a0, 0 + ret diff --git a/test/asm/encode/rv64_exit_zero.targets b/test/asm/encode/rv64_exit_zero.targets @@ -0,0 +1 @@ +rv64 diff --git a/test/asm/listing/rv64_nop_ret.expected.lst b/test/asm/listing/rv64_nop_ret.expected.lst @@ -0,0 +1,5 @@ +Disassembly of section .text: + +0000000000000000 <_start>: + 0: 00000013 nop + 4: 00008067 ret diff --git a/test/asm/listing/rv64_nop_ret.in.bin b/test/asm/listing/rv64_nop_ret.in.bin Binary files differ. diff --git a/test/asm/listing/rv64_nop_ret.targets b/test/asm/listing/rv64_nop_ret.targets @@ -0,0 +1 @@ +rv64