kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit f26028bd7df956414c3f1fc90638cd69ec15e7c8
parent 894150d6695638fa176f671a4b288675bedbd29b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Sat, 30 May 2026 21:39:28 -0700

rv64: emit explicit fcvt rounding mode in cc -S so clang re-encodes it exactly

The disassembler dropped the fcvt/fsqrt rounding-mode field, so cc -S emitted a
bare `fcvt.w.s a0, fa0`. cfree-as inferred the mode (RTZ for fp->int), but clang
assembling the same bare text picks its own default (DYN) — so a float->int
truncation re-assembled by clang rounded-to-nearest instead of truncating
(65_rounding_conversions exec-mismatched on the clang lane).

Render the rounding mode explicitly, matching the llvm/objdump convention
(verified: clang's bare fcvt == DYN; the suffix is shown for any other mode):
- disasm (src/arch/rv64/isa.c print_fp_cvt): append `, <rm>` for fcvt/fsqrt
  when rm != dyn (rne/rtz/rdn/rup/rmm); fmv/fclass carry no rounding mode.
- asm (src/arch/rv64/asm.c): parse an optional explicit `, <rm>` operand on
  FP_CVT (rv_parse_rm_name); the per-family inference remains the fallback for
  a bare hand-written fcvt, so the encode goldens are unchanged.

Now cc -S emits e.g. `fcvt.l.d t6, ft10, rtz`, which clang and cfree-as both
encode identically (RTZ) -> exec matches. Updated the rv64 fp decode goldens
(now show the explicit mode, matching llvm-objdump). test-asm-rv64 43/0; rv64
clang -c sweep 312/312; rv64 cfree-as round-trip still byte-faithful.

Diffstat:
Msrc/arch/rv64/asm.c | 62+++++++++++++++++++++++++++++++++++++++++++-------------------
Msrc/arch/rv64/isa.c | 15+++++++++++++++
Mtest/asm/decode/rv64_fp.expected.txt | 4++--
Mtest/asm/decode/rv64_fp_cvt.expected.txt | 28++++++++++++++--------------
4 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c @@ -162,6 +162,25 @@ static int rv_parse_mod_reloc(AsmDriver* d, RvModPos pos, ObjSymId* sym_out, return 1; } +/* Parse a RISC-V rounding-mode mnemonic (the comma is already consumed) into + * its 3-bit funct3 value. cc -S emits this suffix on fcvt/fsqrt when the mode + * isn't the default `dyn`, so the round-trip (and clang) re-encode the exact + * mode rather than guessing a default. */ +static u32 rv_parse_rm_name(AsmDriver* d) { + AsmTok t = asm_driver_next(d); + Slice s; + if (t.kind != ASM_TOK_IDENT) + asm_driver_panic(d, "rv64 asm: expected rounding mode"); + s = pool_slice(asm_driver_pool(d), t.v.ident); + if (slice_eq_cstr(s, "rne")) return 0u; + if (slice_eq_cstr(s, "rtz")) return 1u; + if (slice_eq_cstr(s, "rdn")) return 2u; + if (slice_eq_cstr(s, "rup")) return 3u; + if (slice_eq_cstr(s, "rmm")) return 4u; + if (slice_eq_cstr(s, "dyn")) return 7u; + asm_driver_panic(d, "rv64 asm: unknown rounding mode"); +} + /* Emit a relocation for a U-type / I-type immediate `%mod(sym)` operand at * the current instruction position; returns 1 if one was present. */ static int rv_emit_imm_mod_reloc(AsmDriver* d, RvModPos pos) { @@ -644,28 +663,33 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) { expect_comma(d); rs1 = parse_freg(d); } - /* match already encodes rs2 (type selector); OR rd/rs1 and the rounding - * mode the disassembler dropped. The rm is fixed per conversion family - * (mirrors the rv_fcvt_* encoders in isa.h, the codegen source of - * truth): fp->int truncates (RTZ=1); int->fp and fp->fp use DYN=7; the - * fmv bit-moves carry no rounding (rm=0). Keyed on the funct7 in match. */ + /* match encodes rs2 (type selector); OR in rd/rs1 and the rounding mode. + * An explicit `, <rm>` suffix (cc -S emits it for non-default modes, and + * clang/gas accept it) takes precedence; otherwise the rm is fixed per + * conversion family (mirrors the rv_fcvt_* encoders in isa.h, the codegen + * source of truth): fp->int truncates (RTZ=1); int->fp and fp->fp use the + * default DYN=7; fmv bit-moves carry no rounding (rm=0). */ { u32 funct7 = (m >> 25) & 0x7fu; u32 rm; - switch (funct7) { - case 0x60: /* fcvt.{w,wu,l,lu}.s */ - case 0x61: /* fcvt.{w,wu,l,lu}.d */ - rm = 0x1u; /* RTZ */ - break; - case 0x70: /* fmv.x.w */ - case 0x71: /* fmv.x.d */ - case 0x78: /* fmv.w.x */ - case 0x79: /* fmv.d.x */ - rm = 0x0u; - break; - default: /* int->fp (0x68/0x69) and fp<->fp (0x20/0x21): DYN */ - rm = 0x7u; - break; + if (asm_driver_eat_comma(d)) { + rm = rv_parse_rm_name(d); + } else { + switch (funct7) { + case 0x60: /* fcvt.{w,wu,l,lu}.s */ + case 0x61: /* fcvt.{w,wu,l,lu}.d */ + rm = 0x1u; /* RTZ */ + break; + case 0x70: /* fmv.x.w */ + case 0x71: /* fmv.x.d */ + case 0x78: /* fmv.w.x */ + case 0x79: /* fmv.d.x */ + rm = 0x0u; + break; + default: /* int->fp (0x68/0x69) and fp<->fp (0x20/0x21): DYN */ + rm = 0x7u; + break; + } } return m | (rm << 12) | ((rs1 & 0x1fu) << 15) | ((rd & 0x1fu) << 7); } diff --git a/src/arch/rv64/isa.c b/src/arch/rv64/isa.c @@ -1637,6 +1637,21 @@ static void print_fp_cvt(StrBuf* sb, u32 w, const Rv64InsnDesc* d) { p_freg(sb, f.rs1); else p_xreg(sb, f.rs1); + /* Explicit rounding mode for the rounding conversions (fcvt / fsqrt) when it + * isn't the default `dyn` — fmv and fclass carry no rounding mode. Matches + * the objdump/clang convention (an omitted suffix means dyn), so a third- + * party assembler re-encodes our fp->int truncation (rtz) exactly rather + * than substituting its own default. */ + if (slice_has_prefix_cstr(d->mnemonic, "fcvt.", 5) || + slice_has_prefix_cstr(d->mnemonic, "fsqrt.", 6)) { + u32 rm = (w >> 12) & 7u; + static const char* const RMN[8] = {"rne", "rtz", "rdn", "rup", + "rmm", 0, 0, "dyn"}; + if (rm != 7u && RMN[rm]) { + p_sep(sb); + strbuf_puts(sb, RMN[rm]); + } + } } static void print_amo(StrBuf* sb, u32 w) { diff --git a/test/asm/decode/rv64_fp.expected.txt b/test/asm/decode/rv64_fp.expected.txt @@ -6,8 +6,8 @@ c: fdiv.d ft3, ft4, ft5 14: fmax.d fs0, fs1, fs2 18: feq.s a0, fa1, fa2 1c: flt.d t0, fa3, fa4 -20: fcvt.w.s a0, fa0 -24: fcvt.s.l fa0, a0 +20: fcvt.w.s a0, fa0, rne +24: fcvt.s.l fa0, a0, rne 28: fmv.x.w t0, ft0 2c: fmv.d.x fa0, a0 30: flw fa0, 0(sp) diff --git a/test/asm/decode/rv64_fp_cvt.expected.txt b/test/asm/decode/rv64_fp_cvt.expected.txt @@ -1,14 +1,14 @@ -0: fcvt.w.s a0, fa0 -4: fcvt.wu.s a1, fa1 -8: fcvt.l.s a2, fa2 -c: fcvt.lu.s a3, fa3 -10: fcvt.w.d a4, fa4 -14: fcvt.l.d a5, fa5 -18: fcvt.s.w fa0, a0 -1c: fcvt.s.wu fa1, a1 -20: fcvt.d.w fa2, a2 -24: fcvt.d.l fa3, a3 -28: fcvt.s.d fa4, fa5 -2c: fcvt.d.s fa6, fa7 -30: fsqrt.s fa0, fa1 -34: fsqrt.d fa2, fa3 +0: fcvt.w.s a0, fa0, rne +4: fcvt.wu.s a1, fa1, rne +8: fcvt.l.s a2, fa2, rne +c: fcvt.lu.s a3, fa3, rne +10: fcvt.w.d a4, fa4, rne +14: fcvt.l.d a5, fa5, rne +18: fcvt.s.w fa0, a0, rne +1c: fcvt.s.wu fa1, a1, rne +20: fcvt.d.w fa2, a2, rne +24: fcvt.d.l fa3, a3, rne +28: fcvt.s.d fa4, fa5, rne +2c: fcvt.d.s fa6, fa7, rne +30: fsqrt.s fa0, fa1, rne +34: fsqrt.d fa2, fa3, rne