commit f26028bd7df956414c3f1fc90638cd69ec15e7c8
parent 894150d6695638fa176f671a4b288675bedbd29b
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Sat, 30 May 2026 21:39:28 -0700
rv64: emit explicit fcvt rounding mode in cc -S so clang re-encodes it exactly
The disassembler dropped the fcvt/fsqrt rounding-mode field, so cc -S emitted a
bare `fcvt.w.s a0, fa0`. cfree-as inferred the mode (RTZ for fp->int), but clang
assembling the same bare text picks its own default (DYN) — so a float->int
truncation re-assembled by clang rounded-to-nearest instead of truncating
(65_rounding_conversions exec-mismatched on the clang lane).
Render the rounding mode explicitly, matching the llvm/objdump convention
(verified: clang's bare fcvt == DYN; the suffix is shown for any other mode):
- disasm (src/arch/rv64/isa.c print_fp_cvt): append `, <rm>` for fcvt/fsqrt
when rm != dyn (rne/rtz/rdn/rup/rmm); fmv/fclass carry no rounding mode.
- asm (src/arch/rv64/asm.c): parse an optional explicit `, <rm>` operand on
FP_CVT (rv_parse_rm_name); the per-family inference remains the fallback for
a bare hand-written fcvt, so the encode goldens are unchanged.
Now cc -S emits e.g. `fcvt.l.d t6, ft10, rtz`, which clang and cfree-as both
encode identically (RTZ) -> exec matches. Updated the rv64 fp decode goldens
(now show the explicit mode, matching llvm-objdump). test-asm-rv64 43/0; rv64
clang -c sweep 312/312; rv64 cfree-as round-trip still byte-faithful.
Diffstat:
4 files changed, 74 insertions(+), 35 deletions(-)
diff --git a/src/arch/rv64/asm.c b/src/arch/rv64/asm.c
@@ -162,6 +162,25 @@ static int rv_parse_mod_reloc(AsmDriver* d, RvModPos pos, ObjSymId* sym_out,
return 1;
}
+/* Parse a RISC-V rounding-mode mnemonic (the comma is already consumed) into
+ * its 3-bit funct3 value. cc -S emits this suffix on fcvt/fsqrt when the mode
+ * isn't the default `dyn`, so the round-trip (and clang) re-encode the exact
+ * mode rather than guessing a default. */
+static u32 rv_parse_rm_name(AsmDriver* d) {
+ AsmTok t = asm_driver_next(d);
+ Slice s;
+ if (t.kind != ASM_TOK_IDENT)
+ asm_driver_panic(d, "rv64 asm: expected rounding mode");
+ s = pool_slice(asm_driver_pool(d), t.v.ident);
+ if (slice_eq_cstr(s, "rne")) return 0u;
+ if (slice_eq_cstr(s, "rtz")) return 1u;
+ if (slice_eq_cstr(s, "rdn")) return 2u;
+ if (slice_eq_cstr(s, "rup")) return 3u;
+ if (slice_eq_cstr(s, "rmm")) return 4u;
+ if (slice_eq_cstr(s, "dyn")) return 7u;
+ asm_driver_panic(d, "rv64 asm: unknown rounding mode");
+}
+
/* Emit a relocation for a U-type / I-type immediate `%mod(sym)` operand at
* the current instruction position; returns 1 if one was present. */
static int rv_emit_imm_mod_reloc(AsmDriver* d, RvModPos pos) {
@@ -644,28 +663,33 @@ static u32 assemble_one(AsmDriver* d, const Rv64InsnDesc* desc) {
expect_comma(d);
rs1 = parse_freg(d);
}
- /* match already encodes rs2 (type selector); OR rd/rs1 and the rounding
- * mode the disassembler dropped. The rm is fixed per conversion family
- * (mirrors the rv_fcvt_* encoders in isa.h, the codegen source of
- * truth): fp->int truncates (RTZ=1); int->fp and fp->fp use DYN=7; the
- * fmv bit-moves carry no rounding (rm=0). Keyed on the funct7 in match. */
+ /* match encodes rs2 (type selector); OR in rd/rs1 and the rounding mode.
+ * An explicit `, <rm>` suffix (cc -S emits it for non-default modes, and
+ * clang/gas accept it) takes precedence; otherwise the rm is fixed per
+ * conversion family (mirrors the rv_fcvt_* encoders in isa.h, the codegen
+ * source of truth): fp->int truncates (RTZ=1); int->fp and fp->fp use the
+ * default DYN=7; fmv bit-moves carry no rounding (rm=0). */
{
u32 funct7 = (m >> 25) & 0x7fu;
u32 rm;
- switch (funct7) {
- case 0x60: /* fcvt.{w,wu,l,lu}.s */
- case 0x61: /* fcvt.{w,wu,l,lu}.d */
- rm = 0x1u; /* RTZ */
- break;
- case 0x70: /* fmv.x.w */
- case 0x71: /* fmv.x.d */
- case 0x78: /* fmv.w.x */
- case 0x79: /* fmv.d.x */
- rm = 0x0u;
- break;
- default: /* int->fp (0x68/0x69) and fp<->fp (0x20/0x21): DYN */
- rm = 0x7u;
- break;
+ if (asm_driver_eat_comma(d)) {
+ rm = rv_parse_rm_name(d);
+ } else {
+ switch (funct7) {
+ case 0x60: /* fcvt.{w,wu,l,lu}.s */
+ case 0x61: /* fcvt.{w,wu,l,lu}.d */
+ rm = 0x1u; /* RTZ */
+ break;
+ case 0x70: /* fmv.x.w */
+ case 0x71: /* fmv.x.d */
+ case 0x78: /* fmv.w.x */
+ case 0x79: /* fmv.d.x */
+ rm = 0x0u;
+ break;
+ default: /* int->fp (0x68/0x69) and fp<->fp (0x20/0x21): DYN */
+ rm = 0x7u;
+ break;
+ }
}
return m | (rm << 12) | ((rs1 & 0x1fu) << 15) | ((rd & 0x1fu) << 7);
}
diff --git a/src/arch/rv64/isa.c b/src/arch/rv64/isa.c
@@ -1637,6 +1637,21 @@ static void print_fp_cvt(StrBuf* sb, u32 w, const Rv64InsnDesc* d) {
p_freg(sb, f.rs1);
else
p_xreg(sb, f.rs1);
+ /* Explicit rounding mode for the rounding conversions (fcvt / fsqrt) when it
+ * isn't the default `dyn` — fmv and fclass carry no rounding mode. Matches
+ * the objdump/clang convention (an omitted suffix means dyn), so a third-
+ * party assembler re-encodes our fp->int truncation (rtz) exactly rather
+ * than substituting its own default. */
+ if (slice_has_prefix_cstr(d->mnemonic, "fcvt.", 5) ||
+ slice_has_prefix_cstr(d->mnemonic, "fsqrt.", 6)) {
+ u32 rm = (w >> 12) & 7u;
+ static const char* const RMN[8] = {"rne", "rtz", "rdn", "rup",
+ "rmm", 0, 0, "dyn"};
+ if (rm != 7u && RMN[rm]) {
+ p_sep(sb);
+ strbuf_puts(sb, RMN[rm]);
+ }
+ }
}
static void print_amo(StrBuf* sb, u32 w) {
diff --git a/test/asm/decode/rv64_fp.expected.txt b/test/asm/decode/rv64_fp.expected.txt
@@ -6,8 +6,8 @@ c: fdiv.d ft3, ft4, ft5
14: fmax.d fs0, fs1, fs2
18: feq.s a0, fa1, fa2
1c: flt.d t0, fa3, fa4
-20: fcvt.w.s a0, fa0
-24: fcvt.s.l fa0, a0
+20: fcvt.w.s a0, fa0, rne
+24: fcvt.s.l fa0, a0, rne
28: fmv.x.w t0, ft0
2c: fmv.d.x fa0, a0
30: flw fa0, 0(sp)
diff --git a/test/asm/decode/rv64_fp_cvt.expected.txt b/test/asm/decode/rv64_fp_cvt.expected.txt
@@ -1,14 +1,14 @@
-0: fcvt.w.s a0, fa0
-4: fcvt.wu.s a1, fa1
-8: fcvt.l.s a2, fa2
-c: fcvt.lu.s a3, fa3
-10: fcvt.w.d a4, fa4
-14: fcvt.l.d a5, fa5
-18: fcvt.s.w fa0, a0
-1c: fcvt.s.wu fa1, a1
-20: fcvt.d.w fa2, a2
-24: fcvt.d.l fa3, a3
-28: fcvt.s.d fa4, fa5
-2c: fcvt.d.s fa6, fa7
-30: fsqrt.s fa0, fa1
-34: fsqrt.d fa2, fa3
+0: fcvt.w.s a0, fa0, rne
+4: fcvt.wu.s a1, fa1, rne
+8: fcvt.l.s a2, fa2, rne
+c: fcvt.lu.s a3, fa3, rne
+10: fcvt.w.d a4, fa4, rne
+14: fcvt.l.d a5, fa5, rne
+18: fcvt.s.w fa0, a0, rne
+1c: fcvt.s.wu fa1, a1, rne
+20: fcvt.d.w fa2, a2, rne
+24: fcvt.d.l fa3, a3, rne
+28: fcvt.s.d fa4, fa5, rne
+2c: fcvt.d.s fa6, fa7, rne
+30: fsqrt.s fa0, fa1, rne
+34: fsqrt.d fa2, fa3, rne