commit 05bca5fe8e103711afaf75836d6aa5af3b483fe1
parent 74c0977fc5d7ae1c26c93b150264c4b617a1f164
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 14:20:35 -0700
aa64 asm: encode scalar floating-point instructions
The aa64 standalone assembler's kTable had zero FP mnemonics, so the as tool
and inline asm() could not assemble any FP arithmetic/convert/move even though
the disassembler (just added) decodes them and codegen emits them. Add the
encode side:
fadd/fsub/fmul/fdiv, fneg/fabs/fsqrt, fmov (reg + gpr<->fp both widths),
fcmp, fcvt (single<->double), scvtf/ucvtf, fcvtzs/fcvtzu.
Bit-knowledge lives in isa.h (aa64_fp_dp2/dp1/fcmp_reg/fcvt_prec/fp_int_cvt
with named op/opcode constants) shared with the FP_* decode rows, plus an
Sn/Dn/Hn scalar register parser and fmov's class-dispatched operand parsing.
Verified: cfree as output is byte-identical to clang for all 20 FP forms.
Adds an aa64_fp encode corpus case to the default suite.
Diffstat:
5 files changed, 235 insertions(+), 0 deletions(-)
diff --git a/src/arch/aa64/asm.c b/src/arch/aa64/asm.c
@@ -1129,7 +1129,174 @@ static void p_b_gt(AsmDriver* d) { p_b_cond(d, 12); }
static void p_b_le(AsmDriver* d) { p_b_cond(d, 13); }
static void p_b_al(AsmDriver* d) { p_b_cond(d, 14); }
+/* ---- Scalar floating-point ----
+ * Sn/Dn/Hn are the single/double/half views of the FP register file; the
+ * 2-bit ftype (0=s,1=d,3=h) drives both the encoding and the operand text. */
+static int parse_fp_scalar_from_ident(AsmDriver* d, Sym ident, u32* num,
+ u32* ftype) {
+ Slice sl = pool_slice(asm_driver_pool(d), ident);
+ const char* p = sl.s;
+ size_t n = sl.len;
+ u32 ft, r = 0;
+ size_t i;
+ if (!p || n < 2) return 0;
+ if (p[0] == 's' || p[0] == 'S')
+ ft = 0u;
+ else if (p[0] == 'd' || p[0] == 'D')
+ ft = 1u;
+ else if (p[0] == 'h' || p[0] == 'H')
+ ft = 3u;
+ else
+ return 0;
+ for (i = 1; i < n; ++i) {
+ char c = p[i];
+ if (c < '0' || c > '9') return 0;
+ r = r * 10u + (u32)(c - '0');
+ if (r > 31u) return 0;
+ }
+ *num = r;
+ *ftype = ft;
+ return 1;
+}
+
+static void parse_fp_scalar(AsmDriver* d, u32* num, u32* ftype) {
+ AsmTok t = asm_driver_next(d);
+ if (t.kind != ASM_TOK_IDENT ||
+ !parse_fp_scalar_from_ident(d, t.v.ident, num, ftype))
+ asm_driver_panic(d, "asm: expected FP register (Sn/Dn/Hn)");
+}
+
+/* A register operand that may be either a GPR or a scalar FP register — used
+ * by fmov, whose three forms differ only by operand class. */
+typedef struct FpOrGpr {
+ int is_fp;
+ u32 num;
+ u32 ftype; /* when is_fp */
+ int is64; /* when !is_fp */
+} FpOrGpr;
+
+static FpOrGpr parse_fp_or_gpr(AsmDriver* d) {
+ AsmTok t = asm_driver_next(d);
+ FpOrGpr r;
+ AA64Reg g;
+ memset(&r, 0, sizeof r);
+ if (t.kind == ASM_TOK_IDENT &&
+ parse_fp_scalar_from_ident(d, t.v.ident, &r.num, &r.ftype)) {
+ r.is_fp = 1;
+ return r;
+ }
+ memset(&g, 0, sizeof g);
+ if (t.kind == ASM_TOK_IDENT && parse_reg_from_ident(d, t.v.ident, &g)) {
+ r.is_fp = 0;
+ r.num = g.num;
+ r.is64 = (int)g.is64;
+ return r;
+ }
+ asm_driver_panic(d, "asm: fmov: expected register");
+ return r; /* unreachable */
+}
+
+static void p_fp_dp2(AsmDriver* d, u32 op) {
+ u32 rd, rn, rm, ftd, ftn, ftm;
+ parse_fp_scalar(d, &rd, &ftd);
+ expect_comma(d, "fp");
+ parse_fp_scalar(d, &rn, &ftn);
+ expect_comma(d, "fp");
+ parse_fp_scalar(d, &rm, &ftm);
+ if (ftd != ftn || ftd != ftm)
+ asm_driver_panic(d, "asm: fp: operand type mismatch");
+ emit32(d, aa64_fp_dp2(ftd, op, rd, rn, rm));
+}
+static void p_fp_dp1(AsmDriver* d, u32 op) {
+ u32 rd, rn, ftd, ftn;
+ parse_fp_scalar(d, &rd, &ftd);
+ expect_comma(d, "fp");
+ parse_fp_scalar(d, &rn, &ftn);
+ if (ftd != ftn) asm_driver_panic(d, "asm: fp: operand type mismatch");
+ emit32(d, aa64_fp_dp1(ftd, op, rd, rn));
+}
+static void p_fadd(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FADD); }
+static void p_fsub(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FSUB); }
+static void p_fmul(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FMUL); }
+static void p_fdiv(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FDIV); }
+static void p_fneg(AsmDriver* d) { p_fp_dp1(d, AA64_FP_DP1_FNEG); }
+static void p_fabs(AsmDriver* d) { p_fp_dp1(d, AA64_FP_DP1_FABS); }
+static void p_fsqrt(AsmDriver* d) { p_fp_dp1(d, AA64_FP_DP1_FSQRT); }
+
+static void p_fcmp(AsmDriver* d) {
+ u32 rn, rm, ftn, ftm;
+ parse_fp_scalar(d, &rn, &ftn);
+ expect_comma(d, "fcmp");
+ parse_fp_scalar(d, &rm, &ftm);
+ if (ftn != ftm) asm_driver_panic(d, "asm: fcmp: operand type mismatch");
+ emit32(d, aa64_fcmp_reg(ftn, rn, rm));
+}
+static void p_fcvt(AsmDriver* d) {
+ u32 rd, rn, ftd, ftn;
+ parse_fp_scalar(d, &rd, &ftd);
+ expect_comma(d, "fcvt");
+ parse_fp_scalar(d, &rn, &ftn);
+ emit32(d, aa64_fcvt_prec(ftn /*src*/, ftd /*dst*/, rd, rn));
+}
+/* scvtf/ucvtf: FP dst, GPR src. */
+static void p_cvtf(AsmDriver* d, u32 opcode) {
+ u32 fd, ft;
+ AA64Reg rn;
+ parse_fp_scalar(d, &fd, &ft);
+ expect_comma(d, "cvtf");
+ rn = parse_reg(d);
+ emit32(d, aa64_fp_int_cvt((u32)rn.is64, ft, opcode, fd, rn.num));
+}
+/* fcvtzs/fcvtzu: GPR dst, FP src. */
+static void p_fcvtz(AsmDriver* d, u32 opcode) {
+ AA64Reg rd;
+ u32 fn, ft;
+ rd = parse_reg(d);
+ expect_comma(d, "fcvtz");
+ parse_fp_scalar(d, &fn, &ft);
+ emit32(d, aa64_fp_int_cvt((u32)rd.is64, ft, opcode, rd.num, fn));
+}
+static void p_scvtf(AsmDriver* d) { p_cvtf(d, AA64_FP_ICVT_SCVTF); }
+static void p_ucvtf(AsmDriver* d) { p_cvtf(d, AA64_FP_ICVT_UCVTF); }
+static void p_fcvtzs(AsmDriver* d) { p_fcvtz(d, AA64_FP_ICVT_FCVTZS); }
+static void p_fcvtzu(AsmDriver* d) { p_fcvtz(d, AA64_FP_ICVT_FCVTZU); }
+
+/* fmov: Vd,Vn (FP reg move) | Rd,Vn (fp->gpr) | Vd,Rn (gpr->fp). */
+static void p_fmov(AsmDriver* d) {
+ FpOrGpr a = parse_fp_or_gpr(d);
+ FpOrGpr b;
+ expect_comma(d, "fmov");
+ b = parse_fp_or_gpr(d);
+ if (a.is_fp && b.is_fp) {
+ if (a.ftype != b.ftype)
+ asm_driver_panic(d, "asm: fmov: operand type mismatch");
+ emit32(d, aa64_fp_dp1(a.ftype, AA64_FP_DP1_FMOV, a.num, b.num));
+ } else if (!a.is_fp && b.is_fp) {
+ emit32(d, aa64_fp_int_cvt((u32)a.is64, b.ftype, AA64_FP_ICVT_FMOV_TO_GPR,
+ a.num, b.num));
+ } else if (a.is_fp && !b.is_fp) {
+ emit32(d, aa64_fp_int_cvt((u32)b.is64, a.ftype, AA64_FP_ICVT_FMOV_TO_FP,
+ a.num, b.num));
+ } else {
+ asm_driver_panic(d, "asm: fmov: gpr,gpr form not supported (use mov)");
+ }
+}
+
static const AA64Mn kTable[] = {
+ {"fadd", p_fadd, 0},
+ {"fsub", p_fsub, 0},
+ {"fmul", p_fmul, 0},
+ {"fdiv", p_fdiv, 0},
+ {"fneg", p_fneg, 0},
+ {"fabs", p_fabs, 0},
+ {"fsqrt", p_fsqrt, 0},
+ {"fmov", p_fmov, 0},
+ {"fcmp", p_fcmp, 0},
+ {"fcvt", p_fcvt, 0},
+ {"scvtf", p_scvtf, 0},
+ {"ucvtf", p_ucvtf, 0},
+ {"fcvtzs", p_fcvtzs, 0},
+ {"fcvtzu", p_fcvtzu, 0},
{"nop", p_nop, 0},
{"dmb", p_dmb, 0},
{"dsb", p_dsb, 0},
diff --git a/src/arch/aa64/isa.h b/src/arch/aa64/isa.h
@@ -790,6 +790,48 @@ static inline u32 aa64_str64_uimm12(u32 Rt, u32 Rn, u32 imm12_scaled) {
.Rt = Rt});
}
+/* ---- Scalar floating-point encoders ----
+ * ftype: 0=single (Sn), 1=double (Dn), 3=half (Hn). The bit layouts match the
+ * FP_* decode rows in isa.c and the aa_* encoders in native.c, so encode and
+ * decode round-trip. The DP2/DP1 `op` and the FP_INT_CVT `opcode` are the
+ * named field values below. */
+#define AA64_FP_DP2_FMUL 0x0800u
+#define AA64_FP_DP2_FDIV 0x1800u
+#define AA64_FP_DP2_FADD 0x2800u
+#define AA64_FP_DP2_FSUB 0x3800u
+#define AA64_FP_DP1_FMOV 0x4000u
+#define AA64_FP_DP1_FABS 0xC000u
+#define AA64_FP_DP1_FNEG 0x14000u
+#define AA64_FP_DP1_FSQRT 0x1C000u
+#define AA64_FP_ICVT_SCVTF 0x02u
+#define AA64_FP_ICVT_UCVTF 0x03u
+#define AA64_FP_ICVT_FCVTZS 0x18u
+#define AA64_FP_ICVT_FCVTZU 0x19u
+#define AA64_FP_ICVT_FMOV_TO_GPR 0x06u /* fmov Rd, Vn */
+#define AA64_FP_ICVT_FMOV_TO_FP 0x07u /* fmov Vd, Rn */
+
+static inline u32 aa64_fp_dp2(u32 ftype, u32 op, u32 Rd, u32 Rn, u32 Rm) {
+ return 0x1E200000u | ((ftype & 3u) << 22) | op | ((Rm & 0x1fu) << 16) |
+ ((Rn & 0x1fu) << 5) | (Rd & 0x1fu);
+}
+static inline u32 aa64_fp_dp1(u32 ftype, u32 op, u32 Rd, u32 Rn) {
+ return 0x1E200000u | ((ftype & 3u) << 22) | op | ((Rn & 0x1fu) << 5) |
+ (Rd & 0x1fu);
+}
+static inline u32 aa64_fcmp_reg(u32 ftype, u32 Rn, u32 Rm) {
+ return 0x1E202000u | ((ftype & 3u) << 22) | ((Rm & 0x1fu) << 16) |
+ ((Rn & 0x1fu) << 5);
+}
+static inline u32 aa64_fcvt_prec(u32 src_ftype, u32 dst_ftype, u32 Rd, u32 Rn) {
+ return 0x1E204000u | ((src_ftype & 3u) << 22) | (1u << 17) |
+ ((dst_ftype & 3u) << 15) | ((Rn & 0x1fu) << 5) | (Rd & 0x1fu);
+}
+static inline u32 aa64_fp_int_cvt(u32 sf, u32 ftype, u32 opcode, u32 Rd,
+ u32 Rn) {
+ return ((sf & 1u) << 31) | 0x1E200000u | ((ftype & 3u) << 22) |
+ ((opcode & 0x1fu) << 16) | ((Rn & 0x1fu) << 5) | (Rd & 0x1fu);
+}
+
/* ====================================================================
* Load/store register pair, pre-indexed (STP / LDP, 64-bit form)
* opc(2) 101 V(1) 010 L(1) imm7(7) Rt2(5) Rn(5) Rt(5)
diff --git a/test/asm/encode/aa64_fp.expected.hex b/test/asm/encode/aa64_fp.expected.hex
@@ -0,0 +1 @@
+2028621e8338251ee608681e49192b1eac41611eeec1201e30c2611e7242601e8022351ef6c2221e3843621e4003669e3b00679e8203261e7d00271e8000629ea100221ec200639e6700789e8800391ec0035fd6
diff --git a/test/asm/encode/aa64_fp.s b/test/asm/encode/aa64_fp.s
@@ -0,0 +1,23 @@
+.text
+t:
+ fadd d0, d1, d2
+ fsub s3, s4, s5
+ fmul d6, d7, d8
+ fdiv s9, s10, s11
+ fneg d12, d13
+ fabs s14, s15
+ fsqrt d16, d17
+ fmov d18, d19
+ fcmp s20, s21
+ fcvt d22, s23
+ fcvt s24, d25
+ fmov x0, d26
+ fmov d27, x1
+ fmov w2, s28
+ fmov s29, w3
+ scvtf d0, x4
+ scvtf s1, w5
+ ucvtf d2, x6
+ fcvtzs x7, d3
+ fcvtzu w8, s4
+ ret
diff --git a/test/asm/encode/aa64_fp.targets b/test/asm/encode/aa64_fp.targets
@@ -0,0 +1 @@
+aa64
+\ No newline at end of file