kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 05bca5fe8e103711afaf75836d6aa5af3b483fe1
parent 74c0977fc5d7ae1c26c93b150264c4b617a1f164
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 14:20:35 -0700

aa64 asm: encode scalar floating-point instructions

The aa64 standalone assembler's kTable had zero FP mnemonics, so the as tool
and inline asm() could not assemble any FP arithmetic/convert/move even though
the disassembler (just added) decodes them and codegen emits them. Add the
encode side:
  fadd/fsub/fmul/fdiv, fneg/fabs/fsqrt, fmov (reg + gpr<->fp both widths),
  fcmp, fcvt (single<->double), scvtf/ucvtf, fcvtzs/fcvtzu.

Bit-knowledge lives in isa.h (aa64_fp_dp2/dp1/fcmp_reg/fcvt_prec/fp_int_cvt
with named op/opcode constants) shared with the FP_* decode rows, plus an
Sn/Dn/Hn scalar register parser and fmov's class-dispatched operand parsing.

Verified: cfree as output is byte-identical to clang for all 20 FP forms.
Adds an aa64_fp encode corpus case to the default suite.

Diffstat:
Msrc/arch/aa64/asm.c | 167+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/arch/aa64/isa.h | 42++++++++++++++++++++++++++++++++++++++++++
Atest/asm/encode/aa64_fp.expected.hex | 1+
Atest/asm/encode/aa64_fp.s | 23+++++++++++++++++++++++
Atest/asm/encode/aa64_fp.targets | 2++
5 files changed, 235 insertions(+), 0 deletions(-)

diff --git a/src/arch/aa64/asm.c b/src/arch/aa64/asm.c @@ -1129,7 +1129,174 @@ static void p_b_gt(AsmDriver* d) { p_b_cond(d, 12); } static void p_b_le(AsmDriver* d) { p_b_cond(d, 13); } static void p_b_al(AsmDriver* d) { p_b_cond(d, 14); } +/* ---- Scalar floating-point ---- + * Sn/Dn/Hn are the single/double/half views of the FP register file; the + * 2-bit ftype (0=s,1=d,3=h) drives both the encoding and the operand text. */ +static int parse_fp_scalar_from_ident(AsmDriver* d, Sym ident, u32* num, + u32* ftype) { + Slice sl = pool_slice(asm_driver_pool(d), ident); + const char* p = sl.s; + size_t n = sl.len; + u32 ft, r = 0; + size_t i; + if (!p || n < 2) return 0; + if (p[0] == 's' || p[0] == 'S') + ft = 0u; + else if (p[0] == 'd' || p[0] == 'D') + ft = 1u; + else if (p[0] == 'h' || p[0] == 'H') + ft = 3u; + else + return 0; + for (i = 1; i < n; ++i) { + char c = p[i]; + if (c < '0' || c > '9') return 0; + r = r * 10u + (u32)(c - '0'); + if (r > 31u) return 0; + } + *num = r; + *ftype = ft; + return 1; +} + +static void parse_fp_scalar(AsmDriver* d, u32* num, u32* ftype) { + AsmTok t = asm_driver_next(d); + if (t.kind != ASM_TOK_IDENT || + !parse_fp_scalar_from_ident(d, t.v.ident, num, ftype)) + asm_driver_panic(d, "asm: expected FP register (Sn/Dn/Hn)"); +} + +/* A register operand that may be either a GPR or a scalar FP register — used + * by fmov, whose three forms differ only by operand class. */ +typedef struct FpOrGpr { + int is_fp; + u32 num; + u32 ftype; /* when is_fp */ + int is64; /* when !is_fp */ +} FpOrGpr; + +static FpOrGpr parse_fp_or_gpr(AsmDriver* d) { + AsmTok t = asm_driver_next(d); + FpOrGpr r; + AA64Reg g; + memset(&r, 0, sizeof r); + if (t.kind == ASM_TOK_IDENT && + parse_fp_scalar_from_ident(d, t.v.ident, &r.num, &r.ftype)) { + r.is_fp = 1; + return r; + } + memset(&g, 0, sizeof g); + if (t.kind == ASM_TOK_IDENT && parse_reg_from_ident(d, t.v.ident, &g)) { + r.is_fp = 0; + r.num = g.num; + r.is64 = (int)g.is64; + return r; + } + asm_driver_panic(d, "asm: fmov: expected register"); + return r; /* unreachable */ +} + +static void p_fp_dp2(AsmDriver* d, u32 op) { + u32 rd, rn, rm, ftd, ftn, ftm; + parse_fp_scalar(d, &rd, &ftd); + expect_comma(d, "fp"); + parse_fp_scalar(d, &rn, &ftn); + expect_comma(d, "fp"); + parse_fp_scalar(d, &rm, &ftm); + if (ftd != ftn || ftd != ftm) + asm_driver_panic(d, "asm: fp: operand type mismatch"); + emit32(d, aa64_fp_dp2(ftd, op, rd, rn, rm)); +} +static void p_fp_dp1(AsmDriver* d, u32 op) { + u32 rd, rn, ftd, ftn; + parse_fp_scalar(d, &rd, &ftd); + expect_comma(d, "fp"); + parse_fp_scalar(d, &rn, &ftn); + if (ftd != ftn) asm_driver_panic(d, "asm: fp: operand type mismatch"); + emit32(d, aa64_fp_dp1(ftd, op, rd, rn)); +} +static void p_fadd(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FADD); } +static void p_fsub(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FSUB); } +static void p_fmul(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FMUL); } +static void p_fdiv(AsmDriver* d) { p_fp_dp2(d, AA64_FP_DP2_FDIV); } +static void p_fneg(AsmDriver* d) { p_fp_dp1(d, AA64_FP_DP1_FNEG); } +static void p_fabs(AsmDriver* d) { p_fp_dp1(d, AA64_FP_DP1_FABS); } +static void p_fsqrt(AsmDriver* d) { p_fp_dp1(d, AA64_FP_DP1_FSQRT); } + +static void p_fcmp(AsmDriver* d) { + u32 rn, rm, ftn, ftm; + parse_fp_scalar(d, &rn, &ftn); + expect_comma(d, "fcmp"); + parse_fp_scalar(d, &rm, &ftm); + if (ftn != ftm) asm_driver_panic(d, "asm: fcmp: operand type mismatch"); + emit32(d, aa64_fcmp_reg(ftn, rn, rm)); +} +static void p_fcvt(AsmDriver* d) { + u32 rd, rn, ftd, ftn; + parse_fp_scalar(d, &rd, &ftd); + expect_comma(d, "fcvt"); + parse_fp_scalar(d, &rn, &ftn); + emit32(d, aa64_fcvt_prec(ftn /*src*/, ftd /*dst*/, rd, rn)); +} +/* scvtf/ucvtf: FP dst, GPR src. */ +static void p_cvtf(AsmDriver* d, u32 opcode) { + u32 fd, ft; + AA64Reg rn; + parse_fp_scalar(d, &fd, &ft); + expect_comma(d, "cvtf"); + rn = parse_reg(d); + emit32(d, aa64_fp_int_cvt((u32)rn.is64, ft, opcode, fd, rn.num)); +} +/* fcvtzs/fcvtzu: GPR dst, FP src. */ +static void p_fcvtz(AsmDriver* d, u32 opcode) { + AA64Reg rd; + u32 fn, ft; + rd = parse_reg(d); + expect_comma(d, "fcvtz"); + parse_fp_scalar(d, &fn, &ft); + emit32(d, aa64_fp_int_cvt((u32)rd.is64, ft, opcode, rd.num, fn)); +} +static void p_scvtf(AsmDriver* d) { p_cvtf(d, AA64_FP_ICVT_SCVTF); } +static void p_ucvtf(AsmDriver* d) { p_cvtf(d, AA64_FP_ICVT_UCVTF); } +static void p_fcvtzs(AsmDriver* d) { p_fcvtz(d, AA64_FP_ICVT_FCVTZS); } +static void p_fcvtzu(AsmDriver* d) { p_fcvtz(d, AA64_FP_ICVT_FCVTZU); } + +/* fmov: Vd,Vn (FP reg move) | Rd,Vn (fp->gpr) | Vd,Rn (gpr->fp). */ +static void p_fmov(AsmDriver* d) { + FpOrGpr a = parse_fp_or_gpr(d); + FpOrGpr b; + expect_comma(d, "fmov"); + b = parse_fp_or_gpr(d); + if (a.is_fp && b.is_fp) { + if (a.ftype != b.ftype) + asm_driver_panic(d, "asm: fmov: operand type mismatch"); + emit32(d, aa64_fp_dp1(a.ftype, AA64_FP_DP1_FMOV, a.num, b.num)); + } else if (!a.is_fp && b.is_fp) { + emit32(d, aa64_fp_int_cvt((u32)a.is64, b.ftype, AA64_FP_ICVT_FMOV_TO_GPR, + a.num, b.num)); + } else if (a.is_fp && !b.is_fp) { + emit32(d, aa64_fp_int_cvt((u32)b.is64, a.ftype, AA64_FP_ICVT_FMOV_TO_FP, + a.num, b.num)); + } else { + asm_driver_panic(d, "asm: fmov: gpr,gpr form not supported (use mov)"); + } +} + static const AA64Mn kTable[] = { + {"fadd", p_fadd, 0}, + {"fsub", p_fsub, 0}, + {"fmul", p_fmul, 0}, + {"fdiv", p_fdiv, 0}, + {"fneg", p_fneg, 0}, + {"fabs", p_fabs, 0}, + {"fsqrt", p_fsqrt, 0}, + {"fmov", p_fmov, 0}, + {"fcmp", p_fcmp, 0}, + {"fcvt", p_fcvt, 0}, + {"scvtf", p_scvtf, 0}, + {"ucvtf", p_ucvtf, 0}, + {"fcvtzs", p_fcvtzs, 0}, + {"fcvtzu", p_fcvtzu, 0}, {"nop", p_nop, 0}, {"dmb", p_dmb, 0}, {"dsb", p_dsb, 0}, diff --git a/src/arch/aa64/isa.h b/src/arch/aa64/isa.h @@ -790,6 +790,48 @@ static inline u32 aa64_str64_uimm12(u32 Rt, u32 Rn, u32 imm12_scaled) { .Rt = Rt}); } +/* ---- Scalar floating-point encoders ---- + * ftype: 0=single (Sn), 1=double (Dn), 3=half (Hn). The bit layouts match the + * FP_* decode rows in isa.c and the aa_* encoders in native.c, so encode and + * decode round-trip. The DP2/DP1 `op` and the FP_INT_CVT `opcode` are the + * named field values below. */ +#define AA64_FP_DP2_FMUL 0x0800u +#define AA64_FP_DP2_FDIV 0x1800u +#define AA64_FP_DP2_FADD 0x2800u +#define AA64_FP_DP2_FSUB 0x3800u +#define AA64_FP_DP1_FMOV 0x4000u +#define AA64_FP_DP1_FABS 0xC000u +#define AA64_FP_DP1_FNEG 0x14000u +#define AA64_FP_DP1_FSQRT 0x1C000u +#define AA64_FP_ICVT_SCVTF 0x02u +#define AA64_FP_ICVT_UCVTF 0x03u +#define AA64_FP_ICVT_FCVTZS 0x18u +#define AA64_FP_ICVT_FCVTZU 0x19u +#define AA64_FP_ICVT_FMOV_TO_GPR 0x06u /* fmov Rd, Vn */ +#define AA64_FP_ICVT_FMOV_TO_FP 0x07u /* fmov Vd, Rn */ + +static inline u32 aa64_fp_dp2(u32 ftype, u32 op, u32 Rd, u32 Rn, u32 Rm) { + return 0x1E200000u | ((ftype & 3u) << 22) | op | ((Rm & 0x1fu) << 16) | + ((Rn & 0x1fu) << 5) | (Rd & 0x1fu); +} +static inline u32 aa64_fp_dp1(u32 ftype, u32 op, u32 Rd, u32 Rn) { + return 0x1E200000u | ((ftype & 3u) << 22) | op | ((Rn & 0x1fu) << 5) | + (Rd & 0x1fu); +} +static inline u32 aa64_fcmp_reg(u32 ftype, u32 Rn, u32 Rm) { + return 0x1E202000u | ((ftype & 3u) << 22) | ((Rm & 0x1fu) << 16) | + ((Rn & 0x1fu) << 5); +} +static inline u32 aa64_fcvt_prec(u32 src_ftype, u32 dst_ftype, u32 Rd, u32 Rn) { + return 0x1E204000u | ((src_ftype & 3u) << 22) | (1u << 17) | + ((dst_ftype & 3u) << 15) | ((Rn & 0x1fu) << 5) | (Rd & 0x1fu); +} +static inline u32 aa64_fp_int_cvt(u32 sf, u32 ftype, u32 opcode, u32 Rd, + u32 Rn) { + return ((sf & 1u) << 31) | 0x1E200000u | ((ftype & 3u) << 22) | + ((opcode & 0x1fu) << 16) | ((Rn & 0x1fu) << 5) | (Rd & 0x1fu); +} + /* ==================================================================== * Load/store register pair, pre-indexed (STP / LDP, 64-bit form) * opc(2) 101 V(1) 010 L(1) imm7(7) Rt2(5) Rn(5) Rt(5) diff --git a/test/asm/encode/aa64_fp.expected.hex b/test/asm/encode/aa64_fp.expected.hex @@ -0,0 +1 @@ +2028621e8338251ee608681e49192b1eac41611eeec1201e30c2611e7242601e8022351ef6c2221e3843621e4003669e3b00679e8203261e7d00271e8000629ea100221ec200639e6700789e8800391ec0035fd6 diff --git a/test/asm/encode/aa64_fp.s b/test/asm/encode/aa64_fp.s @@ -0,0 +1,23 @@ +.text +t: + fadd d0, d1, d2 + fsub s3, s4, s5 + fmul d6, d7, d8 + fdiv s9, s10, s11 + fneg d12, d13 + fabs s14, s15 + fsqrt d16, d17 + fmov d18, d19 + fcmp s20, s21 + fcvt d22, s23 + fcvt s24, d25 + fmov x0, d26 + fmov d27, x1 + fmov w2, s28 + fmov s29, w3 + scvtf d0, x4 + scvtf s1, w5 + ucvtf d2, x6 + fcvtzs x7, d3 + fcvtzu w8, s4 + ret diff --git a/test/asm/encode/aa64_fp.targets b/test/asm/encode/aa64_fp.targets @@ -0,0 +1 @@ +aa64 +\ No newline at end of file