kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

isa.h (27868B)


      1 /* RV64 instruction encoders + descriptor table — single source of truth
      2  * for every instruction the encoder, decoder, and disassembler need to
      3  * agree on. Mirrors the aa64_isa.[ch] pattern.
      4  *
      5  * The bottom of this header (after the `rv_*` inline encoders) declares
      6  * the format-kind enum and per-format pack/unpack helpers. The
      7  * descriptor table itself lives in isa.c. */
      8 
      9 #ifndef KIT_RV64_ISA_H
     10 #define KIT_RV64_ISA_H
     11 
     12 #include "core/core.h"
     13 #include "core/slice.h"
     14 #include "core/strbuf.h"
     15 
     16 /* ---- Named registers (DWARF / psABI numbering matches HW) ---- */
     17 enum {
     18   RV_X0 = 0,
     19   RV_ZERO = 0,
     20   RV_X1 = 1,
     21   RV_RA = 1,
     22   RV_X2 = 2,
     23   RV_SP = 2,
     24   RV_X3 = 3,
     25   RV_GP = 3,
     26   RV_X4 = 4,
     27   RV_TP = 4,
     28   RV_X5 = 5,
     29   RV_T0 = 5,
     30   RV_X6 = 6,
     31   RV_T1 = 6,
     32   RV_X7 = 7,
     33   RV_T2 = 7,
     34   RV_X8 = 8,
     35   RV_S0 = 8,
     36   RV_FP = 8,
     37   RV_X9 = 9,
     38   RV_S1 = 9,
     39   RV_X10 = 10,
     40   RV_A0 = 10,
     41   RV_X11 = 11,
     42   RV_A1 = 11,
     43   RV_X12 = 12,
     44   RV_A2 = 12,
     45   RV_X13 = 13,
     46   RV_A3 = 13,
     47   RV_X14 = 14,
     48   RV_A4 = 14,
     49   RV_X15 = 15,
     50   RV_A5 = 15,
     51   RV_X16 = 16,
     52   RV_A6 = 16,
     53   RV_X17 = 17,
     54   RV_A7 = 17,
     55   RV_X18 = 18,
     56   RV_S2 = 18,
     57   RV_X27 = 27,
     58   RV_S11 = 27,
     59   RV_X28 = 28,
     60   RV_T3 = 28,
     61   RV_X29 = 29,
     62   RV_T4 = 29,
     63   RV_X30 = 30,
     64   RV_T5 = 30,
     65   RV_X31 = 31,
     66   RV_T6 = 31,
     67 };
     68 
     69 #define RV_NOP 0x00000013u /* ADDI x0, x0, 0 */
     70 
     71 /* ---- Format helpers ----
     72  *
     73  * R-type: funct7(31:25) rs2(24:20) rs1(19:15) funct3(14:12) rd(11:7) op(6:0)
     74  * I-type: imm(31:20)    rs1(19:15) funct3(14:12) rd(11:7) op(6:0)
     75  * S-type: imm[11:5](31:25) rs2(24:20) rs1(19:15) funct3(14:12) imm[4:0](11:7)
     76  * op(6:0) B-type: imm[12](31) imm[10:5](30:25) rs2(24:20) rs1(19:15)
     77  * funct3(14:12) imm[4:1](11:8) imm[11](7) op(6:0) U-type: imm[31:12](31:12)
     78  * rd(11:7) op(6:0) J-type: imm[20](31) imm[10:1](30:21) imm[11](20)
     79  * imm[19:12](19:12) rd(11:7) op(6:0)
     80  */
     81 
     82 static inline u32 rv_r(u32 funct7, u32 rs2, u32 rs1, u32 funct3, u32 rd,
     83                        u32 op) {
     84   return ((funct7 & 0x7fu) << 25) | ((rs2 & 0x1fu) << 20) |
     85          ((rs1 & 0x1fu) << 15) | ((funct3 & 0x7u) << 12) | ((rd & 0x1fu) << 7) |
     86          (op & 0x7fu);
     87 }
     88 static inline u32 rv_i(i32 imm12, u32 rs1, u32 funct3, u32 rd, u32 op) {
     89   return (((u32)imm12 & 0xfffu) << 20) | ((rs1 & 0x1fu) << 15) |
     90          ((funct3 & 0x7u) << 12) | ((rd & 0x1fu) << 7) | (op & 0x7fu);
     91 }
     92 static inline u32 rv_s(i32 imm12, u32 rs2, u32 rs1, u32 funct3, u32 op) {
     93   u32 ui = (u32)imm12 & 0xfffu;
     94   return ((ui >> 5) << 25) | ((rs2 & 0x1fu) << 20) | ((rs1 & 0x1fu) << 15) |
     95          ((funct3 & 0x7u) << 12) | ((ui & 0x1fu) << 7) | (op & 0x7fu);
     96 }
     97 static inline u32 rv_b(i32 imm13, u32 rs2, u32 rs1, u32 funct3, u32 op) {
     98   u32 ui = (u32)imm13;
     99   return (((ui >> 12) & 1u) << 31) | (((ui >> 5) & 0x3fu) << 25) |
    100          ((rs2 & 0x1fu) << 20) | ((rs1 & 0x1fu) << 15) |
    101          ((funct3 & 0x7u) << 12) | (((ui >> 1) & 0xfu) << 8) |
    102          (((ui >> 11) & 1u) << 7) | (op & 0x7fu);
    103 }
    104 static inline u32 rv_u(u32 imm32_hi20, u32 rd, u32 op) {
    105   return (imm32_hi20 & 0xfffff000u) | ((rd & 0x1fu) << 7) | (op & 0x7fu);
    106 }
    107 static inline u32 rv_j(i32 imm21, u32 rd, u32 op) {
    108   u32 ui = (u32)imm21;
    109   return (((ui >> 20) & 1u) << 31) | (((ui >> 1) & 0x3ffu) << 21) |
    110          (((ui >> 11) & 1u) << 20) | (((ui >> 12) & 0xffu) << 12) |
    111          ((rd & 0x1fu) << 7) | (op & 0x7fu);
    112 }
    113 
    114 /* ---- Integer ops (RV32I/RV64I) ---- */
    115 
    116 #define RV_OP 0x33u
    117 #define RV_OP_IMM 0x13u
    118 #define RV_OP_32 0x3bu
    119 #define RV_OP_IMM_32 0x1bu
    120 #define RV_LUI 0x37u
    121 #define RV_AUIPC 0x17u
    122 #define RV_LOAD 0x03u
    123 #define RV_STORE 0x23u
    124 #define RV_BRANCH 0x63u
    125 #define RV_JAL 0x6fu
    126 #define RV_JALR 0x67u
    127 #define RV_LOAD_FP 0x07u
    128 #define RV_STORE_FP 0x27u
    129 #define RV_OP_FP 0x53u
    130 #define RV_MADD 0x43u
    131 #define RV_MSUB 0x47u
    132 #define RV_NMSUB 0x4bu
    133 #define RV_NMADD 0x4fu
    134 #define RV_AMO 0x2fu
    135 #define RV_FENCE 0x0fu
    136 #define RV_SYSTEM 0x73u
    137 
    138 static inline u32 rv_add(u32 rd, u32 rs1, u32 rs2) {
    139   return rv_r(0x00, rs2, rs1, 0x0, rd, RV_OP);
    140 }
    141 static inline u32 rv_sub(u32 rd, u32 rs1, u32 rs2) {
    142   return rv_r(0x20, rs2, rs1, 0x0, rd, RV_OP);
    143 }
    144 static inline u32 rv_sll(u32 rd, u32 rs1, u32 rs2) {
    145   return rv_r(0x00, rs2, rs1, 0x1, rd, RV_OP);
    146 }
    147 static inline u32 rv_slt(u32 rd, u32 rs1, u32 rs2) {
    148   return rv_r(0x00, rs2, rs1, 0x2, rd, RV_OP);
    149 }
    150 static inline u32 rv_sltu(u32 rd, u32 rs1, u32 rs2) {
    151   return rv_r(0x00, rs2, rs1, 0x3, rd, RV_OP);
    152 }
    153 static inline u32 rv_xor(u32 rd, u32 rs1, u32 rs2) {
    154   return rv_r(0x00, rs2, rs1, 0x4, rd, RV_OP);
    155 }
    156 static inline u32 rv_srl(u32 rd, u32 rs1, u32 rs2) {
    157   return rv_r(0x00, rs2, rs1, 0x5, rd, RV_OP);
    158 }
    159 static inline u32 rv_sra(u32 rd, u32 rs1, u32 rs2) {
    160   return rv_r(0x20, rs2, rs1, 0x5, rd, RV_OP);
    161 }
    162 static inline u32 rv_or(u32 rd, u32 rs1, u32 rs2) {
    163   return rv_r(0x00, rs2, rs1, 0x6, rd, RV_OP);
    164 }
    165 static inline u32 rv_and(u32 rd, u32 rs1, u32 rs2) {
    166   return rv_r(0x00, rs2, rs1, 0x7, rd, RV_OP);
    167 }
    168 
    169 static inline u32 rv_addw(u32 rd, u32 rs1, u32 rs2) {
    170   return rv_r(0x00, rs2, rs1, 0x0, rd, RV_OP_32);
    171 }
    172 static inline u32 rv_subw(u32 rd, u32 rs1, u32 rs2) {
    173   return rv_r(0x20, rs2, rs1, 0x0, rd, RV_OP_32);
    174 }
    175 static inline u32 rv_sllw(u32 rd, u32 rs1, u32 rs2) {
    176   return rv_r(0x00, rs2, rs1, 0x1, rd, RV_OP_32);
    177 }
    178 static inline u32 rv_srlw(u32 rd, u32 rs1, u32 rs2) {
    179   return rv_r(0x00, rs2, rs1, 0x5, rd, RV_OP_32);
    180 }
    181 static inline u32 rv_sraw(u32 rd, u32 rs1, u32 rs2) {
    182   return rv_r(0x20, rs2, rs1, 0x5, rd, RV_OP_32);
    183 }
    184 
    185 static inline u32 rv_addi(u32 rd, u32 rs1, i32 imm) {
    186   return rv_i(imm, rs1, 0x0, rd, RV_OP_IMM);
    187 }
    188 static inline u32 rv_slti(u32 rd, u32 rs1, i32 imm) {
    189   return rv_i(imm, rs1, 0x2, rd, RV_OP_IMM);
    190 }
    191 static inline u32 rv_sltiu(u32 rd, u32 rs1, i32 imm) {
    192   return rv_i(imm, rs1, 0x3, rd, RV_OP_IMM);
    193 }
    194 static inline u32 rv_xori(u32 rd, u32 rs1, i32 imm) {
    195   return rv_i(imm, rs1, 0x4, rd, RV_OP_IMM);
    196 }
    197 static inline u32 rv_ori(u32 rd, u32 rs1, i32 imm) {
    198   return rv_i(imm, rs1, 0x6, rd, RV_OP_IMM);
    199 }
    200 static inline u32 rv_andi(u32 rd, u32 rs1, i32 imm) {
    201   return rv_i(imm, rs1, 0x7, rd, RV_OP_IMM);
    202 }
    203 
    204 /* Shift-immediate forms. RV64I uses a 6-bit shamt in bits 25:20 and a
    205  * 6-bit funct6 in bits 31:26 (so the funct7-vs-shamt[5] split that
    206  * rv_r() does is wrong here — we hand-assemble these). */
    207 static inline u32 rv_slli(u32 rd, u32 rs1, u32 sh) {
    208   return (0x00u << 26) | ((sh & 0x3fu) << 20) | ((rs1 & 0x1fu) << 15) |
    209          (0x1u << 12) | ((rd & 0x1fu) << 7) | RV_OP_IMM;
    210 }
    211 static inline u32 rv_srli(u32 rd, u32 rs1, u32 sh) {
    212   return (0x00u << 26) | ((sh & 0x3fu) << 20) | ((rs1 & 0x1fu) << 15) |
    213          (0x5u << 12) | ((rd & 0x1fu) << 7) | RV_OP_IMM;
    214 }
    215 static inline u32 rv_srai(u32 rd, u32 rs1, u32 sh) {
    216   return (0x10u << 26) | ((sh & 0x3fu) << 20) | ((rs1 & 0x1fu) << 15) |
    217          (0x5u << 12) | ((rd & 0x1fu) << 7) | RV_OP_IMM;
    218 }
    219 
    220 static inline u32 rv_addiw(u32 rd, u32 rs1, i32 imm) {
    221   return rv_i(imm, rs1, 0x0, rd, RV_OP_IMM_32);
    222 }
    223 static inline u32 rv_slliw(u32 rd, u32 rs1, u32 sh) {
    224   return rv_r(0x00, sh & 0x1fu, rs1, 0x1, rd, RV_OP_IMM_32);
    225 }
    226 static inline u32 rv_srliw(u32 rd, u32 rs1, u32 sh) {
    227   return rv_r(0x00, sh & 0x1fu, rs1, 0x5, rd, RV_OP_IMM_32);
    228 }
    229 static inline u32 rv_sraiw(u32 rd, u32 rs1, u32 sh) {
    230   return rv_r(0x20, sh & 0x1fu, rs1, 0x5, rd, RV_OP_IMM_32);
    231 }
    232 
    233 static inline u32 rv_lui(u32 rd, u32 imm20) {
    234   return ((imm20 & 0xfffffu) << 12) | ((rd & 0x1fu) << 7) | RV_LUI;
    235 }
    236 static inline u32 rv_auipc(u32 rd, u32 imm20) {
    237   return ((imm20 & 0xfffffu) << 12) | ((rd & 0x1fu) << 7) | RV_AUIPC;
    238 }
    239 
    240 /* M extension */
    241 static inline u32 rv_mul(u32 rd, u32 rs1, u32 rs2) {
    242   return rv_r(0x01, rs2, rs1, 0x0, rd, RV_OP);
    243 }
    244 static inline u32 rv_mulh(u32 rd, u32 rs1, u32 rs2) {
    245   return rv_r(0x01, rs2, rs1, 0x1, rd, RV_OP);
    246 }
    247 static inline u32 rv_mulhsu(u32 rd, u32 rs1, u32 rs2) {
    248   return rv_r(0x01, rs2, rs1, 0x2, rd, RV_OP);
    249 }
    250 static inline u32 rv_mulhu(u32 rd, u32 rs1, u32 rs2) {
    251   return rv_r(0x01, rs2, rs1, 0x3, rd, RV_OP);
    252 }
    253 static inline u32 rv_div(u32 rd, u32 rs1, u32 rs2) {
    254   return rv_r(0x01, rs2, rs1, 0x4, rd, RV_OP);
    255 }
    256 static inline u32 rv_divu(u32 rd, u32 rs1, u32 rs2) {
    257   return rv_r(0x01, rs2, rs1, 0x5, rd, RV_OP);
    258 }
    259 static inline u32 rv_rem(u32 rd, u32 rs1, u32 rs2) {
    260   return rv_r(0x01, rs2, rs1, 0x6, rd, RV_OP);
    261 }
    262 static inline u32 rv_remu(u32 rd, u32 rs1, u32 rs2) {
    263   return rv_r(0x01, rs2, rs1, 0x7, rd, RV_OP);
    264 }
    265 static inline u32 rv_mulw(u32 rd, u32 rs1, u32 rs2) {
    266   return rv_r(0x01, rs2, rs1, 0x0, rd, RV_OP_32);
    267 }
    268 static inline u32 rv_divw(u32 rd, u32 rs1, u32 rs2) {
    269   return rv_r(0x01, rs2, rs1, 0x4, rd, RV_OP_32);
    270 }
    271 static inline u32 rv_divuw(u32 rd, u32 rs1, u32 rs2) {
    272   return rv_r(0x01, rs2, rs1, 0x5, rd, RV_OP_32);
    273 }
    274 static inline u32 rv_remw(u32 rd, u32 rs1, u32 rs2) {
    275   return rv_r(0x01, rs2, rs1, 0x6, rd, RV_OP_32);
    276 }
    277 static inline u32 rv_remuw(u32 rd, u32 rs1, u32 rs2) {
    278   return rv_r(0x01, rs2, rs1, 0x7, rd, RV_OP_32);
    279 }
    280 
    281 /* Zba (address-generation) subset — assumed available on rv64 targets.
    282  * SH{1,2,3}ADD rd, rs1, rs2 computes rd = (rs1 << {1,2,3}) + rs2 in one
    283  * instruction (funct7=0x10, opcode=OP). Used by load/store to fold an
    284  * indexed effective address `base + (index << log2_scale)` into a single
    285  * scratch register without an explicit shift+add pair. */
    286 static inline u32 rv_sh1add(u32 rd, u32 rs1, u32 rs2) {
    287   return rv_r(0x10, rs2, rs1, 0x2, rd, RV_OP);
    288 }
    289 static inline u32 rv_sh2add(u32 rd, u32 rs1, u32 rs2) {
    290   return rv_r(0x10, rs2, rs1, 0x4, rd, RV_OP);
    291 }
    292 static inline u32 rv_sh3add(u32 rd, u32 rs1, u32 rs2) {
    293   return rv_r(0x10, rs2, rs1, 0x6, rd, RV_OP);
    294 }
    295 
    296 /* Loads (funct3: 0=LB,1=LH,2=LW,3=LD,4=LBU,5=LHU,6=LWU) */
    297 static inline u32 rv_lb(u32 rd, u32 rs1, i32 imm) {
    298   return rv_i(imm, rs1, 0x0, rd, RV_LOAD);
    299 }
    300 static inline u32 rv_lh(u32 rd, u32 rs1, i32 imm) {
    301   return rv_i(imm, rs1, 0x1, rd, RV_LOAD);
    302 }
    303 static inline u32 rv_lw(u32 rd, u32 rs1, i32 imm) {
    304   return rv_i(imm, rs1, 0x2, rd, RV_LOAD);
    305 }
    306 static inline u32 rv_ld(u32 rd, u32 rs1, i32 imm) {
    307   return rv_i(imm, rs1, 0x3, rd, RV_LOAD);
    308 }
    309 static inline u32 rv_lbu(u32 rd, u32 rs1, i32 imm) {
    310   return rv_i(imm, rs1, 0x4, rd, RV_LOAD);
    311 }
    312 static inline u32 rv_lhu(u32 rd, u32 rs1, i32 imm) {
    313   return rv_i(imm, rs1, 0x5, rd, RV_LOAD);
    314 }
    315 static inline u32 rv_lwu(u32 rd, u32 rs1, i32 imm) {
    316   return rv_i(imm, rs1, 0x6, rd, RV_LOAD);
    317 }
    318 
    319 /* Stores (funct3: 0=SB,1=SH,2=SW,3=SD) */
    320 static inline u32 rv_sb(u32 rs2, u32 rs1, i32 imm) {
    321   return rv_s(imm, rs2, rs1, 0x0, RV_STORE);
    322 }
    323 static inline u32 rv_sh(u32 rs2, u32 rs1, i32 imm) {
    324   return rv_s(imm, rs2, rs1, 0x1, RV_STORE);
    325 }
    326 static inline u32 rv_sw(u32 rs2, u32 rs1, i32 imm) {
    327   return rv_s(imm, rs2, rs1, 0x2, RV_STORE);
    328 }
    329 static inline u32 rv_sd(u32 rs2, u32 rs1, i32 imm) {
    330   return rv_s(imm, rs2, rs1, 0x3, RV_STORE);
    331 }
    332 
    333 /* Branches */
    334 static inline u32 rv_beq(u32 rs1, u32 rs2, i32 imm) {
    335   return rv_b(imm, rs2, rs1, 0x0, RV_BRANCH);
    336 }
    337 static inline u32 rv_bne(u32 rs1, u32 rs2, i32 imm) {
    338   return rv_b(imm, rs2, rs1, 0x1, RV_BRANCH);
    339 }
    340 static inline u32 rv_blt(u32 rs1, u32 rs2, i32 imm) {
    341   return rv_b(imm, rs2, rs1, 0x4, RV_BRANCH);
    342 }
    343 static inline u32 rv_bge(u32 rs1, u32 rs2, i32 imm) {
    344   return rv_b(imm, rs2, rs1, 0x5, RV_BRANCH);
    345 }
    346 static inline u32 rv_bltu(u32 rs1, u32 rs2, i32 imm) {
    347   return rv_b(imm, rs2, rs1, 0x6, RV_BRANCH);
    348 }
    349 static inline u32 rv_bgeu(u32 rs1, u32 rs2, i32 imm) {
    350   return rv_b(imm, rs2, rs1, 0x7, RV_BRANCH);
    351 }
    352 
    353 /* Jumps */
    354 static inline u32 rv_jal(u32 rd, i32 imm21) { return rv_j(imm21, rd, RV_JAL); }
    355 static inline u32 rv_jalr(u32 rd, u32 rs1, i32 imm) {
    356   return rv_i(imm, rs1, 0x0, rd, RV_JALR);
    357 }
    358 
    359 /* Convenience: jr / ret / j / nop */
    360 static inline u32 rv_jr(u32 rs1) { return rv_jalr(RV_ZERO, rs1, 0); }
    361 static inline u32 rv_ret_(void) { return rv_jalr(RV_ZERO, RV_RA, 0); }
    362 static inline u32 rv_nop(void) { return RV_NOP; }
    363 
    364 /* System */
    365 static inline u32 rv_ecall(void) { return rv_i(0, 0, 0, 0, RV_SYSTEM); }
    366 static inline u32 rv_ebreak(void) { return rv_i(1, 0, 0, 0, RV_SYSTEM); }
    367 /* WFI: wait-for-interrupt, SYSTEM funct12=0x105 (privileged). */
    368 static inline u32 rv_wfi(void) { return 0x10500073u; }
    369 
    370 /* FENCE: pred/succ each 4 bits in imm[11:8]/imm[7:4]. fm bits 11:8 of imm */
    371 static inline u32 rv_fence_rw_rw(void) {
    372   return rv_i((i32)0x033, 0, 0, 0, RV_FENCE);
    373 }
    374 /* FENCE.I: instruction-stream sync (Zifencei). funct3=1 in the MISC-MEM major
    375  * opcode (0x0F). Used to lower the ISB intrinsic. */
    376 static inline u32 rv_fence_i(void) { return 0x0000100Fu; }
    377 /* PAUSE (Zihintpause): a FENCE with pred=W, succ=none. Used for cpu_yield;
    378  * decodes as a plain FENCE on hardware lacking the extension, which is a safe
    379  * (stronger) no-op hint. */
    380 static inline u32 rv_pause(void) { return 0x0100000Fu; }
    381 
    382 /* ---- FP (F + D extensions) ----
    383  * funct7 layout: bits[6:2] op-major (e.g. 0x00 FADD, 0x01 FSUB, ...);
    384  * bits[1:0] = fmt (00=S, 01=D). rm (rounding mode) in funct3; 0x7 = DYN. */
    385 
    386 #define RV_FMT_S 0u
    387 #define RV_FMT_D 1u
    388 
    389 static inline u32 rv_fadd(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
    390   return rv_r((0x00u << 2) | fmt, rs2, rs1, 0x7, rd, RV_OP_FP);
    391 }
    392 static inline u32 rv_fsub(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
    393   return rv_r((0x01u << 2) | fmt, rs2, rs1, 0x7, rd, RV_OP_FP);
    394 }
    395 static inline u32 rv_fmul(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
    396   return rv_r((0x02u << 2) | fmt, rs2, rs1, 0x7, rd, RV_OP_FP);
    397 }
    398 static inline u32 rv_fdiv(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
    399   return rv_r((0x03u << 2) | fmt, rs2, rs1, 0x7, rd, RV_OP_FP);
    400 }
    401 /* FSGNJ.fmt rd, rs1, rs2 — used to implement FMV.fmt rd, rs (sgnj rs, rs). */
    402 static inline u32 rv_fsgnj(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
    403   return rv_r((0x04u << 2) | fmt, rs2, rs1, 0x0, rd, RV_OP_FP);
    404 }
    405 static inline u32 rv_fsgnjn(u32 fmt, u32 rd, u32 rs1, u32 rs2) {
    406   return rv_r((0x04u << 2) | fmt, rs2, rs1, 0x1, rd, RV_OP_FP);
    407 }
    408 /* FCVT — integer/FP conversions. funct7 = 0x18..0x1d depending on direction;
    409  * rs2 encodes the partner type:
    410  *   0x60(W <- S)  0x61(W <- D)
    411  *   0x68(S <- W)  0x69(D <- W) etc
    412  * We assemble explicitly via rv_r to be obvious.  */
    413 static inline u32 rv_fcvt(u32 funct7, u32 rs2_sel, u32 rd, u32 rs1, u32 rm) {
    414   return rv_r(funct7, rs2_sel, rs1, rm, rd, RV_OP_FP);
    415 }
    416 /* FCVT.W.S  rd, rs1   (signed i32 from f32, rtz=001)  : funct7=0x60 rs2=0 */
    417 static inline u32 rv_fcvt_w_s(u32 rd, u32 rs1) {
    418   return rv_fcvt(0x60, 0x0, rd, rs1, 0x1);
    419 }
    420 static inline u32 rv_fcvt_wu_s(u32 rd, u32 rs1) {
    421   return rv_fcvt(0x60, 0x1, rd, rs1, 0x1);
    422 }
    423 static inline u32 rv_fcvt_l_s(u32 rd, u32 rs1) {
    424   return rv_fcvt(0x60, 0x2, rd, rs1, 0x1);
    425 }
    426 static inline u32 rv_fcvt_lu_s(u32 rd, u32 rs1) {
    427   return rv_fcvt(0x60, 0x3, rd, rs1, 0x1);
    428 }
    429 static inline u32 rv_fcvt_w_d(u32 rd, u32 rs1) {
    430   return rv_fcvt(0x61, 0x0, rd, rs1, 0x1);
    431 }
    432 static inline u32 rv_fcvt_wu_d(u32 rd, u32 rs1) {
    433   return rv_fcvt(0x61, 0x1, rd, rs1, 0x1);
    434 }
    435 static inline u32 rv_fcvt_l_d(u32 rd, u32 rs1) {
    436   return rv_fcvt(0x61, 0x2, rd, rs1, 0x1);
    437 }
    438 static inline u32 rv_fcvt_lu_d(u32 rd, u32 rs1) {
    439   return rv_fcvt(0x61, 0x3, rd, rs1, 0x1);
    440 }
    441 static inline u32 rv_fcvt_s_w(u32 rd, u32 rs1) {
    442   return rv_fcvt(0x68, 0x0, rd, rs1, 0x7);
    443 }
    444 static inline u32 rv_fcvt_s_wu(u32 rd, u32 rs1) {
    445   return rv_fcvt(0x68, 0x1, rd, rs1, 0x7);
    446 }
    447 static inline u32 rv_fcvt_s_l(u32 rd, u32 rs1) {
    448   return rv_fcvt(0x68, 0x2, rd, rs1, 0x7);
    449 }
    450 static inline u32 rv_fcvt_s_lu(u32 rd, u32 rs1) {
    451   return rv_fcvt(0x68, 0x3, rd, rs1, 0x7);
    452 }
    453 static inline u32 rv_fcvt_d_w(u32 rd, u32 rs1) {
    454   return rv_fcvt(0x69, 0x0, rd, rs1, 0x7);
    455 }
    456 static inline u32 rv_fcvt_d_wu(u32 rd, u32 rs1) {
    457   return rv_fcvt(0x69, 0x1, rd, rs1, 0x7);
    458 }
    459 static inline u32 rv_fcvt_d_l(u32 rd, u32 rs1) {
    460   return rv_fcvt(0x69, 0x2, rd, rs1, 0x7);
    461 }
    462 static inline u32 rv_fcvt_d_lu(u32 rd, u32 rs1) {
    463   return rv_fcvt(0x69, 0x3, rd, rs1, 0x7);
    464 }
    465 /* FCVT.S.D / FCVT.D.S */
    466 static inline u32 rv_fcvt_s_d(u32 rd, u32 rs1) {
    467   return rv_fcvt(0x20, 0x1, rd, rs1, 0x7);
    468 }
    469 static inline u32 rv_fcvt_d_s(u32 rd, u32 rs1) {
    470   return rv_fcvt(0x21, 0x0, rd, rs1, 0x7);
    471 }
    472 
    473 /* FMV.X.W / FMV.W.X / FMV.X.D / FMV.D.X — bitcast between GPR and FPR. */
    474 static inline u32 rv_fmv_x_w(u32 rd, u32 rs1) {
    475   return rv_fcvt(0x70, 0x0, rd, rs1, 0x0);
    476 }
    477 static inline u32 rv_fmv_w_x(u32 rd, u32 rs1) {
    478   return rv_fcvt(0x78, 0x0, rd, rs1, 0x0);
    479 }
    480 static inline u32 rv_fmv_x_d(u32 rd, u32 rs1) {
    481   return rv_fcvt(0x71, 0x0, rd, rs1, 0x0);
    482 }
    483 static inline u32 rv_fmv_d_x(u32 rd, u32 rs1) {
    484   return rv_fcvt(0x79, 0x0, rd, rs1, 0x0);
    485 }
    486 
    487 /* FP compares — rd is integer GPR. funct7 = 0x50/0x51 (S/D). rm: 0=LE, 1=LT,
    488  * 2=EQ. */
    489 static inline u32 rv_feq_s(u32 rd, u32 rs1, u32 rs2) {
    490   return rv_r(0x50, rs2, rs1, 0x2, rd, RV_OP_FP);
    491 }
    492 static inline u32 rv_flt_s(u32 rd, u32 rs1, u32 rs2) {
    493   return rv_r(0x50, rs2, rs1, 0x1, rd, RV_OP_FP);
    494 }
    495 static inline u32 rv_fle_s(u32 rd, u32 rs1, u32 rs2) {
    496   return rv_r(0x50, rs2, rs1, 0x0, rd, RV_OP_FP);
    497 }
    498 static inline u32 rv_feq_d(u32 rd, u32 rs1, u32 rs2) {
    499   return rv_r(0x51, rs2, rs1, 0x2, rd, RV_OP_FP);
    500 }
    501 static inline u32 rv_flt_d(u32 rd, u32 rs1, u32 rs2) {
    502   return rv_r(0x51, rs2, rs1, 0x1, rd, RV_OP_FP);
    503 }
    504 static inline u32 rv_fle_d(u32 rd, u32 rs1, u32 rs2) {
    505   return rv_r(0x51, rs2, rs1, 0x0, rd, RV_OP_FP);
    506 }
    507 
    508 static inline u32 rv_flw(u32 rd, u32 rs1, i32 imm) {
    509   return rv_i(imm, rs1, 0x2, rd, RV_LOAD_FP);
    510 }
    511 static inline u32 rv_fld(u32 rd, u32 rs1, i32 imm) {
    512   return rv_i(imm, rs1, 0x3, rd, RV_LOAD_FP);
    513 }
    514 static inline u32 rv_fsw(u32 rs2, u32 rs1, i32 imm) {
    515   return rv_s(imm, rs2, rs1, 0x2, RV_STORE_FP);
    516 }
    517 static inline u32 rv_fsd(u32 rs2, u32 rs1, i32 imm) {
    518   return rv_s(imm, rs2, rs1, 0x3, RV_STORE_FP);
    519 }
    520 
    521 /* ---- A extension (LR/SC + AMO) ----
    522  * AMO funct7 layout: aq(26) rl(25) funct5(31:27) op-specific.
    523  * funct3 selects width: 0x2 = W (32-bit), 0x3 = D (64-bit). */
    524 static inline u32 rv_amo(u32 funct5, u32 aq, u32 rl, u32 rd, u32 rs1, u32 rs2,
    525                          u32 funct3) {
    526   u32 funct7 = (funct5 << 2) | ((aq & 1u) << 1) | (rl & 1u);
    527   return rv_r(funct7, rs2, rs1, funct3, rd, RV_AMO);
    528 }
    529 static inline u32 rv_lr_w(u32 rd, u32 rs1, u32 aq, u32 rl) {
    530   return rv_amo(0x02, aq, rl, rd, rs1, 0, 0x2);
    531 }
    532 static inline u32 rv_lr_d(u32 rd, u32 rs1, u32 aq, u32 rl) {
    533   return rv_amo(0x02, aq, rl, rd, rs1, 0, 0x3);
    534 }
    535 static inline u32 rv_sc_w(u32 rd, u32 rs1, u32 rs2, u32 aq, u32 rl) {
    536   return rv_amo(0x03, aq, rl, rd, rs1, rs2, 0x2);
    537 }
    538 static inline u32 rv_sc_d(u32 rd, u32 rs1, u32 rs2, u32 aq, u32 rl) {
    539   return rv_amo(0x03, aq, rl, rd, rs1, rs2, 0x3);
    540 }
    541 
    542 /* Other A-extension AMO funct5 codes (W and D widths via funct3). */
    543 #define RV_AMO_SWAP 0x01u
    544 #define RV_AMO_ADD 0x00u
    545 #define RV_AMO_XOR 0x04u
    546 #define RV_AMO_AND 0x0Cu
    547 #define RV_AMO_OR 0x08u
    548 #define RV_AMO_MIN 0x10u
    549 #define RV_AMO_MAX 0x14u
    550 #define RV_AMO_MINU 0x18u
    551 #define RV_AMO_MAXU 0x1Cu
    552 
    553 /* Zicsr — CSR instructions. csr in imm[11:0]; funct3 selects op.
    554  *   csrrw=1, csrrs=2, csrrc=3, csrrwi=5, csrrsi=6, csrrci=7 */
    555 static inline u32 rv_csrrw(u32 rd, u32 csr, u32 rs1) {
    556   return rv_i((i32)(csr & 0xfffu), rs1, 0x1, rd, RV_SYSTEM);
    557 }
    558 static inline u32 rv_csrrs(u32 rd, u32 csr, u32 rs1) {
    559   return rv_i((i32)(csr & 0xfffu), rs1, 0x2, rd, RV_SYSTEM);
    560 }
    561 static inline u32 rv_csrrc(u32 rd, u32 csr, u32 rs1) {
    562   return rv_i((i32)(csr & 0xfffu), rs1, 0x3, rd, RV_SYSTEM);
    563 }
    564 static inline u32 rv_csrrwi(u32 rd, u32 csr, u32 uimm) {
    565   return rv_i((i32)(csr & 0xfffu), uimm & 0x1fu, 0x5, rd, RV_SYSTEM);
    566 }
    567 static inline u32 rv_csrrsi(u32 rd, u32 csr, u32 uimm) {
    568   return rv_i((i32)(csr & 0xfffu), uimm & 0x1fu, 0x6, rd, RV_SYSTEM);
    569 }
    570 static inline u32 rv_csrrci(u32 rd, u32 csr, u32 uimm) {
    571   return rv_i((i32)(csr & 0xfffu), uimm & 0x1fu, 0x7, rd, RV_SYSTEM);
    572 }
    573 
    574 /* ===================================================================
    575  * Format kinds — one per encoding family the descriptor table dispatches
    576  * on. R-type splits by funct3/funct7 selectors; I/S/B/U/J each carry a
    577  * distinct immediate layout. The C-extension formats (CR/CI/CSS/CIW/CL/
    578  * CS/CB/CJ) are 16-bit; the disassembler picks 16 vs 32 by checking the
    579  * bottom two bits of the first halfword (00/01/10 → compressed, 11 → 32).
    580  * =================================================================== */
    581 typedef enum Rv64Format {
    582   RV64_FMT_R,        /* funct7 rs2 rs1 funct3 rd op — most ALU ops */
    583   RV64_FMT_R4,       /* fused FMA: rs3 funct2 rs2 rs1 funct3 rd op */
    584   RV64_FMT_I,        /* imm[11:0] rs1 funct3 rd op — ALU-imm, loads, jalr */
    585   RV64_FMT_I_SHIFT,  /* shift-imm (shamt6/funct6) — RV64 SLLI/SRLI/SRAI */
    586   RV64_FMT_I_SHIFTW, /* RV32 word-shift (shamt5/funct7) — SLLIW/SRLIW/SRAIW */
    587   RV64_FMT_S,        /* store */
    588   RV64_FMT_B,        /* branch */
    589   RV64_FMT_U,        /* LUI/AUIPC */
    590   RV64_FMT_J,        /* JAL */
    591   RV64_FMT_LOAD,    /* I-type load: rd, imm(rs1) — printer uses memory syntax */
    592   RV64_FMT_STORE,   /* S-type store: rs2, imm(rs1) */
    593   RV64_FMT_JALR,    /* JALR: rd, imm(rs1) — memory-style operand syntax */
    594   RV64_FMT_FENCE,   /* FENCE pred,succ */
    595   RV64_FMT_SYSTEM,  /* ECALL/EBREAK — no operands */
    596   RV64_FMT_FP_RM,   /* FP arithmetic with rm: funct7 rs2 rs1 rm rd op */
    597   RV64_FMT_FP_R,    /* FP R-type without rm-as-mnemonic-suffix (cmp/sgnj) */
    598   RV64_FMT_FP_CVT,  /* FP conversion: rs2 is type selector, rs1 is src */
    599   RV64_FMT_FP_LOAD, /* fld/flw — rd[FP], imm(rs1) */
    600   RV64_FMT_FP_STORE, /* fsd/fsw — rs2[FP], imm(rs1) */
    601   RV64_FMT_AMO,      /* atomic: rd, rs2, (rs1) */
    602   RV64_FMT_LR,       /* LR.W/D: rd, (rs1) — no rs2 */
    603   RV64_FMT_CSR,      /* csrr*: rd, csr, rs1 */
    604   RV64_FMT_CSRI,     /* csrr*i: rd, csr, uimm5 */
    605   /* ---- Compressed (16-bit) formats ---- */
    606   RV64_FMT_CR,     /* funct4 rd/rs1 rs2 op (e.g. C.MV, C.ADD, C.JR, C.JALR) */
    607   RV64_FMT_CI,     /* funct3 imm rd/rs1 imm op (e.g. C.ADDI, C.LI, C.LUI) */
    608   RV64_FMT_CSS,    /* funct3 imm rs2 op (stack store: C.SDSP, C.SWSP) */
    609   RV64_FMT_CIW,    /* funct3 imm rd' op (C.ADDI4SPN) */
    610   RV64_FMT_CL,     /* funct3 imm rs1' imm rd' op (C.LD, C.LW) */
    611   RV64_FMT_CS,     /* funct3 imm rs1' imm rs2' op (C.SD, C.SW) */
    612   RV64_FMT_CA,     /* funct6 rd'/rs1' funct2 rs2' op (C.AND, C.OR, ...) */
    613   RV64_FMT_CB,     /* branch: funct3 imm rs1' imm op (C.BEQZ, C.BNEZ) */
    614   RV64_FMT_CJ,     /* jump: funct3 imm op (C.J, C.JAL_unused on RV64) */
    615   RV64_FMT_C_NONE, /* known opcode with no operands (C.NOP, C.EBREAK) */
    616   /* Assembler-only multi-word pseudo-instruction (call/tail/la/lla). The
    617    * descriptor's `match` is unused; the assembler dispatches on mnemonic
    618    * and emits the AUIPC+JALR / AUIPC+ADDI expansion directly. */
    619   RV64_FMT_PSEUDO,
    620 } Rv64Format;
    621 
    622 typedef enum Rv64DecodedOpcode {
    623   RV64_DEC_UNKNOWN = 0,
    624   RV64_DEC_ADDI,
    625   RV64_DEC_ADD,
    626   RV64_DEC_AUIPC,
    627   RV64_DEC_LD,
    628   RV64_DEC_SD,
    629   RV64_DEC_JALR,
    630   RV64_DEC_ECALL,
    631   RV64_DEC_EBREAK,
    632 } Rv64DecodedOpcode;
    633 
    634 /* ---- AsmFlags column on Rv64InsnDesc ---- */
    635 #define RV64_ASMFL_ALIAS 0x01u /* row is an alias (preferred print form) */
    636 #define RV64_ASMFL_FP 0x02u    /* operands take f-register prefix */
    637 #define RV64_ASMFL_NORM 0x04u  /* FP_RM row prints without rm suffix */
    638 #define RV64_ASMFL_C16 0x08u   /* 16-bit compressed instruction */
    639 /* Assembler-only multi-word pseudo (call/tail/la/lla). These expand to
    640  * several 32-bit words and never participate in disassembly — the decoder
    641  * sees the individual auipc/jalr/addi words instead. rv64_disasm_find
    642  * skips rows carrying this flag. */
    643 #define RV64_ASMFL_PSEUDO 0x10u
    644 
    645 /* ===================================================================
    646  * Per-format field structs + pack/unpack pure functions.
    647  * =================================================================== */
    648 
    649 typedef struct Rv64R {
    650   u32 funct7, rs2, rs1, funct3, rd, op;
    651 } Rv64R;
    652 typedef struct Rv64I {
    653   u32 imm12, rs1, funct3, rd, op;
    654 } Rv64I;
    655 typedef struct Rv64S {
    656   u32 imm12, rs2, rs1, funct3, op;
    657 } Rv64S;
    658 typedef struct Rv64B {
    659   u32 imm13, rs2, rs1, funct3, op;
    660 } Rv64B;
    661 typedef struct Rv64U {
    662   u32 imm32_hi20, rd, op;
    663 } Rv64U;
    664 typedef struct Rv64J {
    665   u32 imm21, rd, op;
    666 } Rv64J;
    667 
    668 static inline Rv64R rv64_r_unpack(u32 w) {
    669   Rv64R f;
    670   f.funct7 = (w >> 25) & 0x7fu;
    671   f.rs2 = (w >> 20) & 0x1fu;
    672   f.rs1 = (w >> 15) & 0x1fu;
    673   f.funct3 = (w >> 12) & 0x7u;
    674   f.rd = (w >> 7) & 0x1fu;
    675   f.op = w & 0x7fu;
    676   return f;
    677 }
    678 static inline Rv64I rv64_i_unpack(u32 w) {
    679   Rv64I f;
    680   f.imm12 = (w >> 20) & 0xfffu;
    681   f.rs1 = (w >> 15) & 0x1fu;
    682   f.funct3 = (w >> 12) & 0x7u;
    683   f.rd = (w >> 7) & 0x1fu;
    684   f.op = w & 0x7fu;
    685   return f;
    686 }
    687 static inline Rv64S rv64_s_unpack(u32 w) {
    688   Rv64S f;
    689   f.imm12 = (((w >> 25) & 0x7fu) << 5) | ((w >> 7) & 0x1fu);
    690   f.rs2 = (w >> 20) & 0x1fu;
    691   f.rs1 = (w >> 15) & 0x1fu;
    692   f.funct3 = (w >> 12) & 0x7u;
    693   f.op = w & 0x7fu;
    694   return f;
    695 }
    696 static inline Rv64B rv64_b_unpack(u32 w) {
    697   Rv64B f;
    698   f.imm13 = (((w >> 31) & 1u) << 12) | (((w >> 7) & 1u) << 11) |
    699             (((w >> 25) & 0x3fu) << 5) | (((w >> 8) & 0xfu) << 1);
    700   f.rs2 = (w >> 20) & 0x1fu;
    701   f.rs1 = (w >> 15) & 0x1fu;
    702   f.funct3 = (w >> 12) & 0x7u;
    703   f.op = w & 0x7fu;
    704   return f;
    705 }
    706 static inline Rv64U rv64_u_unpack(u32 w) {
    707   Rv64U f;
    708   f.imm32_hi20 = w & 0xfffff000u;
    709   f.rd = (w >> 7) & 0x1fu;
    710   f.op = w & 0x7fu;
    711   return f;
    712 }
    713 static inline Rv64J rv64_j_unpack(u32 w) {
    714   Rv64J f;
    715   f.imm21 = (((w >> 31) & 1u) << 20) | (((w >> 12) & 0xffu) << 12) |
    716             (((w >> 20) & 1u) << 11) | (((w >> 21) & 0x3ffu) << 1);
    717   f.rd = (w >> 7) & 0x1fu;
    718   f.op = w & 0x7fu;
    719   return f;
    720 }
    721 
    722 /* Sign-extend an n-bit value held in the low bits of v to i64. */
    723 static inline i64 rv64_sext(u64 v, u32 nbits) {
    724   u64 mask = (nbits >= 64u) ? ~0ull : ((1ull << nbits) - 1ull);
    725   v &= mask;
    726   u64 sign = (nbits == 0u) ? 0ull : (1ull << (nbits - 1u));
    727   if (v & sign) v |= ~mask;
    728   return (i64)v;
    729 }
    730 
    731 /* ===================================================================
    732  * Compressed (RV64C) helpers — 16-bit instructions.
    733  *
    734  * Layout (per RVC quadrant): bits[1:0] (op) select the quadrant:
    735  *   00 → Q0 (stack-relative & load/store narrow),
    736  *   01 → Q1 (constant/branch),
    737  *   10 → Q2 (stack pointer access & jumps & MV/ADD).
    738  * 11 is reserved for 32-bit (uncompressed) instructions, so the
    739  * disassembler picks 16-bit when (halfword & 3) != 3.
    740  *
    741  * The "narrow" register fields rs1' / rs2' / rd' are 3-bit and encode
    742  * x8..x15; macro RVC_REG3 unfolds: r' → 8 + r'. */
    743 #define RVC_REG3(r3) ((u32)(8u + ((r3) & 7u)))
    744 
    745 typedef struct Rv64C {
    746   u32 word;
    747 } Rv64C; /* 16-bit halfword in low 16 bits */
    748 
    749 /* ===================================================================
    750  * Descriptor table.
    751  * =================================================================== */
    752 
    753 typedef struct Rv64InsnDesc {
    754   Slice mnemonic;
    755   u32 match;
    756   u32 mask;
    757   u8 fmt;   /* Rv64Format */
    758   u8 flags; /* RV64_ASMFL_* */
    759   u8 pad[2];
    760 } Rv64InsnDesc;
    761 
    762 extern const Rv64InsnDesc rv64_insn_table[];
    763 extern const u32 rv64_insn_table_n;
    764 
    765 /* Linear-scan lookup. Returns the matching descriptor or NULL. First
    766  * match wins; ordering puts more-specific entries (aliases, fixed-Rd
    767  * forms) before broader ones. */
    768 const Rv64InsnDesc* rv64_disasm_find(u32 word);
    769 
    770 /* Compressed-instruction (16-bit) variant. Pass the halfword in the low
    771  * 16 bits of `word`. Returns NULL if no descriptor matches. */
    772 const Rv64InsnDesc* rv64_disasm_find_c(u32 word);
    773 
    774 /* Mnemonic → descriptor for the assembler. Returns NULL if not found.
    775  * Ignores ALIAS-only rows when those would produce ambiguous parses
    776  * (the canonical form is always reachable). */
    777 const Rv64InsnDesc* rv64_asm_find(Slice mnemonic);
    778 
    779 /* ===================================================================
    780  * Operand print / parse dispatch.
    781  *
    782  * rv64_print_operands renders the operand text (everything after the
    783  * mnemonic) for `word` into `sb`, using `desc->fmt` to dispatch.
    784  * Mnemonic itself is in `desc->mnemonic`; the caller writes it before
    785  * calling this helper. `vaddr` is the instruction's virtual address for
    786  * PC-relative formats; pass 0 if not known. */
    787 void rv64_print_operands(StrBuf* sb, const Rv64InsnDesc* desc, u32 word,
    788                          u64 vaddr);
    789 
    790 #endif /* KIT_RV64_ISA_H */