kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

abi_rv64.c (14160B)


      1 /* RISC-V ABI dispatch (shared LP64* / ILP32* classifier).
      2  *
      3  * One descriptor-parameterized classifier serves both XLENs. The descriptor is
      4  * derived per call from a->c->target:
      5  *   gpr_bytes           = target.ptr_size (4 on rv32, 8 on rv64)
      6  *   aggregate_gpr_bytes = 2 * gpr_bytes (8 on rv32, 16 on rv64)
      7  *   flen                = FP register width in bytes from target.float_abi:
      8  *                           DOUBLE -> 8, SINGLE -> 4, SOFT -> 0,
      9  *                           DEFAULT(unset) -> gpr_bytes (preserves the old
     10  *                           rv64 LP64D behavior byte-for-byte).
     11  *
     12  * Covers the subset the cg test harness exercises plus the RISC-V psABI
     13  * floating-point aggregate refinements:
     14  *   void          -> IGNORE
     15  *   integer ≤ XLEN -> DIRECT, one INT part (a0..a7 for args; a0 for return)
     16  *   pointer       -> DIRECT, one INT part
     17  *   float/double  -> DIRECT, one FP part when FP-eligible (fa0..fa7 for args;
     18  *                    fa0 for return); otherwise INT (and a 2*XLEN scalar
     19  *                    becomes a GPR pair).
     20  *   small struct  -> DIRECT:
     21  *                    * homogeneous FP aggregate (1 or 2 same-kind FP fields,
     22  *                      ignoring empty/zero-size fields and zero-length arrays)
     23  *                      -> FP parts (fa pair) when FP-eligible;
     24  *                    * one FP + one INT scalar (in either order, ≤ 2*XLEN)
     25  *                      -> (fa, a) or (a, fa) pair;
     26  *                    * otherwise INT parts up to 2*XLEN (passed in up to 2 GPRs).
     27  *   large struct  -> INDIRECT (sret for return; byval for args)
     28  *
     29  * Long double is IEEE-754 binary128 (quad) and __int128 are 16-byte scalars
     30  * passed/returned in an aligned pair of integer registers (low-order half in
     31  * the lower-numbered register). On rv64 this is the size==2*gpr_bytes pair
     32  * path; there are no 128-bit FP registers. On rv32 a 64-bit scalar (i64, or a
     33  * soft-float double) is the size==2*gpr_bytes even-GPR pair.
     34  *
     35  * Variadic args bypass these rules entirely and always go through the
     36  * integer register file / stack (handled at the caller / callee sites). */
     37 
     38 #include <string.h>
     39 
     40 #include "abi/abi_internal.h"
     41 #include "cg/type.h"
     42 #include "core/arena.h"
     43 #include "core/core.h"
     44 
     45 /* Per-call ABI descriptor derived from the target spec. */
     46 typedef struct RiscvAbiDesc {
     47   u32 gpr_bytes;           /* XLEN in bytes: 4 (rv32) or 8 (rv64) */
     48   u32 aggregate_gpr_bytes; /* 2 * gpr_bytes: the small-struct register cap */
     49   u32 flen;                /* FP register width in bytes: 0, 4, or 8 */
     50 } RiscvAbiDesc;
     51 
     52 static RiscvAbiDesc riscv_abi_desc(TargetABI* a) {
     53   RiscvAbiDesc d;
     54   d.gpr_bytes = a->c->target.ptr_size ? a->c->target.ptr_size : 8u;
     55   d.aggregate_gpr_bytes = 2u * d.gpr_bytes;
     56   switch (a->c->target.float_abi) {
     57     case KIT_FLOAT_ABI_DOUBLE:
     58       d.flen = 8u;
     59       break;
     60     case KIT_FLOAT_ABI_SINGLE:
     61       d.flen = 4u;
     62       break;
     63     case KIT_FLOAT_ABI_SOFT:
     64       d.flen = 0u;
     65       break;
     66     case KIT_FLOAT_ABI_DEFAULT:
     67     default:
     68       /* Unset: preserve the historical rv64 LP64D behavior, i.e. treat the FP
     69        * register width as the GPR width (flen == 8 on rv64). */
     70       d.flen = d.gpr_bytes;
     71       break;
     72   }
     73   return d;
     74 }
     75 
     76 /* An FP scalar of `size` bytes can be carried in an FP register iff the float
     77  * ABI is hard and the value fits: float (4) needs flen>=4; double (8) needs
     78  * flen>=8. With soft float (flen==0) nothing is FP-eligible. */
     79 static int riscv_fp_eligible(u32 flen, u32 size) {
     80   return flen != 0u && size <= flen;
     81 }
     82 
     83 /* Walk a record collecting the leaf scalars in ABI order, skipping
     84  * zero-size members (empty structs, zero-length arrays, zero-width
     85  * bitfields). Returns the number of leaves collected, or > cap if the
     86  * record has too many leaves to inspect (caller falls back to GPR pair). */
     87 typedef struct AbiLeaf {
     88   u32 offset;     /* byte offset within the outermost aggregate */
     89   u32 size;       /* leaf scalar size in bytes */
     90   u8 scalar_kind; /* ABIScalarKind */
     91 } AbiLeaf;
     92 
     93 static u32 riscv_collect_leaves(TargetABI* a, KitCgTypeId tid, u32 base_off,
     94                                 AbiLeaf* out, u32 cap, u32 written) {
     95   const CgType* t = cg_type_get(a->c, tid);
     96   if (!t) return written + 1u; /* poison: treat as too-many */
     97   if (t->kind == KIT_CG_TYPE_ALIAS)
     98     return riscv_collect_leaves(a, t->alias.base, base_off, out, cap, written);
     99   if (t->kind == KIT_CG_TYPE_RECORD) {
    100     if (t->record.is_union) return cap + 1u; /* unions: bail */
    101     for (u32 i = 0; i < t->record.nfields; ++i) {
    102       const CgTypeField* f = &t->record.fields[i];
    103       /* Skip bitfields explicitly: a bitfield with bit_width 0 is a layout
    104        * barrier, a non-zero bitfield kills FP-aggregate classification per
    105        * the psABI (treat the whole record as GPR-pair). */
    106       if (f->bit_width != 0) return cap + 1u;
    107       u32 off = base_off + (u32)f->offset;
    108       written = riscv_collect_leaves(a, f->type, off, out, cap, written);
    109       if (written > cap) return written;
    110     }
    111     return written;
    112   }
    113   if (t->kind == KIT_CG_TYPE_ARRAY) {
    114     if (t->array.count == 0) return written; /* zero-length array: skip */
    115     ABITypeInfo elem = abi_internal_type_info(a, t->array.elem);
    116     if (elem.size == 0) return written;
    117     for (u64 i = 0; i < t->array.count; ++i) {
    118       u32 off = base_off + (u32)(i * elem.size);
    119       written = riscv_collect_leaves(a, t->array.elem, off, out, cap, written);
    120       if (written > cap) return written;
    121     }
    122     return written;
    123   }
    124   /* Scalar leaf (including pointer). */
    125   ABITypeInfo ti = abi_internal_type_info(a, tid);
    126   if (ti.size == 0) return written;
    127   if (written >= cap) return written + 1u;
    128   out[written].offset = base_off;
    129   out[written].size = ti.size;
    130   out[written].scalar_kind = ti.scalar_kind;
    131   return written + 1u;
    132 }
    133 
    134 static void classify_scalar(TargetABI* a, KitCgTypeId t, ABIArgInfo* out) {
    135   RiscvAbiDesc d = riscv_abi_desc(a);
    136   ABITypeInfo ti = abi_internal_type_info(a, t);
    137   /* A scalar twice the GPR width that lives in the integer/long-double space
    138    * (or a soft-float double) is carried as an aligned pair of GPRs. On rv64
    139    * this is the 16-byte long double / __int128 pair; on rv32 it is the 8-byte
    140    * i64 / soft-double pair. A double is only excluded from the pair here when
    141    * it is FP-eligible (handled by the single-FP-part path below). */
    142   int fp_part = (ti.scalar_kind == ABI_SC_FLOAT) &&
    143                 riscv_fp_eligible(d.flen, ti.size);
    144   if (ti.size == 2u * d.gpr_bytes && !fp_part &&
    145       (ti.scalar_kind == ABI_SC_INT || ti.scalar_kind == ABI_SC_FLOAT)) {
    146     ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, 2);
    147     memset(parts, 0, sizeof(ABIArgPart) * 2);
    148     parts[0].cls = ABI_CLASS_INT;
    149     parts[0].loc = ABI_LOC_REG;
    150     parts[0].size = d.gpr_bytes;
    151     parts[0].align = d.gpr_bytes;
    152     parts[0].src_offset = 0;
    153     parts[1].cls = ABI_CLASS_INT;
    154     parts[1].loc = ABI_LOC_REG;
    155     parts[1].size = d.gpr_bytes;
    156     parts[1].align = d.gpr_bytes;
    157     parts[1].src_offset = d.gpr_bytes;
    158     out->kind = ABI_ARG_DIRECT;
    159     out->flags = ABI_AF_NONE;
    160     out->parts = parts;
    161     out->nparts = 2;
    162     out->indirect_align = 0;
    163     return;
    164   }
    165   out->kind = ABI_ARG_DIRECT;
    166   out->flags = ABI_AF_NONE;
    167   out->indirect_align = 0;
    168 
    169   ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart);
    170   memset(parts, 0, sizeof *parts);
    171   parts->cls = fp_part ? ABI_CLASS_FP : ABI_CLASS_INT;
    172   parts->loc = ABI_LOC_REG;
    173   parts->size = ti.size;
    174   parts->align = ti.align;
    175   parts->src_offset = 0;
    176 
    177   out->parts = parts;
    178   out->nparts = 1;
    179 }
    180 
    181 static u32 riscv32_scalar_split_lane_size(TargetABI* a, KitCgTypeId t) {
    182   RiscvAbiDesc d = riscv_abi_desc(a);
    183   ABITypeInfo ti = abi_internal_type_info(a, t);
    184   int fp_part;
    185   if (d.gpr_bytes != 4u) return 0;
    186   fp_part = (ti.scalar_kind == ABI_SC_FLOAT) &&
    187             riscv_fp_eligible(d.flen, ti.size);
    188   if (ti.size == 2u * d.gpr_bytes && !fp_part &&
    189       (ti.scalar_kind == ABI_SC_INT || ti.scalar_kind == ABI_SC_FLOAT))
    190     return d.gpr_bytes;
    191   return 0;
    192 }
    193 
    194 static void classify_void(ABIArgInfo* out) {
    195   memset(out, 0, sizeof *out);
    196   out->kind = ABI_ARG_IGNORE;
    197 }
    198 
    199 /* Try the psABI floating-point aggregate refinements. Returns 1 if `out`
    200  * was populated, 0 to fall back to the generic GPR-pair packing. */
    201 static int riscv_classify_fp_aggregate(TargetABI* a, KitCgTypeId t,
    202                                        const RiscvAbiDesc* d, ABIArgInfo* out) {
    203   AbiLeaf leaves[2];
    204   u32 n = riscv_collect_leaves(a, t, 0, leaves, /*cap=*/2u, /*written=*/0u);
    205   /* n > 2: bail; n == 0: caller already handled zero-size aggregates. */
    206   if (n == 0 || n > 2) return 0;
    207 
    208   u32 nfp = 0;
    209   for (u32 i = 0; i < n; ++i) {
    210     if (leaves[i].scalar_kind == ABI_SC_FLOAT) {
    211       /* An FP leaf only stays in the FP file when it is FP-eligible. With
    212        * soft float, or a double wider than flen, the aggregate must fall
    213        * back to the GPR-pair path. */
    214       if (!riscv_fp_eligible(d->flen, leaves[i].size)) return 0;
    215       ++nfp;
    216     }
    217     /* ABI_SC_INT, ABI_SC_BOOL, ABI_SC_PTR all go to the GPR side. */
    218   }
    219   if (nfp == 0) return 0; /* pure-INT goes through the GPR-pair path. */
    220 
    221   /* Build the part list in source order so that downstream codegen can
    222    * align src_offset with the record's field layout. */
    223   ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, n);
    224   memset(parts, 0, sizeof(ABIArgPart) * n);
    225   for (u32 i = 0; i < n; ++i) {
    226     parts[i].loc = ABI_LOC_REG;
    227     parts[i].size = leaves[i].size;
    228     parts[i].align = leaves[i].size ? leaves[i].size : 1u;
    229     parts[i].src_offset = leaves[i].offset;
    230     parts[i].cls =
    231         (leaves[i].scalar_kind == ABI_SC_FLOAT) ? ABI_CLASS_FP : ABI_CLASS_INT;
    232   }
    233   out->kind = ABI_ARG_DIRECT;
    234   out->flags = ABI_AF_NONE;
    235   out->parts = parts;
    236   out->nparts = (u16)n;
    237   out->indirect_align = 0;
    238   return 1;
    239 }
    240 
    241 static void classify_aggregate(TargetABI* a, KitCgTypeId t, ABIArgInfo* out,
    242                                int is_return) {
    243   RiscvAbiDesc d = riscv_abi_desc(a);
    244   ABITypeInfo ti = abi_internal_type_info(a, t);
    245   if (ti.size == 0) {
    246     classify_void(out);
    247     return;
    248   }
    249   if (ti.size <= d.aggregate_gpr_bytes) {
    250     /* Per psABI: try the FP-aware refinement first (HFA / fp+int pair). */
    251     if (riscv_classify_fp_aggregate(a, t, &d, out)) return;
    252     u32 nparts = (ti.size + d.gpr_bytes - 1u) / d.gpr_bytes;
    253     ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, nparts);
    254     memset(parts, 0, sizeof(ABIArgPart) * nparts);
    255     u32 off = 0;
    256     for (u32 i = 0; i < nparts; ++i) {
    257       u32 chunk =
    258           (ti.size - off > d.gpr_bytes) ? d.gpr_bytes : (ti.size - off);
    259       parts[i].cls = ABI_CLASS_INT;
    260       parts[i].loc = ABI_LOC_REG;
    261       parts[i].size = chunk;
    262       parts[i].align = d.gpr_bytes;
    263       parts[i].src_offset = off;
    264       off += chunk;
    265     }
    266     out->kind = ABI_ARG_DIRECT;
    267     out->flags = ABI_AF_NONE;
    268     out->parts = parts;
    269     out->nparts = (u16)nparts;
    270     out->indirect_align = 0;
    271   } else {
    272     out->kind = ABI_ARG_INDIRECT;
    273     out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL;
    274     out->indirect_align = ti.align ? ti.align : d.gpr_bytes;
    275     out->parts = NULL;
    276     out->nparts = 0;
    277   }
    278   (void)is_return;
    279 }
    280 
    281 static void classify_one(TargetABI* a, KitCgTypeId t, ABIArgInfo* out,
    282                          int is_return) {
    283   const CgType* ty = cg_type_get(a->c, t);
    284   if (!ty || ty->kind == KIT_CG_TYPE_VOID) {
    285     classify_void(out);
    286     return;
    287   }
    288   switch (ty->kind) {
    289     case KIT_CG_TYPE_RECORD:
    290       classify_aggregate(a, t, out, is_return);
    291       return;
    292     case KIT_CG_TYPE_ALIAS:
    293       classify_one(a, ty->alias.base, out, is_return);
    294       return;
    295     default:
    296       classify_scalar(a, t, out);
    297       return;
    298   }
    299 }
    300 
    301 static ABIFuncInfo* riscv_compute_func_info(TargetABI* a, KitCgTypeId fn) {
    302   ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo);
    303   const CgType* fnty = cg_type_get(a->c, fn);
    304   memset(info, 0, sizeof *info);
    305 
    306   classify_one(a, cg_func_ret_type(fnty), &info->ret, /*is_return=*/1);
    307   info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0;
    308   /* RISC-V passes the sret pointer in a0 (the first integer arg register),
    309    * consuming that slot. */
    310   info->sret_consumes_int_arg = info->has_sret;
    311   info->variadic = fnty->func.abi_variadic;
    312 
    313   info->nparams = (u16)fnty->func.nparams;
    314   if (fnty->func.nparams) {
    315     ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fnty->func.nparams);
    316     memset(arr, 0, sizeof(ABIArgInfo) * fnty->func.nparams);
    317     for (u32 i = 0; i < fnty->func.nparams; ++i) {
    318       classify_one(a, fnty->func.params[i].type, &arr[i], /*is_return=*/0);
    319     }
    320     info->params = arr;
    321   } else {
    322     info->params = NULL;
    323   }
    324   return info;
    325 }
    326 
    327 const ABIVtable rv64_vtable = {
    328     .compute_func_info = riscv_compute_func_info,
    329     .va_list_info = {8, 8, ABI_SC_PTR, 0, 0, 0},
    330     /* LP64D va_list is a plain pointer, but the variadic register-save area is
    331      * the 8 integer arg registers (a0..a7) spilled contiguously = 64 bytes; FP
    332      * varargs are passed in GPRs, so there is no separate FP save area. The
    333      * gp_reg_count/gp_slot_size fields let native_frame_va_save_bytes size that
    334      * area from the ABI rather than a backend constant. */
    335     .va_list_layout = {.type = {8, 8, ABI_SC_PTR, 0, 0, 0},
    336                        .kind = ABI_VA_LIST_POINTER,
    337                        .gp_reg_count = 8,
    338                        .fp_reg_count = 0,
    339                        .gp_slot_size = 8,
    340                        .fp_slot_size = 0},
    341 };
    342 
    343 const ABIVtable rv32_vtable = {
    344     .compute_func_info = riscv_compute_func_info,
    345     .scalar_split_lane_size = riscv32_scalar_split_lane_size,
    346     .va_list_info = {4, 4, ABI_SC_PTR, 0, 0, 0},
    347     /* ILP32* va_list is a plain 4-byte pointer; the variadic register-save
    348      * area is the 8 integer arg registers (a0..a7) spilled contiguously =
    349      * 32 bytes. FP varargs are passed in GPRs, so there is no FP save area. */
    350     .va_list_layout = {.type = {4, 4, ABI_SC_PTR, 0, 0, 0},
    351                        .kind = ABI_VA_LIST_POINTER,
    352                        .gp_reg_count = 8,
    353                        .fp_reg_count = 0,
    354                        .gp_slot_size = 4,
    355                        .fp_slot_size = 0},
    356 };