abi_rv64.c (14160B)
1 /* RISC-V ABI dispatch (shared LP64* / ILP32* classifier). 2 * 3 * One descriptor-parameterized classifier serves both XLENs. The descriptor is 4 * derived per call from a->c->target: 5 * gpr_bytes = target.ptr_size (4 on rv32, 8 on rv64) 6 * aggregate_gpr_bytes = 2 * gpr_bytes (8 on rv32, 16 on rv64) 7 * flen = FP register width in bytes from target.float_abi: 8 * DOUBLE -> 8, SINGLE -> 4, SOFT -> 0, 9 * DEFAULT(unset) -> gpr_bytes (preserves the old 10 * rv64 LP64D behavior byte-for-byte). 11 * 12 * Covers the subset the cg test harness exercises plus the RISC-V psABI 13 * floating-point aggregate refinements: 14 * void -> IGNORE 15 * integer ≤ XLEN -> DIRECT, one INT part (a0..a7 for args; a0 for return) 16 * pointer -> DIRECT, one INT part 17 * float/double -> DIRECT, one FP part when FP-eligible (fa0..fa7 for args; 18 * fa0 for return); otherwise INT (and a 2*XLEN scalar 19 * becomes a GPR pair). 20 * small struct -> DIRECT: 21 * * homogeneous FP aggregate (1 or 2 same-kind FP fields, 22 * ignoring empty/zero-size fields and zero-length arrays) 23 * -> FP parts (fa pair) when FP-eligible; 24 * * one FP + one INT scalar (in either order, ≤ 2*XLEN) 25 * -> (fa, a) or (a, fa) pair; 26 * * otherwise INT parts up to 2*XLEN (passed in up to 2 GPRs). 27 * large struct -> INDIRECT (sret for return; byval for args) 28 * 29 * Long double is IEEE-754 binary128 (quad) and __int128 are 16-byte scalars 30 * passed/returned in an aligned pair of integer registers (low-order half in 31 * the lower-numbered register). On rv64 this is the size==2*gpr_bytes pair 32 * path; there are no 128-bit FP registers. On rv32 a 64-bit scalar (i64, or a 33 * soft-float double) is the size==2*gpr_bytes even-GPR pair. 34 * 35 * Variadic args bypass these rules entirely and always go through the 36 * integer register file / stack (handled at the caller / callee sites). */ 37 38 #include <string.h> 39 40 #include "abi/abi_internal.h" 41 #include "cg/type.h" 42 #include "core/arena.h" 43 #include "core/core.h" 44 45 /* Per-call ABI descriptor derived from the target spec. */ 46 typedef struct RiscvAbiDesc { 47 u32 gpr_bytes; /* XLEN in bytes: 4 (rv32) or 8 (rv64) */ 48 u32 aggregate_gpr_bytes; /* 2 * gpr_bytes: the small-struct register cap */ 49 u32 flen; /* FP register width in bytes: 0, 4, or 8 */ 50 } RiscvAbiDesc; 51 52 static RiscvAbiDesc riscv_abi_desc(TargetABI* a) { 53 RiscvAbiDesc d; 54 d.gpr_bytes = a->c->target.ptr_size ? a->c->target.ptr_size : 8u; 55 d.aggregate_gpr_bytes = 2u * d.gpr_bytes; 56 switch (a->c->target.float_abi) { 57 case KIT_FLOAT_ABI_DOUBLE: 58 d.flen = 8u; 59 break; 60 case KIT_FLOAT_ABI_SINGLE: 61 d.flen = 4u; 62 break; 63 case KIT_FLOAT_ABI_SOFT: 64 d.flen = 0u; 65 break; 66 case KIT_FLOAT_ABI_DEFAULT: 67 default: 68 /* Unset: preserve the historical rv64 LP64D behavior, i.e. treat the FP 69 * register width as the GPR width (flen == 8 on rv64). */ 70 d.flen = d.gpr_bytes; 71 break; 72 } 73 return d; 74 } 75 76 /* An FP scalar of `size` bytes can be carried in an FP register iff the float 77 * ABI is hard and the value fits: float (4) needs flen>=4; double (8) needs 78 * flen>=8. With soft float (flen==0) nothing is FP-eligible. */ 79 static int riscv_fp_eligible(u32 flen, u32 size) { 80 return flen != 0u && size <= flen; 81 } 82 83 /* Walk a record collecting the leaf scalars in ABI order, skipping 84 * zero-size members (empty structs, zero-length arrays, zero-width 85 * bitfields). Returns the number of leaves collected, or > cap if the 86 * record has too many leaves to inspect (caller falls back to GPR pair). */ 87 typedef struct AbiLeaf { 88 u32 offset; /* byte offset within the outermost aggregate */ 89 u32 size; /* leaf scalar size in bytes */ 90 u8 scalar_kind; /* ABIScalarKind */ 91 } AbiLeaf; 92 93 static u32 riscv_collect_leaves(TargetABI* a, KitCgTypeId tid, u32 base_off, 94 AbiLeaf* out, u32 cap, u32 written) { 95 const CgType* t = cg_type_get(a->c, tid); 96 if (!t) return written + 1u; /* poison: treat as too-many */ 97 if (t->kind == KIT_CG_TYPE_ALIAS) 98 return riscv_collect_leaves(a, t->alias.base, base_off, out, cap, written); 99 if (t->kind == KIT_CG_TYPE_RECORD) { 100 if (t->record.is_union) return cap + 1u; /* unions: bail */ 101 for (u32 i = 0; i < t->record.nfields; ++i) { 102 const CgTypeField* f = &t->record.fields[i]; 103 /* Skip bitfields explicitly: a bitfield with bit_width 0 is a layout 104 * barrier, a non-zero bitfield kills FP-aggregate classification per 105 * the psABI (treat the whole record as GPR-pair). */ 106 if (f->bit_width != 0) return cap + 1u; 107 u32 off = base_off + (u32)f->offset; 108 written = riscv_collect_leaves(a, f->type, off, out, cap, written); 109 if (written > cap) return written; 110 } 111 return written; 112 } 113 if (t->kind == KIT_CG_TYPE_ARRAY) { 114 if (t->array.count == 0) return written; /* zero-length array: skip */ 115 ABITypeInfo elem = abi_internal_type_info(a, t->array.elem); 116 if (elem.size == 0) return written; 117 for (u64 i = 0; i < t->array.count; ++i) { 118 u32 off = base_off + (u32)(i * elem.size); 119 written = riscv_collect_leaves(a, t->array.elem, off, out, cap, written); 120 if (written > cap) return written; 121 } 122 return written; 123 } 124 /* Scalar leaf (including pointer). */ 125 ABITypeInfo ti = abi_internal_type_info(a, tid); 126 if (ti.size == 0) return written; 127 if (written >= cap) return written + 1u; 128 out[written].offset = base_off; 129 out[written].size = ti.size; 130 out[written].scalar_kind = ti.scalar_kind; 131 return written + 1u; 132 } 133 134 static void classify_scalar(TargetABI* a, KitCgTypeId t, ABIArgInfo* out) { 135 RiscvAbiDesc d = riscv_abi_desc(a); 136 ABITypeInfo ti = abi_internal_type_info(a, t); 137 /* A scalar twice the GPR width that lives in the integer/long-double space 138 * (or a soft-float double) is carried as an aligned pair of GPRs. On rv64 139 * this is the 16-byte long double / __int128 pair; on rv32 it is the 8-byte 140 * i64 / soft-double pair. A double is only excluded from the pair here when 141 * it is FP-eligible (handled by the single-FP-part path below). */ 142 int fp_part = (ti.scalar_kind == ABI_SC_FLOAT) && 143 riscv_fp_eligible(d.flen, ti.size); 144 if (ti.size == 2u * d.gpr_bytes && !fp_part && 145 (ti.scalar_kind == ABI_SC_INT || ti.scalar_kind == ABI_SC_FLOAT)) { 146 ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, 2); 147 memset(parts, 0, sizeof(ABIArgPart) * 2); 148 parts[0].cls = ABI_CLASS_INT; 149 parts[0].loc = ABI_LOC_REG; 150 parts[0].size = d.gpr_bytes; 151 parts[0].align = d.gpr_bytes; 152 parts[0].src_offset = 0; 153 parts[1].cls = ABI_CLASS_INT; 154 parts[1].loc = ABI_LOC_REG; 155 parts[1].size = d.gpr_bytes; 156 parts[1].align = d.gpr_bytes; 157 parts[1].src_offset = d.gpr_bytes; 158 out->kind = ABI_ARG_DIRECT; 159 out->flags = ABI_AF_NONE; 160 out->parts = parts; 161 out->nparts = 2; 162 out->indirect_align = 0; 163 return; 164 } 165 out->kind = ABI_ARG_DIRECT; 166 out->flags = ABI_AF_NONE; 167 out->indirect_align = 0; 168 169 ABIArgPart* parts = arena_new(a->c->tu, ABIArgPart); 170 memset(parts, 0, sizeof *parts); 171 parts->cls = fp_part ? ABI_CLASS_FP : ABI_CLASS_INT; 172 parts->loc = ABI_LOC_REG; 173 parts->size = ti.size; 174 parts->align = ti.align; 175 parts->src_offset = 0; 176 177 out->parts = parts; 178 out->nparts = 1; 179 } 180 181 static u32 riscv32_scalar_split_lane_size(TargetABI* a, KitCgTypeId t) { 182 RiscvAbiDesc d = riscv_abi_desc(a); 183 ABITypeInfo ti = abi_internal_type_info(a, t); 184 int fp_part; 185 if (d.gpr_bytes != 4u) return 0; 186 fp_part = (ti.scalar_kind == ABI_SC_FLOAT) && 187 riscv_fp_eligible(d.flen, ti.size); 188 if (ti.size == 2u * d.gpr_bytes && !fp_part && 189 (ti.scalar_kind == ABI_SC_INT || ti.scalar_kind == ABI_SC_FLOAT)) 190 return d.gpr_bytes; 191 return 0; 192 } 193 194 static void classify_void(ABIArgInfo* out) { 195 memset(out, 0, sizeof *out); 196 out->kind = ABI_ARG_IGNORE; 197 } 198 199 /* Try the psABI floating-point aggregate refinements. Returns 1 if `out` 200 * was populated, 0 to fall back to the generic GPR-pair packing. */ 201 static int riscv_classify_fp_aggregate(TargetABI* a, KitCgTypeId t, 202 const RiscvAbiDesc* d, ABIArgInfo* out) { 203 AbiLeaf leaves[2]; 204 u32 n = riscv_collect_leaves(a, t, 0, leaves, /*cap=*/2u, /*written=*/0u); 205 /* n > 2: bail; n == 0: caller already handled zero-size aggregates. */ 206 if (n == 0 || n > 2) return 0; 207 208 u32 nfp = 0; 209 for (u32 i = 0; i < n; ++i) { 210 if (leaves[i].scalar_kind == ABI_SC_FLOAT) { 211 /* An FP leaf only stays in the FP file when it is FP-eligible. With 212 * soft float, or a double wider than flen, the aggregate must fall 213 * back to the GPR-pair path. */ 214 if (!riscv_fp_eligible(d->flen, leaves[i].size)) return 0; 215 ++nfp; 216 } 217 /* ABI_SC_INT, ABI_SC_BOOL, ABI_SC_PTR all go to the GPR side. */ 218 } 219 if (nfp == 0) return 0; /* pure-INT goes through the GPR-pair path. */ 220 221 /* Build the part list in source order so that downstream codegen can 222 * align src_offset with the record's field layout. */ 223 ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, n); 224 memset(parts, 0, sizeof(ABIArgPart) * n); 225 for (u32 i = 0; i < n; ++i) { 226 parts[i].loc = ABI_LOC_REG; 227 parts[i].size = leaves[i].size; 228 parts[i].align = leaves[i].size ? leaves[i].size : 1u; 229 parts[i].src_offset = leaves[i].offset; 230 parts[i].cls = 231 (leaves[i].scalar_kind == ABI_SC_FLOAT) ? ABI_CLASS_FP : ABI_CLASS_INT; 232 } 233 out->kind = ABI_ARG_DIRECT; 234 out->flags = ABI_AF_NONE; 235 out->parts = parts; 236 out->nparts = (u16)n; 237 out->indirect_align = 0; 238 return 1; 239 } 240 241 static void classify_aggregate(TargetABI* a, KitCgTypeId t, ABIArgInfo* out, 242 int is_return) { 243 RiscvAbiDesc d = riscv_abi_desc(a); 244 ABITypeInfo ti = abi_internal_type_info(a, t); 245 if (ti.size == 0) { 246 classify_void(out); 247 return; 248 } 249 if (ti.size <= d.aggregate_gpr_bytes) { 250 /* Per psABI: try the FP-aware refinement first (HFA / fp+int pair). */ 251 if (riscv_classify_fp_aggregate(a, t, &d, out)) return; 252 u32 nparts = (ti.size + d.gpr_bytes - 1u) / d.gpr_bytes; 253 ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, nparts); 254 memset(parts, 0, sizeof(ABIArgPart) * nparts); 255 u32 off = 0; 256 for (u32 i = 0; i < nparts; ++i) { 257 u32 chunk = 258 (ti.size - off > d.gpr_bytes) ? d.gpr_bytes : (ti.size - off); 259 parts[i].cls = ABI_CLASS_INT; 260 parts[i].loc = ABI_LOC_REG; 261 parts[i].size = chunk; 262 parts[i].align = d.gpr_bytes; 263 parts[i].src_offset = off; 264 off += chunk; 265 } 266 out->kind = ABI_ARG_DIRECT; 267 out->flags = ABI_AF_NONE; 268 out->parts = parts; 269 out->nparts = (u16)nparts; 270 out->indirect_align = 0; 271 } else { 272 out->kind = ABI_ARG_INDIRECT; 273 out->flags = is_return ? ABI_AF_SRET : ABI_AF_BYVAL; 274 out->indirect_align = ti.align ? ti.align : d.gpr_bytes; 275 out->parts = NULL; 276 out->nparts = 0; 277 } 278 (void)is_return; 279 } 280 281 static void classify_one(TargetABI* a, KitCgTypeId t, ABIArgInfo* out, 282 int is_return) { 283 const CgType* ty = cg_type_get(a->c, t); 284 if (!ty || ty->kind == KIT_CG_TYPE_VOID) { 285 classify_void(out); 286 return; 287 } 288 switch (ty->kind) { 289 case KIT_CG_TYPE_RECORD: 290 classify_aggregate(a, t, out, is_return); 291 return; 292 case KIT_CG_TYPE_ALIAS: 293 classify_one(a, ty->alias.base, out, is_return); 294 return; 295 default: 296 classify_scalar(a, t, out); 297 return; 298 } 299 } 300 301 static ABIFuncInfo* riscv_compute_func_info(TargetABI* a, KitCgTypeId fn) { 302 ABIFuncInfo* info = arena_new(a->c->tu, ABIFuncInfo); 303 const CgType* fnty = cg_type_get(a->c, fn); 304 memset(info, 0, sizeof *info); 305 306 classify_one(a, cg_func_ret_type(fnty), &info->ret, /*is_return=*/1); 307 info->has_sret = (info->ret.kind == ABI_ARG_INDIRECT) ? 1 : 0; 308 /* RISC-V passes the sret pointer in a0 (the first integer arg register), 309 * consuming that slot. */ 310 info->sret_consumes_int_arg = info->has_sret; 311 info->variadic = fnty->func.abi_variadic; 312 313 info->nparams = (u16)fnty->func.nparams; 314 if (fnty->func.nparams) { 315 ABIArgInfo* arr = arena_array(a->c->tu, ABIArgInfo, fnty->func.nparams); 316 memset(arr, 0, sizeof(ABIArgInfo) * fnty->func.nparams); 317 for (u32 i = 0; i < fnty->func.nparams; ++i) { 318 classify_one(a, fnty->func.params[i].type, &arr[i], /*is_return=*/0); 319 } 320 info->params = arr; 321 } else { 322 info->params = NULL; 323 } 324 return info; 325 } 326 327 const ABIVtable rv64_vtable = { 328 .compute_func_info = riscv_compute_func_info, 329 .va_list_info = {8, 8, ABI_SC_PTR, 0, 0, 0}, 330 /* LP64D va_list is a plain pointer, but the variadic register-save area is 331 * the 8 integer arg registers (a0..a7) spilled contiguously = 64 bytes; FP 332 * varargs are passed in GPRs, so there is no separate FP save area. The 333 * gp_reg_count/gp_slot_size fields let native_frame_va_save_bytes size that 334 * area from the ABI rather than a backend constant. */ 335 .va_list_layout = {.type = {8, 8, ABI_SC_PTR, 0, 0, 0}, 336 .kind = ABI_VA_LIST_POINTER, 337 .gp_reg_count = 8, 338 .fp_reg_count = 0, 339 .gp_slot_size = 8, 340 .fp_slot_size = 0}, 341 }; 342 343 const ABIVtable rv32_vtable = { 344 .compute_func_info = riscv_compute_func_info, 345 .scalar_split_lane_size = riscv32_scalar_split_lane_size, 346 .va_list_info = {4, 4, ABI_SC_PTR, 0, 0, 0}, 347 /* ILP32* va_list is a plain 4-byte pointer; the variadic register-save 348 * area is the 8 integer arg registers (a0..a7) spilled contiguously = 349 * 32 bytes. FP varargs are passed in GPRs, so there is no FP save area. */ 350 .va_list_layout = {.type = {4, 4, ABI_SC_PTR, 0, 0, 0}, 351 .kind = ABI_VA_LIST_POINTER, 352 .gp_reg_count = 8, 353 .fp_reg_count = 0, 354 .gp_slot_size = 4, 355 .fp_slot_size = 0}, 356 };