arith.c (74104B)
1 #include "cg/internal.h" 2 3 static int api_try_fold_int_convert(KitCg* g, ConvKind ck, KitCgTypeId sty, 4 KitCgTypeId dty, i64 in, i64* out) { 5 u32 sw; 6 u32 dw; 7 u64 r; 8 if (!g || !out || !api_foldable_int_like_type(g->c, sty, &sw) || 9 !api_foldable_int_like_type(g->c, dty, &dw)) { 10 return 0; 11 } 12 switch (ck) { 13 case CV_SEXT: 14 r = (u64)api_sign_extend_width((u64)in, sw); 15 break; 16 case CV_ZEXT: 17 r = api_mask_width((u64)in, sw); 18 break; 19 case CV_TRUNC: 20 r = api_mask_width((u64)in, dw); 21 break; 22 default: 23 return 0; 24 } 25 *out = api_fold_result(g->c, dty, r, dw); 26 return 1; 27 } 28 29 void api_cg_binop(KitCg* g, BinOp iop, u32 flags) { 30 ApiSValue b, a; 31 CgTarget* T; 32 KitCgTypeId ty; 33 Operand ra, rb; 34 CGLocal rr; 35 Operand dst; 36 ApiSValue folded_sv; 37 i64 folded; 38 if (!g) return; 39 T = g->target; 40 b = api_pop(g); 41 a = api_pop(g); 42 ty = a.type ? a.type : b.type; 43 44 if (!flags && api_sv_op_is(&a, OPK_IMM) && api_sv_op_is(&b, OPK_IMM) && 45 api_try_fold_int_binop(g, iop, ty, a.op.v.imm, b.op.v.imm, &folded)) { 46 api_release(g, &a); 47 api_release(g, &b); 48 api_push(g, api_make_sv(api_op_imm(folded, ty), ty)); 49 return; 50 } 51 52 /* Strength-reduce mul/udiv/urem by a power of two into shift/and. Rewrites 53 * iop and the operands in place; the result flows through the same delay / 54 * identity / fallback machinery as any other shift or and. */ 55 if (!flags) api_try_strength_reduce(g, &iop, ty, &a, &b); 56 57 if (api_can_delay_int_arith(g, ty, flags) && 58 api_try_fold_arith_chain(g, iop, ty, &a, &b, &folded_sv)) { 59 api_release(g, &a); 60 api_release(g, &b); 61 api_push(g, folded_sv); 62 return; 63 } 64 65 if (api_type_is_float(g->c, ty)) { 66 ra = api_force_local(g, &a, ty); 67 rb = api_force_local(g, &b, ty); 68 } else { 69 ra = api_force_local_unless_imm(g, &a, ty); 70 rb = api_force_local_unless_imm(g, &b, ty); 71 } 72 73 if (api_can_delay_int_arith(g, ty, flags) && 74 api_try_collapse_binop_identity(g, iop, ty, &a, &b, &folded_sv)) { 75 api_release(g, &a); 76 api_release(g, &b); 77 api_push(g, folded_sv); 78 return; 79 } 80 81 if (api_can_delay_int_arith(g, ty, flags) && 82 (ra.kind == OPK_LOCAL || rb.kind == OPK_LOCAL) && 83 (ra.kind == OPK_LOCAL || ra.kind == OPK_IMM) && 84 (rb.kind == OPK_LOCAL || rb.kind == OPK_IMM)) { 85 int a_owned = api_sv_owns_operand_local(&a, &ra); 86 int b_owned = api_sv_owns_operand_local(&b, &rb); 87 api_push(g, api_make_arith_binop(iop, ra, rb, ty, a_owned, b_owned)); 88 if (a_owned) a.res = RES_INHERENT; 89 if (b_owned) b.res = RES_INHERENT; 90 api_release(g, &a); 91 api_release(g, &b); 92 return; 93 } 94 95 rr = api_alloc_temp_local(g, ty); 96 dst = api_op_local(rr, ty); 97 T->binop(T, iop, dst, ra, rb); 98 api_release(g, &a); 99 api_release(g, &b); 100 api_push(g, api_make_sv(dst, ty)); 101 } 102 103 void api_cg_unop(KitCg* g, UnOp iop, u32 flags) { 104 ApiSValue a; 105 CgTarget* T; 106 KitCgTypeId ty; 107 Operand ra; 108 CGLocal rr; 109 Operand dst; 110 ApiSValue folded_sv; 111 i64 folded; 112 if (!g) return; 113 T = g->target; 114 a = api_pop(g); 115 ty = a.type ? a.type : a.op.type; 116 117 if (iop == UO_FNEG) { 118 if (!api_type_is_float(g->c, ty)) { 119 compiler_panic(g->c, g->cur_loc, 120 "KitCg: FP negation requires floating operand"); 121 } 122 ra = api_force_local(g, &a, ty); 123 rr = api_alloc_temp_local(g, ty); 124 dst = api_op_local(rr, ty); 125 T->unop(T, iop, dst, ra); 126 api_release(g, &a); 127 api_push(g, api_make_sv(dst, ty)); 128 return; 129 } 130 131 /* Logical NOT of a delayed compare stays delayed: invert the predicate in 132 * place. For FP this flips ordered<->unordered as well as the relation (via 133 * api_invert_cmp), so `!(a<b)` becomes UGE (NaN -> true), matching IEEE 134 * negation. The inverted compare keeps the same i32 result type. */ 135 if (iop == UO_NOT && a.kind == SV_CMP) { 136 a.delayed.cmp.op = api_invert_cmp(a.delayed.cmp.op); 137 api_push(g, a); 138 return; 139 } 140 141 if (!flags && api_sv_op_is(&a, OPK_IMM) && 142 api_try_fold_int_unop(g, iop, ty, a.op.v.imm, &folded)) { 143 api_release(g, &a); 144 api_push(g, api_make_sv(api_op_imm(folded, ty), ty)); 145 return; 146 } 147 148 if (api_can_delay_int_arith(g, ty, flags) && 149 api_try_fold_unary_chain(&a, iop, ty, &folded_sv)) { 150 api_release(g, &a); 151 api_push(g, folded_sv); 152 return; 153 } 154 155 ra = api_force_local_unless_imm(g, &a, ty); 156 if (api_can_delay_int_arith(g, ty, flags) && ra.kind == OPK_LOCAL) { 157 int a_owned = api_sv_owns_operand_local(&a, &ra); 158 api_push(g, api_make_arith_unop(iop, ra, ty, a_owned)); 159 if (a_owned) a.res = RES_INHERENT; 160 api_release(g, &a); 161 return; 162 } 163 rr = api_alloc_temp_local(g, ty); 164 dst = api_op_local(rr, ty); 165 T->unop(T, iop, dst, ra); 166 api_release(g, &a); 167 api_push(g, api_make_sv(dst, ty)); 168 } 169 170 void api_cg_cmp(KitCg* g, CmpOp cop) { 171 ApiSValue b, a; 172 KitCgTypeId opty; 173 KitCgTypeId i32; 174 Operand ra, rb; 175 i64 folded; 176 if (!g) return; 177 b = api_pop(g); 178 a = api_pop(g); 179 opty = a.type ? a.type : b.type; 180 i32 = builtin_id(KIT_CG_BUILTIN_I32); 181 182 if (api_sv_op_is(&a, OPK_IMM) && api_sv_op_is(&b, OPK_IMM) && 183 api_try_fold_int_cmp(g, cop, opty, a.op.v.imm, b.op.v.imm, &folded)) { 184 api_release(g, &a); 185 api_release(g, &b); 186 api_push(g, api_make_sv(api_op_imm(folded, i32), i32)); 187 return; 188 } 189 190 ra = api_force_local_unless_imm(g, &a, opty); 191 rb = api_force_local_unless_imm(g, &b, opty); 192 /* Both integer and FP compares are produced as delayed SV_CMP values. 193 * Delaying is what lets api_branch_if (and api_cg_unop's UO_NOT) invert 194 * the compare via api_invert_cmp, reaching the unordered FP duals 195 * (UGE/UGT/ULE/ULT/UEQ/UNE) from `!(a<b)` etc. with NaN-correct semantics. 196 * If the compare instead escapes into value context it is materialized 197 * unchanged via api_materialize_cmp_to, which calls T->cmp with the same 198 * opcode the eager path used to. */ 199 api_push(g, api_make_cmp(cop, ra, rb, i32, api_sv_owns_operand_local(&a, &ra), 200 api_sv_owns_operand_local(&b, &rb))); 201 } 202 203 int api_try_i128_convert(KitCg* g, ConvKind ck, KitCgTypeId sty, 204 KitCgTypeId dty, ApiSValue* v); 205 int api_try_wide8_convert(KitCg* g, ConvKind ck, KitCgTypeId sty, 206 KitCgTypeId dty, ApiSValue* v); 207 208 void api_cg_convert_kind(KitCg* g, KitCgTypeId dst_type, ConvKind ck) { 209 ApiSValue v; 210 CgTarget* T; 211 KitCgTypeId sty; 212 KitCgTypeId dty; 213 Operand src; 214 CGLocal rr; 215 Operand dst; 216 if (!g) return; 217 T = g->target; 218 dty = resolve_type(g->c, dst_type); 219 if (!dty) return; 220 v = api_pop(g); 221 dty = api_unalias_type(g->c, dty); 222 sty = api_unalias_type(g->c, v.type ? v.type : v.op.type); 223 if (!sty) { 224 api_release(g, &v); 225 return; 226 } 227 if (sty == dty) { 228 v.type = dty; 229 v.op.type = dty; 230 api_push(g, v); 231 return; 232 } 233 if (api_sv_op_is(&v, OPK_IMM)) { 234 i64 folded; 235 if (api_try_fold_int_convert(g, ck, sty, dty, v.op.v.imm, &folded)) { 236 api_release(g, &v); 237 /* A folded split-lane 8-byte result must be memory-resident, not a bare 238 * i64 immediate the backend would truncate. */ 239 if (api_is_wide8_scalar_type(g->c, dty)) 240 api_push(g, api_make_wide8_int_const(g, folded, dty)); 241 else 242 api_push(g, api_make_sv(api_op_imm(folded, dty), dty)); 243 return; 244 } 245 } 246 if (api_try_i128_convert(g, ck, sty, dty, &v)) return; 247 if (api_try_wide8_convert(g, ck, sty, dty, &v)) return; 248 if (ck == CV_BITCAST && abi_cg_sizeof(g->c->abi, sty) == 16 && 249 abi_cg_sizeof(g->c->abi, dty) == 16 && 250 (api_is_f128_type(g->c, sty) || api_is_f128_type(g->c, dty))) { 251 CGLocal local = api_f128_temp_local(g, dty); 252 Operand dst_lv = api_op_local(local, dty); 253 if (api_is_lvalue_sv(&v) || v.op.kind == OPK_LOCAL || 254 v.op.kind == OPK_INDIRECT || v.op.kind == OPK_GLOBAL) { 255 KitCgTypeId ptr_ty = cg_type_ptr_to(g->c, dty); 256 ApiSValue src_lv = v; 257 Operand dst_addr; 258 Operand src_addr; 259 AggregateAccess agg; 260 src_lv.lvalue = 1; 261 dst_addr = api_lvalue_addr( 262 g, 263 &(ApiSValue){ 264 .op = dst_lv, .type = dty, .kind = SV_OPERAND, .lvalue = 1}, 265 ptr_ty); 266 src_addr = api_lvalue_addr(g, &src_lv, cg_type_ptr_to(g->c, sty)); 267 memset(&agg, 0, sizeof agg); 268 agg.size = 16; 269 agg.align = 16; 270 g->target->copy_bytes(g->target, dst_addr, src_addr, agg); 271 api_release_temp_local(g, dst_addr.v.local); 272 api_release_temp_local(g, src_addr.v.local); 273 } else if (v.op.kind == OPK_LOCAL) { 274 g->target->store(g->target, dst_lv, v.op, 275 api_mem_for_lvalue(g, &dst_lv, sty)); 276 } else if (v.op.kind == OPK_IMM) { 277 u8 bytes[16]; 278 u64 lo = (u64)v.op.v.imm; 279 memset(bytes, 0, sizeof bytes); 280 for (u32 i = 0; i < 8; ++i) { 281 u32 idx = g->c->target.big_endian ? 15u - i : i; 282 bytes[idx] = (u8)(lo >> (i * 8u)); 283 } 284 api_store_f128_bytes(g, local, dty, bytes); 285 } else { 286 compiler_panic(g->c, g->cur_loc, 287 "KitCg: unsupported 16-byte bitcast source"); 288 } 289 api_release(g, &v); 290 api_push(g, api_make_lv(dst_lv, dty)); 291 return; 292 } 293 294 src = api_force_local(g, &v, sty); 295 rr = api_alloc_temp_local(g, dty); 296 dst = api_op_local(rr, dty); 297 T->convert(T, ck, dst, src); 298 api_release(g, &v); 299 api_push(g, api_make_sv(dst, dty)); 300 } 301 302 /* ============================================================ 303 * 128-bit integer lowering 304 * 305 * i128/u128 are 16-byte memory-resident scalars (see api_is_wide16 306 * and src/cg/wide.c). The native backends only model <=64-bit 307 * register ops, so every i128 arithmetic/compare/convert is lowered 308 * here to a compiler-rt-style runtime call (rt/lib/int64). This 309 * mirrors the f128 dispatch in kit_cg_fp_*. 310 * ============================================================ */ 311 312 int api_i128_stack_top(KitCg* g, u32 depth) { 313 if (!g || g->sp <= depth) return 0; 314 return api_is_i128_type(g->c, api_sv_type(&g->stack[g->sp - 1u - depth])); 315 } 316 317 /* 64-bit integer split into two 32-bit lanes by the selected ABI. The native 318 * backend handles add/sub/and/or/xor on such values as register pairs, but 319 * mul/div/shift must be lowered to a __*di3 runtime call (see 320 * api_wideint64_binop). i128 routes through its own ti3 path (api_i128_*), so 321 * it is explicitly excluded here. */ 322 static int api_int_is_wide64(KitCg* g, KitCgTypeId ty) { 323 if (!g) return 0; 324 if (api_is_i128_type(g->c, ty)) return 0; 325 if (kit_cg_type_int_width((KitCompiler*)g->c, ty) == 0) return 0; 326 return api_is_wide8_scalar_type(g->c, ty); 327 } 328 329 static int api_wide64_stack_top(KitCg* g, u32 depth) { 330 if (!g || g->sp <= depth) return 0; 331 return api_int_is_wide64(g, api_sv_type(&g->stack[g->sp - 1u - depth])); 332 } 333 334 static int api_binop_is_shift(BinOp iop) { 335 return iop == BO_SHL || iop == BO_SHR_U || iop == BO_SHR_S; 336 } 337 338 static int api_is_bool_type(Compiler* c, KitCgTypeId ty) { 339 const CgType* cg = cg_type_get(c, api_unalias_type(c, ty)); 340 return cg && cg->kind == KIT_CG_TYPE_BOOL; 341 } 342 343 /* Materialize an i128 value as an lvalue and return a pointer local to it. */ 344 static Operand api_i128_addr(KitCg* g, ApiSValue* v) { 345 KitCgTypeId i128 = builtin_id(KIT_CG_BUILTIN_I128); 346 ApiSValue lv = api_wide16_materialize_lvalue(g, v, i128); 347 return api_lvalue_addr(g, &lv, cg_type_ptr_to(g->c, i128)); 348 } 349 350 /* Load a 64-bit lane of an i128 (addressed by `addr`) into a fresh i64. */ 351 static Operand api_i128_load_lane(KitCg* g, Operand addr, i32 off) { 352 KitCgTypeId i64 = builtin_id(KIT_CG_BUILTIN_I64); 353 CGLocal rr = api_alloc_temp_local(g, i64); 354 Operand dst = api_op_local(rr, i64); 355 MemAccess ma; 356 memset(&ma, 0, sizeof ma); 357 ma.type = i64; 358 ma.size = 8; 359 ma.align = 8; 360 g->target->load(g->target, dst, api_op_indirect(addr.v.local, off, i64), ma); 361 return dst; 362 } 363 364 static void api_i128_binop(KitCg* g, BinOp iop) { 365 KitCgTypeId i128 = builtin_id(KIT_CG_BUILTIN_I128); 366 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 367 const char* name = api_i128_binop_helper(iop); 368 KitCgTypeId ps[2]; 369 ApiSValue args[2]; 370 if (!name) { 371 compiler_panic(g->c, g->cur_loc, "KitCg: unsupported i128 binop"); 372 return; 373 } 374 args[1] = api_pop(g); 375 args[0] = api_pop(g); 376 ps[0] = i128; 377 ps[1] = api_binop_is_shift(iop) ? i32 : i128; 378 api_runtime_call_values(g, name, i128, ps, 2, args); 379 } 380 381 /* Runtime helper name for a 64-bit-integer mul/div/rem/shift on a 32-bit 382 * target. Mirrors api_i128_binop_helper but with the compiler-rt *di3 names. 383 * Returns NULL for ops the inline backend handles (add/sub/and/or/xor). */ 384 static const char* api_wideint64_binop_helper(BinOp op) { 385 switch (op) { 386 case BO_IMUL: 387 return "__muldi3"; 388 case BO_SDIV: 389 return "__divdi3"; 390 case BO_UDIV: 391 return "__udivdi3"; 392 case BO_SREM: 393 return "__moddi3"; 394 case BO_UREM: 395 return "__umoddi3"; 396 case BO_SHL: 397 return "__ashldi3"; 398 case BO_SHR_U: 399 return "__lshrdi3"; 400 case BO_SHR_S: 401 return "__ashrdi3"; 402 default: 403 return NULL; 404 } 405 } 406 407 /* Lower a 64-bit mul/div/rem/shift to a runtime call. Mirrors api_i128_binop 408 * but ret/params are builtin i64; the shift-count param is i32 (the __ashldi3 409 * family takes (i64 value, i32 count) per compiler-rt). */ 410 static void api_wideint64_binop(KitCg* g, BinOp iop) { 411 KitCgTypeId i64 = builtin_id(KIT_CG_BUILTIN_I64); 412 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 413 const char* name = api_wideint64_binop_helper(iop); 414 KitCgTypeId ps[2]; 415 ApiSValue args[2]; 416 if (!name) { 417 compiler_panic(g->c, g->cur_loc, "KitCg: unsupported wide i64 binop"); 418 return; 419 } 420 args[1] = api_pop(g); 421 args[0] = api_pop(g); 422 ps[0] = i64; 423 ps[1] = api_binop_is_shift(iop) ? i32 : i64; 424 api_runtime_call_values(g, name, i64, ps, 2, args); 425 } 426 427 /* ============================================================ 428 * wide8 inline 2-word lane arithmetic 429 * 430 * Some 32-bit ABIs represent a 64-bit integer as a memory-resident 8-byte 431 * scalar split into two 32-bit lanes. add/sub/and/or/xor/neg/not and compares 432 * have no compiler-rt helper (they would recurse), so they are emitted INLINE 433 * here as lane ops. mul/div/rem/shift route to __*di3 (api_wideint64_*). 434 * ============================================================ */ 435 436 static i32 wide8_lo_off(KitCg* g) { return g->c->target.big_endian ? 4 : 0; } 437 static i32 wide8_hi_off(KitCg* g) { return g->c->target.big_endian ? 0 : 4; } 438 439 /* Emit one i32 binop into a fresh temp and return it. */ 440 static Operand wide8_i32_binop(KitCg* g, BinOp op, Operand a, Operand b) { 441 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 442 CGLocal r = api_alloc_temp_local(g, i32); 443 Operand d = api_op_local(r, i32); 444 g->target->binop(g->target, op, d, a, b); 445 return d; 446 } 447 448 /* Emit one i32 compare (0/1 result) into a fresh temp and return it. */ 449 static Operand wide8_i32_cmp(KitCg* g, CmpOp op, Operand a, Operand b) { 450 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 451 CGLocal r = api_alloc_temp_local(g, i32); 452 Operand d = api_op_local(r, i32); 453 g->target->cmp(g->target, op, d, a, b); 454 return d; 455 } 456 457 /* (lo | hi) of the 8-byte value `v` as an i32, for a truthiness test. Consumes 458 * nothing on the value stack (caller owns *v). */ 459 Operand api_wide8_or_lanes(KitCg* g, ApiSValue* v, KitCgTypeId ty) { 460 Operand addr = api_wide8_addr(g, v, ty); 461 Operand lo = api_wide8_load_lane(g, addr, wide8_lo_off(g)); 462 Operand hi = api_wide8_load_lane(g, addr, wide8_hi_off(g)); 463 return wide8_i32_binop(g, BO_OR, lo, hi); 464 } 465 466 /* add/sub/and/or/xor on two 8-byte ints, result pushed as a fresh 8-byte value. 467 * add/sub carry/borrow through the high lane via an sltu (CMP_LT_U). */ 468 static void api_wide64_binop_inline(KitCg* g, BinOp iop) { 469 ApiSValue b = api_pop(g); 470 ApiSValue a = api_pop(g); 471 KitCgTypeId ty = a.type ? a.type : b.type; 472 int lo = wide8_lo_off(g), hi = wide8_hi_off(g); 473 Operand aa = api_wide8_addr(g, &a, ty); 474 Operand ab = api_wide8_addr(g, &b, ty); 475 Operand alo = api_wide8_load_lane(g, aa, lo); 476 Operand ahi = api_wide8_load_lane(g, aa, hi); 477 Operand blo = api_wide8_load_lane(g, ab, lo); 478 Operand bhi = api_wide8_load_lane(g, ab, hi); 479 CGLocal res = api_wide8_temp_local(g, ty); 480 ApiSValue res_lv = api_make_lv(api_op_local(res, ty), ty); 481 Operand ar = api_lvalue_addr(g, &res_lv, cg_type_ptr_to(g->c, ty)); 482 Operand rlo; 483 Operand rhi; 484 switch (iop) { 485 case BO_AND: 486 case BO_OR: 487 case BO_XOR: 488 rlo = wide8_i32_binop(g, iop, alo, blo); 489 rhi = wide8_i32_binop(g, iop, ahi, bhi); 490 break; 491 case BO_IADD: { 492 Operand carry; 493 rlo = wide8_i32_binop(g, BO_IADD, alo, blo); 494 carry = wide8_i32_cmp(g, CMP_LT_U, rlo, alo); /* unsigned wrap -> carry */ 495 rhi = wide8_i32_binop(g, BO_IADD, ahi, bhi); 496 rhi = wide8_i32_binop(g, BO_IADD, rhi, carry); 497 break; 498 } 499 case BO_ISUB: { 500 Operand borrow = wide8_i32_cmp(g, CMP_LT_U, alo, blo); 501 rlo = wide8_i32_binop(g, BO_ISUB, alo, blo); 502 rhi = wide8_i32_binop(g, BO_ISUB, ahi, bhi); 503 rhi = wide8_i32_binop(g, BO_ISUB, rhi, borrow); 504 break; 505 } 506 default: 507 compiler_panic(g->c, g->cur_loc, "KitCg: unsupported wide i64 inline binop"); 508 return; 509 } 510 api_wide8_store_lane(g, ar, lo, rlo); 511 api_wide8_store_lane(g, ar, hi, rhi); 512 api_release(g, &a); 513 api_release(g, &b); 514 api_push(g, api_make_sv(api_op_local(res, ty), ty)); 515 } 516 517 /* neg / bnot on an 8-byte int. NEG is two's complement: lo = 0-lo with borrow 518 * into hi = 0-hi-borrow. BNOT is lane-wise xor -1. */ 519 static void api_wide64_unop_inline(KitCg* g, UnOp iop) { 520 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 521 ApiSValue a = api_pop(g); 522 KitCgTypeId ty = a.type ? a.type : a.op.type; 523 int lo = wide8_lo_off(g), hi = wide8_hi_off(g); 524 Operand aa = api_wide8_addr(g, &a, ty); 525 Operand alo = api_wide8_load_lane(g, aa, lo); 526 Operand ahi = api_wide8_load_lane(g, aa, hi); 527 CGLocal res = api_wide8_temp_local(g, ty); 528 ApiSValue res_lv = api_make_lv(api_op_local(res, ty), ty); 529 Operand ar = api_lvalue_addr(g, &res_lv, cg_type_ptr_to(g->c, ty)); 530 Operand rlo; 531 Operand rhi; 532 if (iop == UO_BNOT) { 533 rlo = wide8_i32_binop(g, BO_XOR, alo, api_op_imm(-1, i32)); 534 rhi = wide8_i32_binop(g, BO_XOR, ahi, api_op_imm(-1, i32)); 535 } else { /* UO_NEG: 0 - value */ 536 Operand zero = api_op_imm(0, i32); 537 Operand borrow = wide8_i32_cmp(g, CMP_LT_U, zero, alo); /* 0<lo -> borrow */ 538 rlo = wide8_i32_binop(g, BO_ISUB, zero, alo); 539 rhi = wide8_i32_binop(g, BO_ISUB, zero, ahi); 540 rhi = wide8_i32_binop(g, BO_ISUB, rhi, borrow); 541 } 542 api_wide8_store_lane(g, ar, lo, rlo); 543 api_wide8_store_lane(g, ar, hi, rhi); 544 api_release(g, &a); 545 api_push(g, api_make_sv(api_op_local(res, ty), ty)); 546 } 547 548 /* a < b over 8-byte lanes: (a_hi <{s,u} b_hi) | (a_hi==b_hi & a_lo <u b_lo). 549 * The high lane uses the signed/unsigned relation; the low lane is always 550 * unsigned. Returns an i32 0/1. */ 551 static Operand wide8_lt(KitCg* g, int is_signed, Operand alo, Operand ahi, 552 Operand blo, Operand bhi) { 553 Operand hi_lt = wide8_i32_cmp(g, is_signed ? CMP_LT_S : CMP_LT_U, ahi, bhi); 554 Operand hi_eq = wide8_i32_cmp(g, CMP_EQ, ahi, bhi); 555 Operand lo_lt = wide8_i32_cmp(g, CMP_LT_U, alo, blo); 556 Operand t = wide8_i32_binop(g, BO_AND, hi_eq, lo_lt); 557 return wide8_i32_binop(g, BO_OR, hi_lt, t); 558 } 559 560 static Operand wide8_eq(KitCg* g, Operand alo, Operand ahi, Operand blo, 561 Operand bhi) { 562 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 563 Operand dlo = wide8_i32_binop(g, BO_XOR, alo, blo); 564 Operand dhi = wide8_i32_binop(g, BO_XOR, ahi, bhi); 565 Operand diff = wide8_i32_binop(g, BO_OR, dlo, dhi); 566 return wide8_i32_cmp(g, CMP_EQ, diff, api_op_imm(0, i32)); 567 } 568 569 static int cmp_is_signed_rel(CmpOp op) { 570 return op == CMP_LT_S || op == CMP_LE_S || op == CMP_GT_S || op == CMP_GE_S; 571 } 572 573 /* 8-byte int compare -> eager i32 0/1 value (not a delayed SV_CMP). */ 574 static void api_wide64_cmp_inline(KitCg* g, CmpOp cop) { 575 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 576 int sg = cmp_is_signed_rel(cop); 577 ApiSValue b = api_pop(g); 578 ApiSValue a = api_pop(g); 579 KitCgTypeId ty = a.type ? a.type : b.type; 580 int lo = wide8_lo_off(g), hi = wide8_hi_off(g); 581 Operand aa = api_wide8_addr(g, &a, ty); 582 Operand ab = api_wide8_addr(g, &b, ty); 583 Operand alo = api_wide8_load_lane(g, aa, lo); 584 Operand ahi = api_wide8_load_lane(g, aa, hi); 585 Operand blo = api_wide8_load_lane(g, ab, lo); 586 Operand bhi = api_wide8_load_lane(g, ab, hi); 587 Operand one = api_op_imm(1, i32); 588 Operand res; 589 switch (cop) { 590 case CMP_EQ: 591 res = wide8_eq(g, alo, ahi, blo, bhi); 592 break; 593 case CMP_NE: 594 res = wide8_i32_binop(g, BO_XOR, wide8_eq(g, alo, ahi, blo, bhi), one); 595 break; 596 case CMP_LT_S: 597 case CMP_LT_U: 598 res = wide8_lt(g, sg, alo, ahi, blo, bhi); 599 break; 600 case CMP_GT_S: 601 case CMP_GT_U: /* a>b == b<a */ 602 res = wide8_lt(g, sg, blo, bhi, alo, ahi); 603 break; 604 case CMP_LE_S: 605 case CMP_LE_U: /* a<=b == !(b<a) */ 606 res = wide8_i32_binop(g, BO_XOR, wide8_lt(g, sg, blo, bhi, alo, ahi), one); 607 break; 608 case CMP_GE_S: 609 case CMP_GE_U: /* a>=b == !(a<b) */ 610 res = wide8_i32_binop(g, BO_XOR, wide8_lt(g, sg, alo, ahi, blo, bhi), one); 611 break; 612 default: 613 compiler_panic(g->c, g->cur_loc, "KitCg: unsupported wide i64 compare"); 614 return; 615 } 616 api_release(g, &a); 617 api_release(g, &b); 618 api_push(g, api_make_sv(res, i32)); 619 } 620 621 /* ============================================================ 622 * wide64 __builtin_*_overflow on split-lane 64-bit values 623 * 624 * The native backends only model single-register overflow, so a 64-bit 625 * operand traps there. Here we legalize the 6 overflow intrinsics for a 626 * 64-bit operand pair into 32-bit lane ops, computing both the 627 * 64-bit wrapped value (stored to a fresh 8-byte temp) and the boolean 628 * overflow flag, then pushing [value, ok] exactly as the native path does. 629 * add/sub reuse the carry/borrow lane logic; mul builds the full 128-bit 630 * product from 32x32->64 partials (no MULHU opcode exists, so each partial 631 * is itself synthesized from 16-bit halves). 632 * ============================================================ */ 633 634 /* Unsigned 32x32 -> 64 product of i32 lanes a,b, returned as (*plo,*phi) i32 635 * via the 16-bit-halves schoolbook method (the target has no high-multiply 636 * opcode, and a plain BO_IMUL only yields the low 32 bits). 637 * 638 * a = ah*2^16 + al, b = bh*2^16 + bl 639 * a*b = ah*bh*2^32 + (ah*bl + al*bh)*2^16 + al*bl 640 */ 641 static void wide8_umul32(KitCg* g, Operand a, Operand b, Operand* plo, 642 Operand* phi) { 643 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 644 Operand mask = api_op_imm(0xffff, i32); 645 Operand sh16 = api_op_imm(16, i32); 646 Operand al = wide8_i32_binop(g, BO_AND, a, mask); 647 Operand ah = wide8_i32_binop(g, BO_SHR_U, a, sh16); 648 Operand bl = wide8_i32_binop(g, BO_AND, b, mask); 649 Operand bh = wide8_i32_binop(g, BO_SHR_U, b, sh16); 650 Operand ll = wide8_i32_binop(g, BO_IMUL, al, bl); /* bits 0..31 (<=32 bits) */ 651 Operand lh = wide8_i32_binop(g, BO_IMUL, al, bh); /* bits 16..47 */ 652 Operand hl = wide8_i32_binop(g, BO_IMUL, ah, bl); /* bits 16..47 */ 653 Operand hh = wide8_i32_binop(g, BO_IMUL, ah, bh); /* bits 32..63 */ 654 /* mid = lh + hl + (ll >> 16); a 33-bit sum -> track carry into bit 32. */ 655 Operand ll_hi = wide8_i32_binop(g, BO_SHR_U, ll, sh16); 656 Operand mid = wide8_i32_binop(g, BO_IADD, lh, hl); 657 /* carry out of (lh+hl) into bit 48 (i.e. +2^32 in the high word). */ 658 Operand c0 = wide8_i32_cmp(g, CMP_LT_U, mid, lh); 659 Operand mid2 = wide8_i32_binop(g, BO_IADD, mid, ll_hi); 660 Operand c1 = wide8_i32_cmp(g, CMP_LT_U, mid2, mid); 661 Operand carry32 = wide8_i32_binop(g, BO_IADD, c0, c1); /* into high word */ 662 /* lo = (mid2 << 16) | (ll & 0xffff) */ 663 Operand mid2_lo = wide8_i32_binop(g, BO_AND, mid2, mask); 664 Operand mid2_loshift = wide8_i32_binop(g, BO_SHL, mid2_lo, sh16); 665 Operand ll_lo = wide8_i32_binop(g, BO_AND, ll, mask); 666 *plo = wide8_i32_binop(g, BO_OR, mid2_loshift, ll_lo); 667 /* hi = hh + (mid2 >> 16) + carry32*2^16 */ 668 Operand mid2_hi = wide8_i32_binop(g, BO_SHR_U, mid2, sh16); 669 Operand carry_word = wide8_i32_binop(g, BO_SHL, carry32, sh16); 670 Operand hi = wide8_i32_binop(g, BO_IADD, hh, mid2_hi); 671 *phi = wide8_i32_binop(g, BO_IADD, hi, carry_word); 672 } 673 674 /* Add three i32 columns acc += addend, threading carry: returns the new sum and 675 * adds the unsigned-wrap carry (0/1) into *carry. */ 676 static Operand wide8_addc(KitCg* g, Operand acc, Operand addend, 677 Operand* carry) { 678 Operand sum = wide8_i32_binop(g, BO_IADD, acc, addend); 679 Operand c = wide8_i32_cmp(g, CMP_LT_U, sum, acc); 680 *carry = wide8_i32_binop(g, BO_IADD, *carry, c); 681 return sum; 682 } 683 684 /* The 6 __builtin_*_overflow intrinsics for a split-lane wide64 operand pair. 685 * Pops the two 8-byte args, computes the wrapped 64-bit value into a fresh 686 * 8-byte temp and the bool overflow flag into an i32, then pushes [value, ok] 687 * matching the contract of the native overflow path. */ 688 static void api_wide64_overflow_inline(KitCg* g, KitCgIntrinsic intrin) { 689 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 690 KitCgTypeId bool_ty = builtin_id(KIT_CG_BUILTIN_BOOL); 691 Operand sh31 = api_op_imm(31, i32); 692 ApiSValue b = api_pop(g); 693 ApiSValue a = api_pop(g); 694 KitCgTypeId ty = a.type ? a.type : b.type; 695 int lo = wide8_lo_off(g), hi = wide8_hi_off(g); 696 Operand aa = api_wide8_addr(g, &a, ty); 697 Operand ab = api_wide8_addr(g, &b, ty); 698 Operand alo = api_wide8_load_lane(g, aa, lo); 699 Operand ahi = api_wide8_load_lane(g, aa, hi); 700 Operand blo = api_wide8_load_lane(g, ab, lo); 701 Operand bhi = api_wide8_load_lane(g, ab, hi); 702 CGLocal res = api_wide8_temp_local(g, ty); 703 ApiSValue res_lv = api_make_lv(api_op_local(res, ty), ty); 704 Operand ar = api_lvalue_addr(g, &res_lv, cg_type_ptr_to(g->c, ty)); 705 Operand rlo; 706 Operand rhi; 707 Operand ok; 708 switch (intrin) { 709 case KIT_CG_INTRIN_UADD_OVERFLOW: 710 case KIT_CG_INTRIN_SADD_OVERFLOW: { 711 Operand carry; 712 rlo = wide8_i32_binop(g, BO_IADD, alo, blo); 713 carry = wide8_i32_cmp(g, CMP_LT_U, rlo, alo); 714 rhi = wide8_i32_binop(g, BO_IADD, ahi, bhi); 715 /* carry-out of the high lane = (rhi<ahi) before +carry, OR wrap on +carry. 716 * Compute rhi step by step so we can detect the final carry-out. */ 717 Operand c_hi0 = wide8_i32_cmp(g, CMP_LT_U, rhi, ahi); 718 rhi = wide8_i32_binop(g, BO_IADD, rhi, carry); 719 Operand c_hi1 = wide8_i32_cmp(g, CMP_LT_U, rhi, carry); 720 if (intrin == KIT_CG_INTRIN_UADD_OVERFLOW) { 721 /* unsigned: ok = carry-out of the high lane */ 722 ok = wide8_i32_binop(g, BO_OR, c_hi0, c_hi1); 723 } else { 724 /* signed: ok = ((a_hi ^ r_hi) & (b_hi ^ r_hi)) sign bit (bit 31) */ 725 Operand ar_x = wide8_i32_binop(g, BO_XOR, ahi, rhi); 726 Operand br_x = wide8_i32_binop(g, BO_XOR, bhi, rhi); 727 Operand both = wide8_i32_binop(g, BO_AND, ar_x, br_x); 728 ok = wide8_i32_binop(g, BO_SHR_U, both, sh31); 729 } 730 break; 731 } 732 case KIT_CG_INTRIN_USUB_OVERFLOW: 733 case KIT_CG_INTRIN_SSUB_OVERFLOW: { 734 Operand borrow = wide8_i32_cmp(g, CMP_LT_U, alo, blo); 735 rlo = wide8_i32_binop(g, BO_ISUB, alo, blo); 736 Operand t = wide8_i32_binop(g, BO_ISUB, ahi, bhi); 737 /* high-lane borrow-out: (ahi < bhi) OR (t < borrow after subtracting). */ 738 Operand b_hi0 = wide8_i32_cmp(g, CMP_LT_U, ahi, bhi); 739 Operand b_hi1 = wide8_i32_cmp(g, CMP_LT_U, t, borrow); 740 rhi = wide8_i32_binop(g, BO_ISUB, t, borrow); 741 if (intrin == KIT_CG_INTRIN_USUB_OVERFLOW) { 742 ok = wide8_i32_binop(g, BO_OR, b_hi0, b_hi1); 743 } else { 744 /* signed: ok = ((a_hi ^ b_hi) & (a_hi ^ r_hi)) sign bit (bit 31) */ 745 Operand ab_x = wide8_i32_binop(g, BO_XOR, ahi, bhi); 746 Operand ar_x = wide8_i32_binop(g, BO_XOR, ahi, rhi); 747 Operand both = wide8_i32_binop(g, BO_AND, ab_x, ar_x); 748 ok = wide8_i32_binop(g, BO_SHR_U, both, sh31); 749 } 750 break; 751 } 752 case KIT_CG_INTRIN_UMUL_OVERFLOW: 753 case KIT_CG_INTRIN_SMUL_OVERFLOW: { 754 int is_signed = (intrin == KIT_CG_INTRIN_SMUL_OVERFLOW); 755 /* For signed, compute |a|,|b| as unsigned 64-bit, do the unsigned 128-bit 756 * product, then apply the result sign. Overflow tests below use the 757 * unsigned magnitude product plus the expected sign. */ 758 Operand ua_lo = alo, ua_hi = ahi, ub_lo = blo, ub_hi = bhi; 759 Operand sgn = (Operand){0}; 760 if (is_signed) { 761 /* a_sign = ahi >> 31 (0 or 1 in i32, but as a mask we want -1/0). */ 762 Operand am = wide8_i32_binop(g, BO_SHR_S, ahi, sh31); /* 0 or -1 */ 763 Operand bm = wide8_i32_binop(g, BO_SHR_S, bhi, sh31); 764 /* |a| = (a ^ am) - am (two's-complement abs), lane-wise w/ borrow. */ 765 Operand axl = wide8_i32_binop(g, BO_XOR, alo, am); 766 Operand axh = wide8_i32_binop(g, BO_XOR, ahi, am); 767 Operand brwa = wide8_i32_cmp(g, CMP_LT_U, axl, am); 768 ua_lo = wide8_i32_binop(g, BO_ISUB, axl, am); 769 Operand tah = wide8_i32_binop(g, BO_ISUB, axh, am); 770 ua_hi = wide8_i32_binop(g, BO_ISUB, tah, brwa); 771 Operand bxl = wide8_i32_binop(g, BO_XOR, blo, bm); 772 Operand bxh = wide8_i32_binop(g, BO_XOR, bhi, bm); 773 Operand brwb = wide8_i32_cmp(g, CMP_LT_U, bxl, bm); 774 ub_lo = wide8_i32_binop(g, BO_ISUB, bxl, bm); 775 Operand tbh = wide8_i32_binop(g, BO_ISUB, bxh, bm); 776 ub_hi = wide8_i32_binop(g, BO_ISUB, tbh, brwb); 777 sgn = wide8_i32_binop(g, BO_XOR, am, bm); /* result sign mask -1/0 */ 778 } 779 /* Unsigned 128-bit product of (ua_hi:ua_lo) * (ub_hi:ub_lo). 780 * P00 = ua_lo*ub_lo -> columns 0,1 781 * P01 = ua_lo*ub_hi -> columns 1,2 782 * P10 = ua_hi*ub_lo -> columns 1,2 783 * P11 = ua_hi*ub_hi -> columns 2,3 */ 784 Operand p00l, p00h, p01l, p01h, p10l, p10h, p11l, p11h; 785 wide8_umul32(g, ua_lo, ub_lo, &p00l, &p00h); 786 wide8_umul32(g, ua_lo, ub_hi, &p01l, &p01h); 787 wide8_umul32(g, ua_hi, ub_lo, &p10l, &p10h); 788 wide8_umul32(g, ua_hi, ub_hi, &p11l, &p11h); 789 Operand zero = api_op_imm(0, i32); 790 /* column 0 */ 791 Operand r0 = p00l; 792 /* column 1 = p00h + p01l + p10l */ 793 Operand c1 = zero; 794 Operand r1 = p00h; 795 r1 = wide8_addc(g, r1, p01l, &c1); 796 r1 = wide8_addc(g, r1, p10l, &c1); 797 /* column 2 = p01h + p10h + p11l + c1 */ 798 Operand c2 = zero; 799 Operand r2 = p01h; 800 r2 = wide8_addc(g, r2, p10h, &c2); 801 r2 = wide8_addc(g, r2, p11l, &c2); 802 r2 = wide8_addc(g, r2, c1, &c2); 803 /* column 3 = p11h + c2 */ 804 Operand r3 = wide8_i32_binop(g, BO_IADD, p11h, c2); 805 /* low 64 bits = (r1:r0); high 64 bits = (r3:r2). */ 806 Operand mlo = r0, mhi = r1; 807 Operand hi_lo = r2, hi_hi = r3; 808 if (is_signed) { 809 /* Apply result sign: negate the 128-bit magnitude if sgn==-1. 810 * negated = (x ^ sgn) - sgn across all 4 words with borrow. */ 811 Operand w0 = wide8_i32_binop(g, BO_XOR, mlo, sgn); 812 Operand w1 = wide8_i32_binop(g, BO_XOR, mhi, sgn); 813 Operand w2 = wide8_i32_binop(g, BO_XOR, hi_lo, sgn); 814 Operand w3 = wide8_i32_binop(g, BO_XOR, hi_hi, sgn); 815 Operand bor0 = wide8_i32_cmp(g, CMP_LT_U, w0, sgn); 816 mlo = wide8_i32_binop(g, BO_ISUB, w0, sgn); 817 Operand t1 = wide8_i32_binop(g, BO_ISUB, w1, sgn); 818 Operand bor1a = wide8_i32_cmp(g, CMP_LT_U, w1, sgn); 819 Operand bor1b = wide8_i32_cmp(g, CMP_LT_U, t1, bor0); 820 mhi = wide8_i32_binop(g, BO_ISUB, t1, bor0); 821 Operand bor1 = wide8_i32_binop(g, BO_OR, bor1a, bor1b); 822 Operand t2 = wide8_i32_binop(g, BO_ISUB, w2, sgn); 823 Operand bor2a = wide8_i32_cmp(g, CMP_LT_U, w2, sgn); 824 Operand bor2b = wide8_i32_cmp(g, CMP_LT_U, t2, bor1); 825 hi_lo = wide8_i32_binop(g, BO_ISUB, t2, bor1); 826 Operand bor2 = wide8_i32_binop(g, BO_OR, bor2a, bor2b); 827 Operand t3 = wide8_i32_binop(g, BO_ISUB, w3, sgn); 828 hi_hi = wide8_i32_binop(g, BO_ISUB, t3, bor2); 829 } 830 rlo = mlo; 831 rhi = mhi; 832 if (!is_signed) { 833 /* unsigned overflow: high 64 bits nonzero. */ 834 Operand t = wide8_i32_binop(g, BO_OR, hi_lo, hi_hi); 835 ok = wide8_i32_cmp(g, CMP_NE, t, zero); 836 } else { 837 /* signed overflow: the 128-bit result is not the sign-extension of its 838 * low 64 bits. sext = (rhi >> 31) replicated; overflow if 839 * (hi_lo != sext) | (hi_hi != sext) where sext = arithmetic sign of 840 * the signed low-64 result (bit 63 = rhi sign). */ 841 Operand sext = wide8_i32_binop(g, BO_SHR_S, rhi, sh31); /* 0 or -1 */ 842 Operand d2 = wide8_i32_binop(g, BO_XOR, hi_lo, sext); 843 Operand d3 = wide8_i32_binop(g, BO_XOR, hi_hi, sext); 844 Operand d = wide8_i32_binop(g, BO_OR, d2, d3); 845 ok = wide8_i32_cmp(g, CMP_NE, d, zero); 846 } 847 break; 848 } 849 default: 850 compiler_panic(g->c, g->cur_loc, 851 "KitCg: unsupported wide i64 overflow intrinsic"); 852 api_release(g, &a); 853 api_release(g, &b); 854 return; 855 } 856 api_wide8_store_lane(g, ar, lo, rlo); 857 api_wide8_store_lane(g, ar, hi, rhi); 858 api_release(g, &a); 859 api_release(g, &b); 860 /* Materialize ok as a fresh bool temp so it has a stable home. */ 861 { 862 CGLocal okl = api_alloc_temp_local(g, bool_ty); 863 Operand okd = api_op_local(okl, bool_ty); 864 g->target->binop(g->target, BO_AND, okd, ok, api_op_imm(1, i32)); 865 api_push(g, api_make_sv(api_op_local(res, ty), ty)); 866 api_push(g, api_make_sv(okd, bool_ty)); 867 } 868 } 869 870 /* int<->split-i64 conversions (sext/zext/trunc/bitcast across the 4<->8 871 * boundary, and i64->bool). Returns 1 if it handled (and consumed) *v. The 872 * i64<->float conversions are routed to libcalls in kit_cg_*_to_float / 873 * kit_cg_float_to_* and never reach here. */ 874 int api_try_wide8_convert(KitCg* g, ConvKind ck, KitCgTypeId sty, 875 KitCgTypeId dty, ApiSValue* v) { 876 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 877 int s_wide = api_is_wide8_scalar_type(g->c, sty); 878 int d_wide = api_is_wide8_scalar_type(g->c, dty); 879 int lo = wide8_lo_off(g), hi = wide8_hi_off(g); 880 if (!s_wide && !d_wide) return 0; 881 if (s_wide && d_wide) { 882 /* i64<->soft-double reinterpret (same 8-byte layout) or i64<->u64. */ 883 v->type = dty; 884 v->op.type = dty; 885 api_push(g, *v); 886 return 1; 887 } 888 if (d_wide) { 889 /* narrower int -> i64: low lane is the (converted-to-i32) source; high lane 890 * is the sign-extension (CV_SEXT) or zero (CV_ZEXT/CV_BITCAST of a ptr). */ 891 int sext = (ck == CV_SEXT); 892 Operand src32; 893 CGLocal res; 894 ApiSValue res_lv; 895 Operand ar; 896 Operand hival; 897 if (api_unalias_type(g->c, sty) != i32) { 898 api_push(g, *v); 899 api_cg_convert_kind(g, i32, ck == CV_SEXT ? CV_SEXT : CV_ZEXT); 900 *v = api_pop(g); 901 } 902 src32 = api_force_local(g, v, i32); 903 res = api_wide8_temp_local(g, dty); 904 res_lv = api_make_lv(api_op_local(res, dty), dty); 905 ar = api_lvalue_addr(g, &res_lv, cg_type_ptr_to(g->c, dty)); 906 api_wide8_store_lane(g, ar, lo, src32); 907 if (sext) 908 hival = wide8_i32_binop(g, BO_SHR_S, src32, api_op_imm(31, i32)); 909 else 910 hival = api_op_imm(0, i32); 911 api_wide8_store_lane(g, ar, hi, hival); 912 api_release(g, v); 913 api_push(g, api_make_sv(api_op_local(res, dty), dty)); 914 return 1; 915 } 916 /* s_wide: i64 -> narrower. _Bool is "any bit set"; else take the low lane and 917 * truncate/convert further. */ 918 if (api_is_bool_type(g->c, dty)) { 919 Operand orl = api_wide8_or_lanes(g, v, sty); 920 api_release(g, v); 921 api_push(g, api_make_sv(orl, i32)); 922 kit_cg_push_int(g, 0, i32); 923 api_cg_cmp(g, CMP_NE); 924 api_cg_convert_kind(g, dty, CV_TRUNC); 925 return 1; 926 } 927 { 928 Operand addr = api_wide8_addr(g, v, sty); 929 Operand lolane = api_wide8_load_lane(g, addr, lo); 930 api_release(g, v); 931 api_push(g, api_make_sv(lolane, i32)); 932 if (api_unalias_type(g->c, dty) != i32) api_cg_convert_kind(g, dty, CV_TRUNC); 933 return 1; 934 } 935 } 936 937 static void api_i128_unop(KitCg* g, UnOp iop) { 938 KitCgTypeId i128 = builtin_id(KIT_CG_BUILTIN_I128); 939 const char* name = NULL; 940 ApiSValue args[1]; 941 KitCgTypeId ps[1]; 942 if (iop == UO_NEG) 943 name = "__negti2"; 944 else if (iop == UO_BNOT) 945 name = "__kit_notti3"; 946 else { 947 compiler_panic(g->c, g->cur_loc, "KitCg: unsupported i128 unop"); 948 return; 949 } 950 args[0] = api_pop(g); 951 ps[0] = i128; 952 api_runtime_call_values(g, name, i128, ps, 1, args); 953 } 954 955 /* Map a relational op to the form used to compare a __kit_*cmpti2 956 * result (-1/0/1, a signed i32) against zero. */ 957 static CmpOp api_i128_cmp_vs_zero(CmpOp cop) { 958 switch (cop) { 959 case CMP_EQ: 960 return CMP_EQ; 961 case CMP_NE: 962 return CMP_NE; 963 case CMP_LT_S: 964 case CMP_LT_U: 965 return CMP_LT_S; 966 case CMP_LE_S: 967 case CMP_LE_U: 968 return CMP_LE_S; 969 case CMP_GT_S: 970 case CMP_GT_U: 971 return CMP_GT_S; 972 case CMP_GE_S: 973 case CMP_GE_U: 974 return CMP_GE_S; 975 default: 976 return CMP_NE; 977 } 978 } 979 980 static void api_i128_cmp(KitCg* g, CmpOp cop) { 981 KitCgTypeId i128 = builtin_id(KIT_CG_BUILTIN_I128); 982 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 983 const char* name = 984 api_i128_cmp_is_unsigned(cop) ? "__kit_ucmpti2" : "__kit_cmpti2"; 985 KitCgTypeId ps[2] = {i128, i128}; 986 ApiSValue args[2]; 987 args[1] = api_pop(g); 988 args[0] = api_pop(g); 989 api_runtime_call_values(g, name, i32, ps, 2, args); 990 kit_cg_push_int(g, 0, i32); 991 api_cg_cmp(g, api_i128_cmp_vs_zero(cop)); 992 } 993 994 /* int<->i128 conversions. Returns 1 if it handled the conversion and 995 * consumed *v, 0 to fall through to the generic path. */ 996 int api_try_i128_convert(KitCg* g, ConvKind ck, KitCgTypeId sty, 997 KitCgTypeId dty, ApiSValue* v) { 998 KitCgTypeId i128 = builtin_id(KIT_CG_BUILTIN_I128); 999 KitCgTypeId i64 = builtin_id(KIT_CG_BUILTIN_I64); 1000 int s_is_128 = api_is_i128_type(g->c, sty); 1001 int d_is_128 = api_is_i128_type(g->c, dty); 1002 if (!s_is_128 && !d_is_128) return 0; 1003 if (s_is_128 && d_is_128) { 1004 /* signed<->unsigned i128 reinterpret: identical layout. */ 1005 v->type = dty; 1006 v->op.type = dty; 1007 api_push(g, *v); 1008 return 1; 1009 } 1010 if (d_is_128) { 1011 u32 sw = kit_cg_type_int_width((KitCompiler*)g->c, sty); 1012 const char* name = (ck == CV_SEXT) ? "__kit_sext64ti" : "__kit_zext64ti"; 1013 ApiSValue arg; 1014 KitCgTypeId ps[1]; 1015 if (sw == 0) return 0; /* float->i128 unsupported here */ 1016 if (sw >= 64) { 1017 arg = *v; 1018 arg.type = i64; 1019 arg.op.type = i64; 1020 } else { 1021 api_push(g, *v); 1022 api_cg_convert_kind(g, i64, ck); 1023 arg = api_pop(g); 1024 } 1025 ps[0] = i64; 1026 api_runtime_call_values(g, name, i128, ps, 1, &arg); 1027 return 1; 1028 } 1029 /* s_is_128, dty is _Bool: "value != 0" over the full 128 bits, not a 1030 * low-lane truncation (a value whose only set bits are above bit 63 must 1031 * still become 1). Reuse the runtime i128 compare. */ 1032 if (api_is_bool_type(g->c, dty)) { 1033 api_push(g, *v); 1034 kit_cg_push_int(g, 0, i128); 1035 api_i128_cmp(g, CMP_NE); /* leaves i32 0/1 */ 1036 api_cg_convert_kind(g, dty, CV_TRUNC); 1037 return 1; 1038 } 1039 /* s_is_128, dty is a narrower integer: take the low 64 bits, then 1040 * truncate further if needed. */ 1041 { 1042 u32 dw = kit_cg_type_int_width((KitCompiler*)g->c, dty); 1043 i32 lo_off = g->c->target.big_endian ? 8 : 0; 1044 Operand addr; 1045 Operand lo; 1046 if (dw == 0) return 0; /* i128->float unsupported here */ 1047 addr = api_i128_addr(g, v); 1048 lo = api_i128_load_lane(g, addr, lo_off); 1049 api_release_temp_local(g, addr.v.local); 1050 api_release(g, v); 1051 if (dw >= 64) { 1052 api_push(g, api_make_sv(lo, dty)); 1053 } else { 1054 api_push(g, api_make_sv(lo, i64)); 1055 api_cg_convert_kind(g, dty, CV_TRUNC); 1056 } 1057 return 1; 1058 } 1059 } 1060 1061 void kit_cg_int_binop(KitCg* g, KitCgIntBinOp op, uint32_t flags) { 1062 BinOp iop = api_map_int_binop(op); 1063 if (g && (api_i128_stack_top(g, 0) || api_i128_stack_top(g, 1))) { 1064 api_i128_binop(g, iop); 1065 return; 1066 } 1067 /* 64-bit int split into 32-bit lanes: mul/div/rem/shift become __*di3 1068 * runtime calls; add/sub/and/or/xor are emitted inline as 2-word lane ops 1069 * (no compiler-rt helper exists for them). Both keep the value memory-resident 1070 * so the allocator never tries to put 8 bytes in one 4-byte value slot. */ 1071 if (g && (api_wide64_stack_top(g, 0) || api_wide64_stack_top(g, 1))) { 1072 if (api_wideint64_binop_helper(iop)) 1073 api_wideint64_binop(g, iop); 1074 else 1075 api_wide64_binop_inline(g, iop); 1076 return; 1077 } 1078 api_cg_binop(g, iop, flags); 1079 } 1080 1081 void kit_cg_int_unop(KitCg* g, KitCgIntUnOp op, uint32_t flags) { 1082 UnOp iop = api_map_int_unop(op); 1083 if (g && api_i128_stack_top(g, 0) && (iop == UO_NEG || iop == UO_BNOT)) { 1084 api_i128_unop(g, iop); 1085 return; 1086 } 1087 /* Split 64-bit int: neg/bnot are inline 2-word lane ops; logical-not (!x) is 1088 * the full-value truthiness test (lo|hi)==0. */ 1089 if (g && api_wide64_stack_top(g, 0)) { 1090 if (iop == UO_NEG || iop == UO_BNOT) { 1091 api_wide64_unop_inline(g, iop); 1092 return; 1093 } 1094 if (iop == UO_NOT) { 1095 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 1096 ApiSValue v = api_pop(g); 1097 KitCgTypeId ty = v.type ? v.type : v.op.type; 1098 Operand orl = api_wide8_or_lanes(g, &v, ty); 1099 api_release(g, &v); 1100 api_push(g, api_make_sv(orl, i32)); 1101 kit_cg_push_int(g, 0, i32); 1102 api_cg_cmp(g, CMP_EQ); 1103 api_cg_convert_kind(g, ty, CV_ZEXT); 1104 return; 1105 } 1106 } 1107 api_cg_unop(g, iop, flags); 1108 } 1109 1110 void kit_cg_int_cmp(KitCg* g, KitCgIntCmpOp op) { 1111 CmpOp cop = api_map_int_cmp(op); 1112 if (g && (api_i128_stack_top(g, 0) || api_i128_stack_top(g, 1))) { 1113 api_i128_cmp(g, cop); 1114 return; 1115 } 1116 if (g && (api_wide64_stack_top(g, 0) || api_wide64_stack_top(g, 1))) { 1117 api_wide64_cmp_inline(g, cop); 1118 return; 1119 } 1120 api_cg_cmp(g, cop); 1121 } 1122 1123 const char* api_i128_binop_helper(BinOp op) { 1124 switch (op) { 1125 case BO_IADD: 1126 return "__kit_addti3"; 1127 case BO_ISUB: 1128 return "__kit_subti3"; 1129 case BO_IMUL: 1130 return "__multi3"; 1131 case BO_SDIV: 1132 return "__divti3"; 1133 case BO_UDIV: 1134 return "__udivti3"; 1135 case BO_SREM: 1136 return "__modti3"; 1137 case BO_UREM: 1138 return "__umodti3"; 1139 case BO_AND: 1140 return "__kit_andti3"; 1141 case BO_OR: 1142 return "__kit_orti3"; 1143 case BO_XOR: 1144 return "__kit_xorti3"; 1145 case BO_SHL: 1146 return "__ashlti3"; 1147 case BO_SHR_U: 1148 return "__lshrti3"; 1149 case BO_SHR_S: 1150 return "__ashrti3"; 1151 case BO_FADD: 1152 case BO_FSUB: 1153 case BO_FMUL: 1154 case BO_FDIV: 1155 default: 1156 return NULL; 1157 } 1158 } 1159 1160 int api_i128_cmp_is_unsigned(CmpOp op) { 1161 return op == CMP_LT_U || op == CMP_LE_U || op == CMP_GT_U || op == CMP_GE_U; 1162 } 1163 1164 const char* api_f128_binop_helper(KitCgFpBinOp op) { 1165 switch (op) { 1166 case KIT_CG_FP_ADD: 1167 return "__addtf3"; 1168 case KIT_CG_FP_SUB: 1169 return "__subtf3"; 1170 case KIT_CG_FP_MUL: 1171 return "__multf3"; 1172 case KIT_CG_FP_DIV: 1173 return "__divtf3"; 1174 } 1175 return NULL; 1176 } 1177 1178 /* Runtime helper name for double (f64) arithmetic on a target that lacks a 1179 * hardware double unit. Mirrors api_f128_binop_helper with the __*df3 names. */ 1180 static const char* api_softdf_binop_helper(KitCgFpBinOp op) { 1181 switch (op) { 1182 case KIT_CG_FP_ADD: 1183 return "__adddf3"; 1184 case KIT_CG_FP_SUB: 1185 return "__subdf3"; 1186 case KIT_CG_FP_MUL: 1187 return "__muldf3"; 1188 case KIT_CG_FP_DIV: 1189 return "__divdf3"; 1190 } 1191 return NULL; 1192 } 1193 1194 int api_f128_stack_top(KitCg* g, u32 depth) { 1195 if (!g || g->sp <= depth) return 0; 1196 return api_is_f128_type(g->c, api_sv_type(&g->stack[g->sp - 1u - depth])); 1197 } 1198 1199 /* True when the target has no hardware double: float_abi is SOFT (ilp32/lp64, 1200 * no FP regs) or SINGLE (ilp32f/lp64f, only float in FP regs — double is always 1201 * soft). DOUBLE (rv64 lp64d) and DEFAULT (x64/aa64 hardware-double targets that 1202 * never set float_abi) keep the inline hardware path, so existing rv64/x64/aa64 1203 * codegen is unchanged. */ 1204 static int api_target_double_is_soft(KitCg* g) { 1205 if (!g) return 0; 1206 return g->c->target.float_abi == KIT_FLOAT_ABI_SOFT || 1207 g->c->target.float_abi == KIT_FLOAT_ABI_SINGLE; 1208 } 1209 1210 /* True when ty is a 64-bit float (double) AND the target lacks hardware double. 1211 * f128 is handled by the separate api_f128_* path, so width must be exactly 64. */ 1212 static int api_type_is_soft_double(KitCg* g, KitCgTypeId ty) { 1213 if (!api_target_double_is_soft(g)) return 0; 1214 return kit_cg_type_float_width((KitCompiler*)g->c, ty) == 64; 1215 } 1216 1217 static int api_soft_double_stack_top(KitCg* g, u32 depth) { 1218 if (!g || g->sp <= depth) return 0; 1219 return api_type_is_soft_double(g, api_sv_type(&g->stack[g->sp - 1u - depth])); 1220 } 1221 1222 /* f32 under pure-soft ilp32/lp64 (float_abi SOFT, no FP unit): single-precision 1223 * arithmetic/compare/convert is also a libcall. Under SINGLE (ilp32f) float is 1224 * hardware (fadd.s etc.) so this is false; DOUBLE/DEFAULT keep hardware too. */ 1225 static int api_type_is_soft_single(KitCg* g, KitCgTypeId ty) { 1226 if (!g || g->c->target.float_abi != KIT_FLOAT_ABI_SOFT) return 0; 1227 return kit_cg_type_float_width((KitCompiler*)g->c, ty) == 32; 1228 } 1229 1230 static int api_soft_single_stack_top(KitCg* g, u32 depth) { 1231 if (!g || g->sp <= depth) return 0; 1232 return api_type_is_soft_single(g, api_sv_type(&g->stack[g->sp - 1u - depth])); 1233 } 1234 1235 /* Runtime helper for f32 arithmetic on a soft-float target (mirrors 1236 * api_softdf_binop_helper with the __*sf3 names). */ 1237 static const char* api_softsf_binop_helper(KitCgFpBinOp op) { 1238 switch (op) { 1239 case KIT_CG_FP_ADD: return "__addsf3"; 1240 case KIT_CG_FP_SUB: return "__subsf3"; 1241 case KIT_CG_FP_MUL: return "__mulsf3"; 1242 case KIT_CG_FP_DIV: return "__divsf3"; 1243 } 1244 return NULL; 1245 } 1246 1247 void api_f128_call_unary(KitCg* g, const char* name, KitCgTypeId ret, 1248 KitCgTypeId param) { 1249 ApiSValue args[1]; 1250 KitCgTypeId ps[1]; 1251 args[0] = api_pop(g); 1252 ps[0] = param; 1253 api_runtime_call_values(g, name, ret, ps, 1, args); 1254 } 1255 1256 void kit_cg_fp_binop(KitCg* g, KitCgFpBinOp op, uint32_t flags) { 1257 (void)flags; 1258 if (api_f128_stack_top(g, 0) || api_f128_stack_top(g, 1)) { 1259 KitCgTypeId f128 = builtin_id(KIT_CG_BUILTIN_F128); 1260 KitCgTypeId ps[2]; 1261 ApiSValue args[2]; 1262 const char* name = api_f128_binop_helper(op); 1263 if (!name) 1264 compiler_panic(g->c, g->cur_loc, "KitCg: unsupported f128 binop"); 1265 args[1] = api_pop(g); 1266 args[0] = api_pop(g); 1267 ps[0] = f128; 1268 ps[1] = f128; 1269 api_runtime_call_values(g, name, f128, ps, 2, args); 1270 return; 1271 } 1272 if (api_soft_double_stack_top(g, 0) || api_soft_double_stack_top(g, 1)) { 1273 KitCgTypeId f64 = builtin_id(KIT_CG_BUILTIN_F64); 1274 KitCgTypeId ps[2]; 1275 ApiSValue args[2]; 1276 const char* name = api_softdf_binop_helper(op); 1277 if (!name) 1278 compiler_panic(g->c, g->cur_loc, "KitCg: unsupported soft double binop"); 1279 args[1] = api_pop(g); 1280 args[0] = api_pop(g); 1281 ps[0] = f64; 1282 ps[1] = f64; 1283 api_runtime_call_values(g, name, f64, ps, 2, args); 1284 return; 1285 } 1286 if (api_soft_single_stack_top(g, 0) || api_soft_single_stack_top(g, 1)) { 1287 KitCgTypeId f32 = builtin_id(KIT_CG_BUILTIN_F32); 1288 KitCgTypeId ps[2]; 1289 ApiSValue args[2]; 1290 const char* name = api_softsf_binop_helper(op); 1291 if (!name) 1292 compiler_panic(g->c, g->cur_loc, "KitCg: unsupported soft single binop"); 1293 args[1] = api_pop(g); 1294 args[0] = api_pop(g); 1295 ps[0] = f32; 1296 ps[1] = f32; 1297 api_runtime_call_values(g, name, f32, ps, 2, args); 1298 return; 1299 } 1300 api_cg_binop(g, api_map_fp_binop(op), 0); 1301 } 1302 1303 void kit_cg_fp_unop(KitCg* g, KitCgFpUnOp op, uint32_t flags) { 1304 (void)flags; 1305 if (!g) return; 1306 if (op != KIT_CG_FP_NEG) { 1307 compiler_panic(g->c, g->cur_loc, "KitCg: FP unary op unsupported"); 1308 } 1309 if (api_f128_stack_top(g, 0)) { 1310 KitCgTypeId f128 = builtin_id(KIT_CG_BUILTIN_F128); 1311 api_f128_call_unary(g, "__negtf2", f128, f128); 1312 return; 1313 } 1314 /* Soft float has no FP unit, so negation is a libcall too (the inline FNEG 1315 * path emits fsgnj on an FP register, which does not exist here). */ 1316 if (api_soft_double_stack_top(g, 0)) { 1317 KitCgTypeId f64 = builtin_id(KIT_CG_BUILTIN_F64); 1318 api_f128_call_unary(g, "__negdf2", f64, f64); 1319 return; 1320 } 1321 if (api_soft_single_stack_top(g, 0)) { 1322 KitCgTypeId f32 = builtin_id(KIT_CG_BUILTIN_F32); 1323 api_f128_call_unary(g, "__negsf2", f32, f32); 1324 return; 1325 } 1326 api_cg_unop(g, UO_FNEG, 0); 1327 } 1328 1329 /* Soft-float single-libcall comparison: call `name(a,b)` (both operands of type 1330 * `opty`) and test its i32 three-way result against 0 with `icmp`. Consumes the 1331 * two operands on the stack and pushes the i32 boolean. Shared by the f128 (tf) 1332 * and soft-double (df) paths — only the helper name and operand type differ; the 1333 * compiler-rt NaN-sign convention is identical for both. */ 1334 static void api_softfp_cmp_call(KitCg* g, const char* name, KitCgTypeId opty, 1335 CmpOp icmp) { 1336 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 1337 KitCgTypeId ps[2]; 1338 ApiSValue args[2]; 1339 ps[0] = opty; 1340 ps[1] = opty; 1341 args[1] = api_pop(g); 1342 args[0] = api_pop(g); 1343 api_runtime_call_values(g, name, i32, ps, 2, args); 1344 kit_cg_push_int(g, 0, i32); 1345 api_cg_cmp(g, icmp); 1346 } 1347 1348 /* UEQ and ONE are the only soft-float predicates that cannot be a single 1349 * libcall: "equal" and "unordered" both yield a nonzero magnitude from 1350 * __eq*2/__ne*2, so they need a separate __unord*2 to split them. 1351 * UEQ = (__eq*2(a,b) == 0) || (__unord*2(a,b) != 0) 1352 * ONE = (__ne*2(a,b) != 0) && (__unord*2(a,b) == 0) 1353 * `suffix` is "tf" (f128) or "df" (double); `opty` the matching operand type. 1354 * The operands are dup'd (kit_cg_dup copies into a fresh owned local) so each 1355 * libcall consumes its own copy. */ 1356 static void api_softfp_cmp_with_unord(KitCg* g, KitCgFpCmpOp op, 1357 const char* suffix, KitCgTypeId opty) { 1358 char relname[16]; 1359 char unordname[16]; 1360 CmpOp relcmp = (op == KIT_CG_FP_UEQ) ? CMP_EQ : CMP_NE; 1361 const char* rel = (op == KIT_CG_FP_UEQ) ? "eq" : "ne"; 1362 snprintf(relname, sizeof relname, "__%s%s2", rel, suffix); 1363 snprintf(unordname, sizeof unordname, "__unord%s2", suffix); 1364 /* [a, b] -> [a, b, a, b] */ 1365 kit_cg_dup2(g); 1366 /* relation on the top (dup'd) copy: [a, b, R] */ 1367 api_softfp_cmp_call(g, relname, opty, relcmp); 1368 /* bring the original a, b back to TOS with R underneath: [R, a, b] */ 1369 kit_cg_rot3(g); 1370 kit_cg_rot3(g); 1371 if (op == KIT_CG_FP_UEQ) { 1372 api_softfp_cmp_call(g, unordname, opty, CMP_NE); /* [R, unordered?] */ 1373 api_cg_binop(g, BO_OR, 0); /* R || unordered */ 1374 } else { 1375 api_softfp_cmp_call(g, unordname, opty, CMP_EQ); /* [R, ordered?] */ 1376 api_cg_binop(g, BO_AND, 0); /* R && ordered */ 1377 } 1378 } 1379 1380 /* Emit a soft-float comparison for either f128 (suffix "tf", opty f128) or 1381 * soft double (suffix "df", opty f64). The predicate->helper mapping and the 1382 * compiler-rt NaN-sign convention are XLEN/width-neutral, so a single body 1383 * serves both — only the suffix and operand type vary. */ 1384 static void api_softfp_cmp(KitCg* g, KitCgFpCmpOp op, const char* suffix, 1385 KitCgTypeId opty) { 1386 char name[16]; 1387 switch (op) { 1388 case KIT_CG_FP_OEQ: 1389 snprintf(name, sizeof name, "__eq%s2", suffix); 1390 api_softfp_cmp_call(g, name, opty, CMP_EQ); 1391 return; 1392 case KIT_CG_FP_UNE: 1393 snprintf(name, sizeof name, "__ne%s2", suffix); 1394 api_softfp_cmp_call(g, name, opty, CMP_NE); 1395 return; 1396 case KIT_CG_FP_OLT: 1397 snprintf(name, sizeof name, "__lt%s2", suffix); 1398 api_softfp_cmp_call(g, name, opty, CMP_LT_S); 1399 return; 1400 case KIT_CG_FP_OLE: 1401 snprintf(name, sizeof name, "__le%s2", suffix); 1402 api_softfp_cmp_call(g, name, opty, CMP_LE_S); 1403 return; 1404 case KIT_CG_FP_OGT: 1405 snprintf(name, sizeof name, "__gt%s2", suffix); 1406 api_softfp_cmp_call(g, name, opty, CMP_GT_S); 1407 return; 1408 case KIT_CG_FP_OGE: 1409 snprintf(name, sizeof name, "__ge%s2", suffix); 1410 api_softfp_cmp_call(g, name, opty, CMP_GE_S); 1411 return; 1412 /* unordered duals via the opposite-sign helper (NaN flips the test): */ 1413 case KIT_CG_FP_UGE: 1414 snprintf(name, sizeof name, "__lt%s2", suffix); 1415 api_softfp_cmp_call(g, name, opty, CMP_GE_S); 1416 return; 1417 case KIT_CG_FP_UGT: 1418 snprintf(name, sizeof name, "__le%s2", suffix); 1419 api_softfp_cmp_call(g, name, opty, CMP_GT_S); 1420 return; 1421 case KIT_CG_FP_ULT: 1422 snprintf(name, sizeof name, "__ge%s2", suffix); 1423 api_softfp_cmp_call(g, name, opty, CMP_LT_S); 1424 return; 1425 case KIT_CG_FP_ULE: 1426 snprintf(name, sizeof name, "__gt%s2", suffix); 1427 api_softfp_cmp_call(g, name, opty, CMP_LE_S); 1428 return; 1429 case KIT_CG_FP_UEQ: 1430 case KIT_CG_FP_ONE: 1431 api_softfp_cmp_with_unord(g, op, suffix, opty); 1432 return; 1433 } 1434 } 1435 1436 void kit_cg_fp_cmp(KitCg* g, KitCgFpCmpOp op) { 1437 /* f128/long double and soft double are both soft-float: the comparison is a 1438 * libcall returning a three-way i32 we test against 0. kit's runtime uses the 1439 * standard compiler-rt sign convention (rt/lib/impl/fp_compare_impl.inc): 1440 * __le-family (__eq*2/__ne*2/__lt*2/__le*2): NaN -> +1 1441 * __ge-family (__ge*2/__gt*2): NaN -> -1 1442 * so each ordered predicate AND its unordered dual maps to one libcall, 1443 * choosing the helper whose NaN sign makes the integer test fall the right 1444 * way (ordered: NaN must fail; unordered: NaN must pass). Only UEQ/ONE, which 1445 * must split "equal" from "unordered", need a second (__unord*2) call. The 1446 * convention is width-neutral, so the same logic drives the tf and df 1447 * suffixes via api_softfp_cmp. */ 1448 if (api_f128_stack_top(g, 0) || api_f128_stack_top(g, 1)) { 1449 api_softfp_cmp(g, op, "tf", builtin_id(KIT_CG_BUILTIN_F128)); 1450 return; 1451 } 1452 if (api_soft_double_stack_top(g, 0) || api_soft_double_stack_top(g, 1)) { 1453 api_softfp_cmp(g, op, "df", builtin_id(KIT_CG_BUILTIN_F64)); 1454 return; 1455 } 1456 if (api_soft_single_stack_top(g, 0) || api_soft_single_stack_top(g, 1)) { 1457 api_softfp_cmp(g, op, "sf", builtin_id(KIT_CG_BUILTIN_F32)); 1458 return; 1459 } 1460 api_cg_cmp(g, api_map_fp_cmp(op)); 1461 } 1462 1463 void kit_cg_sext(KitCg* g, KitCgTypeId dst) { 1464 api_cg_convert_kind(g, dst, CV_SEXT); 1465 } 1466 1467 void kit_cg_zext(KitCg* g, KitCgTypeId dst) { 1468 api_cg_convert_kind(g, dst, CV_ZEXT); 1469 } 1470 1471 void kit_cg_trunc(KitCg* g, KitCgTypeId dst) { 1472 api_cg_convert_kind(g, dst, CV_TRUNC); 1473 } 1474 1475 void kit_cg_ptr_to_int(KitCg* g, KitCgTypeId dst) { 1476 api_cg_convert_kind(g, dst, CV_BITCAST); 1477 } 1478 1479 void kit_cg_int_to_ptr(KitCg* g, KitCgTypeId dst) { 1480 api_cg_convert_kind(g, dst, CV_BITCAST); 1481 } 1482 1483 void kit_cg_bitcast(KitCg* g, KitCgTypeId dst) { 1484 api_cg_convert_kind(g, dst, CV_BITCAST); 1485 } 1486 1487 void kit_cg_fpext(KitCg* g, KitCgTypeId dst) { 1488 KitCgTypeId dty = resolve_type(g->c, dst); 1489 if (api_is_f128_type(g->c, dty)) { 1490 ApiSValue v = api_pop(g); 1491 KitCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); 1492 const char* name = sty == builtin_id(KIT_CG_BUILTIN_F32) ? "__extendsftf2" 1493 : "__extenddftf2"; 1494 api_push(g, v); 1495 api_f128_call_unary(g, name, dty, sty); 1496 return; 1497 } 1498 /* float -> soft double: runtime widen via __extendsfdf2. */ 1499 if (api_type_is_soft_double(g, dty)) { 1500 ApiSValue v = api_pop(g); 1501 KitCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); 1502 api_push(g, v); 1503 api_f128_call_unary(g, "__extendsfdf2", dty, sty); 1504 return; 1505 } 1506 api_cg_convert_kind(g, dst, CV_FEXT); 1507 } 1508 1509 void kit_cg_fptrunc(KitCg* g, KitCgTypeId dst) { 1510 KitCgTypeId dty = resolve_type(g->c, dst); 1511 if (api_f128_stack_top(g, 0)) { 1512 ApiSValue v = api_pop(g); 1513 KitCgTypeId f128 = builtin_id(KIT_CG_BUILTIN_F128); 1514 const char* name = 1515 dty == builtin_id(KIT_CG_BUILTIN_F32) ? "__trunctfsf2" : "__trunctfdf2"; 1516 api_push(g, v); 1517 api_f128_call_unary(g, name, dty, f128); 1518 return; 1519 } 1520 /* soft double -> float: runtime narrow via __truncdfsf2. */ 1521 if (api_soft_double_stack_top(g, 0)) { 1522 ApiSValue v = api_pop(g); 1523 KitCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); 1524 api_push(g, v); 1525 api_f128_call_unary(g, "__truncdfsf2", dty, sty); 1526 return; 1527 } 1528 api_cg_convert_kind(g, dst, CV_FTRUNC); 1529 } 1530 1531 void kit_cg_sint_to_float(KitCg* g, KitCgTypeId dst, KitCgRounding rounding) { 1532 (void)rounding; 1533 if (api_is_f128_type(g->c, resolve_type(g->c, dst))) { 1534 ApiSValue v = api_pop(g); 1535 KitCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); 1536 u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); 1537 KitCgTypeId pty = sz > 8 ? builtin_id(KIT_CG_BUILTIN_I128) 1538 : (sz > 4 ? builtin_id(KIT_CG_BUILTIN_I64) 1539 : builtin_id(KIT_CG_BUILTIN_I32)); 1540 const char* name = 1541 sz > 8 ? "__floattitf" : (sz > 4 ? "__floatditf" : "__floatsitf"); 1542 api_push(g, v); 1543 api_f128_call_unary(g, name, resolve_type(g->c, dst), pty); 1544 return; 1545 } 1546 /* signed int -> soft double: __floatsidf (i32) / __floatdidf (i64). */ 1547 if (api_type_is_soft_double(g, resolve_type(g->c, dst))) { 1548 ApiSValue v = api_pop(g); 1549 KitCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); 1550 u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); 1551 KitCgTypeId pty = sz > 8 ? builtin_id(KIT_CG_BUILTIN_I128) 1552 : (sz > 4 ? builtin_id(KIT_CG_BUILTIN_I64) 1553 : builtin_id(KIT_CG_BUILTIN_I32)); 1554 const char* name = 1555 sz > 8 ? "__floattidf" : (sz > 4 ? "__floatdidf" : "__floatsidf"); 1556 api_push(g, v); 1557 api_f128_call_unary(g, name, resolve_type(g->c, dst), pty); 1558 return; 1559 } 1560 /* signed split-i64 -> hardware single float: use __floatdisf. */ 1561 if (api_wide64_stack_top(g, 0)) { 1562 api_f128_call_unary(g, "__floatdisf", resolve_type(g->c, dst), 1563 builtin_id(KIT_CG_BUILTIN_I64)); 1564 return; 1565 } 1566 /* i32 -> soft single float (ilp32, no FPU): __floatsisf. */ 1567 if (api_type_is_soft_single(g, resolve_type(g->c, dst))) { 1568 api_f128_call_unary(g, "__floatsisf", resolve_type(g->c, dst), 1569 builtin_id(KIT_CG_BUILTIN_I32)); 1570 return; 1571 } 1572 api_cg_convert_kind(g, dst, CV_ITOF_S); 1573 } 1574 1575 void kit_cg_uint_to_float(KitCg* g, KitCgTypeId dst, KitCgRounding rounding) { 1576 (void)rounding; 1577 if (api_is_f128_type(g->c, resolve_type(g->c, dst))) { 1578 ApiSValue v = api_pop(g); 1579 KitCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); 1580 u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); 1581 KitCgTypeId pty = sz > 8 ? builtin_id(KIT_CG_BUILTIN_I128) 1582 : (sz > 4 ? builtin_id(KIT_CG_BUILTIN_I64) 1583 : builtin_id(KIT_CG_BUILTIN_I32)); 1584 const char* name = 1585 sz > 8 ? "__floatuntitf" : (sz > 4 ? "__floatunditf" : "__floatunsitf"); 1586 api_push(g, v); 1587 api_f128_call_unary(g, name, resolve_type(g->c, dst), pty); 1588 return; 1589 } 1590 /* unsigned int -> soft double: __floatunsidf (i32) / __floatundidf (i64). */ 1591 if (api_type_is_soft_double(g, resolve_type(g->c, dst))) { 1592 ApiSValue v = api_pop(g); 1593 KitCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); 1594 u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); 1595 KitCgTypeId pty = sz > 8 ? builtin_id(KIT_CG_BUILTIN_I128) 1596 : (sz > 4 ? builtin_id(KIT_CG_BUILTIN_I64) 1597 : builtin_id(KIT_CG_BUILTIN_I32)); 1598 const char* name = 1599 sz > 8 ? "__floatuntidf" : (sz > 4 ? "__floatundidf" : "__floatunsidf"); 1600 api_push(g, v); 1601 api_f128_call_unary(g, name, resolve_type(g->c, dst), pty); 1602 return; 1603 } 1604 /* unsigned i64 -> hardware single float: __floatundisf. */ 1605 if (api_wide64_stack_top(g, 0)) { 1606 api_f128_call_unary(g, "__floatundisf", resolve_type(g->c, dst), 1607 builtin_id(KIT_CG_BUILTIN_I64)); 1608 return; 1609 } 1610 /* u32 -> soft single float (ilp32, no FPU): __floatunsisf. */ 1611 if (api_type_is_soft_single(g, resolve_type(g->c, dst))) { 1612 api_f128_call_unary(g, "__floatunsisf", resolve_type(g->c, dst), 1613 builtin_id(KIT_CG_BUILTIN_I32)); 1614 return; 1615 } 1616 api_cg_convert_kind(g, dst, CV_ITOF_U); 1617 } 1618 1619 void kit_cg_float_to_sint(KitCg* g, KitCgTypeId dst, KitCgRounding rounding) { 1620 (void)rounding; 1621 if (api_f128_stack_top(g, 0)) { 1622 KitCgTypeId dty = resolve_type(g->c, dst); 1623 u32 sz = (u32)abi_cg_sizeof(g->c->abi, dty); 1624 KitCgTypeId rty = sz > 8 ? builtin_id(KIT_CG_BUILTIN_I128) 1625 : (sz > 4 ? builtin_id(KIT_CG_BUILTIN_I64) 1626 : builtin_id(KIT_CG_BUILTIN_I32)); 1627 const char* name = 1628 sz > 8 ? "__fixtfti" : (sz > 4 ? "__fixtfdi" : "__fixtfsi"); 1629 api_f128_call_unary(g, name, rty, builtin_id(KIT_CG_BUILTIN_F128)); 1630 if (rty != dty) api_cg_convert_kind(g, dty, CV_TRUNC); 1631 return; 1632 } 1633 /* soft double -> signed int: __fixdfsi (i32) / __fixdfdi (i64). */ 1634 if (api_soft_double_stack_top(g, 0)) { 1635 KitCgTypeId dty = resolve_type(g->c, dst); 1636 KitCgTypeId f64 = builtin_id(KIT_CG_BUILTIN_F64); 1637 u32 sz = (u32)abi_cg_sizeof(g->c->abi, dty); 1638 KitCgTypeId rty = sz > 8 ? builtin_id(KIT_CG_BUILTIN_I128) 1639 : (sz > 4 ? builtin_id(KIT_CG_BUILTIN_I64) 1640 : builtin_id(KIT_CG_BUILTIN_I32)); 1641 const char* name = 1642 sz > 8 ? "__fixdfti" : (sz > 4 ? "__fixdfdi" : "__fixdfsi"); 1643 api_f128_call_unary(g, name, rty, f64); 1644 if (rty != dty) api_cg_convert_kind(g, dty, CV_TRUNC); 1645 return; 1646 } 1647 /* hardware single float -> split-i64: use __fixsfdi. */ 1648 if (api_is_wide8_scalar_type(g->c, resolve_type(g->c, dst))) { 1649 api_f128_call_unary(g, "__fixsfdi", resolve_type(g->c, dst), 1650 builtin_id(KIT_CG_BUILTIN_F32)); 1651 return; 1652 } 1653 /* soft single float -> signed int <=32 (ilp32, no FPU): __fixsfsi. */ 1654 if (api_soft_single_stack_top(g, 0)) { 1655 KitCgTypeId dty = resolve_type(g->c, dst); 1656 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 1657 api_f128_call_unary(g, "__fixsfsi", i32, builtin_id(KIT_CG_BUILTIN_F32)); 1658 if (i32 != dty) api_cg_convert_kind(g, dty, CV_TRUNC); 1659 return; 1660 } 1661 api_cg_convert_kind(g, dst, CV_FTOI_S); 1662 } 1663 1664 void kit_cg_float_to_uint(KitCg* g, KitCgTypeId dst, KitCgRounding rounding) { 1665 (void)rounding; 1666 if (api_f128_stack_top(g, 0)) { 1667 KitCgTypeId dty = resolve_type(g->c, dst); 1668 u32 sz = (u32)abi_cg_sizeof(g->c->abi, dty); 1669 KitCgTypeId rty = sz > 8 ? builtin_id(KIT_CG_BUILTIN_I128) 1670 : (sz > 4 ? builtin_id(KIT_CG_BUILTIN_I64) 1671 : builtin_id(KIT_CG_BUILTIN_I32)); 1672 const char* name = 1673 sz > 8 ? "__fixunstfti" : (sz > 4 ? "__fixunstfdi" : "__fixunstfsi"); 1674 api_f128_call_unary(g, name, rty, builtin_id(KIT_CG_BUILTIN_F128)); 1675 if (rty != dty) api_cg_convert_kind(g, dty, CV_TRUNC); 1676 return; 1677 } 1678 /* soft double -> unsigned int: __fixunsdfsi (i32) / __fixunsdfdi (i64). */ 1679 if (api_soft_double_stack_top(g, 0)) { 1680 KitCgTypeId dty = resolve_type(g->c, dst); 1681 KitCgTypeId f64 = builtin_id(KIT_CG_BUILTIN_F64); 1682 u32 sz = (u32)abi_cg_sizeof(g->c->abi, dty); 1683 KitCgTypeId rty = sz > 8 ? builtin_id(KIT_CG_BUILTIN_I128) 1684 : (sz > 4 ? builtin_id(KIT_CG_BUILTIN_I64) 1685 : builtin_id(KIT_CG_BUILTIN_I32)); 1686 const char* name = 1687 sz > 8 ? "__fixunsdfti" : (sz > 4 ? "__fixunsdfdi" : "__fixunsdfsi"); 1688 api_f128_call_unary(g, name, rty, f64); 1689 if (rty != dty) api_cg_convert_kind(g, dty, CV_TRUNC); 1690 return; 1691 } 1692 /* hardware single float -> split-u64: use __fixunssfdi. */ 1693 if (api_is_wide8_scalar_type(g->c, resolve_type(g->c, dst))) { 1694 api_f128_call_unary(g, "__fixunssfdi", resolve_type(g->c, dst), 1695 builtin_id(KIT_CG_BUILTIN_F32)); 1696 return; 1697 } 1698 /* soft single float -> unsigned int <=32 (ilp32, no FPU): __fixunssfsi. */ 1699 if (api_soft_single_stack_top(g, 0)) { 1700 KitCgTypeId dty = resolve_type(g->c, dst); 1701 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 1702 api_f128_call_unary(g, "__fixunssfsi", i32, builtin_id(KIT_CG_BUILTIN_F32)); 1703 if (i32 != dty) api_cg_convert_kind(g, dty, CV_TRUNC); 1704 return; 1705 } 1706 api_cg_convert_kind(g, dst, CV_FTOI_U); 1707 } 1708 1709 /* ============================================================ 1710 * Intrinsics (stub) 1711 * ============================================================ */ 1712 1713 /* One descriptor per KitCgIntrinsic, indexed by the enum value. The four 1714 * accessors below are field reads off this single source of truth; unmapped 1715 * intrinsics (FMA/cache/coro) use an INTRIN_NONE row. The table is laid 1716 * out in enum order; the _Static_assert guards its length so a new enumerator 1717 * is a compile error rather than a silently truncated index. */ 1718 typedef struct IntrinDesc { 1719 IntrinKind kind; 1720 const char* name; 1721 bool is_void; 1722 bool is_overflow; 1723 } IntrinDesc; 1724 1725 static const IntrinDesc kIntrinTable[] = { 1726 [KIT_CG_INTRIN_TRAP] = {INTRIN_TRAP, "trap", true, false}, 1727 [KIT_CG_INTRIN_CLZ] = {INTRIN_CLZ, "clz", false, false}, 1728 [KIT_CG_INTRIN_CTZ] = {INTRIN_CTZ, "ctz", false, false}, 1729 [KIT_CG_INTRIN_POPCOUNT] = {INTRIN_POPCOUNT, "popcount", false, false}, 1730 [KIT_CG_INTRIN_BSWAP] = {INTRIN_BSWAP, "bswap", false, false}, 1731 [KIT_CG_INTRIN_SETJMP] = {INTRIN_SETJMP, "setjmp", false, false}, 1732 [KIT_CG_INTRIN_LONGJMP] = {INTRIN_LONGJMP, "longjmp", true, false}, 1733 [KIT_CG_INTRIN_SADD_OVERFLOW] = 1734 {INTRIN_SADD_OVERFLOW, "sadd_overflow", false, true}, 1735 [KIT_CG_INTRIN_UADD_OVERFLOW] = 1736 {INTRIN_UADD_OVERFLOW, "uadd_overflow", false, true}, 1737 [KIT_CG_INTRIN_SSUB_OVERFLOW] = 1738 {INTRIN_SSUB_OVERFLOW, "ssub_overflow", false, true}, 1739 [KIT_CG_INTRIN_USUB_OVERFLOW] = 1740 {INTRIN_USUB_OVERFLOW, "usub_overflow", false, true}, 1741 [KIT_CG_INTRIN_SMUL_OVERFLOW] = 1742 {INTRIN_SMUL_OVERFLOW, "smul_overflow", false, true}, 1743 [KIT_CG_INTRIN_UMUL_OVERFLOW] = 1744 {INTRIN_UMUL_OVERFLOW, "umul_overflow", false, true}, 1745 [KIT_CG_INTRIN_FMA] = {INTRIN_NONE, "fma", false, false}, 1746 [KIT_CG_INTRIN_PREFETCH] = {INTRIN_PREFETCH, "prefetch", true, false}, 1747 [KIT_CG_INTRIN_EXPECT] = {INTRIN_EXPECT, "expect", false, false}, 1748 [KIT_CG_INTRIN_ASSUME_ALIGNED] = 1749 {INTRIN_ASSUME_ALIGNED, "assume_aligned", false, false}, 1750 [KIT_CG_INTRIN_SYSCALL] = {INTRIN_SYSCALL, "syscall", false, false}, 1751 [KIT_CG_INTRIN_IRQ_SAVE] = {INTRIN_IRQ_SAVE, "irq_save", false, false}, 1752 [KIT_CG_INTRIN_IRQ_RESTORE] = 1753 {INTRIN_IRQ_RESTORE, "irq_restore", true, false}, 1754 [KIT_CG_INTRIN_IRQ_DISABLE] = 1755 {INTRIN_IRQ_DISABLE, "irq_disable", true, false}, 1756 [KIT_CG_INTRIN_IRQ_ENABLE] = {INTRIN_IRQ_ENABLE, "irq_enable", true, false}, 1757 [KIT_CG_INTRIN_DMB] = {INTRIN_DMB, "dmb", true, false}, 1758 [KIT_CG_INTRIN_DSB] = {INTRIN_DSB, "dsb", true, false}, 1759 [KIT_CG_INTRIN_ISB] = {INTRIN_ISB, "isb", true, false}, 1760 [KIT_CG_INTRIN_DCACHE_CLEAN] = {INTRIN_NONE, "dcache_clean", false, false}, 1761 [KIT_CG_INTRIN_DCACHE_INVALIDATE] = 1762 {INTRIN_NONE, "dcache_invalidate", false, false}, 1763 [KIT_CG_INTRIN_DCACHE_CLEAN_INVALIDATE] = 1764 {INTRIN_NONE, "dcache_clean_invalidate", false, false}, 1765 [KIT_CG_INTRIN_ICACHE_INVALIDATE] = 1766 {INTRIN_NONE, "icache_invalidate", false, false}, 1767 [KIT_CG_INTRIN_CPU_NOP] = {INTRIN_CPU_NOP, "cpu_nop", true, false}, 1768 [KIT_CG_INTRIN_CPU_YIELD] = {INTRIN_CPU_YIELD, "cpu_yield", true, false}, 1769 [KIT_CG_INTRIN_WFI] = {INTRIN_WFI, "wfi", true, false}, 1770 [KIT_CG_INTRIN_WFE] = {INTRIN_WFE, "wfe", true, false}, 1771 [KIT_CG_INTRIN_SEV] = {INTRIN_SEV, "sev", true, false}, 1772 [KIT_CG_INTRIN_CORO_SWITCH] = {INTRIN_NONE, "coro_switch", false, false}, 1773 [KIT_CG_INTRIN_FRAME_ADDRESS] = 1774 {INTRIN_FRAME_ADDRESS, "frame_address", false, false}, 1775 [KIT_CG_INTRIN_RETURN_ADDRESS] = 1776 {INTRIN_RETURN_ADDRESS, "return_address", false, false}, 1777 }; 1778 1779 _Static_assert(sizeof(kIntrinTable) / sizeof(kIntrinTable[0]) == 1780 KIT_CG_INTRIN_RETURN_ADDRESS + 1, 1781 "kIntrinTable must have exactly one row per KitCgIntrinsic"); 1782 1783 /* Bounds-guarded row lookup: an out-of-range intrinsic falls back to the NONE 1784 * row, preserving the defensive `default:` behavior the four switches carried 1785 * before they collapsed into kIntrinTable. */ 1786 static const IntrinDesc* intrin_desc(KitCgIntrinsic intrin) { 1787 static const IntrinDesc none = {INTRIN_NONE, NULL, false, false}; 1788 unsigned i = (unsigned)intrin; 1789 return i < sizeof(kIntrinTable) / sizeof(kIntrinTable[0]) ? &kIntrinTable[i] 1790 : &none; 1791 } 1792 1793 IntrinKind api_map_intrinsic(KitCg* g, KitCgIntrinsic intrin, 1794 KitCgTypeId result_type) { 1795 /* Width-by-type: backends derive operand width from the result type, so the 1796 * mapping no longer needs the size here. */ 1797 (void)g; 1798 (void)result_type; 1799 return intrin_desc(intrin)->kind; 1800 } 1801 1802 int api_intrinsic_is_void(KitCgIntrinsic intrin) { 1803 return intrin_desc(intrin)->is_void; 1804 } 1805 1806 int api_intrinsic_is_overflow(KitCgIntrinsic intrin) { 1807 return intrin_desc(intrin)->is_overflow; 1808 } 1809 1810 const char* api_intrinsic_name(KitCgIntrinsic intrin) { 1811 const char* name = intrin_desc(intrin)->name; 1812 return name ? name : "intrinsic"; 1813 } 1814 1815 void kit_cg_intrinsic(KitCg* g, KitCgIntrinsic intrin, uint32_t nargs, 1816 KitCgTypeId result_type) { 1817 CgTarget* T; 1818 KitCgTypeId rty; 1819 KitCgTypeId int_ty; 1820 IntrinKind kind; 1821 ApiSValue* svs; 1822 Operand* args; 1823 Operand dsts[2]; 1824 u32 ndst = 0; 1825 Heap* h; 1826 if (!g) return; 1827 /* clz/ctz/popcount/bswap on a split 64-bit value cannot use the backend's 1828 * single-register software sequence. Route them to the compiler-rt __*di2 1829 * helpers, which decompose into 32-bit operations. (32-bit forms still lower 1830 * inline.) */ 1831 if (nargs == 1 && api_wide64_stack_top(g, 0)) { 1832 const char* name = NULL; 1833 KitCgTypeId i32 = builtin_id(KIT_CG_BUILTIN_I32); 1834 KitCgTypeId i64 = builtin_id(KIT_CG_BUILTIN_I64); 1835 KitCgTypeId ret = i32; 1836 switch (intrin) { 1837 case KIT_CG_INTRIN_CLZ: name = "__clzdi2"; break; 1838 case KIT_CG_INTRIN_CTZ: name = "__ctzdi2"; break; 1839 case KIT_CG_INTRIN_POPCOUNT: name = "__popcountdi2"; break; 1840 case KIT_CG_INTRIN_BSWAP: name = "__bswapdi2"; ret = i64; break; 1841 default: break; 1842 } 1843 if (name) { 1844 ApiSValue arg = api_pop(g); 1845 KitCgTypeId ps[1] = {i64}; 1846 api_runtime_call_values(g, name, ret, ps, 1, &arg); 1847 if (ret == i32 && api_unalias_type(g->c, result_type) != i32) 1848 api_cg_convert_kind(g, result_type, CV_ZEXT); 1849 return; 1850 } 1851 } 1852 /* __builtin_*_overflow on a split 64-bit operand pair traps in the native 1853 * backend (it only models single-register overflow). Legalize all 6 forms 1854 * inline as 2-lane / 4-lane ops, pushing [value, ok] like the native path. 1855 * Gated on both operands being wide64 so other targets are unchanged. */ 1856 if (nargs == 2 && api_intrinsic_is_overflow(intrin) && 1857 api_wide64_stack_top(g, 0) && api_wide64_stack_top(g, 1)) { 1858 api_wide64_overflow_inline(g, intrin); 1859 return; 1860 } 1861 if (nargs == 2 && intrin == KIT_CG_INTRIN_EXPECT && 1862 api_wide64_stack_top(g, 1)) { 1863 ApiSValue expected = api_pop(g); 1864 ApiSValue val = api_pop(g); 1865 api_release(g, &expected); 1866 api_push(g, val); 1867 return; 1868 } 1869 T = g->target; 1870 h = g->c->ctx->heap; 1871 rty = resolve_type(g->c, result_type); 1872 int_ty = builtin_id(KIT_CG_BUILTIN_I32); 1873 kind = api_map_intrinsic(g, intrin, result_type); 1874 if (!kit_cg_target_supports_intrinsic(g->c, intrin) || kind == INTRIN_NONE) { 1875 compiler_panic( 1876 g->c, g->cur_loc, "KitCg: target '%s' does not support intrinsic '%s'", 1877 arch_kind_name(g->c->target.arch), api_intrinsic_name(intrin)); 1878 return; 1879 } 1880 1881 svs = NULL; 1882 args = NULL; 1883 if (nargs) { 1884 svs = (ApiSValue*)h->alloc(h, sizeof(*svs) * nargs, _Alignof(ApiSValue)); 1885 args = (Operand*)h->alloc(h, sizeof(*args) * nargs, _Alignof(Operand)); 1886 memset(args, 0, sizeof(*args) * nargs); 1887 for (u32 i = 0; i < nargs; ++i) { 1888 u32 idx = nargs - 1u - i; 1889 KitCgTypeId aty; 1890 svs[idx] = api_pop(g); 1891 aty = api_sv_type(&svs[idx]); 1892 if (api_sv_op_is(&svs[idx], OPK_IMM) && 1893 (intrin == KIT_CG_INTRIN_EXPECT || 1894 intrin == KIT_CG_INTRIN_ASSUME_ALIGNED || 1895 intrin == KIT_CG_INTRIN_PREFETCH || intrin == KIT_CG_INTRIN_DMB || 1896 intrin == KIT_CG_INTRIN_DSB || 1897 intrin == KIT_CG_INTRIN_FRAME_ADDRESS || 1898 intrin == KIT_CG_INTRIN_RETURN_ADDRESS)) { 1899 args[idx] = svs[idx].op; 1900 } else { 1901 args[idx] = api_force_local(g, &svs[idx], aty); 1902 } 1903 } 1904 } 1905 1906 if (api_intrinsic_is_overflow(intrin)) { 1907 KitCgTypeId vty = rty ? rty : (nargs ? api_sv_type(&svs[0]) : int_ty); 1908 KitCgTypeId bool_ty = builtin_id(KIT_CG_BUILTIN_BOOL); 1909 CGLocal rr = api_alloc_temp_local(g, vty); 1910 CGLocal ok = api_alloc_temp_local(g, bool_ty); 1911 dsts[0] = api_op_local(rr, vty); 1912 dsts[1] = api_op_local(ok, bool_ty); 1913 ndst = 2; 1914 } else if (!api_intrinsic_is_void(intrin) && !cg_type_is_void(g->c, rty)) { 1915 CGLocal rr = api_alloc_temp_local(g, rty); 1916 dsts[0] = api_op_local(rr, rty); 1917 ndst = 1; 1918 } 1919 1920 T->intrinsic(T, kind, ndst ? dsts : NULL, ndst, args, nargs); 1921 1922 for (u32 i = 0; i < nargs; ++i) api_release(g, &svs[i]); 1923 if (svs) h->free(h, svs, sizeof(*svs) * nargs); 1924 if (args) h->free(h, args, sizeof(*args) * nargs); 1925 1926 if (api_intrinsic_is_overflow(intrin)) { 1927 api_push(g, api_make_sv(dsts[0], dsts[0].type)); 1928 api_push(g, api_make_sv(dsts[1], dsts[1].type)); 1929 } else if (ndst == 1) { 1930 api_push(g, api_make_sv(dsts[0], rty)); 1931 } 1932 } 1933 1934 /* ============================================================ 1935 * Atomics (stub) 1936 * ============================================================ */ 1937 1938 KitCgTypeId api_atomic_pointee(KitCg* g, KitCgTypeId pty, const char* who) { 1939 KitCgTypeId pointee = cg_type_pointee(g->c, pty); 1940 if (!pointee) { 1941 compiler_panic(g->c, g->cur_loc, "%.*s: operand is not a pointer", 1942 SLICE_ARG(slice_from_cstr(who))); 1943 return builtin_id(KIT_CG_BUILTIN_I32); 1944 } 1945 return pointee; 1946 }