kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 01aa94a3053b9d39aa5660b4b15b51ff228dc200
parent 22ad0e948be159d0c73851107fc3ac6534b3936e
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Tue, 19 May 2026 07:32:54 -0700

Add binary128 long double target tests

Diffstat:
Adoc/C11_LONG_DOUBLE_CHECKLIST.md | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/cfree/cg.h | 1+
Mlang/c/pp/pp.c | 46+++++++++++++++++++++++++++++++---------------
Mlang/c/type/type.c | 7+++++++
Mrt/lib/fp_tf/fp_tf.c | 44++++++++++++++++++++++++++++++++++++++++++++
Msrc/abi/abi_rv64.c | 20++++++++++++++++++++
Msrc/api/cg.c | 506+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc/arch/aa64/emit.c | 53++++++++++++++++++++++++++++++++++++++++-------------
Msrc/arch/aa64/internal.h | 22++++++++++++++++++++++
Msrc/arch/aa64/ops.c | 122+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Msrc/arch/rv64/internal.h | 2++
Msrc/arch/x64/internal.h | 2++
Atest/parse/cases/i128_01_layout.c | 19+++++++++++++++++++
Atest/parse/cases/i128_01_layout.expected | 1+
Atest/parse/cases/i128_02_literal_storage.c | 11+++++++++++
Atest/parse/cases/i128_02_literal_storage.expected | 1+
Atest/parse/cases/i128_03_add_sub_carry.c | 12++++++++++++
Atest/parse/cases/i128_03_add_sub_carry.expected | 1+
Atest/parse/cases/i128_04_mul_high_half.c | 9+++++++++
Atest/parse/cases/i128_04_mul_high_half.expected | 1+
Atest/parse/cases/i128_05_div_mod.c | 12++++++++++++
Atest/parse/cases/i128_05_div_mod.expected | 1+
Atest/parse/cases/i128_06_shifts_bitwise.c | 11+++++++++++
Atest/parse/cases/i128_06_shifts_bitwise.expected | 1+
Atest/parse/cases/i128_07_compare.c | 17+++++++++++++++++
Atest/parse/cases/i128_07_compare.expected | 1+
Atest/parse/cases/i128_08_signed_shift_convert.c | 9+++++++++
Atest/parse/cases/i128_08_signed_shift_convert.expected | 1+
Atest/parse/cases/i128_09_call_return.c | 17+++++++++++++++++
Atest/parse/cases/i128_09_call_return.expected | 1+
Atest/parse/cases/i128_10_struct_storage.c | 16++++++++++++++++
Atest/parse/cases/i128_10_struct_storage.expected | 1+
Atest/parse/cases/i128_11_union_lanes.c | 15+++++++++++++++
Atest/parse/cases/i128_11_union_lanes.expected | 1+
Atest/parse/cases/i128_12_global_init.c | 8++++++++
Atest/parse/cases/i128_12_global_init.expected | 1+
Atest/parse/cases/ldbl128_01_layout_macros.c | 14++++++++++++++
Atest/parse/cases/ldbl128_01_layout_macros.expected | 1+
Atest/parse/cases/ldbl128_02_literal_to_int.c | 6++++++
Atest/parse/cases/ldbl128_02_literal_to_int.expected | 1+
Atest/parse/cases/ldbl128_03_arith.c | 7+++++++
Atest/parse/cases/ldbl128_03_arith.expected | 1+
Atest/parse/cases/ldbl128_04_conversions.c | 11+++++++++++
Atest/parse/cases/ldbl128_04_conversions.expected | 1+
Atest/parse/cases/ldbl128_05_compare.c | 13+++++++++++++
Atest/parse/cases/ldbl128_05_compare.expected | 1+
Atest/parse/cases/ldbl128_06_call_return.c | 13+++++++++++++
Atest/parse/cases/ldbl128_06_call_return.expected | 1+
Atest/parse/cases/ldbl128_07_struct_storage.c | 14++++++++++++++
Atest/parse/cases/ldbl128_07_struct_storage.expected | 1+
Atest/parse/cases/ldbl128_08_literal_bits.c | 13+++++++++++++
Atest/parse/cases/ldbl128_08_literal_bits.expected | 1+
Atest/parse/cases/ldbl128_09_global_init.c | 7+++++++
Atest/parse/cases/ldbl128_09_global_init.expected | 1+
Atest/parse/cases/ldbl128_10_unary_neg.c | 6++++++
Atest/parse/cases/ldbl128_10_unary_neg.expected | 1+
Atest/parse/cases/ldbl128_11_array_copy.c | 8++++++++
Atest/parse/cases/ldbl128_11_array_copy.expected | 1+
Atest/parse/cases/ldbl128_12_stack_args.c | 12++++++++++++
Atest/parse/cases/ldbl128_12_stack_args.expected | 1+
Atest/parse/cases/ldbl128_13_mixed_arith.c | 7+++++++
Atest/parse/cases/ldbl128_13_mixed_arith.expected | 1+
Atest/parse/cases/ldbl128_14_struct_return.c | 17+++++++++++++++++
Atest/parse/cases/ldbl128_14_struct_return.expected | 1+
Mtest/parse/harness/parse_runner.c | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Mtest/parse/run.sh | 14++++++++++++--
66 files changed, 1277 insertions(+), 70 deletions(-)

diff --git a/doc/C11_LONG_DOUBLE_CHECKLIST.md b/doc/C11_LONG_DOUBLE_CHECKLIST.md @@ -0,0 +1,96 @@ +# C11 `long double` support checklist + +Status snapshot: 2026-05-19. + +Goal: make `long double` target-correct instead of aliasing it to `double`. +Keep this red-green: add the smallest target-scoped case first, then make the +implementation pass it on the target that owns that format. + +## Target profiles + +- [ ] AArch64 Linux: IEEE binary128 `long double`. + ABI: passed and returned in SIMD/FP `q` registers when register slots are + available. Arithmetic and conversions lower to compiler-rt `*tf*` + helpers. +- [ ] RV64 Linux LP64D: IEEE binary128 `long double`. + ABI: passed and returned as two integer XLEN eightbytes because FLEN is + 64. Arithmetic and conversions lower to compiler-rt `*tf*` helpers. +- [ ] AArch64 Darwin: `long double == double`. + Keep the current binary64 behavior and predefined macros for this OS. +- [ ] x86-64 SysV/Darwin: x87 80-bit extended precision in 16-byte storage. + Defer as a separate backend slice; it needs x87 load/store/arithmetic, + x87 return handling, and `LDBL_*` macro updates. Do not block the + binary128 work on this. + +## Support target for the binary128 slice + +- [ ] Complete the 16-byte scalar `__int128` path before treating binary128 as + green: layout, locals/globals, constants, arithmetic, shifts, compares, + calls/returns, aggregate fields, unions, and static initialization. +- [ ] Add a target long-double profile query used by both the frontend and CG: + format, storage size, alignment, macro values, and ABI classification. +- [ ] Add a distinct CG type for binary128 `long double`; `TY_LDOUBLE` must not + map to `F64` on AArch64/RV64 Linux. +- [ ] Emit target-correct `__LDBL_*` and `__DECIMAL_DIG__` predefined macros + for binary128 targets. +- [ ] Encode `L` floating constants as binary128 bytes without narrowing their + storage type to `double`. +- [ ] Support binary128 local/global storage, assignment, struct fields, and + return values. +- [ ] Lower binary128 arithmetic to runtime helpers: + `__addtf3`, `__subtf3`, `__multf3`, and `__divtf3`. +- [ ] Lower binary128 comparisons through compiler-rt compare helpers. +- [ ] Lower integer, float, and double conversions through compiler-rt helpers: + `__float*tf`, `__fix*tf*`, `__extend{s,d}ftf2`, and + `__trunctf{s,d}f2`. +- [ ] Teach AArch64 codegen to move 16-byte FP values through Q-register + load/store/copy paths. +- [ ] Teach RV64 ABI movement to pass/return binary128 values as two integer + parts, backed by memory in CG. +- [ ] Keep runtime linkage using the existing `rt/lib/fp_tf/fp_tf.c` and + `rt/lib/fp_ti/fp_ti.c` objects for the binary128 runtime variants. + +## Red tests + +The support-target tests live under `test/parse/cases/i128_*.c` and +`test/parse/cases/ldbl128_*.c`. Run the `i128` group first; those cases isolate +the 16-byte integer substrate needed by compiler-rt binary128 helpers and by +the memory-backed long-double lowering. + +```sh +CFREE_TEST_ARCH=aa64 CFREE_TEST_FILTER=i128 CFREE_OPT_LEVELS=0 make test-parse +CFREE_TEST_ARCH=rv64 CFREE_TEST_FILTER=i128 CFREE_OPT_LEVELS=0 make test-parse +CFREE_TEST_ARCH=aa64 CFREE_TEST_FILTER=ldbl128 make test-parse +CFREE_TEST_ARCH=rv64 CFREE_TEST_FILTER=ldbl128 make test-parse +``` + +The `ldbl128` cases intentionally return success on non-binary128 targets so +x87 work can land later without hiding the binary128 regression signal. + +Coverage intent: + +- `i128_01` through `i128_12`: target layout/alignment, literal storage, + add/sub carry, multiply high-half behavior, div/mod, shifts/bitwise + operations, signed and unsigned compares, signed shifts/conversions, + calls/returns, aggregate fields, union lane visibility, and global + initialization. +- `ldbl128_01` through `ldbl128_14`: target macros/layout, literal decoding, + arithmetic helpers, conversions, comparisons, calls/returns, struct and + array storage, raw binary128 bits, globals, unary negation, stack + arguments, mixed arithmetic, and aggregate return. + +## Done criteria + +- [ ] `CFREE_TEST_ARCH=aa64 CFREE_TEST_FILTER=ldbl128 make test-parse` passes + with `CFREE_TEST_ALLOW_SKIP` unset. +- [ ] `CFREE_TEST_ARCH=rv64 CFREE_TEST_FILTER=ldbl128 make test-parse` passes + with `CFREE_TEST_ALLOW_SKIP` unset. +- [ ] `CFREE_TEST_ARCH=aa64 CFREE_TEST_FILTER=i128 make test-parse` passes + with `CFREE_TEST_ALLOW_SKIP` unset. +- [ ] `CFREE_TEST_ARCH=rv64 CFREE_TEST_FILTER=i128 make test-parse` passes + with `CFREE_TEST_ALLOW_SKIP` unset. +- [ ] `CFREE_TEST_FILTER=6_7_2_12_long_double make test-parse` passes on + AArch64 Linux and RV64 Linux without a `.skip` sidecar. +- [ ] `make rt` still builds the default runtime archives. +- [ ] `make test-rt-headers test-rt-runtime` stays green for the default + runtime targets. diff --git a/include/cfree/cg.h b/include/cfree/cg.h @@ -35,6 +35,7 @@ typedef enum CfreeCgBuiltinType { CFREE_CG_BUILTIN_I128, CFREE_CG_BUILTIN_F32, CFREE_CG_BUILTIN_F64, + CFREE_CG_BUILTIN_F128, CFREE_CG_BUILTIN_VARARG_STATE, CFREE_CG_BUILTIN_COUNT, } CfreeCgBuiltinType; diff --git a/lang/c/pp/pp.c b/lang/c/pp/pp.c @@ -464,22 +464,38 @@ static void pp_register_target_predefined(Pp* pp) { pp_define(pp, "__ATOMIC_LLONG_LOCK_FREE", "2"); pp_define(pp, "__ATOMIC_POINTER_LOCK_FREE", "2"); - /* The C frontend currently lowers long double as binary64. Keep the - * compiler-predefined floating macros aligned with that implementation. */ pp_define(pp, "__FLT_EVAL_METHOD__", "0"); - pp_define(pp, "__LDBL_HAS_DENORM__", "1"); - pp_define(pp, "__LDBL_MANT_DIG__", "53"); - pp_define(pp, "__LDBL_DECIMAL_DIG__", "17"); - pp_define(pp, "__LDBL_DIG__", "15"); - pp_define(pp, "__LDBL_MIN_EXP__", "(-1021)"); - pp_define(pp, "__LDBL_MIN_10_EXP__", "(-307)"); - pp_define(pp, "__LDBL_MAX_EXP__", "1024"); - pp_define(pp, "__LDBL_MAX_10_EXP__", "308"); - pp_define(pp, "__LDBL_MAX__", "0x1.fffffffffffffp+1023L"); - pp_define(pp, "__LDBL_EPSILON__", "0x1p-52L"); - pp_define(pp, "__LDBL_MIN__", "0x1p-1022L"); - pp_define(pp, "__LDBL_DENORM_MIN__", "0x1p-1074L"); - pp_define(pp, "__DECIMAL_DIG__", "17"); + if ((target.arch == CFREE_ARCH_ARM_64 || target.arch == CFREE_ARCH_RV64) && + target.os == CFREE_OS_LINUX) { + pp_define(pp, "__LDBL_HAS_DENORM__", "1"); + pp_define(pp, "__LDBL_MANT_DIG__", "113"); + pp_define(pp, "__LDBL_DECIMAL_DIG__", "36"); + pp_define(pp, "__LDBL_DIG__", "33"); + pp_define(pp, "__LDBL_MIN_EXP__", "(-16381)"); + pp_define(pp, "__LDBL_MIN_10_EXP__", "(-4931)"); + pp_define(pp, "__LDBL_MAX_EXP__", "16384"); + pp_define(pp, "__LDBL_MAX_10_EXP__", "4932"); + pp_define(pp, "__LDBL_MAX__", + "0x1.ffffffffffffffffffffffffffffp+16383L"); + pp_define(pp, "__LDBL_EPSILON__", "0x1p-112L"); + pp_define(pp, "__LDBL_MIN__", "0x1p-16382L"); + pp_define(pp, "__LDBL_DENORM_MIN__", "0x1p-16494L"); + pp_define(pp, "__DECIMAL_DIG__", "36"); + } else { + pp_define(pp, "__LDBL_HAS_DENORM__", "1"); + pp_define(pp, "__LDBL_MANT_DIG__", "53"); + pp_define(pp, "__LDBL_DECIMAL_DIG__", "17"); + pp_define(pp, "__LDBL_DIG__", "15"); + pp_define(pp, "__LDBL_MIN_EXP__", "(-1021)"); + pp_define(pp, "__LDBL_MIN_10_EXP__", "(-307)"); + pp_define(pp, "__LDBL_MAX_EXP__", "1024"); + pp_define(pp, "__LDBL_MAX_10_EXP__", "308"); + pp_define(pp, "__LDBL_MAX__", "0x1.fffffffffffffp+1023L"); + pp_define(pp, "__LDBL_EPSILON__", "0x1p-52L"); + pp_define(pp, "__LDBL_MIN__", "0x1p-1022L"); + pp_define(pp, "__LDBL_DENORM_MIN__", "0x1p-1074L"); + pp_define(pp, "__DECIMAL_DIG__", "17"); + } } Pp* pp_new(Compiler* c) { diff --git a/lang/c/type/type.c b/lang/c/type/type.c @@ -463,6 +463,7 @@ static int type_is_signed_integer_for_cg(const Type* t) { static CfreeCgTypeId type_cg_builtin(CfreeCompiler* c, TypeKind kind) { CfreeCgBuiltinTypes b = cfree_cg_builtin_types(c); + CfreeTarget target = cfree_compiler_target(c); switch (kind) { case TY_VOID: return b.id[CFREE_CG_BUILTIN_VOID]; @@ -489,7 +490,13 @@ static CfreeCgTypeId type_cg_builtin(CfreeCompiler* c, TypeKind kind) { case TY_FLOAT: return b.id[CFREE_CG_BUILTIN_F32]; case TY_DOUBLE: + return b.id[CFREE_CG_BUILTIN_F64]; case TY_LDOUBLE: + if ((target.arch == CFREE_ARCH_ARM_64 || + target.arch == CFREE_ARCH_RV64) && + target.os == CFREE_OS_LINUX) { + return b.id[CFREE_CG_BUILTIN_F128]; + } return b.id[CFREE_CG_BUILTIN_F64]; default: break; diff --git a/rt/lib/fp_tf/fp_tf.c b/rt/lib/fp_tf/fp_tf.c @@ -13,6 +13,50 @@ #include "tf_supplement.h" #endif +// cfree's freestanding aarch64/RV64 backend may lower 32-bit divisions in +// this runtime TU back into compiler-rt spellings. Keep these leaf helpers in +// the same archive member as the quad helpers so demand-loaded links resolve +// the complete member closure. +COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d) { + const unsigned bits = sizeof(su_int) * CHAR_BIT; + su_int q = 0; + su_int r = 0; + for (unsigned i = bits; i > 0; --i) { + r = (su_int)((r << 1) | ((n >> (i - 1u)) & 1u)); + if (r >= d) { + r = (su_int)(r - d); + q |= (su_int)1u << (i - 1u); + } + } + return q; +} + +COMPILER_RT_ABI su_int __udivmodsi4(su_int n, su_int d, su_int* rem) { + const unsigned bits = sizeof(su_int) * CHAR_BIT; + su_int q = 0; + su_int r = 0; + for (unsigned i = bits; i > 0; --i) { + r = (su_int)((r << 1) | ((n >> (i - 1u)) & 1u)); + if (r >= d) { + r = (su_int)(r - d); + q |= (su_int)1u << (i - 1u); + } + } + if (rem) + *rem = r; + return q; +} + +COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b) { + su_int sa = (su_int)(a >> 31); + su_int sb = (su_int)(b >> 31); + su_int ua = ((su_int)a ^ sa) - sa; + su_int ub = ((su_int)b ^ sb) - sb; + su_int sign = sa ^ sb; + su_int q = __udivsi3(ua, ub); + return (si_int)((q ^ sign) - sign); +} + // ---- addtf3.c ---- #define QUAD_PRECISION #include "fp_add_impl.inc" diff --git a/src/abi/abi_rv64.c b/src/abi/abi_rv64.c @@ -20,6 +20,26 @@ static void classify_scalar(TargetABI* a, CfreeCgTypeId t, ABIArgInfo* out) { ABITypeInfo ti = abi_internal_type_info(a, t); + if (ti.scalar_kind == ABI_SC_FLOAT && ti.size == 16) { + ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, 2); + memset(parts, 0, sizeof(ABIArgPart) * 2); + parts[0].cls = ABI_CLASS_INT; + parts[0].loc = ABI_LOC_REG; + parts[0].size = 8; + parts[0].align = 8; + parts[0].src_offset = 0; + parts[1].cls = ABI_CLASS_INT; + parts[1].loc = ABI_LOC_REG; + parts[1].size = 8; + parts[1].align = 8; + parts[1].src_offset = 8; + out->kind = ABI_ARG_DIRECT; + out->flags = ABI_AF_NONE; + out->parts = parts; + out->nparts = 2; + out->indirect_align = 0; + return; + } out->kind = ABI_ARG_DIRECT; out->flags = ABI_AF_NONE; out->indirect_align = 0; diff --git a/src/api/cg.c b/src/api/cg.c @@ -150,6 +150,12 @@ static void builtin_cg_type_init(Compiler *c, CgType *out, out->align = 8; out->fp.width = 64; break; + case CFREE_CG_BUILTIN_F128: + out->kind = CFREE_CG_TYPE_FLOAT; + out->size = 16; + out->align = 16; + out->fp.width = 128; + break; case CFREE_CG_BUILTIN_VARARG_STATE: { ABITypeInfo info = abi_va_list_info(c->abi); out->kind = CFREE_CG_TYPE_VARARG_STATE; @@ -1283,12 +1289,20 @@ static DebugTypeId api_debug_type(CfreeCg *g, CfreeCgTypeId id) { static u8 api_type_class(CfreeCgTypeId ty) { if (ty == builtin_id(CFREE_CG_BUILTIN_F32) || - ty == builtin_id(CFREE_CG_BUILTIN_F64)) { + ty == builtin_id(CFREE_CG_BUILTIN_F64) || + ty == builtin_id(CFREE_CG_BUILTIN_F128)) { return RC_FP; } return RC_INT; } +static int api_is_f128_type(Compiler *c, CfreeCgTypeId ty) { + const CgType *cg; + ty = api_unalias_type(c, ty); + cg = cg_type_get(c, ty); + return cg && cg->kind == CFREE_CG_TYPE_FLOAT && cg->fp.width == 128; +} + static Operand api_op_imm(i64 v, CfreeCgTypeId ty) { Operand o; memset(&o, 0, sizeof o); @@ -2091,7 +2105,7 @@ static void api_release_arg_storage(CfreeCg *g, Operand *storage) { api_free_reg(g, storage->v.reg, storage->cls); } else if (storage->kind == OPK_LOCAL && storage->cls < 3) { CfreeCgTypeId ty = storage->type; - if (cg_type_is_aggregate(g->c, ty)) + if (cg_type_is_aggregate(g->c, ty) || api_is_f128_type(g->c, ty)) return; api_return_spill_slot(g, storage->v.frame_slot, storage->cls); } else if (storage->kind == OPK_INDIRECT) { @@ -3123,6 +3137,196 @@ void cfree_cg_func_end(CfreeCg *g) { memset(g->scopes, 0, sizeof g->scopes); } +static void api_call_symbol_common(CfreeCg *g, CfreeCgSym sym, uint32_t nargs, + CfreeCgCallAttrs attrs); + +static FrameSlot api_f128_temp_slot(CfreeCg *g, CfreeCgTypeId ty) { + FrameSlotDesc fsd; + memset(&fsd, 0, sizeof fsd); + fsd.type = ty; + fsd.size = 16; + fsd.align = 16; + fsd.kind = FS_LOCAL; + fsd.flags = FSF_ADDR_TAKEN; + return g->target->frame_slot(g->target, &fsd); +} + +static u64 api_u64_from_target_bytes(CfreeCg *g, const u8 *bytes) { + u64 v = 0; + for (u32 i = 0; i < 8; ++i) { + u32 shift = g->c->target.big_endian ? (7u - i) * 8u : i * 8u; + v |= (u64)bytes[i] << shift; + } + return v; +} + +static void api_store_f128_bytes(CfreeCg *g, FrameSlot slot, + CfreeCgTypeId ty, const u8 bytes[16]) { + CfreeCgTypeId i64_ty = builtin_id(CFREE_CG_BUILTIN_I64); + CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); + Reg ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty); + Operand base = api_op_reg(ar, ptr_ty); + MemAccess ma; + memset(&ma, 0, sizeof ma); + ma.type = i64_ty; + ma.size = 8; + ma.align = 8; + g->target->addr_of(g->target, base, api_op_local(slot, ty)); + g->target->store(g->target, api_op_indirect(ar, 0, i64_ty), + api_op_imm((i64)api_u64_from_target_bytes(g, bytes), + i64_ty), + ma); + g->target->store(g->target, api_op_indirect(ar, 8, i64_ty), + api_op_imm((i64)api_u64_from_target_bytes(g, bytes + 8), + i64_ty), + ma); + api_free_reg(g, ar, RC_INT); +} + +static void api_encode_binary128_from_double(CfreeCg *g, double value, + u8 out[16]) { + union { + double d; + u64 u; + } in; + unsigned __int128 rep = 0; + u64 frac; + u32 sign; + u32 exp; + in.d = value; + sign = (u32)(in.u >> 63); + exp = (u32)((in.u >> 52) & 0x7ffu); + frac = in.u & 0x000fffffffffffffull; + if (sign) + rep |= ((unsigned __int128)1) << 127; + if (exp == 0x7ffu) { + rep |= ((unsigned __int128)0x7fffu) << 112; + if (frac) { + rep |= ((unsigned __int128)frac) << (112u - 52u); + rep |= ((unsigned __int128)1) << 111; + } + } else if (exp != 0 || frac != 0) { + i32 e; + u64 sig; + if (exp == 0) { + e = -1022; + sig = frac; + while ((sig & (1ull << 52)) == 0) { + sig <<= 1; + --e; + } + frac = sig & 0x000fffffffffffffull; + } else { + e = (i32)exp - 1023; + } + rep |= ((unsigned __int128)(u32)(e + 16383)) << 112; + rep |= ((unsigned __int128)frac) << (112u - 52u); + } + for (u32 i = 0; i < 16; ++i) { + u32 shift = g->c->target.big_endian ? (15u - i) * 8u : i * 8u; + out[i] = (u8)(rep >> shift); + } +} + +static ApiSValue api_make_f128_const(CfreeCg *g, double value, + CfreeCgTypeId ty) { + u8 bytes[16]; + FrameSlot slot; + api_encode_binary128_from_double(g, value, bytes); + slot = api_f128_temp_slot(g, ty); + api_store_f128_bytes(g, slot, ty, bytes); + return api_make_lv(api_op_local(slot, ty), ty); +} + +static ApiSValue api_f128_materialize_lvalue(CfreeCg *g, ApiSValue *v, + CfreeCgTypeId ty) { + if (v->op.kind == OPK_LOCAL || v->op.kind == OPK_INDIRECT) { + v->type = ty; + v->op.type = ty; + v->lvalue = 1; + return *v; + } + if (v->op.kind == OPK_GLOBAL) { + FrameSlot slot = api_f128_temp_slot(g, ty); + Operand dst_lv = api_op_local(slot, ty); + Operand dst_addr; + Operand src_addr; + AggregateAccess agg; + ApiSValue tmp = api_make_lv(dst_lv, ty); + ApiSValue src = api_make_lv(v->op, ty); + dst_addr = api_lvalue_addr(g, &tmp, cg_type_ptr_to(g->c, ty)); + src_addr = api_lvalue_addr(g, &src, cg_type_ptr_to(g->c, ty)); + memset(&agg, 0, sizeof agg); + agg.size = 16; + agg.align = 16; + g->target->copy_bytes(g->target, dst_addr, src_addr, agg); + api_free_reg(g, dst_addr.v.reg, RC_INT); + api_free_reg(g, src_addr.v.reg, RC_INT); + return api_make_lv(dst_lv, ty); + } + if (v->op.kind == OPK_REG) { + FrameSlot slot = api_f128_temp_slot(g, ty); + Operand dst = api_op_local(slot, ty); + g->target->store(g->target, dst, v->op, api_mem_for_lvalue(g, &dst, ty)); + return api_make_lv(dst, ty); + } + if (v->op.kind == OPK_IMM) { + u8 bytes[16]; + u64 lo = (u64)v->op.v.imm; + memset(bytes, 0, sizeof bytes); + for (u32 i = 0; i < 8; ++i) { + u32 idx = g->c->target.big_endian ? 15u - i : i; + bytes[idx] = (u8)(lo >> (i * 8u)); + } + FrameSlot slot = api_f128_temp_slot(g, ty); + api_store_f128_bytes(g, slot, ty, bytes); + return api_make_lv(api_op_local(slot, ty), ty); + } + compiler_panic(g->c, g->cur_loc, + "CfreeCg: binary128 value is not addressable (kind %u, op %u)", + (unsigned)v->kind, (unsigned)v->op.kind); + return *v; +} + +static CfreeCgSym api_runtime_helper(CfreeCg *g, const char *name, + CfreeCgTypeId ret, + const CfreeCgTypeId *params, + u32 nparams) { + CfreeCgFuncParam ps[3]; + CfreeCgFuncSig sig; + CfreeCgDecl decl; + if (nparams > 3) + return CFREE_CG_SYM_NONE; + memset(ps, 0, sizeof ps); + for (u32 i = 0; i < nparams; ++i) + ps[i].type = params[i]; + memset(&sig, 0, sizeof sig); + sig.ret = ret; + sig.params = ps; + sig.nparams = nparams; + sig.call_conv = CFREE_CG_CC_TARGET_C; + memset(&decl, 0, sizeof decl); + decl.kind = CFREE_CG_DECL_FUNC; + decl.linkage_name = pool_intern_cstr(g->c->global, name); + decl.display_name = decl.linkage_name; + decl.type = cfree_cg_type_func((CfreeCompiler *)g->c, sig); + decl.sym.bind = CFREE_SB_GLOBAL; + decl.sym.visibility = CFREE_CG_VIS_DEFAULT; + return cfree_cg_decl(g, decl); +} + +static void api_runtime_call_values(CfreeCg *g, const char *name, + CfreeCgTypeId ret, + const CfreeCgTypeId *params, + u32 nparams, ApiSValue *args) { + CfreeCgCallAttrs attrs; + CfreeCgSym sym = api_runtime_helper(g, name, ret, params, nparams); + memset(&attrs, 0, sizeof attrs); + for (u32 i = 0; i < nparams; ++i) + api_push(g, args[i]); + api_call_symbol_common(g, sym, nparams, attrs); +} + /* ============================================================ * Locals and params * ============================================================ */ @@ -3135,6 +3339,8 @@ static int api_local_requires_memory(CfreeCg *g, CfreeCgTypeId ty, CfreeCgLocalAttrs attrs) { if (api_source_flags_addr_taken(attrs.flags)) return 1; + if (api_is_f128_type(g->c, ty)) + return 1; return !(cg_type_is_int(g->c, ty) || cg_type_is_float(g->c, ty) || cg_type_is_ptr(g->c, ty)); } @@ -3336,6 +3542,10 @@ void cfree_cg_push_float(CfreeCg *g, double value, CfreeCgTypeId type) { ty = resolve_type(g->c, type); if (!ty) return; + if (api_is_f128_type(g->c, ty)) { + api_push(g, api_make_f128_const(g, value, ty)); + return; + } T = g->target; cb.type = ty; cb.size = (u32)abi_cg_sizeof(g->c->abi, type); @@ -3595,6 +3805,12 @@ void cfree_cg_load(CfreeCg *g, CfreeCgMemAccess access) { return; } api_require_scalar_mem_type(g, "load", ty); + if (api_is_f128_type(g->c, ty)) { + v.type = ty; + v.op.type = ty; + api_push(g, v); + return; + } if (v.source_local != CFREE_CG_LOCAL_NONE && api_local_const_load(g, v.source_local, access, &dst)) { api_release(g, &v); @@ -3752,6 +3968,32 @@ void cfree_cg_store(CfreeCg *g, CfreeCgMemAccess access) { return; } api_validate_memory_value(g, "store", ty, api_sv_type(&rv)); + if (api_is_f128_type(g->c, ty)) { + if (lv.source_local != CFREE_CG_LOCAL_NONE) { + api_local_const_clear(api_local_from_handle(g, lv.source_local)); + } else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL || + (access.flags & CFREE_CG_MEM_VOLATILE)) { + api_local_const_memory_boundary(g); + } + if (api_is_lvalue_sv(&rv)) { + CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty); + Operand dst_addr = api_lvalue_addr(g, &lv, ptr_ty); + Operand src_addr = api_lvalue_addr(g, &rv, ptr_ty); + AggregateAccess agg; + memset(&agg, 0, sizeof agg); + agg.size = 16; + agg.align = access.align ? access.align : 16; + T->copy_bytes(T, dst_addr, src_addr, agg); + api_free_reg(g, dst_addr.v.reg, RC_INT); + api_free_reg(g, src_addr.v.reg, RC_INT); + } else { + src = api_force_reg(g, &rv, ty); + T->store(T, lv.op, src, api_mem_from_access(g, &lv.op, access)); + } + api_release(g, &lv); + api_release(g, &rv); + return; + } api_ensure_reg(g, &lv); api_ensure_reg(g, &rv); if (api_sv_op_is_reg_or_imm(&rv)) { @@ -4060,6 +4302,52 @@ static void api_cg_convert_kind(CfreeCg *g, CfreeCgTypeId dst_type, api_push(g, v); return; } + if (ck == CV_BITCAST && abi_cg_sizeof(g->c->abi, sty) == 16 && + abi_cg_sizeof(g->c->abi, dty) == 16 && + (api_is_f128_type(g->c, sty) || api_is_f128_type(g->c, dty))) { + FrameSlot slot = api_f128_temp_slot(g, dty); + Operand dst_lv = api_op_local(slot, dty); + if (api_is_lvalue_sv(&v) || + v.op.kind == OPK_LOCAL || v.op.kind == OPK_INDIRECT || + v.op.kind == OPK_GLOBAL) { + CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, dty); + ApiSValue src_lv = v; + Operand dst_addr; + Operand src_addr; + AggregateAccess agg; + src_lv.lvalue = 1; + dst_addr = api_lvalue_addr(g, &(ApiSValue){.op = dst_lv, + .type = dty, + .kind = SV_OPERAND, + .lvalue = 1}, + ptr_ty); + src_addr = api_lvalue_addr(g, &src_lv, cg_type_ptr_to(g->c, sty)); + memset(&agg, 0, sizeof agg); + agg.size = 16; + agg.align = 16; + g->target->copy_bytes(g->target, dst_addr, src_addr, agg); + api_free_reg(g, dst_addr.v.reg, RC_INT); + api_free_reg(g, src_addr.v.reg, RC_INT); + } else if (v.op.kind == OPK_REG) { + g->target->store(g->target, dst_lv, v.op, + api_mem_for_lvalue(g, &dst_lv, sty)); + } else if (v.op.kind == OPK_IMM) { + u8 bytes[16]; + u64 lo = (u64)v.op.v.imm; + memset(bytes, 0, sizeof bytes); + for (u32 i = 0; i < 8; ++i) { + u32 idx = g->c->target.big_endian ? 15u - i : i; + bytes[idx] = (u8)(lo >> (i * 8u)); + } + api_store_f128_bytes(g, slot, dty, bytes); + } else { + compiler_panic(g->c, g->cur_loc, + "CfreeCg: unsupported 16-byte bitcast source"); + } + api_release(g, &v); + api_push(g, api_make_lv(dst_lv, dty)); + return; + } src = api_force_reg(g, &v, sty); rr = api_alloc_reg_or_spill(g, api_type_class(dty), dty); @@ -4081,22 +4369,121 @@ void cfree_cg_int_cmp(CfreeCg *g, CfreeCgIntCmpOp op) { api_cg_cmp(g, api_map_int_cmp(op)); } +static const char *api_f128_binop_helper(CfreeCgFpBinOp op) { + switch (op) { + case CFREE_CG_FP_ADD: return "__addtf3"; + case CFREE_CG_FP_SUB: return "__subtf3"; + case CFREE_CG_FP_MUL: return "__multf3"; + case CFREE_CG_FP_DIV: return "__divtf3"; + case CFREE_CG_FP_REM: return NULL; + } + return NULL; +} + +static int api_f128_stack_top(CfreeCg *g, u32 depth) { + if (!g || g->sp <= depth) + return 0; + return api_is_f128_type(g->c, api_sv_type(&g->stack[g->sp - 1u - depth])); +} + +static void api_f128_call_unary(CfreeCg *g, const char *name, + CfreeCgTypeId ret, CfreeCgTypeId param) { + ApiSValue args[1]; + CfreeCgTypeId ps[1]; + args[0] = api_pop(g); + ps[0] = param; + api_runtime_call_values(g, name, ret, ps, 1, args); +} + void cfree_cg_fp_binop(CfreeCg *g, CfreeCgFpBinOp op, uint32_t flags) { (void)flags; if (op == CFREE_CG_FP_REM) { compiler_panic(g->c, g->cur_loc, "CfreeCg: FP remainder is unsupported"); return; } + if (api_f128_stack_top(g, 0) || api_f128_stack_top(g, 1)) { + CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); + CfreeCgTypeId ps[2]; + ApiSValue args[2]; + const char *name = api_f128_binop_helper(op); + if (!name) + compiler_panic(g->c, g->cur_loc, "CfreeCg: FP remainder is unsupported"); + args[1] = api_pop(g); + args[0] = api_pop(g); + ps[0] = f128; + ps[1] = f128; + api_runtime_call_values(g, name, f128, ps, 2, args); + return; + } api_cg_binop(g, api_map_fp_binop(op), 0); } void cfree_cg_fp_unop(CfreeCg *g, CfreeCgFpUnOp op, uint32_t flags) { (void)flags; (void)op; + if (api_f128_stack_top(g, 0)) { + CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); + CfreeCgTypeId ps[2]; + ApiSValue args[2]; + args[1] = api_pop(g); + args[0] = api_make_f128_const(g, 0.0, f128); + ps[0] = f128; + ps[1] = f128; + api_runtime_call_values(g, "__subtf3", f128, ps, 2, args); + return; + } api_cg_unop(g, UO_NEG, 0); } void cfree_cg_fp_cmp(CfreeCg *g, CfreeCgFpCmpOp op) { + if (api_f128_stack_top(g, 0) || api_f128_stack_top(g, 1)) { + CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); + CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32); + CfreeCgTypeId ps[2]; + ApiSValue args[2]; + const char *name = "__eqtf2"; + CmpOp cmp = CMP_EQ; + switch (op) { + case CFREE_CG_FP_OEQ: + case CFREE_CG_FP_UEQ: + name = "__eqtf2"; + cmp = CMP_EQ; + break; + case CFREE_CG_FP_ONE: + case CFREE_CG_FP_UNE: + name = "__netf2"; + cmp = CMP_NE; + break; + case CFREE_CG_FP_OLT: + case CFREE_CG_FP_ULT: + name = "__lttf2"; + cmp = CMP_LT_S; + break; + case CFREE_CG_FP_OLE: + case CFREE_CG_FP_ULE: + name = "__letf2"; + cmp = CMP_LE_S; + break; + case CFREE_CG_FP_OGT: + case CFREE_CG_FP_UGT: + name = "__gttf2"; + cmp = CMP_GT_S; + break; + case CFREE_CG_FP_OGE: + case CFREE_CG_FP_UGE: + name = "__getf2"; + cmp = CMP_GE_S; + break; + } + args[1] = api_pop(g); + args[0] = api_pop(g); + ps[0] = f128; + ps[1] = f128; + api_runtime_call_values(g, name, i32, ps, 2, args); + cfree_cg_push_int(g, 0, i32); + api_cg_cmp(g, cmp); + return; + } api_cg_cmp(g, api_map_fp_cmp(op)); } @@ -4125,34 +4512,108 @@ void cfree_cg_bitcast(CfreeCg *g, CfreeCgTypeId dst) { } void cfree_cg_fpext(CfreeCg *g, CfreeCgTypeId dst) { + CfreeCgTypeId dty = resolve_type(g->c, dst); + if (api_is_f128_type(g->c, dty)) { + ApiSValue v = api_pop(g); + CfreeCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); + const char *name = sty == builtin_id(CFREE_CG_BUILTIN_F32) + ? "__extendsftf2" + : "__extenddftf2"; + api_push(g, v); + api_f128_call_unary(g, name, dty, sty); + return; + } api_cg_convert_kind(g, dst, CV_FEXT); } void cfree_cg_fptrunc(CfreeCg *g, CfreeCgTypeId dst) { + CfreeCgTypeId dty = resolve_type(g->c, dst); + if (api_f128_stack_top(g, 0)) { + ApiSValue v = api_pop(g); + CfreeCgTypeId f128 = builtin_id(CFREE_CG_BUILTIN_F128); + const char *name = dty == builtin_id(CFREE_CG_BUILTIN_F32) + ? "__trunctfsf2" + : "__trunctfdf2"; + api_push(g, v); + api_f128_call_unary(g, name, dty, f128); + return; + } api_cg_convert_kind(g, dst, CV_FTRUNC); } void cfree_cg_sint_to_float(CfreeCg *g, CfreeCgTypeId dst, CfreeCgRounding rounding) { (void)rounding; + if (api_is_f128_type(g->c, resolve_type(g->c, dst))) { + ApiSValue v = api_pop(g); + CfreeCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); + u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); + CfreeCgTypeId pty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) + : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) + : builtin_id(CFREE_CG_BUILTIN_I32)); + const char *name = sz > 8 ? "__floattitf" + : (sz > 4 ? "__floatditf" : "__floatsitf"); + api_push(g, v); + api_f128_call_unary(g, name, resolve_type(g->c, dst), pty); + return; + } api_cg_convert_kind(g, dst, CV_ITOF_S); } void cfree_cg_uint_to_float(CfreeCg *g, CfreeCgTypeId dst, CfreeCgRounding rounding) { (void)rounding; + if (api_is_f128_type(g->c, resolve_type(g->c, dst))) { + ApiSValue v = api_pop(g); + CfreeCgTypeId sty = api_unalias_type(g->c, api_sv_type(&v)); + u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty); + CfreeCgTypeId pty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) + : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) + : builtin_id(CFREE_CG_BUILTIN_I32)); + const char *name = sz > 8 ? "__floatuntitf" + : (sz > 4 ? "__floatunditf" : "__floatunsitf"); + api_push(g, v); + api_f128_call_unary(g, name, resolve_type(g->c, dst), pty); + return; + } api_cg_convert_kind(g, dst, CV_ITOF_U); } void cfree_cg_float_to_sint(CfreeCg *g, CfreeCgTypeId dst, CfreeCgRounding rounding) { (void)rounding; + if (api_f128_stack_top(g, 0)) { + CfreeCgTypeId dty = resolve_type(g->c, dst); + u32 sz = (u32)abi_cg_sizeof(g->c->abi, dty); + CfreeCgTypeId rty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) + : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) + : builtin_id(CFREE_CG_BUILTIN_I32)); + const char *name = sz > 8 ? "__fixtfti" + : (sz > 4 ? "__fixtfdi" : "__fixtfsi"); + api_f128_call_unary(g, name, rty, builtin_id(CFREE_CG_BUILTIN_F128)); + if (rty != dty) + api_cg_convert_kind(g, dty, CV_TRUNC); + return; + } api_cg_convert_kind(g, dst, CV_FTOI_S); } void cfree_cg_float_to_uint(CfreeCg *g, CfreeCgTypeId dst, CfreeCgRounding rounding) { (void)rounding; + if (api_f128_stack_top(g, 0)) { + CfreeCgTypeId dty = resolve_type(g->c, dst); + u32 sz = (u32)abi_cg_sizeof(g->c->abi, dty); + CfreeCgTypeId rty = sz > 8 ? builtin_id(CFREE_CG_BUILTIN_I128) + : (sz > 4 ? builtin_id(CFREE_CG_BUILTIN_I64) + : builtin_id(CFREE_CG_BUILTIN_I32)); + const char *name = sz > 8 ? "__fixunstfti" + : (sz > 4 ? "__fixunstfdi" : "__fixunstfsi"); + api_f128_call_unary(g, name, rty, builtin_id(CFREE_CG_BUILTIN_F128)); + if (rty != dty) + api_cg_convert_kind(g, dty, CV_TRUNC); + return; + } api_cg_convert_kind(g, dst, CV_FTOI_U); } @@ -5639,7 +6100,6 @@ void cfree_cg_call(CfreeCg *g, uint32_t nargs, CfreeCgTypeId fn_type, for (u32 i = 0; i < nargs; ++i) { u32 idx = nargs - 1u - i; ApiSValue arg = api_pop(g); - api_ensure_reg(g, &arg); int is_vararg = (idx >= abi->nparams); CfreeCgTypeId aty; if (is_vararg) { @@ -5652,12 +6112,19 @@ void cfree_cg_call(CfreeCg *g, uint32_t nargs, CfreeCgTypeId fn_type, avs[idx].type = aty; avs[idx].abi = is_vararg ? NULL : &abi->params[idx]; int is_aggregate = cg_type_is_aggregate(g->c, aty); - if (is_aggregate) { + if (api_is_f128_type(g->c, aty)) { + ApiSValue lv = api_f128_materialize_lvalue(g, &arg, aty); + avs[idx].storage = lv.op; + avs[idx].storage.type = aty; + avs[idx].size = 16; + } else if (is_aggregate) { + api_ensure_reg(g, &arg); Operand st = arg.op; st.type = aty; avs[idx].storage = st; avs[idx].size = abi_cg_sizeof(g->c->abi, aty); } else { + api_ensure_reg(g, &arg); avs[idx].storage = (api_is_lvalue_sv(&arg) || arg.op.kind == OPK_GLOBAL) ? api_force_reg(g, &arg, aty) @@ -5683,14 +6150,15 @@ void cfree_cg_call(CfreeCg *g, uint32_t nargs, CfreeCgTypeId fn_type, if (has_result) { int ret_is_aggregate = cg_type_is_aggregate(g->c, ret_ty); - if (ret_is_aggregate) { + if (ret_is_aggregate || api_is_f128_type(g->c, ret_ty)) { FrameSlotDesc fsd; memset(&fsd, 0, sizeof fsd); fsd.type = ret_ty; fsd.size = abi_cg_sizeof(g->c->abi, ret_ty); fsd.align = abi_cg_alignof(g->c->abi, ret_ty); fsd.kind = FS_LOCAL; - fsd.flags = FSF_ADDR_TAKEN; + if (ret_is_aggregate || api_is_f128_type(g->c, ret_ty)) + fsd.flags = FSF_ADDR_TAKEN; FrameSlot ret_slot = T->frame_slot(T, &fsd); desc.ret.storage = api_op_local(ret_slot, ret_ty); } else { @@ -5826,19 +6294,25 @@ static void api_call_symbol_common(CfreeCg *g, CfreeCgSym sym, uint32_t nargs, ApiSValue arg = api_pop(g); int is_vararg = (idx >= abi->nparams); CfreeCgTypeId aty; - api_ensure_reg(g, &arg); aty = is_vararg ? (arg.type ? arg.type : api_sv_type(&arg)) : cg_type_func_param_id(g->c, fty, idx); if (!aty) aty = arg.type; avs[idx].type = aty; avs[idx].abi = is_vararg ? NULL : &abi->params[idx]; - if (cg_type_is_aggregate(g->c, aty)) { + if (api_is_f128_type(g->c, aty)) { + ApiSValue lv = api_f128_materialize_lvalue(g, &arg, aty); + avs[idx].storage = lv.op; + avs[idx].storage.type = aty; + avs[idx].size = 16; + } else if (cg_type_is_aggregate(g->c, aty)) { + api_ensure_reg(g, &arg); Operand st = arg.op; st.type = aty; avs[idx].storage = st; avs[idx].size = abi_cg_sizeof(g->c->abi, aty); } else { + api_ensure_reg(g, &arg); avs[idx].storage = (api_is_lvalue_sv(&arg) || arg.op.kind == OPK_GLOBAL) ? api_force_reg(g, &arg, aty) @@ -5856,7 +6330,7 @@ static void api_call_symbol_common(CfreeCg *g, CfreeCgSym sym, uint32_t nargs, desc.ret.type = ret_ty; desc.ret.abi = &abi->ret; if (has_result) { - if (cg_type_is_aggregate(g->c, ret_ty)) { + if (cg_type_is_aggregate(g->c, ret_ty) || api_is_f128_type(g->c, ret_ty)) { FrameSlotDesc fsd; FrameSlot ret_slot; memset(&fsd, 0, sizeof fsd); @@ -5924,6 +6398,14 @@ void cfree_cg_ret(CfreeCg *g) { T->ret(T, &av); return; } + if (api_is_f128_type(g->c, rty)) { + ApiSValue lv = api_f128_materialize_lvalue(g, &v, rty); + av.storage = lv.op; + av.storage.type = rty; + av.size = 16; + T->ret(T, &av); + return; + } if (api_sv_op_is(&v, OPK_IMM)) { ret_op = v.op; ret_op.type = rty; @@ -6106,6 +6588,12 @@ void cfree_cg_data_float(CfreeCg *g, double value, CfreeCgTypeId type) { ty = resolve_type(g->c, type); if (!ty) return; + if (api_is_f128_type(g->c, ty)) { + u8 bytes[16]; + api_encode_binary128_from_double(g, value, bytes); + cfree_cg_data_bytes(g, bytes, sizeof bytes); + return; + } if (ty == builtin_id(CFREE_CG_BUILTIN_F32)) { u.f = (float)value; if (g->c->target.big_endian) { diff --git a/src/arch/aa64/emit.c b/src/arch/aa64/emit.c @@ -35,6 +35,8 @@ u32 type_byte_size(CfreeCgTypeId t) { if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_I32) || t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F32)) return 4; + if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F128)) + return 16; return 8; } @@ -48,6 +50,8 @@ u32 size_idx_for_bytes(u32 nbytes) { return 2; case 8: return 3; + case 16: + return 4; default: return 3; } @@ -533,7 +537,7 @@ static void aa_consume_param_location(AAImpl *a, const ABIArgInfo *ai) { if (a->next_param_fp < 8) ++a->next_param_fp; else - a->next_param_stack += 8; + a->next_param_stack += pt->size > 8 ? pt->size : 8; } } } @@ -592,13 +596,23 @@ CGLocalStorage aa_param(CGTarget *t, const CGParamDesc *p) { u32 dst = reg_num((Operand){.kind = OPK_REG, .v.reg = st.v.reg}); if (a->next_param_fp < 8) { u32 src = a->next_param_fp++; - u32 type = (sz == 8) ? 1u : 0u; - if (dst != src) aa64_emit32(t->mc, aa64_fmov_reg(type, dst, src)); + if (sz == 16) { + if (dst != src) aa64_emit32(t->mc, aa64_mov_v16b(dst, src)); + } else { + u32 type = (sz == 8) ? 1u : 0u; + if (dst != src) aa64_emit32(t->mc, aa64_fmov_reg(type, dst, src)); + } } else { u32 caller_off = a->next_param_stack; - a->next_param_stack += 8; - aa64_emit_ldur_fp_off(t->mc, sidx, dst, incoming_stack_base, - incoming_stack_bias + (i32)caller_off, AA_TMP0); + a->next_param_stack += sz > 8 ? sz : 8; + if (sz == 16) + aa64_emit32(t->mc, aa64_ldur_q(dst, incoming_stack_base, + incoming_stack_bias + + (i32)caller_off)); + else + aa64_emit_ldur_fp_off(t->mc, sidx, dst, incoming_stack_base, + incoming_stack_bias + (i32)caller_off, + AA_TMP0); } } else { compiler_panic(t->c, a->loc, "aarch64 param: ABI class %d unimpl", @@ -667,15 +681,28 @@ CGLocalStorage aa_param(CGTarget *t, const CGParamDesc *p) { } else if (pt->cls == ABI_CLASS_FP) { if (a->next_param_fp < 8) { u32 reg = a->next_param_fp++; - aa64_emit_stur_fp_off(t->mc, sidx, reg, 29, - -(i32)s->off + (i32)part_off, AA_TMP0); + if (sz == 16) + aa64_emit32(t->mc, aa64_stur_q(reg, 29, + -(i32)s->off + (i32)part_off)); + else + aa64_emit_stur_fp_off(t->mc, sidx, reg, 29, + -(i32)s->off + (i32)part_off, AA_TMP0); } else { u32 caller_off = a->next_param_stack; - a->next_param_stack += 8; - aa64_emit_ldur_fp_off(t->mc, sidx, AA_FP_TMP0, incoming_stack_base, - incoming_stack_bias + (i32)caller_off, AA_TMP0); - aa64_emit_stur_fp_off(t->mc, sidx, AA_FP_TMP0, 29, - -(i32)s->off + (i32)part_off, AA_TMP0); + a->next_param_stack += sz > 8 ? sz : 8; + if (sz == 16) { + aa64_emit32(t->mc, aa64_ldur_q(AA_FP_TMP0, incoming_stack_base, + incoming_stack_bias + + (i32)caller_off)); + aa64_emit32(t->mc, aa64_stur_q(AA_FP_TMP0, 29, + -(i32)s->off + (i32)part_off)); + } else { + aa64_emit_ldur_fp_off(t->mc, sidx, AA_FP_TMP0, incoming_stack_base, + incoming_stack_bias + (i32)caller_off, + AA_TMP0); + aa64_emit_stur_fp_off(t->mc, sidx, AA_FP_TMP0, 29, + -(i32)s->off + (i32)part_off, AA_TMP0); + } } } else { compiler_panic(t->c, a->loc, "aarch64 param: ABI class %d unimpl", diff --git a/src/arch/aa64/internal.h b/src/arch/aa64/internal.h @@ -66,6 +66,14 @@ static inline u32 aa64_ldur_fp(u32 size, u32 Rt, u32 Rn, i32 simm9) { return 0x3C400000u | (size << 30) | (((u32)simm9 & 0x1ffu) << 12) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); } +static inline u32 aa64_stur_q(u32 Rt, u32 Rn, i32 simm9) { + return 0x3C800000u | (((u32)simm9 & 0x1ffu) << 12) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} +static inline u32 aa64_ldur_q(u32 Rt, u32 Rn, i32 simm9) { + return 0x3CC00000u | (((u32)simm9 & 0x1ffu) << 12) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} static inline u32 aa64_str_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) { u32 sc = byte_off >> size; @@ -82,6 +90,11 @@ static inline u32 aa64_str_fp_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) { return 0x3D000000u | (size << 30) | ((sc & 0xfffu) << 10) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); } +static inline u32 aa64_str_q_uimm(u32 Rt, u32 Rn, u32 byte_off) { + u32 sc = byte_off >> 4; + return 0x3D800000u | ((sc & 0xfffu) << 10) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} static inline u32 aa64_mrs_tpidr_el0(u32 Rt) { return 0xD53BD040u | (Rt & 0x1fu); @@ -96,10 +109,19 @@ static inline u32 aa64_ldr_fp_uimm(u32 size, u32 Rt, u32 Rn, u32 byte_off) { return 0x3D400000u | (size << 30) | ((sc & 0xfffu) << 10) | ((Rn & 0x1f) << 5) | (Rt & 0x1f); } +static inline u32 aa64_ldr_q_uimm(u32 Rt, u32 Rn, u32 byte_off) { + u32 sc = byte_off >> 4; + return 0x3DC00000u | ((sc & 0xfffu) << 10) | + ((Rn & 0x1f) << 5) | (Rt & 0x1f); +} static inline u32 aa64_fmov_reg(u32 type, u32 Rd, u32 Rn) { return 0x1E204000u | ((type & 3) << 22) | ((Rn & 0x1f) << 5) | (Rd & 0x1f); } +static inline u32 aa64_mov_v16b(u32 Rd, u32 Rn) { + return 0x4EA01C00u | ((Rn & 0x1f) << 16) | ((Rn & 0x1f) << 5) | + (Rd & 0x1f); +} static inline u32 aa64_subs_imm(u32 sf, u32 Rd, u32 Rn, u32 imm12) { return 0x71000000u | (sf << 31) | ((imm12 & 0xfff) << 10) | diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c @@ -7,6 +7,8 @@ * Data movement * ============================================================ */ +static RelocKind ldst_lo12_reloc_for(u32 nbytes); + static void aa_load_imm(CGTarget* t, Operand dst, i64 imm) { u32 sf = type_is_64(dst.type) ? 1u : 0u; aa64_emit_load_imm(t->mc, sf, reg_num(dst), imm); @@ -58,17 +60,23 @@ static void aa_load_const(CGTarget* t, Operand dst, ConstBytes cb) { sym, 0, 0, 0); u32 ldr_pos = t->mc->pos(t->mc); - u32 sidx = (cb.size == 8) ? 3u : 2u; - aa64_emit32(t->mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), AA_TMP0, 0)); - RelocKind lo12 = (cb.size == 8) ? R_AARCH64_LDST64_ABS_LO12_NC - : R_AARCH64_LDST32_ABS_LO12_NC; + u32 sidx = size_idx_for_bytes(cb.size); + if (cb.size == 16) + aa64_emit32(t->mc, aa64_ldr_q_uimm(reg_num(dst), AA_TMP0, 0)); + else + aa64_emit32(t->mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), AA_TMP0, 0)); + RelocKind lo12 = ldst_lo12_reloc_for(cb.size); t->mc->emit_reloc_at(t->mc, cur_section, ldr_pos, lo12, sym, 0, 0, 0); } static void aa_copy(CGTarget* t, Operand dst, Operand src) { if (dst.cls == RC_FP || src.cls == RC_FP) { - u32 type = type_is_fp_double(dst.type) ? 1u : 0u; - aa64_emit32(t->mc, aa64_fmov_reg(type, reg_num(dst), reg_num(src))); + if (type_byte_size(dst.type) == 16) { + aa64_emit32(t->mc, aa64_mov_v16b(reg_num(dst), reg_num(src))); + } else { + u32 type = type_is_fp_double(dst.type) ? 1u : 0u; + aa64_emit32(t->mc, aa64_fmov_reg(type, reg_num(dst), reg_num(src))); + } return; } u32 sf = type_is_64(dst.type) ? 1u : 0u; @@ -85,10 +93,43 @@ static RelocKind ldst_lo12_reloc_for(u32 nbytes) { case 2: return R_AARCH64_LDST16_ABS_LO12_NC; case 4: return R_AARCH64_LDST32_ABS_LO12_NC; case 8: return R_AARCH64_LDST64_ABS_LO12_NC; + case 16: return R_AARCH64_LDST128_ABS_LO12_NC; default: return R_AARCH64_LDST64_ABS_LO12_NC; } } +static void aa_emit_ldr_fp_any(MCEmitter* mc, u32 sidx, u32 rt, u32 rn, + i32 off) { + if (sidx == 4) + aa64_emit32(mc, aa64_ldur_q(rt, rn, off)); + else + aa64_emit32(mc, aa64_ldur_fp(sidx, rt, rn, off)); +} + +static void aa_emit_str_fp_any(MCEmitter* mc, u32 sidx, u32 rt, u32 rn, + i32 off) { + if (sidx == 4) + aa64_emit32(mc, aa64_stur_q(rt, rn, off)); + else + aa64_emit32(mc, aa64_stur_fp(sidx, rt, rn, off)); +} + +static void aa_emit_ldr_fp_uimm_any(MCEmitter* mc, u32 sidx, u32 rt, u32 rn, + u32 off) { + if (sidx == 4) + aa64_emit32(mc, aa64_ldr_q_uimm(rt, rn, off)); + else + aa64_emit32(mc, aa64_ldr_fp_uimm(sidx, rt, rn, off)); +} + +static void aa_emit_str_fp_uimm_any(MCEmitter* mc, u32 sidx, u32 rt, u32 rn, + u32 off) { + if (sidx == 4) + aa64_emit32(mc, aa64_str_q_uimm(rt, rn, off)); + else + aa64_emit32(mc, aa64_str_fp_uimm(sidx, rt, rn, off)); +} + static int use_got_for_sym(CGTarget* t, ObjSymId sym) { return obj_symbol_extern_via_got(t->c, t->obj, sym); } @@ -139,7 +180,7 @@ void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { if (use_got_for_sym(t, sym)) { aa64_emit_got_load_addr(t, AA_TMP0, sym); if (dst.cls == RC_FP) { - aa64_emit32(mc, aa64_ldur_fp(sidx, reg_num(dst), AA_TMP0, (i32)add)); + aa_emit_ldr_fp_any(mc, sidx, reg_num(dst), AA_TMP0, (i32)add); } else { aa64_emit32(mc, aa64_ldur(sidx, reg_num(dst), AA_TMP0, (i32)add)); } @@ -151,7 +192,7 @@ void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { 0, 0); u32 ld_pos = mc->pos(mc); if (dst.cls == RC_FP) { - aa64_emit32(mc, aa64_ldr_fp_uimm(sidx, reg_num(dst), AA_TMP0, 0)); + aa_emit_ldr_fp_uimm_any(mc, sidx, reg_num(dst), AA_TMP0, 0); } else { aa64_emit32(mc, aa64_ldr_uimm(sidx, reg_num(dst), AA_TMP0, 0)); } @@ -162,7 +203,7 @@ void aa_load(CGTarget* t, Operand dst, Operand addr, MemAccess ma) { i32 off; u32 base = addr_base(t, addr, &off, AA_TMP0); if (dst.cls == RC_FP) { - aa64_emit32(t->mc, aa64_ldur_fp(sidx, reg_num(dst), base, off)); + aa_emit_ldr_fp_any(t->mc, sidx, reg_num(dst), base, off); } else { aa64_emit32(t->mc, aa64_ldur(sidx, reg_num(dst), base, off)); } @@ -194,7 +235,7 @@ void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { if (use_got_for_sym(t, sym)) { aa64_emit_got_load_addr(t, base, sym); if (src_is_fp) { - aa64_emit32(mc, aa64_stur_fp(sidx, src_reg, base, (i32)add)); + aa_emit_str_fp_any(mc, sidx, src_reg, base, (i32)add); } else { aa64_emit32(mc, aa64_stur(sidx, src_reg, base, (i32)add)); } @@ -206,7 +247,7 @@ void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { 0, 0); u32 st_pos = mc->pos(mc); if (src_is_fp) { - aa64_emit32(mc, aa64_str_fp_uimm(sidx, src_reg, base, 0)); + aa_emit_str_fp_uimm_any(mc, sidx, src_reg, base, 0); } else { aa64_emit32(mc, aa64_str_uimm(sidx, src_reg, base, 0)); } @@ -225,7 +266,7 @@ void aa_store(CGTarget* t, Operand addr, Operand src, MemAccess ma) { return; } if (src.cls == RC_FP) { - aa64_emit32(t->mc, aa64_stur_fp(sidx, reg_num(src), base, off)); + aa_emit_str_fp_any(t->mc, sidx, reg_num(src), base, off); } else { aa64_emit32(t->mc, aa64_stur(sidx, reg_num(src), base, off)); } @@ -928,8 +969,20 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, u32 dst_reg = (*next_fp)++; switch (av->storage.kind) { case OPK_REG: { - u32 type = (sz == 8) ? 1u : 0u; - aa64_emit32(t->mc, aa64_fmov_reg(type, dst_reg, reg_num(av->storage))); + if (sz == 16) + aa64_emit32(t->mc, aa64_mov_v16b(dst_reg, reg_num(av->storage))); + else { + u32 type = (sz == 8) ? 1u : 0u; + aa64_emit32(t->mc, aa64_fmov_reg(type, dst_reg, + reg_num(av->storage))); + } + break; + } + case OPK_LOCAL: { + AASlot* s = aa64_slot_get(a, av->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad FP arg slot"); + i32 off = -(i32)s->off + (i32)pt->src_offset; + aa_emit_ldr_fp_any(t->mc, sidx, dst_reg, 29, off); break; } case OPK_INDIRECT: { @@ -940,7 +993,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; i32 off; u32 base = addr_base(t, src, &off, AA_TMP0); - aa64_emit32(t->mc, aa64_ldur_fp(sidx, dst_reg, base, off)); + aa_emit_ldr_fp_any(t->mc, sidx, dst_reg, base, off); break; } default: @@ -954,6 +1007,15 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, aa_store_stack_reg(t, reg_num(av->storage), RC_FP, av->type, sz, *stack_off, tail); break; + case OPK_LOCAL: { + AASlot* s = aa64_slot_get(a, av->storage.v.frame_slot); + if (!s) compiler_panic(t->c, a->loc, "aarch64 call: bad FP arg slot"); + i32 off = -(i32)s->off + (i32)pt->src_offset; + aa_emit_ldr_fp_any(t->mc, sidx, AA_FP_TMP0, 29, off); + aa_store_stack_reg(t, AA_FP_TMP0, RC_FP, av->type, sz, + *stack_off, tail); + break; + } case OPK_INDIRECT: { Operand src; memset(&src, 0, sizeof src); @@ -962,7 +1024,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, src.v.ind.ofs = av->storage.v.ind.ofs + (i32)pt->src_offset; i32 off; u32 base = addr_base(t, src, &off, AA_TMP0); - aa64_emit32(t->mc, aa64_ldur_fp(sidx, AA_FP_TMP0, base, off)); + aa_emit_ldr_fp_any(t->mc, sidx, AA_FP_TMP0, base, off); aa_store_stack_reg(t, AA_FP_TMP0, RC_FP, av->type, sz, *stack_off, tail); break; @@ -973,7 +1035,7 @@ static void emit_arg_value(CGTarget* t, const ABIFuncInfo* fi, "aarch64 call: FP stack-arg storage kind %d unsupported", (int)av->storage.kind); } - *stack_off += 8; + *stack_off += sz > 8 ? sz : 8; } } else { compiler_panic(t->c, a->loc, "aarch64 call: ABI class %d unimpl", @@ -1023,7 +1085,7 @@ static void count_arg_stack(const ABIFuncInfo* fi, const CGABIValue* av, if (*next_fp < 8) ++*next_fp; else - *stack_off += 8; + *stack_off += pt->size > 8 ? pt->size : 8; } } } @@ -1187,8 +1249,12 @@ static void aa_call(CGTarget* t, const CGCallDesc* d) { u32 sf = (p->size == 8) ? 1u : 0u; aa64_emit32(mc, aa64_mov_reg(sf, reg_num(rs), src_reg)); } else { - u32 type = (p->size == 8) ? 1u : 0u; - aa64_emit32(mc, aa64_fmov_reg(type, reg_num(rs), src_reg)); + if (p->size == 16) + aa64_emit32(mc, aa64_mov_v16b(reg_num(rs), src_reg)); + else { + u32 type = (p->size == 8) ? 1u : 0u; + aa64_emit32(mc, aa64_fmov_reg(type, reg_num(rs), src_reg)); + } } } else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) { u32 base_reg; @@ -1207,7 +1273,7 @@ static void aa_call(CGTarget* t, const CGCallDesc* d) { if (p->cls == ABI_CLASS_INT) { aa64_emit32(mc, aa64_stur(sidx, src_reg, base_reg, off)); } else { - aa64_emit32(mc, aa64_stur_fp(sidx, src_reg, base_reg, off)); + aa_emit_str_fp_any(mc, sidx, src_reg, base_reg, off); } } else if (rs.kind == OPK_IMM && rs.type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_VOID)) { /* void return placeholder */ @@ -1402,9 +1468,15 @@ static void aa_ret(CGTarget* t, const CGABIValue* val) { } } else if (val->storage.kind == OPK_REG) { if (val->storage.cls == RC_FP) { - u32 type = type_is_fp_double(val->storage.type) ? 1u : 0u; - if (reg_num(val->storage) != 0) - aa64_emit32(mc, aa64_fmov_reg(type, /*Rd=*/0, reg_num(val->storage))); + if (type_byte_size(val->storage.type) == 16) { + if (reg_num(val->storage) != 0) + aa64_emit32(mc, aa64_mov_v16b(/*Rd=*/0, reg_num(val->storage))); + } else { + u32 type = type_is_fp_double(val->storage.type) ? 1u : 0u; + if (reg_num(val->storage) != 0) + aa64_emit32(mc, aa64_fmov_reg(type, /*Rd=*/0, + reg_num(val->storage))); + } } else { u32 sf = type_is_64(val->storage.type) ? 1u : 0u; if (reg_num(val->storage) != 0) @@ -1434,7 +1506,7 @@ static void aa_ret(CGTarget* t, const CGABIValue* val) { if (pt->cls == ABI_CLASS_INT) { aa64_emit_ldur_off(mc, sidx, /*Rt=*/i, base_reg, off, AA_TMP0); } else if (pt->cls == ABI_CLASS_FP) { - aa64_emit_ldur_fp_off(mc, sidx, /*Rt=*/i, base_reg, off, AA_TMP0); + aa_emit_ldr_fp_any(mc, sidx, /*Rt=*/i, base_reg, off); } else { compiler_panic(t->c, a->loc, "aarch64 ret: ret part cls %d unimpl", (int)pt->cls); diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h @@ -105,6 +105,8 @@ static inline u32 type_byte_size(CfreeCgTypeId t) { if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_I32) || t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F32)) return 4; + if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F128)) + return 16; return 8; } static inline int type_is_signed(CfreeCgTypeId t) { diff --git a/src/arch/x64/internal.h b/src/arch/x64/internal.h @@ -123,6 +123,8 @@ static inline u32 type_byte_size(CfreeCgTypeId t) { if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_I32) || t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F32)) return 4; + if (t == CG_BUILTIN_ID(CFREE_CG_BUILTIN_F128)) + return 16; return 8; } static inline int type_is_signed(CfreeCgTypeId t) { diff --git a/test/parse/cases/i128_01_layout.c b/test/parse/cases/i128_01_layout.c @@ -0,0 +1,19 @@ +typedef __int128 i128; +typedef unsigned __int128 u128; + +struct S { + char c; + u128 x; + char d; +}; + +int test_main(void) { + if (sizeof(i128) != 16) return 11; + if (sizeof(u128) != 16) return 12; + if (_Alignof(i128) != 16) return 13; + if (_Alignof(u128) != 16) return 14; + if (sizeof(struct S) != 48) return 15; + if (__builtin_offsetof(struct S, x) != 16) return 16; + if (__builtin_offsetof(struct S, d) != 32) return 17; + return 0; +} diff --git a/test/parse/cases/i128_01_layout.expected b/test/parse/cases/i128_01_layout.expected @@ -0,0 +1 @@ +0 diff --git a/test/parse/cases/i128_02_literal_storage.c b/test/parse/cases/i128_02_literal_storage.c @@ -0,0 +1,11 @@ +typedef unsigned __int128 u128; + +int test_main(void) { + u128 x = ((u128)0x1122334455667788ULL << 64) | + (u128)0x99aabbccddeeff00ULL; + unsigned long long lo = (unsigned long long)x; + unsigned long long hi = (unsigned long long)(x >> 64); + if (lo != 0x99aabbccddeeff00ULL) return 11; + if (hi != 0x1122334455667788ULL) return 12; + return 42; +} diff --git a/test/parse/cases/i128_02_literal_storage.expected b/test/parse/cases/i128_02_literal_storage.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/i128_03_add_sub_carry.c b/test/parse/cases/i128_03_add_sub_carry.c @@ -0,0 +1,12 @@ +typedef unsigned __int128 u128; + +int test_main(void) { + u128 x = (((u128)1 << 64) - 1) + 3; + if ((unsigned long long)x != 2ULL) return 11; + if ((unsigned long long)(x >> 64) != 1ULL) return 12; + + x -= 3; + if ((unsigned long long)x != 0xffffffffffffffffULL) return 13; + if ((unsigned long long)(x >> 64) != 0ULL) return 14; + return 29; +} diff --git a/test/parse/cases/i128_03_add_sub_carry.expected b/test/parse/cases/i128_03_add_sub_carry.expected @@ -0,0 +1 @@ +29 diff --git a/test/parse/cases/i128_04_mul_high_half.c b/test/parse/cases/i128_04_mul_high_half.c @@ -0,0 +1,9 @@ +typedef unsigned __int128 u128; + +int test_main(void) { + u128 a = (u128)0x100000000ULL; + u128 b = a * a; + if ((unsigned long long)b != 0ULL) return 11; + if ((unsigned long long)(b >> 64) != 1ULL) return 12; + return 31; +} diff --git a/test/parse/cases/i128_04_mul_high_half.expected b/test/parse/cases/i128_04_mul_high_half.expected @@ -0,0 +1 @@ +31 diff --git a/test/parse/cases/i128_05_div_mod.c b/test/parse/cases/i128_05_div_mod.c @@ -0,0 +1,12 @@ +typedef unsigned __int128 u128; + +int test_main(void) { + u128 n = (u128)1 << 96; + u128 d = (u128)1 << 32; + u128 q = n / d; + u128 r = n % d; + if (r != 0) return 11; + if ((unsigned long long)q != 0ULL) return 12; + if ((unsigned long long)(q >> 64) != 1ULL) return 13; + return 37; +} diff --git a/test/parse/cases/i128_05_div_mod.expected b/test/parse/cases/i128_05_div_mod.expected @@ -0,0 +1 @@ +37 diff --git a/test/parse/cases/i128_06_shifts_bitwise.c b/test/parse/cases/i128_06_shifts_bitwise.c @@ -0,0 +1,11 @@ +typedef unsigned __int128 u128; + +int test_main(void) { + u128 x = (u128)0xf0ULL << 68; + u128 y = x >> 64; + u128 z = (x | ((u128)0x55ULL << 4)) ^ ((u128)0x5ULL << 4); + if ((unsigned long long)y != 0xf00ULL) return 11; + if ((unsigned long long)z != 0x500ULL) return 12; + if ((unsigned long long)(z >> 64) != 0xf00ULL) return 13; + return 41; +} diff --git a/test/parse/cases/i128_06_shifts_bitwise.expected b/test/parse/cases/i128_06_shifts_bitwise.expected @@ -0,0 +1 @@ +41 diff --git a/test/parse/cases/i128_07_compare.c b/test/parse/cases/i128_07_compare.c @@ -0,0 +1,17 @@ +typedef __int128 i128; +typedef unsigned __int128 u128; + +int test_main(void) { + u128 big = (u128)1 << 100; + u128 small = ((u128)1 << 99) + 9; + i128 neg = -((i128)1 << 70); + i128 pos = (i128)1 << 70; + int r = 0; + if (big > small) r += 1; + if (small < big) r += 2; + if (big != small) r += 4; + if (neg < pos) r += 8; + if (neg <= -1) r += 16; + if (pos >= 0) r += 32; + return r; +} diff --git a/test/parse/cases/i128_07_compare.expected b/test/parse/cases/i128_07_compare.expected @@ -0,0 +1 @@ +63 diff --git a/test/parse/cases/i128_08_signed_shift_convert.c b/test/parse/cases/i128_08_signed_shift_convert.c @@ -0,0 +1,9 @@ +typedef __int128 i128; + +int test_main(void) { + i128 x = -((i128)1 << 70); + i128 y = x >> 68; + if ((long long)y != -4LL) return 11; + if ((int)(i128)12345 != 12345) return 12; + return 45; +} diff --git a/test/parse/cases/i128_08_signed_shift_convert.expected b/test/parse/cases/i128_08_signed_shift_convert.expected @@ -0,0 +1 @@ +45 diff --git a/test/parse/cases/i128_09_call_return.c b/test/parse/cases/i128_09_call_return.c @@ -0,0 +1,17 @@ +typedef unsigned __int128 u128; + +static u128 add128(u128 a, u128 b) { + return a + b; +} + +static int low_byte(u128 x) { + return (int)(x & 255); +} + +int test_main(void) { + u128 a = ((u128)1 << 80) + 40; + u128 b = ((u128)2 << 80) + 7; + u128 c = add128(a, b); + if ((unsigned long long)(c >> 80) != 3ULL) return 11; + return low_byte(c); +} diff --git a/test/parse/cases/i128_09_call_return.expected b/test/parse/cases/i128_09_call_return.expected @@ -0,0 +1 @@ +47 diff --git a/test/parse/cases/i128_10_struct_storage.c b/test/parse/cases/i128_10_struct_storage.c @@ -0,0 +1,16 @@ +typedef unsigned __int128 u128; + +struct Box { + int a; + u128 x; + int b; +}; + +int test_main(void) { + struct Box box; + box.a = 5; + box.x = ((u128)0x1234ULL << 64) | 39; + box.b = 8; + if ((unsigned long long)(box.x >> 64) != 0x1234ULL) return 11; + return box.a + (int)box.x + box.b; +} diff --git a/test/parse/cases/i128_10_struct_storage.expected b/test/parse/cases/i128_10_struct_storage.expected @@ -0,0 +1 @@ +52 diff --git a/test/parse/cases/i128_11_union_lanes.c b/test/parse/cases/i128_11_union_lanes.c @@ -0,0 +1,15 @@ +typedef unsigned __int128 u128; + +union U { + u128 x; + unsigned long long lane[2]; +}; + +int test_main(void) { + union U u; + u.x = ((u128)0x0102030405060708ULL << 64) | + (u128)0x1112131415161718ULL; + if (u.lane[0] != 0x1112131415161718ULL) return 11; + if (u.lane[1] != 0x0102030405060708ULL) return 12; + return 55; +} diff --git a/test/parse/cases/i128_11_union_lanes.expected b/test/parse/cases/i128_11_union_lanes.expected @@ -0,0 +1 @@ +55 diff --git a/test/parse/cases/i128_12_global_init.c b/test/parse/cases/i128_12_global_init.c @@ -0,0 +1,8 @@ +typedef unsigned __int128 u128; + +static u128 g = ((u128)7 << 64) | 61; + +int test_main(void) { + if ((unsigned long long)(g >> 64) != 7ULL) return 11; + return (int)g; +} diff --git a/test/parse/cases/i128_12_global_init.expected b/test/parse/cases/i128_12_global_init.expected @@ -0,0 +1 @@ +61 diff --git a/test/parse/cases/ldbl128_01_layout_macros.c b/test/parse/cases/ldbl128_01_layout_macros.c @@ -0,0 +1,14 @@ +#include <float.h> + +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + if (sizeof(long double) != 16) return 11; + if (_Alignof(long double) != 16) return 12; + if (LDBL_MANT_DIG != 113) return 13; + if (LDBL_MAX_EXP != 16384) return 14; + if (LDBL_MIN_EXP != (-16381)) return 15; + if (LDBL_DIG != 33) return 16; + if (LDBL_DECIMAL_DIG != 36) return 17; + if (DECIMAL_DIG != 36) return 18; + return 0; +} diff --git a/test/parse/cases/ldbl128_01_layout_macros.expected b/test/parse/cases/ldbl128_01_layout_macros.expected @@ -0,0 +1 @@ +0 diff --git a/test/parse/cases/ldbl128_02_literal_to_int.c b/test/parse/cases/ldbl128_02_literal_to_int.c @@ -0,0 +1,6 @@ +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + long double a = 42.0L; + long double b = 5.0L; + return (int)(a + b); +} diff --git a/test/parse/cases/ldbl128_02_literal_to_int.expected b/test/parse/cases/ldbl128_02_literal_to_int.expected @@ -0,0 +1 @@ +47 diff --git a/test/parse/cases/ldbl128_03_arith.c b/test/parse/cases/ldbl128_03_arith.c @@ -0,0 +1,7 @@ +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + long double a = 21.0L; + long double b = 6.0L; + long double c = ((a + b) * 2.0L - 9.0L) / 3.0L; + return (int)c; +} diff --git a/test/parse/cases/ldbl128_03_arith.expected b/test/parse/cases/ldbl128_03_arith.expected @@ -0,0 +1 @@ +15 diff --git a/test/parse/cases/ldbl128_04_conversions.c b/test/parse/cases/ldbl128_04_conversions.c @@ -0,0 +1,11 @@ +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + int i = 19; + double d = 8.0; + float f = 5.0f; + long double a = (long double)i; + long double b = (long double)d; + long double c = (long double)f; + double back = (double)(a + b + c); + return (int)back; +} diff --git a/test/parse/cases/ldbl128_04_conversions.expected b/test/parse/cases/ldbl128_04_conversions.expected @@ -0,0 +1 @@ +32 diff --git a/test/parse/cases/ldbl128_05_compare.c b/test/parse/cases/ldbl128_05_compare.c @@ -0,0 +1,13 @@ +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + long double a = 10.0L; + long double b = 20.0L; + int r = 0; + if (a < b) r += 1; + if (b > a) r += 2; + if (a <= 10.0L) r += 4; + if (b >= 20.0L) r += 8; + if (a != b) r += 16; + if (a == 10.0L) r += 32; + return r; +} diff --git a/test/parse/cases/ldbl128_05_compare.expected b/test/parse/cases/ldbl128_05_compare.expected @@ -0,0 +1 @@ +63 diff --git a/test/parse/cases/ldbl128_06_call_return.c b/test/parse/cases/ldbl128_06_call_return.c @@ -0,0 +1,13 @@ +static long double add_ld(long double a, long double b) { + return a + b; +} + +static int take_ld(long double x) { + return (int)x; +} + +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + long double v = add_ld(30.0L, 12.0L); + return take_ld(v); +} diff --git a/test/parse/cases/ldbl128_06_call_return.expected b/test/parse/cases/ldbl128_06_call_return.expected @@ -0,0 +1 @@ +42 diff --git a/test/parse/cases/ldbl128_07_struct_storage.c b/test/parse/cases/ldbl128_07_struct_storage.c @@ -0,0 +1,14 @@ +struct Box { + int a; + long double x; + int b; +}; + +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + struct Box box; + box.a = 7; + box.x = 35.0L; + box.b = 9; + return box.a + (int)box.x + box.b; +} diff --git a/test/parse/cases/ldbl128_07_struct_storage.expected b/test/parse/cases/ldbl128_07_struct_storage.expected @@ -0,0 +1 @@ +51 diff --git a/test/parse/cases/ldbl128_08_literal_bits.c b/test/parse/cases/ldbl128_08_literal_bits.c @@ -0,0 +1,13 @@ +union U { + long double f; + unsigned long long lane[2]; +}; + +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + union U u; + u.f = 1.0L; + if (u.lane[0] != 0ULL) return 11; + if (u.lane[1] != 0x3fff000000000000ULL) return 12; + return 23; +} diff --git a/test/parse/cases/ldbl128_08_literal_bits.expected b/test/parse/cases/ldbl128_08_literal_bits.expected @@ -0,0 +1 @@ +23 diff --git a/test/parse/cases/ldbl128_09_global_init.c b/test/parse/cases/ldbl128_09_global_init.c @@ -0,0 +1,7 @@ +static long double g = 17.0L; +static long double h = 4.0L; + +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + return (int)(g + h); +} diff --git a/test/parse/cases/ldbl128_09_global_init.expected b/test/parse/cases/ldbl128_09_global_init.expected @@ -0,0 +1 @@ +21 diff --git a/test/parse/cases/ldbl128_10_unary_neg.c b/test/parse/cases/ldbl128_10_unary_neg.c @@ -0,0 +1,6 @@ +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + long double x = 19.0L; + long double y = -x; + return (int)(y + 50.0L); +} diff --git a/test/parse/cases/ldbl128_10_unary_neg.expected b/test/parse/cases/ldbl128_10_unary_neg.expected @@ -0,0 +1 @@ +31 diff --git a/test/parse/cases/ldbl128_11_array_copy.c b/test/parse/cases/ldbl128_11_array_copy.c @@ -0,0 +1,8 @@ +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + long double xs[3]; + xs[0] = 5.0L; + xs[1] = 7.0L; + xs[2] = xs[0] + xs[1]; + return (int)xs[2]; +} diff --git a/test/parse/cases/ldbl128_11_array_copy.expected b/test/parse/cases/ldbl128_11_array_copy.expected @@ -0,0 +1 @@ +12 diff --git a/test/parse/cases/ldbl128_12_stack_args.c b/test/parse/cases/ldbl128_12_stack_args.c @@ -0,0 +1,12 @@ +static long double sum10(long double a0, long double a1, long double a2, + long double a3, long double a4, long double a5, + long double a6, long double a7, long double a8, + long double a9) { + return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9; +} + +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + return (int)sum10(1.0L, 2.0L, 3.0L, 4.0L, 5.0L, + 6.0L, 7.0L, 8.0L, 9.0L, 10.0L); +} diff --git a/test/parse/cases/ldbl128_12_stack_args.expected b/test/parse/cases/ldbl128_12_stack_args.expected @@ -0,0 +1 @@ +55 diff --git a/test/parse/cases/ldbl128_13_mixed_arith.c b/test/parse/cases/ldbl128_13_mixed_arith.c @@ -0,0 +1,7 @@ +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + long double a = 3.0L; + double b = 4.0; + float c = 5.0f; + return (int)(a + b + c); +} diff --git a/test/parse/cases/ldbl128_13_mixed_arith.expected b/test/parse/cases/ldbl128_13_mixed_arith.expected @@ -0,0 +1 @@ +12 diff --git a/test/parse/cases/ldbl128_14_struct_return.c b/test/parse/cases/ldbl128_14_struct_return.c @@ -0,0 +1,17 @@ +struct Pair { + long double a; + long double b; +}; + +static struct Pair make_pair(long double a, long double b) { + struct Pair p; + p.a = a; + p.b = b; + return p; +} + +int test_main(void) { + if (__LDBL_MANT_DIG__ != 113) return 0; + struct Pair p = make_pair(14.0L, 9.0L); + return (int)(p.a + p.b); +} diff --git a/test/parse/cases/ldbl128_14_struct_return.expected b/test/parse/cases/ldbl128_14_struct_return.expected @@ -0,0 +1 @@ +23 diff --git a/test/parse/harness/parse_runner.c b/test/parse/harness/parse_runner.c @@ -249,10 +249,35 @@ static int read_file(const char* path, uint8_t** out, size_t* out_len) { return 0; } +static int test_read_all(void* user, const char* path, CfreeFileData* out) { + uint8_t* data = NULL; + size_t len = 0; + (void)user; + if (!out || read_file(path, &data, &len) != 0) + return 0; + out->data = data; + out->size = len; + out->token = data; + return 1; +} + +static void test_release(void* user, CfreeFileData* d) { + (void)user; + if (!d) + return; + free(d->token); + d->data = NULL; + d->size = 0; + d->token = NULL; +} + +static CfreeFileIO g_file_io = {test_read_all, test_release, NULL, NULL}; + static void env_init(CfreeEnv* env) { memset(env, 0, sizeof *env); env->heap = &g_heap; env->diag = &g_diag; + env->file_io = &g_file_io; env->execmem = &g_execmem; env->now = -1; } @@ -267,6 +292,35 @@ static int opt_level_from_env(void) { exit(2); } +static void add_test_system_includes(CfreeCompileOptions* opts) { + static const char* dirs[] = {"rt/include"}; + opts->pp.system_include_dirs = dirs; + opts->pp.nsystem_include_dirs = 1; +} + +static int add_runtime_archive(CfreeLinkOptions* opts, uint8_t** rt_data_out) { + static CfreeBytesInputArchive rt_archive; + uint8_t* data = NULL; + size_t len = 0; + const char* arch = cfree_test_arch_name(); + const char* path = "rt/build/aarch64-linux/libcfree_rt.a"; + if (!strcmp(arch, "rv64") || !strcmp(arch, "riscv64")) + path = "rt/build/riscv64-linux/libcfree_rt.a"; + else if (!strcmp(arch, "x64") || !strcmp(arch, "x86_64") || + !strcmp(arch, "amd64")) + path = "rt/build/x86_64-linux/libcfree_rt.a"; + if (read_file(path, &data, &len) != 0) + return 0; + memset(&rt_archive, 0, sizeof rt_archive); + rt_archive.input.name = path; + rt_archive.input.data = data; + rt_archive.input.len = len; + opts->inputs.archives = &rt_archive; + opts->inputs.narchives = 1; + *rt_data_out = data; + return 1; +} + /* ---- modes ---- */ static int mode_emit(const char* src_path, const char* out_path) { @@ -304,6 +358,7 @@ static int mode_emit(const char* src_path, const char* out_path) { memset(&opts, 0, sizeof opts); opts.opt_level = opt_level_from_env(); + add_test_system_includes(&opts); w = cfree_writer_mem(&g_heap); if (cfree_compile_obj_emit(c, &opts, &in, w) != 0) { @@ -353,10 +408,13 @@ static int mode_jit(const char* src_path) { CfreeEnv env; CfreeCompiler* c; CfreeBytesInput in; + CfreeBytesInput obj_in; CfreeCompileOptions opts; - CfreeObjBuilder* ob = NULL; CfreeLinkOptions lopts; - CfreeObjBuilder* arr[1]; + CfreeWriter* obj_w = NULL; + size_t obj_len = 0; + const uint8_t* obj_data = NULL; + uint8_t* rt_data = NULL; CfreeJit* jit = NULL; int (*fn)(void); int result; @@ -381,21 +439,31 @@ static int mode_jit(const char* src_path) { in.lang = CFREE_LANG_C; memset(&opts, 0, sizeof opts); opts.opt_level = opt_level_from_env(); + add_test_system_includes(&opts); - if (cfree_compile_obj(c, &opts, &in, &ob) != 0 || !ob) { + obj_w = cfree_writer_mem(&g_heap); + if (cfree_compile_obj_emit(c, &opts, &in, obj_w) != 0) { + cfree_writer_close(obj_w); cfree_compiler_free(c); free(src); return 1; } + obj_data = cfree_writer_mem_bytes(obj_w, &obj_len); + memset(&obj_in, 0, sizeof obj_in); + obj_in.name = src_path; + obj_in.data = obj_data; + obj_in.len = obj_len; memset(&lopts, 0, sizeof lopts); - arr[0] = ob; - lopts.inputs.objs = arr; - lopts.inputs.nobjs = 1; + lopts.inputs.obj_bytes = &obj_in; + lopts.inputs.nobj_bytes = 1; lopts.inputs.entry = "test_main"; + add_runtime_archive(&lopts, &rt_data); if (cfree_link_jit(c, &lopts, &jit) != 0 || !jit) { + cfree_writer_close(obj_w); cfree_compiler_free(c); + free(rt_data); free(src); return 1; } @@ -427,7 +495,9 @@ static int mode_jit(const char* src_path) { } cfree_jit_free(jit); + cfree_writer_close(obj_w); cfree_compiler_free(c); + free(rt_data); free(src); return result; } diff --git a/test/parse/run.sh b/test/parse/run.sh @@ -66,6 +66,16 @@ case "$CFREE_TEST_ARCH" in esac export CFREE_TEST_ARCH +case "$TEST_ARCH" in + aa64) RT_AR="$ROOT/rt/build/aarch64-linux/libcfree_rt.a" ;; + x64) RT_AR="$ROOT/rt/build/x86_64-linux/libcfree_rt.a" ;; + rv64) RT_AR="$ROOT/rt/build/riscv64-linux/libcfree_rt.a" ;; +esac +RT_LINK_ARGS=() +if [ -f "$RT_AR" ]; then + RT_LINK_ARGS=(--archive "$RT_AR") +fi + CLANG_TARGET="--target=$CLANG_TRIPLE" CC="${CC:-cc}" HARNESS_CFLAGS="-std=c11 -Wall -Wextra -I$ROOT/include -I$ROOT/test" @@ -427,7 +437,7 @@ run_parse_case() { && [ $have_start_obj -eq 1 ]; then t0=$(now_ms) exe="$work/linked.exe" - if ! "$LINK_EXE_RUNNER" -o "$exe" "$obj" "$START_OBJ" \ + if ! "$LINK_EXE_RUNNER" -o "$exe" "$obj" "$START_OBJ" "${RT_LINK_ARGS[@]}" \ >"$work/exec_link.out" 2>"$work/exec_link.err"; then dt=$(( $(now_ms) - t0 )) emit_event "$event" TIME E "$dt" @@ -447,7 +457,7 @@ run_parse_case() { if [ $RUN_J -eq 1 ]; then if [ $have_jit_runner -eq 1 ]; then t0=$(now_ms) - "$JIT_RUNNER" "$obj" >"$work/jit.out" 2>"$work/jit.err" + "$JIT_RUNNER" "$obj" "${RT_LINK_ARGS[@]}" >"$work/jit.out" 2>"$work/jit.err" j_rc=$? dt=$(( $(now_ms) - t0 )) emit_event "$event" TIME J "$dt"