commit bbe0c3e30b210857cf082638b7cc4ecfa2b3e022
parent 28e75424c3c6c828f561cb5a6f216235f1dd5ad7
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Tue, 19 May 2026 09:30:02 -0700
Complete i128 and binary128 long double support
Diffstat:
25 files changed, 1493 insertions(+), 395 deletions(-)
diff --git a/doc/C11_LONG_DOUBLE_CHECKLIST.md b/doc/C11_LONG_DOUBLE_CHECKLIST.md
@@ -8,11 +8,11 @@ implementation pass it on the target that owns that format.
## Target profiles
-- [ ] AArch64 Linux: IEEE binary128 `long double`.
+- [x] AArch64 Linux: IEEE binary128 `long double`.
ABI: passed and returned in SIMD/FP `q` registers when register slots are
available. Arithmetic and conversions lower to compiler-rt `*tf*`
helpers.
-- [ ] RV64 Linux LP64D: IEEE binary128 `long double`.
+- [x] RV64 Linux LP64D: IEEE binary128 `long double`.
ABI: passed and returned as two integer XLEN eightbytes because FLEN is
64. Arithmetic and conversions lower to compiler-rt `*tf*` helpers.
- [ ] AArch64 Darwin: `long double == double`.
@@ -24,30 +24,30 @@ implementation pass it on the target that owns that format.
## Support target for the binary128 slice
-- [ ] Complete the 16-byte scalar `__int128` path before treating binary128 as
+- [x] Complete the 16-byte scalar `__int128` path before treating binary128 as
green: layout, locals/globals, constants, arithmetic, shifts, compares,
calls/returns, aggregate fields, unions, and static initialization.
-- [ ] Add a target long-double profile query used by both the frontend and CG:
+- [x] Add a target long-double profile query used by both the frontend and CG:
format, storage size, alignment, macro values, and ABI classification.
-- [ ] Add a distinct CG type for binary128 `long double`; `TY_LDOUBLE` must not
+- [x] Add a distinct CG type for binary128 `long double`; `TY_LDOUBLE` must not
map to `F64` on AArch64/RV64 Linux.
-- [ ] Emit target-correct `__LDBL_*` and `__DECIMAL_DIG__` predefined macros
+- [x] Emit target-correct `__LDBL_*` and `__DECIMAL_DIG__` predefined macros
for binary128 targets.
-- [ ] Encode `L` floating constants as binary128 bytes without narrowing their
+- [x] Encode `L` floating constants as binary128 bytes without narrowing their
storage type to `double`.
-- [ ] Support binary128 local/global storage, assignment, struct fields, and
+- [x] Support binary128 local/global storage, assignment, struct fields, and
return values.
-- [ ] Lower binary128 arithmetic to runtime helpers:
+- [x] Lower binary128 arithmetic to runtime helpers:
`__addtf3`, `__subtf3`, `__multf3`, and `__divtf3`.
-- [ ] Lower binary128 comparisons through compiler-rt compare helpers.
-- [ ] Lower integer, float, and double conversions through compiler-rt helpers:
+- [x] Lower binary128 comparisons through compiler-rt compare helpers.
+- [x] Lower integer, float, and double conversions through compiler-rt helpers:
`__float*tf`, `__fix*tf*`, `__extend{s,d}ftf2`, and
`__trunctf{s,d}f2`.
-- [ ] Teach AArch64 codegen to move 16-byte FP values through Q-register
+- [x] Teach AArch64 codegen to move 16-byte FP values through Q-register
load/store/copy paths.
-- [ ] Teach RV64 ABI movement to pass/return binary128 values as two integer
+- [x] Teach RV64 ABI movement to pass/return binary128 values as two integer
parts, backed by memory in CG.
-- [ ] Keep runtime linkage using the existing `rt/lib/fp_tf/fp_tf.c` and
+- [x] Keep runtime linkage using the existing `rt/lib/fp_tf/fp_tf.c` and
`rt/lib/fp_ti/fp_ti.c` objects for the binary128 runtime variants.
## Red tests
@@ -69,28 +69,42 @@ x87 work can land later without hiding the binary128 regression signal.
Coverage intent:
-- `i128_01` through `i128_12`: target layout/alignment, literal storage,
+- `i128_01` through `i128_14`: target layout/alignment, literal storage,
add/sub carry, multiply high-half behavior, div/mod, shifts/bitwise
operations, signed and unsigned compares, signed shifts/conversions,
calls/returns, aggregate fields, union lane visibility, and global
- initialization.
-- `ldbl128_01` through `ldbl128_14`: target macros/layout, literal decoding,
+ initialization, arbitrary signed div/mod, and arbitrary signed/unsigned
+ multiplication.
+- `ldbl128_01` through `ldbl128_15`: target macros/layout, literal decoding,
arithmetic helpers, conversions, comparisons, calls/returns, struct and
array storage, raw binary128 bits, globals, unary negation, stack
- arguments, mixed arithmetic, and aggregate return.
+ arguments, mixed arithmetic, aggregate return, and arbitrary binary128
+ multiplication.
+
+Known remaining limits:
+
+- The binary128 support target is Linux AArch64/RV64. Darwin `long double`
+ target rules and x87 80-bit `long double` are still separate follow-up
+ targets.
+- Decimal `L` literal coverage currently exercises representable values and
+ raw canonical encodings; it does not yet prove full decimal-to-binary128
+ precision for non-representable literals.
+- ABI aggregate classification still covers the implemented scalar and simple
+ aggregate paths, not the full AArch64 HFA/HVA or every RV64 aggregate
+ flattening edge.
## Done criteria
-- [ ] `CFREE_TEST_ARCH=aa64 CFREE_TEST_FILTER=ldbl128 make test-parse` passes
+- [x] `CFREE_TEST_ARCH=aa64 CFREE_TEST_FILTER=ldbl128 make test-parse` passes
with `CFREE_TEST_ALLOW_SKIP` unset.
-- [ ] `CFREE_TEST_ARCH=rv64 CFREE_TEST_FILTER=ldbl128 make test-parse` passes
+- [x] `CFREE_TEST_ARCH=rv64 CFREE_TEST_FILTER=ldbl128 make test-parse` passes
with `CFREE_TEST_ALLOW_SKIP` unset.
-- [ ] `CFREE_TEST_ARCH=aa64 CFREE_TEST_FILTER=i128 make test-parse` passes
+- [x] `CFREE_TEST_ARCH=aa64 CFREE_TEST_FILTER=i128 make test-parse` passes
with `CFREE_TEST_ALLOW_SKIP` unset.
-- [ ] `CFREE_TEST_ARCH=rv64 CFREE_TEST_FILTER=i128 make test-parse` passes
+- [x] `CFREE_TEST_ARCH=rv64 CFREE_TEST_FILTER=i128 make test-parse` passes
with `CFREE_TEST_ALLOW_SKIP` unset.
-- [ ] `CFREE_TEST_FILTER=6_7_2_12_long_double make test-parse` passes on
+- [x] `CFREE_TEST_FILTER=6_7_2_12_long_double make test-parse` passes on
AArch64 Linux and RV64 Linux without a `.skip` sidecar.
-- [ ] `make rt` still builds the default runtime archives.
-- [ ] `make test-rt-headers test-rt-runtime` stays green for the default
+- [x] `make rt` still builds the default runtime archives.
+- [x] `make test-rt-headers test-rt-runtime` stays green for the default
runtime targets.
diff --git a/lang/c/parse/cg_adapter.c b/lang/c/parse/cg_adapter.c
@@ -422,7 +422,8 @@ void pcg_unop(Parser* p, UnOp op) {
}
void pcg_cmp(Parser* p, CmpOp op) {
- if (op == CMP_LT_F || op == CMP_LE_F || op == CMP_GT_F || op == CMP_GE_F) {
+ if (op == CMP_LT_F || op == CMP_LE_F || op == CMP_GT_F || op == CMP_GE_F ||
+ ((op == CMP_EQ || op == CMP_NE) && pcg_type_is_fp(pcg_top_type(p)))) {
if (pcg_emit_enabled(p)) cfree_cg_fp_cmp(p->cg, pcg_fp_cmp(op));
} else {
if (pcg_emit_enabled(p)) cfree_cg_int_cmp(p->cg, pcg_int_cmp(op));
diff --git a/lang/c/parse/parse_expr.c b/lang/c/parse/parse_expr.c
@@ -197,7 +197,7 @@ static const Type* int_literal_type(Parser* p, const Tok* t) {
}
}
-static double parse_float_literal(Parser* p, const Tok* t) {
+double parse_float_literal(Parser* p, const Tok* t) {
size_t len = 0;
const char* s = pool_str(p->pool, t->spelling, &len);
size_t i = 0;
@@ -463,33 +463,112 @@ static u32 cint_bits(Parser* p, const Type* ty) {
return sz * 8u;
}
-static u64 cint_mask_for_bits(u32 bits) {
- if (bits >= 64) return ~0ull;
- return (1ull << bits) - 1ull;
-}
-
static int cint_signed(Parser* p, const Type* ty) {
if (!ty) return 1;
return c_abi_type_info(p->abi, ty).signed_ != 0;
}
-static CConstInt cint_make(Parser* p, const Type* ty, u64 bits) {
+static void cint_mask_to_bits(CConstInt* v, u32 bits) {
+ if (bits < 64) {
+ v->lo &= (1ull << bits) - 1ull;
+ v->hi = 0;
+ } else if (bits < 128) {
+ v->hi &= (1ull << (bits - 64u)) - 1ull;
+ }
+}
+
+static CConstInt cint_make_u64(Parser* p, const Type* ty, u64 bits) {
CConstInt v;
u32 nb;
if (!ty) ty = ty_int(p);
nb = cint_bits(p, ty);
v.type = ty;
- v.bits = bits & cint_mask_for_bits(nb);
- if (ty->kind == TY_BOOL) v.bits = v.bits ? 1u : 0u;
+ v.lo = bits;
+ v.hi = 0;
+ cint_mask_to_bits(&v, nb);
+ if (ty->kind == TY_BOOL) {
+ v.lo = (v.lo || v.hi) ? 1u : 0u;
+ v.hi = 0;
+ }
return v;
}
+static CConstInt cint_make_pair(Parser* p, const Type* ty, u64 lo, u64 hi) {
+ CConstInt v;
+ if (!ty) ty = ty_int(p);
+ v.type = ty;
+ v.lo = lo;
+ v.hi = hi;
+ cint_mask_to_bits(&v, cint_bits(p, ty));
+ if (ty->kind == TY_BOOL) {
+ v.lo = (v.lo || v.hi) ? 1u : 0u;
+ v.hi = 0;
+ }
+ return v;
+}
+
+static int cint_nonzero(CConstInt v) { return v.lo != 0 || v.hi != 0; }
+
+static int cint_eq(CConstInt a, CConstInt b) {
+ return a.lo == b.lo && a.hi == b.hi;
+}
+
+static int cint_cmp_u(CConstInt a, CConstInt b) {
+ if (a.hi != b.hi) return a.hi < b.hi ? -1 : 1;
+ if (a.lo != b.lo) return a.lo < b.lo ? -1 : 1;
+ return 0;
+}
+
+static CConstInt cint_add(Parser* p, const Type* ty, CConstInt a, CConstInt b) {
+ u64 lo = a.lo + b.lo;
+ return cint_make_pair(p, ty, lo, a.hi + b.hi + (lo < a.lo));
+}
+
+static CConstInt cint_sub(Parser* p, const Type* ty, CConstInt a, CConstInt b) {
+ return cint_make_pair(p, ty, a.lo - b.lo, a.hi - b.hi - (a.lo < b.lo));
+}
+
+static CConstInt cint_shl(Parser* p, const Type* ty, CConstInt a, u32 sh) {
+ if (sh >= 128) return cint_make_u64(p, ty, 0);
+ if (sh == 0) return cint_make_pair(p, ty, a.lo, a.hi);
+ if (sh >= 64) return cint_make_pair(p, ty, 0, a.lo << (sh - 64u));
+ return cint_make_pair(p, ty, a.lo << sh, (a.hi << sh) | (a.lo >> (64u - sh)));
+}
+
+static CConstInt cint_shr_u(Parser* p, const Type* ty, CConstInt a, u32 sh) {
+ if (sh >= 128) return cint_make_u64(p, ty, 0);
+ if (sh == 0) return cint_make_pair(p, ty, a.lo, a.hi);
+ if (sh >= 64) return cint_make_pair(p, ty, a.hi >> (sh - 64u), 0);
+ return cint_make_pair(p, ty, (a.lo >> sh) | (a.hi << (64u - sh)), a.hi >> sh);
+}
+
+static CConstInt cint_neg(Parser* p, const Type* ty, CConstInt a) {
+ CConstInt zero = cint_make_u64(p, ty, 0);
+ return cint_sub(p, ty, zero, a);
+}
+
+static CConstInt cint_bnot(Parser* p, const Type* ty, CConstInt a) {
+ return cint_make_pair(p, ty, ~a.lo, ~a.hi);
+}
+
+static CConstInt cint_mul(Parser* p, const Type* ty, CConstInt a, CConstInt b) {
+ CConstInt r = cint_make_u64(p, ty, 0);
+ CConstInt x = a;
+ for (u32 i = 0; i < 128; ++i) {
+ if ((i < 64 ? (b.lo >> i) : (b.hi >> (i - 64u))) & 1ull)
+ r = cint_add(p, ty, r, x);
+ x = cint_shl(p, ty, x, 1);
+ }
+ return r;
+}
+
i64 const_int_as_i64(Parser* p, CConstInt v) {
u32 nb = cint_bits(p, v.type);
- u64 mask = cint_mask_for_bits(nb);
- u64 u = v.bits & mask;
+ u64 u = v.lo;
if (cint_signed(p, v.type) && nb < 64) {
+ u64 mask = (1ull << nb) - 1ull;
u64 sign = 1ull << (nb - 1u);
+ u &= mask;
if (u & sign) u |= ~mask;
}
return (i64)u;
@@ -500,7 +579,7 @@ static CConstInt cint_cast(Parser* p, CConstInt v, const Type* ty) {
if (!dst || !type_is_int(dst)) {
perr(p, "integer constant expression cast requires integer type");
}
- return cint_make(p, dst, v.bits);
+ return cint_make_pair(p, dst, v.lo, v.hi);
}
static u32 cint_rank(const Type* ty) {
@@ -583,16 +662,16 @@ static const Type* cint_common_type(Parser* p, const Type* a, const Type* b) {
}
static CConstInt cint_convert(Parser* p, CConstInt v, const Type* ty) {
- return cint_make(p, ty, v.bits);
+ return cint_make_pair(p, ty, v.lo, v.hi);
}
static int cint_truth(Parser* p, CConstInt v) {
(void)p;
- return v.bits != 0;
+ return cint_nonzero(v);
}
static CConstInt cint_bool(Parser* p, int truth) {
- return cint_make(p, ty_int(p), truth ? 1u : 0u);
+ return cint_make_u64(p, ty_int(p), truth ? 1u : 0u);
}
static CConstInt cexpr_mul(Parser* p, SrcLoc loc) {
@@ -615,17 +694,18 @@ static CConstInt cexpr_mul(Parser* p, SrcLoc loc) {
v = cint_convert(p, v, ct);
r = cint_convert(p, r, ct);
if (op == '*') {
- v = cint_make(p, ct, v.bits * r.bits);
+ v = cint_mul(p, ct, v, r);
} else {
- if (r.bits == 0)
+ if (!cint_nonzero(r))
compiler_panic(p->c, loc, op == '/' ? "division by zero in constant"
: "modulo by zero in constant");
if (cint_signed(p, ct)) {
i64 lv = const_int_as_i64(p, v);
i64 rv = const_int_as_i64(p, r);
- v = cint_make(p, ct, op == '/' ? (u64)(lv / rv) : (u64)(lv % rv));
+ v = cint_make_u64(p, ct, op == '/' ? (u64)(lv / rv) : (u64)(lv % rv));
} else {
- v = cint_make(p, ct, op == '/' ? v.bits / r.bits : v.bits % r.bits);
+ v = cint_make_u64(p, ct,
+ op == '/' ? v.lo / r.lo : v.lo % r.lo);
}
}
}
@@ -648,7 +728,7 @@ static CConstInt cexpr_add(Parser* p, SrcLoc loc) {
ct = cint_common_type(p, v.type, r.type);
v = cint_convert(p, v, ct);
r = cint_convert(p, r, ct);
- v = cint_make(p, ct, sub ? v.bits - r.bits : v.bits + r.bits);
+ v = sub ? cint_sub(p, ct, v, r) : cint_add(p, ct, v, r);
}
return v;
}
@@ -675,11 +755,11 @@ static CConstInt cexpr_shift(Parser* p, SrcLoc loc) {
if (left) {
if (cint_signed(p, vt) && const_int_as_i64(p, v) < 0)
perr(p, "left shift of negative value in constant expression");
- v = cint_make(p, vt, v.bits << (u32)sh);
+ v = cint_shl(p, vt, v, (u32)sh);
} else if (cint_signed(p, vt)) {
- v = cint_make(p, vt, (u64)(const_int_as_i64(p, v) >> (u32)sh));
+ v = cint_make_u64(p, vt, (u64)(const_int_as_i64(p, v) >> (u32)sh));
} else {
- v = cint_make(p, vt, v.bits >> (u32)sh);
+ v = cint_shr_u(p, vt, v, (u32)sh);
}
}
return v;
@@ -716,10 +796,11 @@ static CConstInt cexpr_rel(Parser* p, SrcLoc loc) {
: op == '<' ? lv < rv
: lv > rv;
} else {
- res = op == P_LE ? v.bits <= r.bits
- : op == P_GE ? v.bits >= r.bits
- : op == '<' ? v.bits < r.bits
- : v.bits > r.bits;
+ int cmp = cint_cmp_u(v, r);
+ res = op == P_LE ? cmp <= 0
+ : op == P_GE ? cmp >= 0
+ : op == '<' ? cmp < 0
+ : cmp > 0;
}
v = cint_bool(p, res);
}
@@ -742,7 +823,7 @@ static CConstInt cexpr_eq(Parser* p, SrcLoc loc) {
ct = cint_common_type(p, v.type, r.type);
v = cint_convert(p, v, ct);
r = cint_convert(p, r, ct);
- v = cint_bool(p, ne ? v.bits != r.bits : v.bits == r.bits);
+ v = cint_bool(p, ne ? !cint_eq(v, r) : cint_eq(v, r));
}
return v;
}
@@ -756,7 +837,7 @@ static CConstInt cexpr_band(Parser* p, SrcLoc loc) {
ct = cint_common_type(p, v.type, r.type);
v = cint_convert(p, v, ct);
r = cint_convert(p, r, ct);
- v = cint_make(p, ct, v.bits & r.bits);
+ v = cint_make_pair(p, ct, v.lo & r.lo, v.hi & r.hi);
}
return v;
}
@@ -767,7 +848,7 @@ static CConstInt cexpr_bxor(Parser* p, SrcLoc loc) {
const Type* ct = cint_common_type(p, v.type, r.type);
v = cint_convert(p, v, ct);
r = cint_convert(p, r, ct);
- v = cint_make(p, ct, v.bits ^ r.bits);
+ v = cint_make_pair(p, ct, v.lo ^ r.lo, v.hi ^ r.hi);
}
return v;
}
@@ -781,7 +862,7 @@ static CConstInt cexpr_bor(Parser* p, SrcLoc loc) {
ct = cint_common_type(p, v.type, r.type);
v = cint_convert(p, v, ct);
r = cint_convert(p, r, ct);
- v = cint_make(p, ct, v.bits | r.bits);
+ v = cint_make_pair(p, ct, v.lo | r.lo, v.hi | r.hi);
}
return v;
}
@@ -823,13 +904,13 @@ static CConstInt cexpr_unary(Parser* p, SrcLoc loc) {
CConstInt v = cexpr_unary(p, loc);
const Type* pt = cint_promote_type(p, v.type);
v = cint_convert(p, v, pt);
- return cint_make(p, pt, (u64)(-const_int_as_i64(p, v)));
+ return cint_neg(p, pt, v);
}
if (accept_punct(p, '~')) {
CConstInt v = cexpr_unary(p, loc);
const Type* pt = cint_promote_type(p, v.type);
v = cint_convert(p, v, pt);
- return cint_make(p, pt, ~v.bits);
+ return cint_bnot(p, pt, v);
}
if (accept_punct(p, '!')) return cint_bool(p, !cint_truth(p, cexpr_unary(p, loc)));
if (accept_kw(p, KW_SIZEOF)) {
@@ -841,7 +922,7 @@ static CConstInt cexpr_unary(Parser* p, SrcLoc loc) {
const Type* t = parse_type_name(p);
expect_punct(p, ')', "')' after sizeof type-name");
require_sizeof_type(p, t);
- return cint_make(p, ty_size_t(p), c_abi_sizeof(p->abi, t));
+ return cint_make_u64(p, ty_size_t(p), c_abi_sizeof(p->abi, t));
}
}
}
@@ -852,7 +933,7 @@ static CConstInt cexpr_unary(Parser* p, SrcLoc loc) {
require_sizeof_type(p, ty);
i64 sz = (i64)c_abi_sizeof(p->abi, ty);
cg_drop(p->cg);
- return cint_make(p, ty_size_t(p), (u64)sz);
+ return cint_make_u64(p, ty_size_t(p), (u64)sz);
}
}
if (accept_kw(p, KW_ALIGNOF)) {
@@ -863,7 +944,7 @@ static CConstInt cexpr_unary(Parser* p, SrcLoc loc) {
{
const Type* t = parse_type_name(p);
expect_punct(p, ')', "')' after _Alignof type-name");
- return cint_make(p, ty_size_t(p), c_abi_alignof(p->abi, t));
+ return cint_make_u64(p, ty_size_t(p), c_abi_alignof(p->abi, t));
}
}
}
@@ -872,7 +953,7 @@ static CConstInt cexpr_unary(Parser* p, SrcLoc loc) {
const Type* ty = cg_top_type(p->cg);
i64 al = (i64)c_abi_alignof(p->abi, ty);
cg_drop(p->cg);
- return cint_make(p, ty_size_t(p), (u64)al);
+ return cint_make_u64(p, ty_size_t(p), (u64)al);
}
}
if (accept_punct(p, '(')) {
@@ -888,7 +969,7 @@ static CConstInt cexpr_unary(Parser* p, SrcLoc loc) {
}
fv = parse_float_literal(p, &p->cur);
advance(p);
- return cint_make(p, tu, (u64)(i64)fv);
+ return cint_make_u64(p, tu, (u64)(i64)fv);
}
CConstInt v = cexpr_unary(p, loc);
return cint_cast(p, v, t);
@@ -904,12 +985,12 @@ static CConstInt cexpr_unary(Parser* p, SrcLoc loc) {
i64 v = parse_int_literal(p, &p->cur);
const Type* ty = int_literal_type(p, &p->cur);
advance(p);
- return cint_make(p, ty, (u64)v);
+ return cint_make_u64(p, ty, (u64)v);
}
if (p->cur.kind == TOK_CHR) {
i64 v = decode_char_literal(p, &p->cur);
advance(p);
- return cint_make(p, ty_int(p), (u64)v);
+ return cint_make_u64(p, ty_int(p), (u64)v);
}
if (p->cur.kind == TOK_IDENT) {
Sym name = p->cur.v.ident;
@@ -922,13 +1003,13 @@ static CConstInt cexpr_unary(Parser* p, SrcLoc loc) {
expect_punct(p, ',', "',' in __builtin_offsetof");
(void)offsetof_designator(p, root, &off);
expect_punct(p, ')', "')' after __builtin_offsetof");
- return cint_make(p, ty_size_t(p), off);
+ return cint_make_u64(p, ty_size_t(p), off);
}
{
SymEntry* e = scope_lookup(p, name);
if (e && e->kind == SEK_ENUM_CST) {
advance(p);
- return cint_make(p, e->type ? e->type : ty_int(p), (u64)e->v.enum_value);
+ return cint_make_u64(p, e->type ? e->type : ty_int(p), (u64)e->v.enum_value);
}
}
compiler_panic(p->c, loc, "non-constant identifier in constant expression");
@@ -2552,6 +2633,8 @@ static void parse_shift(Parser* p) {
}
advance(p);
to_rvalue(p);
+ if (bop == BO_SHR_S && !c_abi_type_info(p->abi, cg_top_type(p->cg)).signed_)
+ bop = BO_SHR_U;
parse_add(p);
to_rvalue(p);
if (!type_is_int(cg_top2_type(p->cg)) || !type_is_int(cg_top_type(p->cg))) {
@@ -2944,6 +3027,8 @@ void parse_assign_expr(Parser* p) {
}
advance(p);
const Type* lhs = cg_top_type(p->cg);
+ if (compound == BO_SHR_S && !c_abi_type_info(p->abi, lhs).signed_)
+ compound = BO_SHR_U;
{
if (lhs && (lhs->qual & Q_CONST)) {
perr(p, "assignment to const-qualified object");
@@ -2980,6 +3065,7 @@ void parse_assign_expr(Parser* p) {
case BO_XOR: op = '^'; break;
case BO_SHL: op = '<'; break;
case BO_SHR_S: op = '>'; break;
+ case BO_SHR_U: op = '>'; break;
default: op = 0; break;
}
CSemCheck chk =
diff --git a/lang/c/parse/parse_init.c b/lang/c/parse/parse_init.c
@@ -560,6 +560,98 @@ static void encode_uint_le(u8* dst, u32 size, u64 v) {
}
}
+static void encode_uint128_le(u8* dst, u32 size, u64 lo, u64 hi) {
+ if (size > 16) size = 16;
+ for (u32 i = 0; i < size; ++i) {
+ u64 lane = i < 8u ? lo : hi;
+ dst[i] = (u8)((lane >> (8u * (i & 7u))) & 0xffu);
+ }
+}
+
+static void encode_binary128_from_double_le(u8 out[16], double value) {
+ union {
+ double d;
+ u64 u;
+ } in;
+ u64 lo = 0;
+ u64 hi = 0;
+ u64 frac;
+ u32 sign;
+ u32 exp;
+ in.d = value;
+ sign = (u32)(in.u >> 63);
+ exp = (u32)((in.u >> 52) & 0x7ffu);
+ frac = in.u & 0x000fffffffffffffull;
+ if (sign)
+ hi |= 1ull << 63;
+ if (exp == 0x7ffu) {
+ hi |= (u64)0x7fffu << 48;
+ if (frac) {
+ lo |= (frac & 0xfu) << 60;
+ hi |= frac >> 4;
+ hi |= 1ull << 47;
+ }
+ } else if (exp != 0 || frac != 0) {
+ i32 e;
+ u64 sig;
+ if (exp == 0) {
+ e = -1022;
+ sig = frac;
+ while ((sig & (1ull << 52)) == 0) {
+ sig <<= 1;
+ --e;
+ }
+ frac = sig & 0x000fffffffffffffull;
+ } else {
+ e = (i32)exp - 1023;
+ }
+ hi |= (u64)(u32)(e + 16383) << 48;
+ lo |= (frac & 0xfu) << 60;
+ hi |= frac >> 4;
+ }
+ encode_uint128_le(out, 16, lo, hi);
+}
+
+static int try_parse_static_float(Parser* p, u8* dst, u32 size,
+ const Type* ty) {
+ const Type* uty = type_unqual(p->pool, ty);
+ double value;
+ if (!uty ||
+ (uty->kind != TY_FLOAT && uty->kind != TY_DOUBLE &&
+ uty->kind != TY_LDOUBLE)) {
+ return 0;
+ }
+ if (p->cur.kind != TOK_FLT && p->cur.kind != TOK_NUM)
+ perr(p, "expected floating constant expression");
+ value = p->cur.kind == TOK_FLT ? parse_float_literal(p, &p->cur)
+ : (double)parse_int_literal(p, &p->cur);
+ advance(p);
+ if (uty->kind == TY_FLOAT && size == 4u) {
+ union {
+ float f;
+ u8 b[4];
+ } u;
+ u.f = (float)value;
+ memcpy(dst, u.b, 4);
+ return 1;
+ }
+ if ((uty->kind == TY_DOUBLE || uty->kind == TY_LDOUBLE) && size == 8u) {
+ union {
+ double d;
+ u8 b[8];
+ } u;
+ u.d = value;
+ memcpy(dst, u.b, 8);
+ return 1;
+ }
+ if (uty->kind == TY_LDOUBLE && size == 16u) {
+ encode_binary128_from_double_le(dst, value);
+ return 1;
+ }
+ perr(p, "unsupported static floating initializer type");
+ return 0;
+}
+
/* Encode a string literal at *buf+offset for a char-array sub-object. */
static void parse_static_string_at(Parser* p, u8* buf, u32 buflen, u32 offset,
u32 count) {
@@ -608,14 +700,24 @@ typedef struct CStaticConst {
i64 addend;
} CStaticConst;
-static u64 int_bits_for_type(Parser* p, CConstInt v, const Type* ty) {
+static CConstInt int_bits_for_type(Parser* p, CConstInt v, const Type* ty) {
u32 sz = c_abi_sizeof(p->abi, ty);
- u64 bits = v.bits;
+ v.type = ty;
if (sz < 8u) {
- bits &= (1ull << (sz * 8u)) - 1ull;
+ u32 bits = sz * 8u;
+ v.lo &= bits ? ((1ull << bits) - 1ull) : 0;
+ v.hi = 0;
+ } else if (sz == 8u) {
+ v.hi = 0;
+ } else if (sz < 16u) {
+ u32 hi_bits = sz * 8u - 64u;
+ v.hi &= hi_bits ? ((1ull << hi_bits) - 1ull) : 0;
+ }
+ if (ty && ty->kind == TY_BOOL) {
+ v.lo = (v.lo || v.hi) ? 1u : 0u;
+ v.hi = 0;
}
- if (ty && ty->kind == TY_BOOL) bits = bits ? 1u : 0u;
- return bits;
+ return v;
}
static void check_static_integer_initializer_range(Parser* p, const Type* ty,
@@ -635,7 +737,7 @@ static void check_static_integer_initializer_range(Parser* p, const Type* ty,
}
} else {
u64 maxu = (u64)maxv;
- if (v.bits > maxu) {
+ if (v.hi != 0 || v.lo > maxu) {
perr(p, "initializer value overflows destination type");
}
}
@@ -793,7 +895,7 @@ static void parse_static_bitfield_at(Parser* p, u8* buf, u32 buflen,
ones = width >= 64u ? ~(u64)0 : (((u64)1 << width) - 1u);
mask = ones << lsb;
cur = decode_uint_le(buf + storage_off, storage_size);
- val = (int_bits_for_type(p, parsed.int_value, field_ty) & ones) << lsb;
+ val = (int_bits_for_type(p, parsed.int_value, field_ty).lo & ones) << lsb;
cur = (cur & ~mask) | val;
encode_uint_le(buf + storage_off, storage_size, cur);
}
@@ -915,13 +1017,23 @@ void parse_static_init_at(Parser* p, u8* buf, u32 buflen, u32 offset,
u32 sz = c_abi_sizeof(p->abi, ty);
CStaticConst cv;
if (offset + sz > buflen) perr(p, "initializer overflows object");
+ if (try_parse_static_float(p, buf + offset, sz, ty)) {
+ if (had_brace) {
+ accept_punct(p, ',');
+ expect_punct(p, '}', "'}' after scalar initializer");
+ }
+ return;
+ }
cv = parse_static_const(p, ty, cloc);
if (cv.kind == C_STATIC_CONST_ADDR) {
srl_push(p, offset, sz, cv.target, cv.addend);
} else if (cv.kind == C_STATIC_CONST_NULL_PTR) {
encode_int_le(buf + offset, sz, 0);
} else {
- encode_uint_le(buf + offset, sz, int_bits_for_type(p, cv.int_value, ty));
+ {
+ CConstInt bits = int_bits_for_type(p, cv.int_value, ty);
+ encode_uint128_le(buf + offset, sz, bits.lo, bits.hi);
+ }
}
if (had_brace) {
accept_punct(p, ',');
diff --git a/lang/c/parse/parse_priv.h b/lang/c/parse/parse_priv.h
@@ -419,12 +419,14 @@ void parse_cond_expr(Parser* p);
void parse_unary(Parser* p);
typedef struct CConstInt {
const Type* type;
- u64 bits;
+ u64 lo;
+ u64 hi;
} CConstInt;
CConstInt eval_const_int_typed(Parser* p, SrcLoc loc);
i64 eval_const_int(Parser* p, SrcLoc loc);
i64 const_int_as_i64(Parser* p, CConstInt v);
i64 parse_int_literal(Parser* p, const Tok* t);
+double parse_float_literal(Parser* p, const Tok* t);
i64 decode_char_literal(Parser* p, const Tok* t);
u8* decode_string_literal(Parser* p, const Tok* t, size_t* nlen_out);
void to_rvalue(Parser* p);
diff --git a/lang/c/parse/parse_type.c b/lang/c/parse/parse_type.c
@@ -59,6 +59,9 @@ static const struct {
static SrcLoc tok_loc(const Tok* t) { return t->loc; }
+static void attr_canon_range(const char* s, size_t len, const char** out_p,
+ size_t* out_len);
+
static int accept_kw(Parser* p, CKw k) {
if (is_kw(p, &p->cur, k)) {
advance(p);
@@ -67,6 +70,30 @@ static int accept_kw(Parser* p, CKw k) {
return 0;
}
+static int attr_sym_canon_eq(Parser* p, Sym sym, const char* want) {
+ size_t len = 0;
+ const char* s = pool_str(p->pool, sym, &len);
+ const char* cs;
+ size_t clen;
+ size_t wlen = strlen(want);
+ if (!s) return 0;
+ attr_canon_range(s, len, &cs, &clen);
+ return clen == wlen && memcmp(cs, want, wlen) == 0;
+}
+
+static const Type* attrs_apply_type_mode(Parser* p, const Type* base,
+ const Attr* attrs) {
+ for (const Attr* a = attrs; a; a = a->next) {
+ if (a->kind != ATTR_MODE || a->nargs == 0) continue;
+ if (attr_sym_canon_eq(p, a->v.sym, "TI")) {
+ const Type* u = type_unqual(p->pool, base);
+ int is_unsigned = u && type_is_int(u) && c_abi_type_info(p->abi, u).signed_ == 0;
+ return type_prim(p->pool, is_unsigned ? TY_UINT128 : TY_INT128);
+ }
+ }
+ return base;
+}
+
static CKw ident_kw(const Parser* p, Sym name) {
return ident_kw_inline(p, name);
}
@@ -627,6 +654,7 @@ int parse_decl_specs(Parser* p, DeclSpecs* out) {
out->type = ty_int(p);
}
}
+ out->type = attrs_apply_type_mode(p, out->type, out->attrs);
if (out->type && out->quals) {
out->type = type_qualified(p->pool, out->type,
(u16)(out->type->qual | out->quals));
@@ -1134,6 +1162,7 @@ const Type* parse_declarator_full(Parser* p, const Type* base,
const Type* parse_declarator_full_ex(Parser* p, const Type* base,
int allow_abstract, Sym* name_out,
SrcLoc* loc_out, Attr** attrs_out) {
+ Attr* local_attrs = NULL;
base = parse_pointer_layer(p, base);
Sym name = 0;
@@ -1215,7 +1244,7 @@ const Type* parse_declarator_full_ex(Parser* p, const Type* base,
if (attrs_out)
parse_attrs_into(p, attrs_out);
else
- parse_and_discard_attributes(p);
+ parse_attrs_into(p, &local_attrs);
}
DeclSuffix suffs[8];
@@ -1227,9 +1256,10 @@ const Type* parse_declarator_full_ex(Parser* p, const Type* base,
if (attrs_out)
parse_attrs_into(p, attrs_out);
else
- parse_and_discard_attributes(p);
+ parse_attrs_into(p, &local_attrs);
}
}
+ base = attrs_apply_type_mode(p, base, attrs_out ? *attrs_out : local_attrs);
if (nsuffs == 8 && (is_punct(&p->cur, '[') || is_punct(&p->cur, '('))) {
perr(p, "too many declarator suffixes (raise the cap if needed)");
}
diff --git a/rt/lib/README.md b/rt/lib/README.md
@@ -22,7 +22,7 @@ hand-written `mem/mem.c` is 0BSD; relicense as desired.
| -------------------------- | ----------------------------------------------------------- | --------------------------------------------------- |
| `int/int.c` | Integer helpers needed on every target | All |
| `int32/int32.c` | 64-bit ops synthesized from 32-bit | ILP32 only |
-| `int64/int64.c` | 128-bit ops via `__int128` | LP64 / LLP64 only |
+| `int64/int64.c` | 128-bit ops implemented on explicit 64-bit lanes | LP64 / LLP64 only |
| `fp/fp.c` | Soft-float `sf` (binary32) + `df` (binary64) + sf↔df + `fp_mode` | FPU-less (RV{32,64}I, ARM softfp, WASM) |
| `fp_tf/fp_tf.c` | Soft-float `tf` (binary128) + sf↔tf + df↔tf + i128↔tf | Targets with binary128 long double (e.g. aarch64 `-mlong-double-128`) |
| `fp_ti/fp_ti.c` | `__int128` ↔ sf/df + sf/df → ti fix | LP64 / LLP64 + soft-float |
diff --git a/rt/lib/fp_tf/fp_tf.c b/rt/lib/fp_tf/fp_tf.c
@@ -78,23 +78,253 @@ COMPILER_RT_ABI fp_t __subtf3(fp_t a, fp_t b) {
// ---- multf3.c ----
#define QUAD_PRECISION
#include "fp_lib.h"
-#include "fp_mul_impl.inc"
-COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) { return __mulXf3__(a, b); }
+typedef struct {
+ du_int limb[4];
+} cfree_tf_u256;
+
+static int cfree_tf_rep_bit(rep_t value, int bit) {
+ return ((value >> (unsigned)bit) & 1) != 0;
+}
+
+static int cfree_tf_u256_bit(const cfree_tf_u256* value, int bit) {
+ if (bit < 0 || bit >= 256) return 0;
+ return ((value->limb[bit / 64] >> (unsigned)(bit % 64)) & 1u) != 0;
+}
+
+static int cfree_tf_u256_any_below(const cfree_tf_u256* value, int bit) {
+ int full;
+ int rem;
+ if (bit <= 0) return 0;
+ if (bit > 256) bit = 256;
+ full = bit / 64;
+ rem = bit % 64;
+ for (int i = 0; i < full; ++i) {
+ if (value->limb[i]) return 1;
+ }
+ if (rem) {
+ const du_int mask = ((du_int)1 << (unsigned)rem) - 1u;
+ if (value->limb[full] & mask) return 1;
+ }
+ return 0;
+}
+
+static void cfree_tf_u256_add_limb(cfree_tf_u256* value, int index,
+ du_int addend) {
+ du_int old;
+ if (!addend || index >= 4) return;
+ old = value->limb[index];
+ value->limb[index] = old + addend;
+ if (value->limb[index] >= old) return;
+ for (++index; index < 4; ++index) {
+ old = value->limb[index];
+ value->limb[index] = old + 1u;
+ if (value->limb[index] != 0) return;
+ }
+}
+
+static void cfree_tf_u256_add_shifted_sig(cfree_tf_u256* product, rep_t sig,
+ int shift) {
+ const du_int lo = (du_int)sig;
+ const du_int hi = (du_int)(sig >> 64);
+ const int index = shift / 64;
+ const int bits = shift % 64;
+ if (bits == 0) {
+ cfree_tf_u256_add_limb(product, index, lo);
+ cfree_tf_u256_add_limb(product, index + 1, hi);
+ } else {
+ cfree_tf_u256_add_limb(product, index, lo << (unsigned)bits);
+ cfree_tf_u256_add_limb(
+ product, index + 1,
+ (lo >> (unsigned)(64 - bits)) | (hi << (unsigned)bits));
+ cfree_tf_u256_add_limb(product, index + 2,
+ hi >> (unsigned)(64 - bits));
+ }
+}
+
+static cfree_tf_u256 cfree_tf_sig_product(rep_t a, rep_t b) {
+ cfree_tf_u256 product = {{0, 0, 0, 0}};
+ for (int bit = 0; bit <= significandBits; ++bit) {
+ if (cfree_tf_rep_bit(b, bit))
+ cfree_tf_u256_add_shifted_sig(&product, a, bit);
+ }
+ return product;
+}
+
+static rep_t cfree_tf_u256_extract_rounded(const cfree_tf_u256* product,
+ int shift) {
+ rep_t result = 0;
+ for (int bit = 0; bit <= significandBits; ++bit) {
+ if (cfree_tf_u256_bit(product, shift + bit))
+ result |= (rep_t)1 << (unsigned)bit;
+ }
+ if (cfree_tf_u256_bit(product, shift - 1) &&
+ (cfree_tf_u256_any_below(product, shift - 1) || (result & 1)))
+ ++result;
+ return result;
+}
+
+COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) {
+ const rep_t aRep = toRep(a);
+ const rep_t bRep = toRep(b);
+ const rep_t aAbs = aRep & absMask;
+ const rep_t bAbs = bRep & absMask;
+ const rep_t productSign = (aRep ^ bRep) & signBit;
+ int aExponent = (int)((aAbs >> significandBits) & maxExponent);
+ int bExponent = (int)((bAbs >> significandBits) & maxExponent);
+ int productExponent;
+ int productTop;
+ int shift;
+ rep_t aSignificand = aAbs & significandMask;
+ rep_t bSignificand = bAbs & significandMask;
+ cfree_tf_u256 product;
+ rep_t resultSignificand;
+
+ if (aAbs > infRep) return fromRep(aRep | quietBit);
+ if (bAbs > infRep) return fromRep(bRep | quietBit);
+ if (aAbs == infRep) {
+ if (bAbs) return fromRep(infRep | productSign);
+ return fromRep(qnanRep);
+ }
+ if (bAbs == infRep) {
+ if (aAbs) return fromRep(infRep | productSign);
+ return fromRep(qnanRep);
+ }
+ if (!aAbs || !bAbs) return fromRep(productSign);
+
+ if (aExponent == 0)
+ aExponent = normalize(&aSignificand);
+ else
+ aSignificand |= implicitBit;
+ if (bExponent == 0)
+ bExponent = normalize(&bSignificand);
+ else
+ bSignificand |= implicitBit;
+
+ product = cfree_tf_sig_product(aSignificand, bSignificand);
+ productTop = cfree_tf_u256_bit(&product, 225) ? 225 : 224;
+ productExponent = aExponent + bExponent - exponentBias;
+ if (productTop == 225) ++productExponent;
+
+ if (productExponent >= maxExponent) return fromRep(infRep | productSign);
+
+ shift = productTop - significandBits;
+ if (productExponent <= 0) {
+ shift += 1 - productExponent;
+ productExponent = 0;
+ }
+
+ resultSignificand = cfree_tf_u256_extract_rounded(&product, shift);
+ if (resultSignificand & ((rep_t)1 << (significandBits + 1))) {
+ resultSignificand >>= 1;
+ ++productExponent;
+ }
+ if (productExponent == 0 && (resultSignificand & implicitBit))
+ productExponent = 1;
+ if (productExponent >= maxExponent) return fromRep(infRep | productSign);
+
+ return fromRep(productSign | ((rep_t)productExponent << significandBits) |
+ (resultSignificand & significandMask));
+}
// ---- divtf3.c ----
#define QUAD_PRECISION
#include "fp_lib.h"
+#include "fp_mode.h"
+
+COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) {
+ const rep_t aRep = toRep(a);
+ const rep_t bRep = toRep(b);
+ const rep_t aAbs = aRep & absMask;
+ const rep_t bAbs = bRep & absMask;
+ const rep_t quotientSign = (aRep ^ bRep) & signBit;
+ int aExponent = (int)((aAbs >> significandBits) & maxExponent);
+ int bExponent = (int)((bAbs >> significandBits) & maxExponent);
+ rep_t aSignificand = aAbs & significandMask;
+ rep_t bSignificand = bAbs & significandMask;
+ rep_t quotient = 0;
+ rep_t remainder;
+ int writtenExponent;
+
+ if (aAbs > infRep) return fromRep(aRep | quietBit);
+ if (bAbs > infRep) return fromRep(bRep | quietBit);
+ if (aAbs == infRep) {
+ if (bAbs == infRep) return fromRep(qnanRep);
+ return fromRep(infRep | quotientSign);
+ }
+ if (bAbs == infRep) return fromRep(quotientSign);
+ if (!aAbs) {
+ if (!bAbs) return fromRep(qnanRep);
+ return fromRep(quotientSign);
+ }
+ if (!bAbs) return fromRep(infRep | quotientSign);
+
+ if (aExponent == 0)
+ aExponent = normalize(&aSignificand);
+ else
+ aSignificand |= implicitBit;
+ if (bExponent == 0)
+ bExponent = normalize(&bSignificand);
+ else
+ bSignificand |= implicitBit;
+
+ writtenExponent = aExponent - bExponent + exponentBias;
+ if (aSignificand < bSignificand) {
+ aSignificand <<= 1;
+ writtenExponent -= 1;
+ }
-#define NUMBER_OF_HALF_ITERATIONS 4
-#define NUMBER_OF_FULL_ITERATIONS 1
-
-#include "fp_div_impl.inc"
+ remainder = aSignificand;
+ for (int i = 0; i < significandBits + 4; ++i) {
+ quotient <<= 1;
+ if (remainder >= bSignificand) {
+ quotient |= 1;
+ remainder -= bSignificand;
+ }
+ if (i != significandBits + 3)
+ remainder <<= 1;
+ }
+ if (remainder)
+ quotient |= 1;
+
+ if (writtenExponent >= maxExponent)
+ return fromRep(infRep | quotientSign);
+ if (writtenExponent <= 0) {
+ const int shift = 1 - writtenExponent;
+ if (shift >= typeWidth)
+ return fromRep(quotientSign);
+ if (shift > 0) {
+ const bool sticky = (quotient << (typeWidth - shift)) != 0;
+ quotient = (quotient >> shift) | sticky;
+ }
+ writtenExponent = 0;
+ }
-COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
+ const int roundGuardSticky = quotient & 0x7;
+ rep_t absResult = (quotient >> 3) & significandMask;
+ absResult |= (rep_t)writtenExponent << significandBits;
+
+ switch (__fe_getround()) {
+ case CRT_FE_TONEAREST:
+ if (roundGuardSticky > 0x4)
+ absResult++;
+ if (roundGuardSticky == 0x4)
+ absResult += absResult & 1;
+ break;
+ case CRT_FE_DOWNWARD:
+ if (quotientSign && roundGuardSticky) absResult++;
+ break;
+ case CRT_FE_UPWARD:
+ if (!quotientSign && roundGuardSticky) absResult++;
+ break;
+ case CRT_FE_TOWARDZERO:
+ break;
+ }
+ if (roundGuardSticky)
+ __fe_raise_inexact();
+ return fromRep(absResult | quotientSign);
+}
-#undef NUMBER_OF_HALF_ITERATIONS
-#undef NUMBER_OF_FULL_ITERATIONS
// ---- comparetf2.c ----
#define QUAD_PRECISION
#include "fp_compare_impl.inc"
@@ -114,32 +344,43 @@ COMPILER_RT_ABI CMP_RESULT __unordtf2(fp_t a, fp_t b) {
#define QUAD_PRECISION
#include "fp_lib.h"
-COMPILER_RT_ABI fp_t __floatsitf(si_int a) {
- const int aWidth = sizeof a * CHAR_BIT;
+static int cfree_clz_u32(su_int x) {
+ int n = 0;
+ for (int bit = 31; bit >= 0; --bit) {
+ if ((x >> (unsigned)bit) & 1u) break;
+ ++n;
+ }
+ return n;
+}
- // Handle zero as a special case to protect clz
- if (a == 0) return fromRep(0);
+static int cfree_clz_u64(du_int x) {
+ int n = 0;
+ for (int bit = 63; bit >= 0; --bit) {
+ if ((x >> (unsigned)bit) & 1u) break;
+ ++n;
+ }
+ return n;
+}
+
+static fp_t cfree_tf_from_u64(du_int mag, rep_t sign, int width) {
+ if (!mag) return fromRep(0);
+ int exponent = (width - 1) -
+ (width == 32 ? cfree_clz_u32((su_int)mag)
+ : cfree_clz_u64(mag));
+ int shift = significandBits - exponent;
+ rep_t result = ((rep_t)mag << shift) ^ implicitBit;
+ result |= (rep_t)(exponent + exponentBias) << significandBits;
+ return fromRep(result | sign);
+}
- // All other cases begin by extracting the sign and absolute value of a
+COMPILER_RT_ABI fp_t __floatsitf(si_int a) {
rep_t sign = 0;
- su_int aAbs = (su_int)a;
+ su_int mag = (su_int)a;
if (a < 0) {
sign = signBit;
- aAbs = -aAbs;
+ mag = (su_int)(0u - mag);
}
-
- // Exponent of (fp_t)a is the width of abs(a).
- const int exponent = (aWidth - 1) - clzsi(aAbs);
- rep_t result;
-
- // Shift a into the significand field and clear the implicit bit.
- const int shift = significandBits - exponent;
- result = (rep_t)aAbs << shift ^ implicitBit;
-
- // Insert the exponent
- result += (rep_t)(exponent + exponentBias) << significandBits;
- // Insert the sign bit and return
- return fromRep(result | sign);
+ return cfree_tf_from_u64((du_int)mag, sign, 32);
}
// ---- floatunsitf.c ----
@@ -147,22 +388,7 @@ COMPILER_RT_ABI fp_t __floatsitf(si_int a) {
#include "fp_lib.h"
COMPILER_RT_ABI fp_t __floatunsitf(su_int a) {
- const int aWidth = sizeof a * CHAR_BIT;
-
- // Handle zero as a special case to protect clz
- if (a == 0) return fromRep(0);
-
- // Exponent of (fp_t)a is the width of abs(a).
- const int exponent = (aWidth - 1) - clzsi(a);
- rep_t result;
-
- // Shift a into the significand field and clear the implicit bit.
- const int shift = significandBits - exponent;
- result = (rep_t)a << shift ^ implicitBit;
-
- // Insert the exponent
- result += (rep_t)(exponent + exponentBias) << significandBits;
- return fromRep(result);
+ return cfree_tf_from_u64((du_int)a, 0, 32);
}
// ---- floatditf.c ----
@@ -170,31 +396,13 @@ COMPILER_RT_ABI fp_t __floatunsitf(su_int a) {
#include "fp_lib.h"
COMPILER_RT_ABI fp_t __floatditf(di_int a) {
- const int aWidth = sizeof a * CHAR_BIT;
-
- // Handle zero as a special case to protect clz
- if (a == 0) return fromRep(0);
-
- // All other cases begin by extracting the sign and absolute value of a
rep_t sign = 0;
- du_int aAbs = (du_int)a;
+ du_int mag = (du_int)a;
if (a < 0) {
sign = signBit;
- aAbs = ~(du_int)a + 1U;
+ mag = (du_int)0 - mag;
}
-
- // Exponent of (fp_t)a is the width of abs(a).
- const int exponent = (aWidth - 1) - __builtin_clzll(aAbs);
- rep_t result;
-
- // Shift a into the significand field, rounding if it is a right-shift
- const int shift = significandBits - exponent;
- result = (rep_t)aAbs << shift ^ implicitBit;
-
- // Insert the exponent
- result += (rep_t)(exponent + exponentBias) << significandBits;
- // Insert the sign bit and return
- return fromRep(result | sign);
+ return cfree_tf_from_u64(mag, sign, 64);
}
// ---- floatunditf.c ----
@@ -202,22 +410,7 @@ COMPILER_RT_ABI fp_t __floatditf(di_int a) {
#include "fp_lib.h"
COMPILER_RT_ABI fp_t __floatunditf(du_int a) {
- const int aWidth = sizeof a * CHAR_BIT;
-
- // Handle zero as a special case to protect clz
- if (a == 0) return fromRep(0);
-
- // Exponent of (fp_t)a is the width of abs(a).
- const int exponent = (aWidth - 1) - __builtin_clzll(a);
- rep_t result;
-
- // Shift a into the significand field and clear the implicit bit.
- const int shift = significandBits - exponent;
- result = (rep_t)a << shift ^ implicitBit;
-
- // Insert the exponent
- result += (rep_t)(exponent + exponentBias) << significandBits;
- return fromRep(result);
+ return cfree_tf_from_u64(a, 0, 64);
}
// ---- floattitf.c ----
diff --git a/rt/lib/int64/int64.c b/rt/lib/int64/int64.c
@@ -80,61 +80,165 @@ static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
return udiv128by64to64default(u1, u0, v, r);
}
+static inline int ut_is_zero(utwords a) {
+ return a.s.low == 0 && a.s.high == 0;
+}
+
+static inline int ut_cmp(utwords a, utwords b) {
+ if (a.s.high != b.s.high) return a.s.high < b.s.high ? -1 : 1;
+ if (a.s.low != b.s.low) return a.s.low < b.s.low ? -1 : 1;
+ return 0;
+}
+
+static inline utwords ut_add(utwords a, utwords b) {
+ utwords r;
+ r.s.low = a.s.low + b.s.low;
+ r.s.high = a.s.high + b.s.high + (r.s.low < a.s.low);
+ return r;
+}
+
+static inline utwords ut_sub(utwords a, utwords b) {
+ utwords r;
+ r.s.low = a.s.low - b.s.low;
+ r.s.high = a.s.high - b.s.high - (a.s.low < b.s.low);
+ return r;
+}
+
+static inline utwords ut_neg(utwords a) {
+ utwords z;
+ z.s.low = 0;
+ z.s.high = 0;
+ return ut_sub(z, a);
+}
+
+static inline utwords ut_shl1(utwords a) {
+ utwords r;
+ r.s.low = a.s.low << 1;
+ r.s.high = (a.s.high << 1) | (a.s.low >> 63);
+ return r;
+}
+
+static inline utwords ut_shr1(utwords a) {
+ utwords r;
+ r.s.low = (a.s.low >> 1) | (a.s.high << 63);
+ r.s.high = a.s.high >> 1;
+ return r;
+}
+
+static inline utwords ut_shl(utwords a, unsigned sh) {
+ utwords r;
+ if (sh >= 128u) {
+ r.s.low = 0;
+ r.s.high = 0;
+ } else if (sh == 0) {
+ r = a;
+ } else if (sh >= 64u) {
+ r.s.low = 0;
+ r.s.high = a.s.low << (sh - 64u);
+ } else {
+ r.s.low = a.s.low << sh;
+ r.s.high = (a.s.high << sh) | (a.s.low >> (64u - sh));
+ }
+ return r;
+}
+
+static inline utwords ut_lshr(utwords a, unsigned sh) {
+ utwords r;
+ if (sh >= 128u) {
+ r.s.low = 0;
+ r.s.high = 0;
+ } else if (sh == 0) {
+ r = a;
+ } else if (sh >= 64u) {
+ r.s.low = a.s.high >> (sh - 64u);
+ r.s.high = 0;
+ } else {
+ r.s.low = (a.s.low >> sh) | (a.s.high << (64u - sh));
+ r.s.high = a.s.high >> sh;
+ }
+ return r;
+}
+
+static inline twords t_ashr(twords a, unsigned sh) {
+ twords r;
+ if (sh >= 128u) {
+ r.s.low = a.s.high < 0 ? ~(du_int)0 : 0;
+ r.s.high = a.s.high < 0 ? (di_int)-1 : 0;
+ } else if (sh == 0) {
+ r = a;
+ } else if (sh >= 64u) {
+ r.s.low = (du_int)(a.s.high >> (sh - 64u));
+ r.s.high = a.s.high < 0 ? (di_int)-1 : 0;
+ } else {
+ r.s.low = ((du_int)a.s.high << (64u - sh)) | (a.s.low >> sh);
+ r.s.high = a.s.high >> sh;
+ }
+ return r;
+}
+
+static inline utwords ut_mul(utwords a, utwords b) {
+ utwords r;
+ const int half_bits = (int)(sizeof(du_int) * CHAR_BIT) / 2;
+ const du_int mask = (du_int)~0 >> half_bits;
+ du_int t;
+ r.s.low = (a.s.low & mask) * (b.s.low & mask);
+ t = r.s.low >> half_bits;
+ r.s.low &= mask;
+ t += (a.s.low >> half_bits) * (b.s.low & mask);
+ r.s.low += (t & mask) << half_bits;
+ r.s.high = t >> half_bits;
+ t = r.s.low >> half_bits;
+ r.s.low &= mask;
+ t += (b.s.low >> half_bits) * (a.s.low & mask);
+ r.s.low += (t & mask) << half_bits;
+ r.s.high += t >> half_bits;
+ r.s.high += (a.s.low >> half_bits) * (b.s.low >> half_bits);
+ r.s.high += a.s.high * b.s.low + a.s.low * b.s.high;
+ return r;
+}
+
+static inline void ut_udivmod(utwords n, utwords d, utwords* q, utwords* rem) {
+ utwords quotient;
+ utwords remainder;
+ quotient.s.low = 0;
+ quotient.s.high = 0;
+ remainder.s.low = 0;
+ remainder.s.high = 0;
+ if (ut_is_zero(d)) {
+ if (q) *q = quotient;
+ if (rem) *rem = n;
+ return;
+ }
+ for (int i = 127; i >= 0; --i) {
+ du_int bit =
+ i < 64 ? ((n.s.low >> (unsigned)i) & 1u)
+ : ((n.s.high >> (unsigned)(i - 64)) & 1u);
+ remainder = ut_shl1(remainder);
+ remainder.s.low |= bit;
+ if (ut_cmp(remainder, d) >= 0) {
+ remainder = ut_sub(remainder, d);
+ if (i < 64)
+ quotient.s.low |= (du_int)1 << (unsigned)i;
+ else
+ quotient.s.high |= (du_int)1 << (unsigned)(i - 64);
+ }
+ }
+ if (q) *q = quotient;
+ if (rem) *rem = remainder;
+}
+
// Effects: if rem != 0, *rem = a % b
// Returns: a / b
COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem) {
- const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
utwords dividend;
dividend.all = a;
utwords divisor;
divisor.all = b;
utwords quotient;
utwords remainder;
- if (divisor.all > dividend.all) {
- if (rem) *rem = dividend.all;
- return 0;
- }
- // When the divisor fits in 64 bits, we can use an optimized path.
- if (divisor.s.high == 0) {
- remainder.s.high = 0;
- if (dividend.s.high < divisor.s.low) {
- // The result fits in 64 bits.
- quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
- divisor.s.low, &remainder.s.low);
- quotient.s.high = 0;
- } else {
- // First, divide with the high part to get the remainder in
- // dividend.s.high. After that dividend.s.high < divisor.s.low.
- quotient.s.high = dividend.s.high / divisor.s.low;
- dividend.s.high = dividend.s.high % divisor.s.low;
- quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
- divisor.s.low, &remainder.s.low);
- }
- if (rem) *rem = remainder.all;
- return quotient.all;
- }
- // 0 <= shift <= 63.
- si_int shift =
- __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
- divisor.all <<= shift;
- quotient.s.high = 0;
- quotient.s.low = 0;
- for (; shift >= 0; --shift) {
- quotient.s.low <<= 1;
- // Branch free version of.
- // if (dividend.all >= divisor.all)
- // {
- // dividend.all -= divisor.all;
- // carry = 1;
- // }
- const ti_int s =
- (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
- quotient.s.low |= s & 1;
- dividend.all -= divisor.all & s;
- divisor.all >>= 1;
- }
- if (rem) *rem = dividend.all;
+ ut_udivmod(dividend, divisor, "ient, &remainder);
+ if (rem) *rem = remainder.all;
return quotient.all;
}
@@ -147,20 +251,11 @@ COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem) {
// Precondition: 0 <= b < bits_in_tword
COMPILER_RT_ABI ti_int __ashlti3(ti_int a, int b) {
- const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
- twords input;
- twords result;
+ utwords input;
+ utwords result;
input.all = a;
- if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
- result.s.low = 0;
- result.s.high = input.s.low << (b - bits_in_dword);
- } else /* 0 <= b < bits_in_dword */ {
- if (b == 0) return a;
- result.s.low = input.s.low << b;
- result.s.high =
- ((du_int)input.s.high << b) | (input.s.low >> (bits_in_dword - b));
- }
- return result.all;
+ result = ut_shl(input, (unsigned)b);
+ return (ti_int)result.all;
}
// ---- ashrti3.c ----
@@ -171,20 +266,10 @@ COMPILER_RT_ABI ti_int __ashlti3(ti_int a, int b) {
// Precondition: 0 <= b < bits_in_tword
COMPILER_RT_ABI ti_int __ashrti3(ti_int a, int b) {
- const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
twords input;
twords result;
input.all = a;
- if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
- // result.s.high = input.s.high < 0 ? -1 : 0
- result.s.high = input.s.high >> (bits_in_dword - 1);
- result.s.low = input.s.high >> (b - bits_in_dword);
- } else /* 0 <= b < bits_in_dword */ {
- if (b == 0) return a;
- result.s.high = input.s.high >> b;
- result.s.low =
- ((du_int)input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
- }
+ result = t_ashr(input, (unsigned)b);
return result.all;
}
@@ -226,19 +311,11 @@ COMPILER_RT_ABI int __ctzti2(ti_int a) {
// Precondition: 0 <= b < bits_in_tword
COMPILER_RT_ABI ti_int __lshrti3(ti_int a, int b) {
- const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
utwords input;
utwords result;
input.all = a;
- if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
- result.s.high = 0;
- result.s.low = input.s.high >> (b - bits_in_dword);
- } else /* 0 <= b < bits_in_dword */ {
- if (b == 0) return a;
- result.s.high = input.s.high >> b;
- result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
- }
- return result.all;
+ result = ut_lshr(input, (unsigned)b);
+ return (ti_int)result.all;
}
// ---- multi3.c ----
@@ -268,14 +345,13 @@ static ti_int __mulddi3(du_int a, du_int b) {
// Returns: a * b
COMPILER_RT_ABI ti_int __multi3(ti_int a, ti_int b) {
- twords x;
- x.all = a;
- twords y;
- y.all = b;
- twords r;
- r.all = __mulddi3(x.s.low, y.s.low);
- r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
- return r.all;
+ utwords x;
+ utwords y;
+ utwords r;
+ x.all = (tu_int)a;
+ y.all = (tu_int)b;
+ r = ut_mul(x, y);
+ return (ti_int)r.all;
}
// ---- negti2.c ----
@@ -284,9 +360,111 @@ COMPILER_RT_ABI ti_int __multi3(ti_int a, ti_int b) {
// Returns: -a
COMPILER_RT_ABI ti_int __negti2(ti_int a) {
- // Note: this routine is here for API compatibility; any sane compiler
- // should expand it inline.
- return -(tu_int)a;
+ utwords x;
+ utwords r;
+ x.all = (tu_int)a;
+ r = ut_neg(x);
+ return (ti_int)r.all;
+}
+
+COMPILER_RT_ABI ti_int __cfree_addti3(ti_int a, ti_int b) {
+ utwords x;
+ utwords y;
+ utwords r;
+ x.all = (tu_int)a;
+ y.all = (tu_int)b;
+ r = ut_add(x, y);
+ return (ti_int)r.all;
+}
+
+COMPILER_RT_ABI ti_int __cfree_subti3(ti_int a, ti_int b) {
+ utwords x;
+ utwords y;
+ utwords r;
+ x.all = (tu_int)a;
+ y.all = (tu_int)b;
+ r = ut_sub(x, y);
+ return (ti_int)r.all;
+}
+
+COMPILER_RT_ABI ti_int __cfree_andti3(ti_int a, ti_int b) {
+ utwords x;
+ utwords y;
+ utwords r;
+ x.all = (tu_int)a;
+ y.all = (tu_int)b;
+ r.s.low = x.s.low & y.s.low;
+ r.s.high = x.s.high & y.s.high;
+ return (ti_int)r.all;
+}
+
+COMPILER_RT_ABI ti_int __cfree_orti3(ti_int a, ti_int b) {
+ utwords x;
+ utwords y;
+ utwords r;
+ x.all = (tu_int)a;
+ y.all = (tu_int)b;
+ r.s.low = x.s.low | y.s.low;
+ r.s.high = x.s.high | y.s.high;
+ return (ti_int)r.all;
+}
+
+COMPILER_RT_ABI ti_int __cfree_xorti3(ti_int a, ti_int b) {
+ utwords x;
+ utwords y;
+ utwords r;
+ x.all = (tu_int)a;
+ y.all = (tu_int)b;
+ r.s.low = x.s.low ^ y.s.low;
+ r.s.high = x.s.high ^ y.s.high;
+ return (ti_int)r.all;
+}
+
+COMPILER_RT_ABI ti_int __cfree_notti3(ti_int a) {
+ utwords x;
+ utwords r;
+ x.all = (tu_int)a;
+ r.s.low = ~x.s.low;
+ r.s.high = ~x.s.high;
+ return (ti_int)r.all;
+}
+
+COMPILER_RT_ABI ti_int __cfree_sext64ti(di_int a) {
+ twords r;
+ r.s.low = (du_int)a;
+ r.s.high = a < 0 ? -1 : 0;
+ return r.all;
+}
+
+COMPILER_RT_ABI ti_int __cfree_zext64ti(du_int a) {
+ utwords r;
+ r.s.low = a;
+ r.s.high = 0;
+ return (ti_int)r.all;
+}
+
+COMPILER_RT_ABI si_int __cfree_cmpti2(ti_int a, ti_int b) {
+ twords x;
+ twords y;
+ x.all = a;
+ y.all = b;
+ if (x.s.high < y.s.high) return -1;
+ if (x.s.high > y.s.high) return 1;
+ if (x.s.low < y.s.low) return -1;
+ if (x.s.low > y.s.low) return 1;
+ return 0;
+}
+
+COMPILER_RT_ABI si_int __cfree_ucmpti2(tu_int a, tu_int b) {
+ utwords x;
+ utwords y;
+ x.all = a;
+ y.all = b;
+ if (x.s.high < y.s.high) return -1;
+ if (x.s.high > y.s.high) return 1;
+ if (x.s.low < y.s.low) return -1;
+ if (x.s.low > y.s.low) return 1;
+ return 0;
}
// Callers of __udivmodti4:
@@ -316,16 +494,27 @@ COMPILER_RT_ABI tu_int __umodti3(tu_int a, tu_int b) {
// Returns: a / b, *rem = a % b
COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int* rem) {
- const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1;
- ti_int s_a = a >> bits_in_tword_m1; // s_a = a < 0 ? -1 : 0
- ti_int s_b = b >> bits_in_tword_m1; // s_b = b < 0 ? -1 : 0
- a = (tu_int)(a ^ s_a) - s_a; // negate if s_a == -1
- b = (tu_int)(b ^ s_b) - s_b; // negate if s_b == -1
- s_b ^= s_a; // sign of quotient
- tu_int r;
- ti_int q = (__udivmodti4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1
- *rem = (r ^ s_a) - s_a; // negate if s_a == -1
- return q;
+ twords sa;
+ twords sb;
+ utwords ua;
+ utwords ub;
+ utwords uq;
+ utwords ur;
+ int neg_a;
+ int neg_b;
+ sa.all = a;
+ sb.all = b;
+ neg_a = sa.s.high < 0;
+ neg_b = sb.s.high < 0;
+ ua.all = (tu_int)a;
+ ub.all = (tu_int)b;
+ if (neg_a) ua = ut_neg(ua);
+ if (neg_b) ub = ut_neg(ub);
+ ut_udivmod(ua, ub, &uq, &ur);
+ if (neg_a != neg_b) uq = ut_neg(uq);
+ if (neg_a) ur = ut_neg(ur);
+ if (rem) *rem = (ti_int)ur.all;
+ return (ti_int)uq.all;
}
// ---- divti3.c ----
@@ -333,14 +522,9 @@ COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int* rem) {
// Returns: a / b
-#define fixint_t ti_int
-#define fixuint_t tu_int
-#define INT_DIV_SUFFIX divti3
-#define COMPUTE_UDIV(a, b) __udivmodti4((a), (b), (tu_int*)0)
-#include "int_div_impl.inc"
-
COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) {
- return __divXi3_divti3(a, b);
+ ti_int r;
+ return __divmodti4(a, b, &r);
}
// ---- modti3.c ----
@@ -348,12 +532,8 @@ COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) {
// Returns: a % b
-#define fixint_t ti_int
-#define fixuint_t tu_int
-#define INT_DIV_SUFFIX modti3
-#define ASSIGN_UMOD(res, a, b) __udivmodti4((a), (b), &(res))
-#include "int_div_impl.inc"
-
COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) {
- return __modXi3_modti3(a, b);
+ ti_int r;
+ (void)__divmodti4(a, b, &r);
+ return r;
}
diff --git a/src/abi/abi_aapcs64.c b/src/abi/abi_aapcs64.c
@@ -20,6 +20,23 @@
static void classify_scalar(TargetABI* a, CfreeCgTypeId t, ABIArgInfo* out) {
ABITypeInfo ti = abi_internal_type_info(a, t);
+ if (ti.scalar_kind == ABI_SC_INT && ti.size == 16) {
+ ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, 2);
+ memset(parts, 0, sizeof(ABIArgPart) * 2);
+ for (u32 i = 0; i < 2; ++i) {
+ parts[i].cls = ABI_CLASS_INT;
+ parts[i].loc = ABI_LOC_REG;
+ parts[i].size = 8;
+ parts[i].align = 8;
+ parts[i].src_offset = i * 8;
+ }
+ out->kind = ABI_ARG_DIRECT;
+ out->flags = ABI_AF_NONE;
+ out->parts = parts;
+ out->nparts = 2;
+ out->indirect_align = 0;
+ return;
+ }
out->kind = ABI_ARG_DIRECT;
out->flags = ABI_AF_NONE;
out->indirect_align = 0;
diff --git a/src/abi/abi_rv64.c b/src/abi/abi_rv64.c
@@ -20,7 +20,8 @@
static void classify_scalar(TargetABI* a, CfreeCgTypeId t, ABIArgInfo* out) {
ABITypeInfo ti = abi_internal_type_info(a, t);
- if (ti.scalar_kind == ABI_SC_FLOAT && ti.size == 16) {
+ if (ti.size == 16 &&
+ (ti.scalar_kind == ABI_SC_INT || ti.scalar_kind == ABI_SC_FLOAT)) {
ABIArgPart* parts = arena_array(a->c->tu, ABIArgPart, 2);
memset(parts, 0, sizeof(ABIArgPart) * 2);
parts[0].cls = ABI_CLASS_INT;
diff --git a/src/api/cg.c b/src/api/cg.c
@@ -1303,6 +1303,17 @@ static int api_is_f128_type(Compiler *c, CfreeCgTypeId ty) {
return cg && cg->kind == CFREE_CG_TYPE_FLOAT && cg->fp.width == 128;
}
+static int api_is_i128_type(Compiler *c, CfreeCgTypeId ty) {
+ const CgType *cg;
+ ty = api_unalias_type(c, ty);
+ cg = cg_type_get(c, ty);
+ return cg && cg->kind == CFREE_CG_TYPE_INT && cg->integer.width == 128;
+}
+
+static int api_is_wide16_scalar_type(Compiler *c, CfreeCgTypeId ty) {
+ return api_is_f128_type(c, ty) || api_is_i128_type(c, ty);
+}
+
static Operand api_op_imm(i64 v, CfreeCgTypeId ty) {
Operand o;
memset(&o, 0, sizeof o);
@@ -2105,7 +2116,7 @@ static void api_release_arg_storage(CfreeCg *g, Operand *storage) {
api_free_reg(g, storage->v.reg, storage->cls);
} else if (storage->kind == OPK_LOCAL && storage->cls < 3) {
CfreeCgTypeId ty = storage->type;
- if (cg_type_is_aggregate(g->c, ty) || api_is_f128_type(g->c, ty))
+ if (cg_type_is_aggregate(g->c, ty) || api_is_wide16_scalar_type(g->c, ty))
return;
api_return_spill_slot(g, storage->v.frame_slot, storage->cls);
} else if (storage->kind == OPK_INDIRECT) {
@@ -3189,7 +3200,8 @@ static void api_encode_binary128_from_double(CfreeCg *g, double value,
double d;
u64 u;
} in;
- unsigned __int128 rep = 0;
+ u64 lo = 0;
+ u64 hi = 0;
u64 frac;
u32 sign;
u32 exp;
@@ -3198,12 +3210,13 @@ static void api_encode_binary128_from_double(CfreeCg *g, double value,
exp = (u32)((in.u >> 52) & 0x7ffu);
frac = in.u & 0x000fffffffffffffull;
if (sign)
- rep |= ((unsigned __int128)1) << 127;
+ hi |= 1ull << 63;
if (exp == 0x7ffu) {
- rep |= ((unsigned __int128)0x7fffu) << 112;
+ hi |= (u64)0x7fffu << 48;
if (frac) {
- rep |= ((unsigned __int128)frac) << (112u - 52u);
- rep |= ((unsigned __int128)1) << 111;
+ lo |= (frac & 0xfu) << 60;
+ hi |= frac >> 4;
+ hi |= 1ull << 47;
}
} else if (exp != 0 || frac != 0) {
i32 e;
@@ -3219,12 +3232,20 @@ static void api_encode_binary128_from_double(CfreeCg *g, double value,
} else {
e = (i32)exp - 1023;
}
- rep |= ((unsigned __int128)(u32)(e + 16383)) << 112;
- rep |= ((unsigned __int128)frac) << (112u - 52u);
+ hi |= (u64)(u32)(e + 16383) << 48;
+ lo |= (frac & 0xfu) << 60;
+ hi |= frac >> 4;
}
for (u32 i = 0; i < 16; ++i) {
- u32 shift = g->c->target.big_endian ? (15u - i) * 8u : i * 8u;
- out[i] = (u8)(rep >> shift);
+ if (g->c->target.big_endian) {
+ u64 lane = i < 8u ? hi : lo;
+ u32 shift = (7u - (i & 7u)) * 8u;
+ out[i] = (u8)(lane >> shift);
+ } else {
+ u64 lane = i < 8u ? lo : hi;
+ u32 shift = (i & 7u) * 8u;
+ out[i] = (u8)(lane >> shift);
+ }
}
}
@@ -3238,8 +3259,8 @@ static ApiSValue api_make_f128_const(CfreeCg *g, double value,
return api_make_lv(api_op_local(slot, ty), ty);
}
-static ApiSValue api_f128_materialize_lvalue(CfreeCg *g, ApiSValue *v,
- CfreeCgTypeId ty) {
+static ApiSValue api_wide16_materialize_lvalue(CfreeCg *g, ApiSValue *v,
+ CfreeCgTypeId ty) {
if (v->op.kind == OPK_LOCAL || v->op.kind == OPK_INDIRECT) {
v->type = ty;
v->op.type = ty;
@@ -3283,7 +3304,7 @@ static ApiSValue api_f128_materialize_lvalue(CfreeCg *g, ApiSValue *v,
return api_make_lv(api_op_local(slot, ty), ty);
}
compiler_panic(g->c, g->cur_loc,
- "CfreeCg: binary128 value is not addressable (kind %u, op %u)",
+ "CfreeCg: 16-byte scalar value is not addressable (kind %u, op %u)",
(unsigned)v->kind, (unsigned)v->op.kind);
return *v;
}
@@ -3339,7 +3360,7 @@ static int api_local_requires_memory(CfreeCg *g, CfreeCgTypeId ty,
CfreeCgLocalAttrs attrs) {
if (api_source_flags_addr_taken(attrs.flags))
return 1;
- if (api_is_f128_type(g->c, ty))
+ if (api_is_wide16_scalar_type(g->c, ty))
return 1;
return !(cg_type_is_int(g->c, ty) || cg_type_is_float(g->c, ty) ||
cg_type_is_ptr(g->c, ty));
@@ -3805,7 +3826,7 @@ void cfree_cg_load(CfreeCg *g, CfreeCgMemAccess access) {
return;
}
api_require_scalar_mem_type(g, "load", ty);
- if (api_is_f128_type(g->c, ty)) {
+ if (api_is_wide16_scalar_type(g->c, ty)) {
v.type = ty;
v.op.type = ty;
api_push(g, v);
@@ -3968,7 +3989,7 @@ void cfree_cg_store(CfreeCg *g, CfreeCgMemAccess access) {
return;
}
api_validate_memory_value(g, "store", ty, api_sv_type(&rv));
- if (api_is_f128_type(g->c, ty)) {
+ if (api_is_wide16_scalar_type(g->c, ty)) {
if (lv.source_local != CFREE_CG_LOCAL_NONE) {
api_local_const_clear(api_local_from_handle(g, lv.source_local));
} else if (lv.op.kind == OPK_INDIRECT || lv.op.kind == OPK_GLOBAL ||
@@ -3986,6 +4007,35 @@ void cfree_cg_store(CfreeCg *g, CfreeCgMemAccess access) {
T->copy_bytes(T, dst_addr, src_addr, agg);
api_free_reg(g, dst_addr.v.reg, RC_INT);
api_free_reg(g, src_addr.v.reg, RC_INT);
+ } else if (rv.op.kind == OPK_IMM) {
+ u8 bytes[16];
+ u64 lo = (u64)rv.op.v.imm;
+ u64 hi = rv.op.v.imm < 0 ? ~(u64)0 : 0;
+ memset(bytes, 0, sizeof bytes);
+ for (u32 i = 0; i < 8; ++i) {
+ u32 lo_idx = g->c->target.big_endian ? 15u - i : i;
+ u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i;
+ bytes[lo_idx] = (u8)(lo >> (i * 8u));
+ bytes[hi_idx] = (u8)(hi >> (i * 8u));
+ }
+ if (lv.op.kind == OPK_LOCAL) {
+ api_store_f128_bytes(g, lv.op.v.frame_slot, ty, bytes);
+ } else {
+ FrameSlot slot = api_f128_temp_slot(g, ty);
+ ApiSValue tmp = api_make_lv(api_op_local(slot, ty), ty);
+ CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, ty);
+ Operand dst_addr = api_lvalue_addr(g, &lv, ptr_ty);
+ Operand src_addr;
+ AggregateAccess agg;
+ api_store_f128_bytes(g, slot, ty, bytes);
+ src_addr = api_lvalue_addr(g, &tmp, ptr_ty);
+ memset(&agg, 0, sizeof agg);
+ agg.size = 16;
+ agg.align = access.align ? access.align : 16;
+ T->copy_bytes(T, dst_addr, src_addr, agg);
+ api_free_reg(g, dst_addr.v.reg, RC_INT);
+ api_free_reg(g, src_addr.v.reg, RC_INT);
+ }
} else {
src = api_force_reg(g, &rv, ty);
T->store(T, lv.op, src, api_mem_from_access(g, &lv.op, access));
@@ -4113,6 +4163,12 @@ void cfree_cg_rot3(CfreeCg *g) {
* Arithmetic / compare / convert
* ============================================================ */
+static const char *api_i128_binop_helper(BinOp op);
+static int api_i128_cmp_is_unsigned(CmpOp op);
+static void api_cg_cmp(CfreeCg *g, CmpOp cop);
+static void api_f128_call_unary(CfreeCg *g, const char *name,
+ CfreeCgTypeId ret, CfreeCgTypeId param);
+
static void api_cg_binop(CfreeCg *g, BinOp iop, u32 flags) {
ApiSValue b, a;
CGTarget *T;
@@ -4129,6 +4185,22 @@ static void api_cg_binop(CfreeCg *g, BinOp iop, u32 flags) {
a = api_pop(g);
ty = a.type ? a.type : b.type;
+ if (api_is_i128_type(g->c, ty)) {
+ CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128);
+ CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32);
+ CfreeCgTypeId ps[2];
+ ApiSValue args[2];
+ const char *name = api_i128_binop_helper(iop);
+ if (!name)
+ compiler_panic(g->c, g->cur_loc, "CfreeCg: i128 binop unsupported");
+ args[0] = a;
+ args[1] = b;
+ ps[0] = i128;
+ ps[1] = (iop == BO_SHL || iop == BO_SHR_U || iop == BO_SHR_S) ? i32 : i128;
+ api_runtime_call_values(g, name, i128, ps, 2, args);
+ return;
+ }
+
if (!flags && api_sv_op_is(&a, OPK_IMM) && api_sv_op_is(&b, OPK_IMM) &&
api_try_fold_int_binop(g, iop, ty, a.op.v.imm, b.op.v.imm, &folded)) {
api_release(g, &a);
@@ -4195,6 +4267,27 @@ static void api_cg_unop(CfreeCg *g, UnOp iop, u32 flags) {
a = api_pop(g);
ty = a.type ? a.type : a.op.type;
+ if (api_is_i128_type(g->c, ty)) {
+ CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128);
+ CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32);
+ if (iop == UO_NEG || iop == UO_BNOT) {
+ const char *name = (iop == UO_NEG) ? "__negti2" : "__cfree_notti3";
+ api_push(g, a);
+ api_f128_call_unary(g, name, i128, i128);
+ return;
+ }
+ if (iop == UO_NOT) {
+ CfreeCgTypeId ps[2] = {i128, i128};
+ ApiSValue args[2];
+ args[0] = a;
+ args[1] = api_make_sv(api_op_imm(0, i128), i128);
+ api_runtime_call_values(g, "__cfree_ucmpti2", i32, ps, 2, args);
+ cfree_cg_push_int(g, 0, i32);
+ api_cg_cmp(g, CMP_EQ);
+ return;
+ }
+ }
+
if (!flags && api_sv_op_is(&a, OPK_IMM) &&
api_try_fold_int_unop(g, iop, ty, a.op.v.imm, &folded)) {
api_release(g, &a);
@@ -4242,6 +4335,34 @@ static void api_cg_cmp(CfreeCg *g, CmpOp cop) {
opty = a.type ? a.type : b.type;
i32 = builtin_id(CFREE_CG_BUILTIN_I32);
+ if (api_is_i128_type(g->c, opty)) {
+ CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128);
+ CfreeCgTypeId ps[2] = {i128, i128};
+ ApiSValue args[2];
+ CmpOp icmp = CMP_EQ;
+ const char *name = api_i128_cmp_is_unsigned(cop) ? "__cfree_ucmpti2"
+ : "__cfree_cmpti2";
+ switch (cop) {
+ case CMP_EQ: icmp = CMP_EQ; break;
+ case CMP_NE: icmp = CMP_NE; break;
+ case CMP_LT_S:
+ case CMP_LT_U: icmp = CMP_LT_S; break;
+ case CMP_LE_S:
+ case CMP_LE_U: icmp = CMP_LE_S; break;
+ case CMP_GT_S:
+ case CMP_GT_U: icmp = CMP_GT_S; break;
+ case CMP_GE_S:
+ case CMP_GE_U: icmp = CMP_GE_S; break;
+ default: icmp = CMP_EQ; break;
+ }
+ args[0] = a;
+ args[1] = b;
+ api_runtime_call_values(g, name, i32, ps, 2, args);
+ cfree_cg_push_int(g, 0, i32);
+ api_cg_cmp(g, icmp);
+ return;
+ }
+
if (api_sv_op_is(&a, OPK_IMM) && api_sv_op_is(&b, OPK_IMM) &&
api_try_fold_int_cmp(g, cop, opty, a.op.v.imm, b.op.v.imm, &folded)) {
api_release(g, &a);
@@ -4294,6 +4415,118 @@ static void api_cg_convert_kind(CfreeCg *g, CfreeCgTypeId dst_type,
api_push(g, v);
return;
}
+ if (api_is_i128_type(g->c, sty) && api_type_is_bool(g->c, dty) &&
+ ck != CV_BITCAST) {
+ CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128);
+ CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32);
+ CfreeCgTypeId ps[2] = {i128, i128};
+ ApiSValue args[2];
+ ApiSValue r;
+ args[0] = v;
+ args[1] = api_make_sv(api_op_imm(0, i128), i128);
+ api_runtime_call_values(g, "__cfree_ucmpti2", i32, ps, 2, args);
+ cfree_cg_push_int(g, 0, i32);
+ api_cg_cmp(g, CMP_NE);
+ r = api_pop(g);
+ r.type = dty;
+ r.op.type = dty;
+ api_push(g, r);
+ return;
+ }
+ if (api_is_i128_type(g->c, dty) && !api_is_i128_type(g->c, sty) &&
+ ck != CV_BITCAST) {
+ u32 sz = (u32)abi_cg_sizeof(g->c->abi, sty);
+ CfreeCgTypeId i64_ty = builtin_id(CFREE_CG_BUILTIN_I64);
+ FrameSlot slot = api_f128_temp_slot(g, dty);
+ Operand dst_lv = api_op_local(slot, dty);
+ if (api_sv_op_is(&v, OPK_IMM)) {
+ u8 bytes[16];
+ u64 lo = (u64)v.op.v.imm;
+ u64 hi = 0;
+ if (ck == CV_SEXT && sz <= 8) {
+ u32 bits = sz * 8u;
+ u64 mask = bits >= 64u ? ~(u64)0 : ((1ull << bits) - 1ull);
+ u64 sign = 1ull << (bits - 1u);
+ u64 u = lo & mask;
+ if (u & sign)
+ u |= ~mask;
+ lo = u;
+ hi = (u & (1ull << 63)) ? ~(u64)0 : 0;
+ }
+ memset(bytes, 0, sizeof bytes);
+ for (u32 i = 0; i < 8; ++i) {
+ u32 lo_idx = g->c->target.big_endian ? 15u - i : i;
+ u32 hi_idx = g->c->target.big_endian ? 7u - i : 8u + i;
+ bytes[lo_idx] = (u8)(lo >> (i * 8u));
+ bytes[hi_idx] = (u8)(hi >> (i * 8u));
+ }
+ api_store_f128_bytes(g, slot, dty, bytes);
+ api_release(g, &v);
+ api_push(g, api_make_lv(dst_lv, dty));
+ return;
+ }
+ {
+ CfreeCgTypeId ptr_ty = cg_type_ptr_to(g->c, dty);
+ CfreeCgTypeId src_ty = sty;
+ Operand src = api_force_reg(g, &v, sty);
+ Operand low = src;
+ Operand base;
+ Reg low_tmp = REG_NONE;
+ Reg ar;
+ MemAccess ma;
+ memset(&ma, 0, sizeof ma);
+ ma.type = i64_ty;
+ ma.size = 8;
+ ma.align = 8;
+ if (sz < 8) {
+ low_tmp = api_alloc_reg_or_spill(g, RC_INT, i64_ty);
+ low = api_op_reg(low_tmp, i64_ty);
+ T->convert(T, ck == CV_SEXT ? CV_SEXT : CV_ZEXT, low, src);
+ src_ty = i64_ty;
+ } else {
+ low.type = i64_ty;
+ }
+ ar = api_alloc_reg_or_spill(g, RC_INT, ptr_ty);
+ base = api_op_reg(ar, ptr_ty);
+ T->addr_of(T, base, dst_lv);
+ T->store(T, api_op_indirect(ar, 0, i64_ty), low, ma);
+ if (ck == CV_SEXT) {
+ Reg hr = api_alloc_reg_or_spill(g, RC_INT, i64_ty);
+ Operand high = api_op_reg(hr, i64_ty);
+ T->binop(T, BO_SHR_S, high, low, api_op_imm(63, i64_ty));
+ T->store(T, api_op_indirect(ar, 8, i64_ty), high, ma);
+ api_free_reg(g, hr, RC_INT);
+ } else {
+ T->store(T, api_op_indirect(ar, 8, i64_ty), api_op_imm(0, i64_ty), ma);
+ }
+ if (low_tmp != REG_NONE)
+ api_free_reg(g, low_tmp, RC_INT);
+ (void)src_ty;
+ api_free_reg(g, ar, RC_INT);
+ api_release(g, &v);
+ api_push(g, api_make_lv(dst_lv, dty));
+ }
+ return;
+ }
+ if (api_is_i128_type(g->c, sty) && !api_is_i128_type(g->c, dty) &&
+ ck == CV_TRUNC && abi_cg_sizeof(g->c->abi, dty) <= 8) {
+ Reg rr = api_alloc_reg_or_spill(g, RC_INT, dty);
+ Operand dst = api_op_reg(rr, dty);
+ if (api_is_lvalue_sv(&v) || v.op.kind == OPK_LOCAL ||
+ v.op.kind == OPK_INDIRECT || v.op.kind == OPK_GLOBAL) {
+ ApiSValue lv = v;
+ lv.lvalue = 1;
+ T->load(T, dst, lv.op, api_mem_for_lvalue(g, &lv.op, dty));
+ } else if (v.op.kind == OPK_IMM) {
+ T->load_imm(T, dst, v.op.v.imm);
+ } else {
+ compiler_panic(g->c, g->cur_loc,
+ "CfreeCg: unsupported i128 truncation source");
+ }
+ api_release(g, &v);
+ api_push(g, api_make_sv(dst, dty));
+ return;
+ }
if (ck == CV_BITCAST &&
abi_cg_sizeof(g->c->abi, sty) == abi_cg_sizeof(g->c->abi, dst_type) &&
api_type_class(sty) == api_type_class(dty)) {
@@ -4369,6 +4602,34 @@ void cfree_cg_int_cmp(CfreeCg *g, CfreeCgIntCmpOp op) {
api_cg_cmp(g, api_map_int_cmp(op));
}
+static const char *api_i128_binop_helper(BinOp op) {
+ switch (op) {
+ case BO_IADD: return "__cfree_addti3";
+ case BO_ISUB: return "__cfree_subti3";
+ case BO_IMUL: return "__multi3";
+ case BO_SDIV: return "__divti3";
+ case BO_UDIV: return "__udivti3";
+ case BO_SREM: return "__modti3";
+ case BO_UREM: return "__umodti3";
+ case BO_AND: return "__cfree_andti3";
+ case BO_OR: return "__cfree_orti3";
+ case BO_XOR: return "__cfree_xorti3";
+ case BO_SHL: return "__ashlti3";
+ case BO_SHR_U: return "__lshrti3";
+ case BO_SHR_S: return "__ashrti3";
+ case BO_FADD:
+ case BO_FSUB:
+ case BO_FMUL:
+ case BO_FDIV:
+ default:
+ return NULL;
+ }
+}
+
+static int api_i128_cmp_is_unsigned(CmpOp op) {
+ return op == CMP_LT_U || op == CMP_LE_U || op == CMP_GT_U || op == CMP_GE_U;
+}
+
static const char *api_f128_binop_helper(CfreeCgFpBinOp op) {
switch (op) {
case CFREE_CG_FP_ADD: return "__addtf3";
@@ -5380,6 +5641,19 @@ static void api_branch_if(CfreeCg *g, ApiSValue *v, int branch_when_true,
api_release(g, v);
return;
}
+ if (api_is_i128_type(g->c, ty)) {
+ CfreeCgTypeId i128 = builtin_id(CFREE_CG_BUILTIN_I128);
+ CfreeCgTypeId i32 = builtin_id(CFREE_CG_BUILTIN_I32);
+ CfreeCgTypeId ps[2] = {i128, i128};
+ ApiSValue args[2];
+ ApiSValue cmp;
+ args[0] = *v;
+ args[1] = api_make_sv(api_op_imm(0, i128), i128);
+ api_runtime_call_values(g, "__cfree_ucmpti2", i32, ps, 2, args);
+ cmp = api_pop(g);
+ api_branch_if(g, &cmp, branch_when_true, label);
+ return;
+ }
{
Operand a = api_force_reg(g, v, ty);
Operand zero = api_op_imm(0, ty);
@@ -6112,8 +6386,8 @@ void cfree_cg_call(CfreeCg *g, uint32_t nargs, CfreeCgTypeId fn_type,
avs[idx].type = aty;
avs[idx].abi = is_vararg ? NULL : &abi->params[idx];
int is_aggregate = cg_type_is_aggregate(g->c, aty);
- if (api_is_f128_type(g->c, aty)) {
- ApiSValue lv = api_f128_materialize_lvalue(g, &arg, aty);
+ if (api_is_wide16_scalar_type(g->c, aty)) {
+ ApiSValue lv = api_wide16_materialize_lvalue(g, &arg, aty);
avs[idx].storage = lv.op;
avs[idx].storage.type = aty;
avs[idx].size = 16;
@@ -6150,14 +6424,14 @@ void cfree_cg_call(CfreeCg *g, uint32_t nargs, CfreeCgTypeId fn_type,
if (has_result) {
int ret_is_aggregate = cg_type_is_aggregate(g->c, ret_ty);
- if (ret_is_aggregate || api_is_f128_type(g->c, ret_ty)) {
+ if (ret_is_aggregate || api_is_wide16_scalar_type(g->c, ret_ty)) {
FrameSlotDesc fsd;
memset(&fsd, 0, sizeof fsd);
fsd.type = ret_ty;
fsd.size = abi_cg_sizeof(g->c->abi, ret_ty);
fsd.align = abi_cg_alignof(g->c->abi, ret_ty);
fsd.kind = FS_LOCAL;
- if (ret_is_aggregate || api_is_f128_type(g->c, ret_ty))
+ if (ret_is_aggregate || api_is_wide16_scalar_type(g->c, ret_ty))
fsd.flags = FSF_ADDR_TAKEN;
FrameSlot ret_slot = T->frame_slot(T, &fsd);
desc.ret.storage = api_op_local(ret_slot, ret_ty);
@@ -6300,8 +6574,8 @@ static void api_call_symbol_common(CfreeCg *g, CfreeCgSym sym, uint32_t nargs,
aty = arg.type;
avs[idx].type = aty;
avs[idx].abi = is_vararg ? NULL : &abi->params[idx];
- if (api_is_f128_type(g->c, aty)) {
- ApiSValue lv = api_f128_materialize_lvalue(g, &arg, aty);
+ if (api_is_wide16_scalar_type(g->c, aty)) {
+ ApiSValue lv = api_wide16_materialize_lvalue(g, &arg, aty);
avs[idx].storage = lv.op;
avs[idx].storage.type = aty;
avs[idx].size = 16;
@@ -6330,7 +6604,8 @@ static void api_call_symbol_common(CfreeCg *g, CfreeCgSym sym, uint32_t nargs,
desc.ret.type = ret_ty;
desc.ret.abi = &abi->ret;
if (has_result) {
- if (cg_type_is_aggregate(g->c, ret_ty) || api_is_f128_type(g->c, ret_ty)) {
+ if (cg_type_is_aggregate(g->c, ret_ty) ||
+ api_is_wide16_scalar_type(g->c, ret_ty)) {
FrameSlotDesc fsd;
FrameSlot ret_slot;
memset(&fsd, 0, sizeof fsd);
@@ -6398,8 +6673,8 @@ void cfree_cg_ret(CfreeCg *g) {
T->ret(T, &av);
return;
}
- if (api_is_f128_type(g->c, rty)) {
- ApiSValue lv = api_f128_materialize_lvalue(g, &v, rty);
+ if (api_is_wide16_scalar_type(g->c, rty)) {
+ ApiSValue lv = api_wide16_materialize_lvalue(g, &v, rty);
av.storage = lv.op;
av.storage.type = rty;
av.size = 16;
diff --git a/src/arch/aa64/ops.c b/src/arch/aa64/ops.c
@@ -100,6 +100,11 @@ static RelocKind ldst_lo12_reloc_for(u32 nbytes) {
static void aa_emit_ldr_fp_any(MCEmitter* mc, u32 sidx, u32 rt, u32 rn,
i32 off) {
+ if (off < -256 || off > 255) {
+ aa64_emit_addr_adjust(mc, AA_TMP0, rn, off);
+ rn = AA_TMP0;
+ off = 0;
+ }
if (sidx == 4)
aa64_emit32(mc, aa64_ldur_q(rt, rn, off));
else
@@ -108,6 +113,11 @@ static void aa_emit_ldr_fp_any(MCEmitter* mc, u32 sidx, u32 rt, u32 rn,
static void aa_emit_str_fp_any(MCEmitter* mc, u32 sidx, u32 rt, u32 rn,
i32 off) {
+ if (off < -256 || off > 255) {
+ aa64_emit_addr_adjust(mc, AA_TMP0, rn, off);
+ rn = AA_TMP0;
+ off = 0;
+ }
if (sidx == 4)
aa64_emit32(mc, aa64_stur_q(rt, rn, off));
else
@@ -1271,7 +1281,7 @@ static void aa_call(CGTarget* t, const CGCallDesc* d) {
u32 sidx = size_idx_for_bytes(p->size);
i32 off = base_off + (i32)p->src_offset;
if (p->cls == ABI_CLASS_INT) {
- aa64_emit32(mc, aa64_stur(sidx, src_reg, base_reg, off));
+ aa64_emit_stur_off(mc, sidx, src_reg, base_reg, off, AA_TMP0);
} else {
aa_emit_str_fp_any(mc, sidx, src_reg, base_reg, off);
}
diff --git a/src/arch/rv64/alloc.c b/src/arch/rv64/alloc.c
@@ -368,8 +368,9 @@ void rv_cmp(CGTarget* t, CmpOp op, Operand dst, Operand a_op,
RImpl* a = impl_of(t);
u32 rd = reg_num(dst);
- if (op == CMP_EQ || op == CMP_NE || op == CMP_LT_F || op == CMP_LE_F ||
- op == CMP_GT_F || op == CMP_GE_F) {
+ if ((a_op.cls == RC_FP || b_op.cls == RC_FP) &&
+ (op == CMP_EQ || op == CMP_NE || op == CMP_LT_F || op == CMP_LE_F ||
+ op == CMP_GT_F || op == CMP_GE_F)) {
/* FP compare in fa,fb → rd. Use FLT/FLE/FEQ depending on op. */
int is_d = type_is_fp_double(a_op.type);
u32 fa = reg_num(a_op);
diff --git a/src/arch/rv64/emit.c b/src/arch/rv64/emit.c
@@ -24,8 +24,8 @@ static u32 rv_planned_prologue_words(const RImpl *a) {
u32 n = RV_PROLOGUE_FRAME_WORDS;
if (a->has_sret) ++n;
if (a->is_variadic) n += 8u;
- n += count_mask_regs(a->planned_cs_int_mask, 18u, 27u);
- n += count_mask_regs(a->planned_cs_fp_mask, 18u, 27u);
+ n += 4u * count_mask_regs(a->planned_cs_int_mask, 18u, 27u);
+ n += 4u * count_mask_regs(a->planned_cs_fp_mask, 18u, 27u);
return n ? n : 1u;
}
@@ -250,6 +250,71 @@ static u32 rv_variadic_first_saved_int(const CGFuncDesc *fd) {
return next_int;
}
+static void rv_words_addr_adjust(CGTarget *t, u32 *words, u32 cap, u32 *wi,
+ u32 rd, u32 base, i32 off) {
+ if (off == 0) {
+ if (rd != base) {
+ if (*wi >= cap) goto overflow;
+ words[(*wi)++] = rv_addi(rd, base, 0);
+ }
+ return;
+ }
+ if (off >= -2048 && off <= 2047) {
+ if (*wi >= cap) goto overflow;
+ words[(*wi)++] = rv_addi(rd, base, off);
+ return;
+ }
+ i32 hi = (i32)(((i64)off + 0x800) >> 12);
+ i32 lo = off - (hi << 12);
+ if (*wi >= cap) goto overflow;
+ words[(*wi)++] = rv_lui(rd, (u32)hi & 0xfffffu);
+ if (lo) {
+ if (*wi >= cap) goto overflow;
+ words[(*wi)++] = rv_addiw(rd, rd, lo);
+ }
+ if (*wi >= cap) goto overflow;
+ words[(*wi)++] = rv_add(rd, base, rd);
+ return;
+
+overflow:
+ compiler_panic(t->c, impl_of(t)->loc,
+ "rv64: prologue placeholder too small (cap %u)", cap);
+}
+
+static void rv_words_store_int_s0(CGTarget *t, u32 *words, u32 cap, u32 *wi,
+ u32 reg, i32 off) {
+ if (off >= -2048 && off <= 2047) {
+ if (*wi >= cap) goto overflow;
+ words[(*wi)++] = rv_sd(reg, RV_S0, off);
+ return;
+ }
+ rv_words_addr_adjust(t, words, cap, wi, RV_T0, RV_S0, off);
+ if (*wi >= cap) goto overflow;
+ words[(*wi)++] = rv_sd(reg, RV_T0, 0);
+ return;
+
+overflow:
+ compiler_panic(t->c, impl_of(t)->loc,
+ "rv64: prologue placeholder too small (cap %u)", cap);
+}
+
+static void rv_words_store_fp_s0(CGTarget *t, u32 *words, u32 cap, u32 *wi,
+ u32 reg, i32 off) {
+ if (off >= -2048 && off <= 2047) {
+ if (*wi >= cap) goto overflow;
+ words[(*wi)++] = rv_fsd(reg, RV_S0, off);
+ return;
+ }
+ rv_words_addr_adjust(t, words, cap, wi, RV_T0, RV_S0, off);
+ if (*wi >= cap) goto overflow;
+ words[(*wi)++] = rv_fsd(reg, RV_T0, 0);
+ return;
+
+overflow:
+ compiler_panic(t->c, impl_of(t)->loc,
+ "rv64: prologue placeholder too small (cap %u)", cap);
+}
+
static u32 rv_build_prologue(CGTarget *t, u32 *words, u32 cap,
const RvFrameLayout *fl, const u32 *int_regs,
u32 n_int_saves, const u32 *fp_regs,
@@ -277,14 +342,26 @@ static u32 rv_build_prologue(CGTarget *t, u32 *words, u32 cap,
words[wi++] = rv_add(RV_SP, RV_SP, RV_T0);
}
- if ((i32)fl->fp_pair_off > 2047 ||
- (i32)(fl->fp_pair_off + 8) > 2047) {
- compiler_panic(t->c, a->loc, "rv64: fp_pair_off out of imm12 range");
+ if ((i32)fl->fp_pair_off <= 2039) {
+ if (wi + 3 > cap) goto overflow;
+ words[wi++] = rv_sd(RV_S0, RV_SP, (i32)fl->fp_pair_off);
+ words[wi++] = rv_sd(RV_RA, RV_SP, (i32)fl->fp_pair_off + 8);
+ words[wi++] = rv_addi(RV_S0, RV_SP, (i32)fl->fp_pair_off);
+ } else {
+ i32 off = (i32)fl->fp_pair_off;
+ i32 hi = (i32)(((i64)off + 0x800) >> 12);
+ i32 lo = off - (hi << 12);
+ if (fl->fp_pair_off > 0x7fffffffu)
+ compiler_panic(t->c, a->loc, "rv64: fp_pair_off too large");
+ if (wi + 6 > cap) goto overflow;
+ words[wi++] = rv_lui(RV_T0, (u32)hi & 0xfffffu);
+ if (lo)
+ words[wi++] = rv_addiw(RV_T0, RV_T0, lo);
+ words[wi++] = rv_add(RV_T0, RV_SP, RV_T0);
+ words[wi++] = rv_sd(RV_S0, RV_T0, 0);
+ words[wi++] = rv_sd(RV_RA, RV_T0, 8);
+ words[wi++] = rv_addi(RV_S0, RV_T0, 0);
}
- if (wi + 3 > cap) goto overflow;
- words[wi++] = rv_sd(RV_S0, RV_SP, (i32)fl->fp_pair_off);
- words[wi++] = rv_sd(RV_RA, RV_SP, (i32)fl->fp_pair_off + 8);
- words[wi++] = rv_addi(RV_S0, RV_SP, (i32)fl->fp_pair_off);
/* If sret, spill incoming a0 into the hidden slot. */
if (a->has_sret && a->sret_ptr_slot != FRAME_SLOT_NONE) {
@@ -304,14 +381,12 @@ static u32 rv_build_prologue(CGTarget *t, u32 *words, u32 cap,
for (u32 i = 0; i < n_int_saves; ++i) {
u32 r = int_regs[i];
i32 off = fl->int_save_base - 8 * (i32)i;
- if (wi >= cap) goto overflow;
- words[wi++] = rv_sd(r, RV_S0, off);
+ rv_words_store_int_s0(t, words, cap, &wi, r, off);
}
for (u32 i = 0; i < n_fp_saves; ++i) {
u32 r = fp_regs[i];
i32 off = fl->fp_save_base - 8 * (i32)i;
- if (wi >= cap) goto overflow;
- words[wi++] = rv_fsd(r, RV_S0, off);
+ rv_words_store_fp_s0(t, words, cap, &wi, r, off);
}
return wi;
@@ -424,23 +499,30 @@ void rv_func_end(CGTarget *t) {
for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) {
u32 r = int_regs[i];
i32 off = fl.int_save_base - 8 * (i32)i;
- rv64_emit32(mc, rv_ld(r, RV_S0, off));
+ if (off >= -2048 && off <= 2047) {
+ rv64_emit32(mc, rv_ld(r, RV_S0, off));
+ } else {
+ rv64_emit_addr_adjust(mc, RV_T0, RV_S0, off);
+ rv64_emit32(mc, rv_ld(r, RV_T0, 0));
+ }
}
for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) {
u32 r = fp_regs[i];
i32 off = fl.fp_save_base - 8 * (i32)i;
- rv64_emit32(mc, rv_fld(r, RV_S0, off));
+ if (off >= -2048 && off <= 2047) {
+ rv64_emit32(mc, rv_fld(r, RV_S0, off));
+ } else {
+ rv64_emit_addr_adjust(mc, RV_T0, RV_S0, off);
+ rv64_emit32(mc, rv_fld(r, RV_T0, 0));
+ }
}
/* Restore sp from s0 first so alloca-induced offsets don't matter.
* After this, sp == its post-prologue value. */
if (a->has_alloca) {
- if ((i32)fl.fp_pair_off > 2047) {
- compiler_panic(t->c, a->loc, "rv64: fp_pair_off too large for alloca");
- }
- rv64_emit32(mc, rv_addi(RV_SP, RV_S0, -(i32)fl.fp_pair_off));
+ rv64_emit_addr_adjust(mc, RV_SP, RV_S0, -(i32)fl.fp_pair_off);
}
- rv64_emit32(mc, rv_ld(RV_S0, RV_SP, (i32)fl.fp_pair_off));
- rv64_emit32(mc, rv_ld(RV_RA, RV_SP, (i32)fl.fp_pair_off + 8));
+ rv64_emit32(mc, rv_ld(RV_RA, RV_S0, 8));
+ rv64_emit32(mc, rv_ld(RV_S0, RV_S0, 0));
emit_sp_addi(mc, (i64)fl.frame_size);
rv64_emit32(mc, rv_ret_());
diff --git a/src/arch/rv64/internal.h b/src/arch/rv64/internal.h
@@ -11,8 +11,8 @@
#include "core/pool.h"
#include "obj/obj.h"
-#define RV_PROLOGUE_WORDS 35u
-#define RV_PROLOGUE_FRAME_WORDS 6u /* worst-case sp adjust + s0/ra + set s0 */
+#define RV_PROLOGUE_WORDS 128u
+#define RV_PROLOGUE_FRAME_WORDS 10u /* sp adjust + far/near s0/ra save + set s0 */
/* ---- RvSlot / RvScope ---- */
typedef struct RvSlot {
diff --git a/src/arch/rv64/ops.c b/src/arch/rv64/ops.c
@@ -804,6 +804,41 @@ static void rv_store_stack_reg(CGTarget* t, u32 reg, RegClass cls,
rv_store(t, addr, src, ma);
}
+static Operand rv_offset_mem_operand(CGTarget* t, Operand op, u32 offset) {
+ if (!offset) return op;
+ if (op.kind == OPK_INDIRECT) {
+ op.v.ind.ofs += (i32)offset;
+ } else if (op.kind == OPK_LOCAL) {
+ RImpl* a = impl_of(t);
+ RvSlot* s = rv64_slot_get(a, op.v.frame_slot);
+ if (!s) compiler_panic(t->c, a->loc, "rv64 offset operand: bad slot");
+ op.kind = OPK_INDIRECT;
+ op.v.ind.base = RV_S0;
+ op.v.ind.ofs = -(i32)s->off + (i32)offset;
+ }
+ return op;
+}
+
+static void rv_load_abi_part(CGTarget* t, Operand dst, Operand src, u32 offset,
+ u32 size) {
+ MemAccess ma;
+ memset(&ma, 0, sizeof ma);
+ ma.type = dst.type;
+ ma.size = size;
+ ma.align = size ? size : 1u;
+ rv_load(t, dst, rv_offset_mem_operand(t, src, offset), ma);
+}
+
+static void rv_store_abi_part(CGTarget* t, Operand dst, Operand src,
+ u32 offset, u32 size) {
+ MemAccess ma;
+ memset(&ma, 0, sizeof ma);
+ ma.type = src.type;
+ ma.size = size;
+ ma.align = size ? size : 1u;
+ rv_store(t, rv_offset_mem_operand(t, dst, offset), src, ma);
+}
+
static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
u32* next_fp, u32* stack_off, int tail) {
RImpl* a = impl_of(t);
@@ -890,18 +925,15 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
break;
}
case OPK_LOCAL: {
- RvSlot* s = rv64_slot_get(a, av->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad arg slot");
- i32 off = -(i32)s->off + (i32)pt->src_offset;
- rv64_emit32(mc, enc_int_load(sz, 0, dst_reg, RV_S0, off));
+ Operand dst = {.kind = OPK_REG, .cls = RC_INT, .type = av->type};
+ dst.v.reg = dst_reg;
+ rv_load_abi_part(t, dst, av->storage, pt->src_offset, sz);
break;
}
case OPK_INDIRECT: {
- /* cg holds INDIRECT base regs in s2..s11, disjoint from arg
- * regs a0..a7 and the t0 stack-arg scratch. */
- u32 base = av->storage.v.ind.base & 0x1fu;
- i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset;
- rv64_emit32(mc, enc_int_load(sz, 0, dst_reg, base, off));
+ Operand dst = {.kind = OPK_REG, .cls = RC_INT, .type = av->type};
+ dst.v.reg = dst_reg;
+ rv_load_abi_part(t, dst, av->storage, pt->src_offset, sz);
break;
}
default:
@@ -925,18 +957,15 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
break;
}
case OPK_LOCAL: {
- RvSlot* s = rv64_slot_get(a, av->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad FP arg slot");
- i32 off = -(i32)s->off + (i32)pt->src_offset;
- rv64_emit32(mc, (sz == 8) ? rv_fld(freg, RV_S0, off)
- : rv_flw(freg, RV_S0, off));
+ Operand dst = {.kind = OPK_REG, .cls = RC_FP, .type = av->type};
+ dst.v.reg = freg;
+ rv_load_abi_part(t, dst, av->storage, pt->src_offset, sz);
break;
}
case OPK_INDIRECT: {
- u32 base = av->storage.v.ind.base & 0x1fu;
- i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset;
- rv64_emit32(mc, (sz == 8) ? rv_fld(freg, base, off)
- : rv_flw(freg, base, off));
+ Operand dst = {.kind = OPK_REG, .cls = RC_FP, .type = av->type};
+ dst.v.reg = freg;
+ rv_load_abi_part(t, dst, av->storage, pt->src_offset, sz);
break;
}
default:
@@ -950,15 +979,14 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
*stack_off, tail);
break;
case OPK_LOCAL: {
- RvSlot* s = rv64_slot_get(a, av->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad FP arg slot");
- i32 off = -(i32)s->off + (i32)pt->src_offset;
+ Operand tmp = {.kind = OPK_REG, .cls = RC_FP, .type = av->type};
+ tmp.v.reg = 0u;
if (sz == 8) {
- rv64_emit32(mc, rv_fld(/*ft0=*/0u, RV_S0, off));
+ rv_load_abi_part(t, tmp, av->storage, pt->src_offset, sz);
rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz,
*stack_off, tail);
} else {
- rv64_emit32(mc, rv_flw(/*ft0=*/0u, RV_S0, off));
+ rv_load_abi_part(t, tmp, av->storage, pt->src_offset, sz);
rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz,
*stack_off, tail);
}
@@ -967,14 +995,14 @@ static void emit_arg_value(CGTarget* t, const CGABIValue* av, u32* next_int,
case OPK_INDIRECT: {
/* Route through ft0 — it is in {ft0..ft7}, caller-saved
* scratch outside the cg fs2..fs11 pool. */
- u32 base = av->storage.v.ind.base & 0x1fu;
- i32 off = av->storage.v.ind.ofs + (i32)pt->src_offset;
+ Operand tmp = {.kind = OPK_REG, .cls = RC_FP, .type = av->type};
+ tmp.v.reg = 0u;
if (sz == 8) {
- rv64_emit32(mc, rv_fld(/*ft0=*/0u, base, off));
+ rv_load_abi_part(t, tmp, av->storage, pt->src_offset, sz);
rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz,
*stack_off, tail);
} else {
- rv64_emit32(mc, rv_flw(/*ft0=*/0u, base, off));
+ rv_load_abi_part(t, tmp, av->storage, pt->src_offset, sz);
rv_store_stack_reg(t, /*ft0=*/0u, RC_FP, av->type, sz,
*stack_off, tail);
}
@@ -1089,19 +1117,28 @@ static void rv_tail_restore_frame(CGTarget* t) {
if (a->omit_frame) return;
for (i32 i = (i32)n_int_saves - 1; i >= 0; --i) {
- rv64_emit32(mc, rv_ld(int_regs[i], RV_S0, fl.int_save_base - 8 * i));
+ i32 off = fl.int_save_base - 8 * i;
+ if (off >= -2048 && off <= 2047) {
+ rv64_emit32(mc, rv_ld(int_regs[i], RV_S0, off));
+ } else {
+ rv64_emit_addr_adjust(mc, RV_T0, RV_S0, off);
+ rv64_emit32(mc, rv_ld(int_regs[i], RV_T0, 0));
+ }
}
for (i32 i = (i32)n_fp_saves - 1; i >= 0; --i) {
- rv64_emit32(mc, rv_fld(fp_regs[i], RV_S0, fl.fp_save_base - 8 * i));
+ i32 off = fl.fp_save_base - 8 * i;
+ if (off >= -2048 && off <= 2047) {
+ rv64_emit32(mc, rv_fld(fp_regs[i], RV_S0, off));
+ } else {
+ rv64_emit_addr_adjust(mc, RV_T0, RV_S0, off);
+ rv64_emit32(mc, rv_fld(fp_regs[i], RV_T0, 0));
+ }
}
if (a->has_alloca) {
- if ((i32)fl.fp_pair_off > 2047) {
- compiler_panic(t->c, a->loc, "rv64 tail call: fp pair offset too large");
- }
- rv64_emit32(mc, rv_addi(RV_SP, RV_S0, -(i32)fl.fp_pair_off));
+ rv64_emit_addr_adjust(mc, RV_SP, RV_S0, -(i32)fl.fp_pair_off);
}
- rv64_emit32(mc, rv_ld(RV_S0, RV_SP, (i32)fl.fp_pair_off));
- rv64_emit32(mc, rv_ld(RV_RA, RV_SP, (i32)fl.fp_pair_off + 8));
+ rv64_emit32(mc, rv_ld(RV_RA, RV_S0, 8));
+ rv64_emit32(mc, rv_ld(RV_S0, RV_S0, 0));
emit_sp_addi(mc, (i64)fl.frame_size);
}
@@ -1208,23 +1245,15 @@ static void rv_call(CGTarget* t, const CGCallDesc* d) {
rv64_emit32(mc, rv_fsgnj(fmt, reg_num(rs), src_reg, src_reg));
}
} else if (rs.kind == OPK_LOCAL || rs.kind == OPK_INDIRECT) {
- u32 base_reg;
- i32 base_off;
- if (rs.kind == OPK_LOCAL) {
- RvSlot* s = rv64_slot_get(a, rs.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 call: bad ret slot");
- base_reg = RV_S0;
- base_off = -(i32)s->off;
- } else {
- base_reg = rs.v.ind.base & 0x1fu;
- base_off = rs.v.ind.ofs;
- }
- i32 off = base_off + (i32)p->src_offset;
- if (p->cls == ABI_CLASS_INT) {
- rv64_emit32(mc, enc_int_store(p->size, src_reg, base_reg, off));
+ Operand src = {.kind = OPK_REG,
+ .cls = (u8)((p->cls == ABI_CLASS_FP) ? RC_FP : RC_INT),
+ .type = d->ret.type};
+ src.v.reg = src_reg;
+ if (p->cls == ABI_CLASS_INT || p->cls == ABI_CLASS_FP) {
+ rv_store_abi_part(t, rs, src, p->src_offset, p->size);
} else {
- if (p->size == 8) rv64_emit32(mc, rv_fsd(src_reg, base_reg, off));
- else rv64_emit32(mc, rv_fsw(src_reg, base_reg, off));
+ compiler_panic(t->c, a->loc, "rv64 call: ret part cls %d unimpl",
+ (int)p->cls);
}
} else if (rs.kind == OPK_IMM && rs.type == CG_BUILTIN_ID(CFREE_CG_BUILTIN_VOID)) {
/* void return placeholder — nothing to do. */
@@ -1407,28 +1436,19 @@ static void rv_ret(CGTarget* t, const CGABIValue* val) {
rv64_emit_load_imm(mc, sf, RV_A0, val->storage.v.imm);
} else if (val->storage.kind == OPK_LOCAL ||
val->storage.kind == OPK_INDIRECT) {
- u32 base_reg;
- i32 base_off;
- if (val->storage.kind == OPK_LOCAL) {
- RvSlot* s = rv64_slot_get(a, val->storage.v.frame_slot);
- if (!s) compiler_panic(t->c, a->loc, "rv64 ret: bad local slot");
- base_reg = RV_S0;
- base_off = -(i32)s->off;
- } else {
- base_reg = val->storage.v.ind.base & 0x1fu;
- base_off = val->storage.v.ind.ofs;
- }
const ABIArgInfo* ri2 = val->abi;
u32 nir = 0, nfr = 0;
for (u16 i = 0; i < (ri2 ? ri2->nparts : 0); ++i) {
const ABIArgPart* pt = &ri2->parts[i];
- i32 off = base_off + (i32)pt->src_offset;
if (pt->cls == ABI_CLASS_INT) {
- rv64_emit32(mc, enc_int_load(pt->size, 0, RV_A0 + nir++, base_reg, off));
+ Operand dst = {.kind = OPK_REG, .cls = RC_INT, .type = val->type};
+ dst.v.reg = RV_A0 + nir++;
+ rv_load_abi_part(t, dst, val->storage, pt->src_offset, pt->size);
} else if (pt->cls == ABI_CLASS_FP) {
+ Operand dst = {.kind = OPK_REG, .cls = RC_FP, .type = val->type};
u32 freg = 10u + nfr++;
- if (pt->size == 8) rv64_emit32(mc, rv_fld(freg, base_reg, off));
- else rv64_emit32(mc, rv_flw(freg, base_reg, off));
+ dst.v.reg = freg;
+ rv_load_abi_part(t, dst, val->storage, pt->src_offset, pt->size);
} else {
compiler_panic(t->c, a->loc, "rv64 ret: part cls %d unimpl",
(int)pt->cls);
diff --git a/test/parse/cases/6_7_2_12_long_double.skip b/test/parse/cases/6_7_2_12_long_double.skip
@@ -1 +0,0 @@
-long double (binary128) literal/convert needs rt/lib/fp_tf wiring through cg
diff --git a/test/parse/cases/i128_06_shifts_bitwise.c b/test/parse/cases/i128_06_shifts_bitwise.c
@@ -4,8 +4,12 @@ int test_main(void) {
u128 x = (u128)0xf0ULL << 68;
u128 y = x >> 64;
u128 z = (x | ((u128)0x55ULL << 4)) ^ ((u128)0x5ULL << 4);
+ u128 high_truth = ((u128)1 << 112) << 4;
+ _Bool high_bool = high_truth;
if ((unsigned long long)y != 0xf00ULL) return 11;
if ((unsigned long long)z != 0x500ULL) return 12;
if ((unsigned long long)(z >> 64) != 0xf00ULL) return 13;
+ if (!high_truth) return 14;
+ if (!high_bool) return 15;
return 41;
}
diff --git a/test/parse/cases/i128_13_signed_div_mod.c b/test/parse/cases/i128_13_signed_div_mod.c
@@ -0,0 +1,31 @@
+typedef __int128 i128;
+
+int test_main(void) {
+ i128 a = -(((i128)1 << 90) + 123456789);
+ i128 b = ((i128)1 << 30) + 7;
+ i128 q = a / b;
+ i128 r = a % b;
+
+ if (q != -(((i128)1 << 60) - (((i128)7 << 30) - 49))) return 11;
+ if (r != -123456446) return 12;
+ if (q * b + r != a) return 13;
+ if (r >= 0) return 14;
+
+ b = -(((i128)1 << 33) + 5);
+ q = a / b;
+ r = a % b;
+ if (q != (((i128)1 << 57) - ((i128)80 << 20))) return 15;
+ if (r != -542887189) return 16;
+ if (q * b + r != a) return 17;
+ if (r >= 0) return 18;
+
+ a = ((i128)1 << 90) + 123456789;
+ q = a / b;
+ r = a % b;
+ if (q != -(((i128)1 << 57) - ((i128)80 << 20))) return 19;
+ if (r != 542887189) return 20;
+ if (q * b + r != a) return 21;
+ if (r <= 0) return 22;
+
+ return 61;
+}
diff --git a/test/parse/cases/i128_13_signed_div_mod.expected b/test/parse/cases/i128_13_signed_div_mod.expected
@@ -0,0 +1 @@
+61
diff --git a/test/parse/cases/i128_14_arbitrary_mul.c b/test/parse/cases/i128_14_arbitrary_mul.c
@@ -0,0 +1,22 @@
+typedef __int128 i128;
+typedef unsigned __int128 u128;
+
+int test_main(void) {
+ u128 a = ((u128)0x123456789abcdef0ULL << 16) | 0x1357ULL;
+ u128 b = ((u128)0x0fedcba987654321ULL << 12) | 0x246ULL;
+ u128 p = a * b;
+
+ if ((unsigned long long)p != 0x71407aa829ff67caULL) return 11;
+ if ((unsigned long long)(p >> 64) != 0x0ad77d7422601184ULL) return 12;
+
+ i128 x = -(((i128)0x1234567 << 40) + 0x89abcdef);
+ i128 y = ((i128)0x13579 << 28) + 0x2468ace;
+ i128 z = x * y;
+ u128 uz = (u128)z;
+
+ if (z >= 0) return 13;
+ if ((unsigned long long)uz != 0x324b79b4fd6373aeULL) return 14;
+ if ((unsigned long long)(uz >> 64) != 0xffffe9fe36571cf3ULL) return 15;
+
+ return 73;
+}
diff --git a/test/parse/cases/i128_14_arbitrary_mul.expected b/test/parse/cases/i128_14_arbitrary_mul.expected
@@ -0,0 +1 @@
+73
diff --git a/test/parse/cases/ldbl128_15_arbitrary_mul.c b/test/parse/cases/ldbl128_15_arbitrary_mul.c
@@ -0,0 +1,15 @@
+int test_main(void) {
+ if (__LDBL_MANT_DIG__ != 113) return 0;
+
+ long double a = 7.0L * 9.0L;
+ long double b = 13.0L * 11.0L;
+ long double c = 1.5L * 2.5L;
+
+ if ((int)a != 63) return 11;
+ if ((int)b != 143) return 12;
+ if ((int)(a + b) != 206) return 13;
+ if ((int)c != 3) return 14;
+ if ((int)((c - 3.0L) * 4.0L) != 3) return 15;
+
+ return 71;
+}
diff --git a/test/parse/cases/ldbl128_15_arbitrary_mul.expected b/test/parse/cases/ldbl128_15_arbitrary_mul.expected
@@ -0,0 +1 @@
+71