kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 8874cd3e9e3f4ec33010f3a603e03b831b4ba2d9
parent 310b5ab01ab448779044ddb36f3cdf5248ae18f3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu,  7 May 2026 10:27:18 -0700

compiler_rt

Diffstat:
Mdoc/builtins.md | 123+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/LICENSE-compiler-rt.txt | 311+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/README.md | 167+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/arm/aeabi_dcmp.S | 45+++++++++++++++++++++++++++++++++++++++++++++
Alib/arm/aeabi_drsub.c | 14++++++++++++++
Alib/arm/aeabi_fcmp.S | 45+++++++++++++++++++++++++++++++++++++++++++++
Alib/arm/aeabi_frsub.c | 14++++++++++++++
Alib/arm/aeabi_idivmod.S | 33+++++++++++++++++++++++++++++++++
Alib/arm/aeabi_idivmod_thumb1.S | 28++++++++++++++++++++++++++++
Alib/arm/aeabi_ldivmod.S | 34++++++++++++++++++++++++++++++++++
Alib/arm/aeabi_memcmp.S | 23+++++++++++++++++++++++
Alib/arm/aeabi_memcmp_thumb1.S | 25+++++++++++++++++++++++++
Alib/arm/aeabi_memcpy.S | 23+++++++++++++++++++++++
Alib/arm/aeabi_memcpy_thumb1.S | 25+++++++++++++++++++++++++
Alib/arm/aeabi_memmove.S | 22++++++++++++++++++++++
Alib/arm/aeabi_memmove_thumb1.S | 24++++++++++++++++++++++++
Alib/arm/aeabi_memset.S | 37+++++++++++++++++++++++++++++++++++++
Alib/arm/aeabi_memset_thumb1.S | 42++++++++++++++++++++++++++++++++++++++++++
Alib/arm/aeabi_uidivmod.S | 34++++++++++++++++++++++++++++++++++
Alib/arm/aeabi_uidivmod_thumb1.S | 31+++++++++++++++++++++++++++++++
Alib/arm/aeabi_uldivmod.S | 34++++++++++++++++++++++++++++++++++
Alib/atomic/atomic_common.inc | 238+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/atomic/atomic_freestanding.c | 36++++++++++++++++++++++++++++++++++++
Alib/fp/adddf3.c | 17+++++++++++++++++
Alib/fp/addsf3.c | 17+++++++++++++++++
Alib/fp/comparedf2.c | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/fp/comparesf2.c | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/fp/divdf3.c | 22++++++++++++++++++++++
Alib/fp/divsf3.c | 23+++++++++++++++++++++++
Alib/fp/extendsfdf2.c | 14++++++++++++++
Alib/fp/fixdfdi.c | 21+++++++++++++++++++++
Alib/fp/fixdfsi.c | 16++++++++++++++++
Alib/fp/fixsfdi.c | 21+++++++++++++++++++++
Alib/fp/fixsfsi.c | 16++++++++++++++++
Alib/fp/fixunsdfdi.c | 20++++++++++++++++++++
Alib/fp/fixunsdfsi.c | 15+++++++++++++++
Alib/fp/fixunssfdi.c | 20++++++++++++++++++++
Alib/fp/fixunssfsi.c | 19+++++++++++++++++++
Alib/fp/floatdidf.c | 32++++++++++++++++++++++++++++++++
Alib/fp/floatdisf.c | 27+++++++++++++++++++++++++++
Alib/fp/floatsidf.c | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/fp/floatsisf.c | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/fp/floatundidf.c | 32++++++++++++++++++++++++++++++++
Alib/fp/floatundisf.c | 27+++++++++++++++++++++++++++
Alib/fp/floatunsidf.c | 40++++++++++++++++++++++++++++++++++++++++
Alib/fp/floatunsisf.c | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/fp/fp_mode.c | 22++++++++++++++++++++++
Alib/fp/muldf3.c | 18++++++++++++++++++
Alib/fp/mulsf3.c | 18++++++++++++++++++
Alib/fp/negdf2.c | 17+++++++++++++++++
Alib/fp/negsf2.c | 17+++++++++++++++++
Alib/fp/subdf3.c | 20++++++++++++++++++++
Alib/fp/subsf3.c | 20++++++++++++++++++++
Alib/fp/truncdfsf2.c | 14++++++++++++++
Alib/fp_tf/addtf3.c | 21+++++++++++++++++++++
Alib/fp_tf/comparetf2.c | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/fp_tf/divtf3.c | 24++++++++++++++++++++++++
Alib/fp_tf/extenddftf2.c | 17+++++++++++++++++
Alib/fp_tf/extendsftf2.c | 17+++++++++++++++++
Alib/fp_tf/fixtfdi.c | 16++++++++++++++++
Alib/fp_tf/fixtfsi.c | 16++++++++++++++++
Alib/fp_tf/fixtfti.c | 16++++++++++++++++
Alib/fp_tf/fixunstfdi.c | 15+++++++++++++++
Alib/fp_tf/fixunstfsi.c | 15+++++++++++++++
Alib/fp_tf/fixunstfti.c | 15+++++++++++++++
Alib/fp_tf/floatditf.c | 47+++++++++++++++++++++++++++++++++++++++++++++++
Alib/fp_tf/floatsitf.c | 47+++++++++++++++++++++++++++++++++++++++++++++++
Alib/fp_tf/floattitf.c | 33+++++++++++++++++++++++++++++++++
Alib/fp_tf/floatunditf.c | 38++++++++++++++++++++++++++++++++++++++
Alib/fp_tf/floatunsitf.c | 38++++++++++++++++++++++++++++++++++++++
Alib/fp_tf/floatuntitf.c | 33+++++++++++++++++++++++++++++++++
Alib/fp_tf/multf3.c | 20++++++++++++++++++++
Alib/fp_tf/subtf3.c | 22++++++++++++++++++++++
Alib/fp_tf/trunctfdf2.c | 17+++++++++++++++++
Alib/fp_tf/trunctfsf2.c | 17+++++++++++++++++
Alib/fp_ti/fixdfti.c | 19+++++++++++++++++++
Alib/fp_ti/fixsfti.c | 19+++++++++++++++++++
Alib/fp_ti/fixunsdfti.c | 16++++++++++++++++
Alib/fp_ti/fixunssfti.c | 19+++++++++++++++++++
Alib/fp_ti/floattidf.c | 29+++++++++++++++++++++++++++++
Alib/fp_ti/floattisf.c | 28++++++++++++++++++++++++++++
Alib/fp_ti/floatuntidf.c | 29+++++++++++++++++++++++++++++
Alib/fp_ti/floatuntisf.c | 28++++++++++++++++++++++++++++
Alib/impl/fp_add_impl.inc | 172+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/impl/fp_compare_impl.inc | 119+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/impl/fp_div_impl.inc | 419+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/impl/fp_extend.h | 174+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/impl/fp_extend_impl.inc | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/impl/fp_fixint_impl.inc | 40++++++++++++++++++++++++++++++++++++++++
Alib/impl/fp_fixuint_impl.inc | 38++++++++++++++++++++++++++++++++++++++
Alib/impl/fp_mul_impl.inc | 128+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/impl/fp_trunc.h | 158+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/impl/fp_trunc_impl.inc | 155+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/impl/int_div_impl.inc | 95+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/impl/int_to_fp.h | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/impl/int_to_fp_impl.inc | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/include/common/assembly.h | 293+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/include/common/fp_lib.h | 281+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/include/common/fp_mode.h | 29+++++++++++++++++++++++++++++
Alib/include/common/int_math.h | 32++++++++++++++++++++++++++++++++
Alib/include/common/int_util.h | 47+++++++++++++++++++++++++++++++++++++++++++++++
Alib/include/ilp32_le/int_endianness.h | 13+++++++++++++
Alib/include/ilp32_le/int_lib.h | 43+++++++++++++++++++++++++++++++++++++++++++
Alib/include/ilp32_le/int_types.h | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alib/include/llp64_le/int_endianness.h | 13+++++++++++++
Alib/include/llp64_le/int_lib.h | 39+++++++++++++++++++++++++++++++++++++++
Alib/include/llp64_le/int_types.h | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/include/lp64_le/int_endianness.h | 13+++++++++++++
Alib/include/lp64_le/int_lib.h | 41+++++++++++++++++++++++++++++++++++++++++
Alib/include/lp64_le/int_types.h | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/include/lp64_le_ldbl128/tf_supplement.h | 32++++++++++++++++++++++++++++++++
Alib/int/absvdi2.c | 25+++++++++++++++++++++++++
Alib/int/bswapdi2.c | 25+++++++++++++++++++++++++
Alib/int/bswapsi2.c | 20++++++++++++++++++++
Alib/int/clzdi2.c | 25+++++++++++++++++++++++++
Alib/int/clzsi2.c | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alib/int/cmpdi2.c | 34++++++++++++++++++++++++++++++++++
Alib/int/ctzdi2.c | 25+++++++++++++++++++++++++
Alib/int/ctzsi2.c | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/int/divdi3.c | 22++++++++++++++++++++++
Alib/int/divmoddi4.c | 28++++++++++++++++++++++++++++
Alib/int/ffsdi2.c | 27+++++++++++++++++++++++++++
Alib/int/int_util.c | 20++++++++++++++++++++
Alib/int/moddi3.c | 22++++++++++++++++++++++
Alib/int/negdi2.c | 21+++++++++++++++++++++
Alib/int/paritydi2.c | 25+++++++++++++++++++++++++
Alib/int/paritysi2.c | 23+++++++++++++++++++++++
Alib/int/popcountdi2.c | 32++++++++++++++++++++++++++++++++
Alib/int/popcountsi2.c | 29+++++++++++++++++++++++++++++
Alib/int/ucmpdi2.c | 34++++++++++++++++++++++++++++++++++
Alib/int/udivdi3.c | 23+++++++++++++++++++++++
Alib/int/udivmoddi4.c | 191+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/int/umoddi3.c | 23+++++++++++++++++++++++
Alib/int32/ashldi3.c | 36++++++++++++++++++++++++++++++++++++
Alib/int32/ashrdi3.c | 37+++++++++++++++++++++++++++++++++++++
Alib/int32/lshrdi3.c | 35+++++++++++++++++++++++++++++++++++
Alib/int32/muldi3.c | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alib/int64/ashlti3.c | 37+++++++++++++++++++++++++++++++++++++
Alib/int64/ashrti3.c | 38++++++++++++++++++++++++++++++++++++++
Alib/int64/clzti2.c | 27+++++++++++++++++++++++++++
Alib/int64/ctzti2.c | 27+++++++++++++++++++++++++++
Alib/int64/divmodti4.c | 30++++++++++++++++++++++++++++++
Alib/int64/divti3.c | 24++++++++++++++++++++++++
Alib/int64/lshrti3.c | 36++++++++++++++++++++++++++++++++++++
Alib/int64/modti3.c | 24++++++++++++++++++++++++
Alib/int64/multi3.c | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Alib/int64/negti2.c | 23+++++++++++++++++++++++
Alib/int64/udivmodti4.c | 148+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/int64/udivti3.c | 21+++++++++++++++++++++
Alib/int64/umodti3.c | 23+++++++++++++++++++++++
Alib/mem/mem.c | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Alib/riscv/restore_rv32.S | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/riscv/restore_rv64.S | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/riscv/save_rv32.S | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/riscv/save_rv64.S | 101+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
155 files changed, 7597 insertions(+), 0 deletions(-)

diff --git a/doc/builtins.md b/doc/builtins.md @@ -114,6 +114,129 @@ Operations (signatures match the GCC `__atomic` builtin family): --- +## `libcfree_rt.a` — runtime support library + +The codegen emits calls to symbols the user can't reasonably supply. cfree +ships them all in a single archive: integer/float/atomic helpers *and* the +`mem*` family the codegen lowers struct copies and aggregate inits to. + +Naming follows the libgcc / compiler-rt convention (`{op}{mode}{N}`, where +mode is `qi/hi/si/di/ti/sf/df/tf` for 1/2/4/8/16-byte int and 32/64/128-bit +float). All `mem*` are weak so a user libc wins. + +### Mem intrinsics (always shipped) +- `memcpy`, `memmove`, `memset`, `memcmp` + +### Integer helpers +Always: +- Div/mod 64-bit: `__divdi3`, `__udivdi3`, `__moddi3`, `__umoddi3`, `__divmoddi4`, `__udivmoddi4` +- Count/bits: `__clzsi2`, `__clzdi2`, `__ctzsi2`, `__ctzdi2`, `__ffsdi2`, `__popcountsi2`, `__popcountdi2`, `__paritysi2`, `__paritydi2`, `__bswapsi2`, `__bswapdi2` +- Compare: `__cmpdi2`, `__ucmpdi2` +- Negate/abs: `__negdi2`, `__absvdi2` + +64-bit targets only (128-bit `__int128` support): +- `__divti3`, `__udivti3`, `__modti3`, `__umodti3`, `__divmodti4`, `__udivmodti4` +- `__ashlti3`, `__lshrti3`, `__ashrti3`, `__multi3`, `__negti2`, `__clzti2`, `__ctzti2` + +32-bit targets only (no native 64-bit ops): +- `__muldi3`, `__ashldi3`, `__lshrdi3`, `__ashrdi3` + +### Soft-float (only on FPU-less targets — RV{32,64}I, ARM `-mfloat-abi=soft`, WASM-no-simd) +- Arithmetic `sf`/`df`/`tf`: `__add`, `__sub`, `__mul`, `__div`, `__neg` + → e.g. `__addsf3`, `__divdf3`, `__multf3` +- Int → float: `__float{,un}{si,di,ti}{sf,df,tf}` (e.g. `__floatdisf`, `__floatunsidf`) +- Float → int: `__fix{,uns}{sf,df,tf}{si,di,ti}` (e.g. `__fixdfdi`, `__fixunssfsi`) +- Float → float: `__extendsfdf2`, `__extendsftf2`, `__extenddftf2`, `__truncdfsf2`, `__trunctfsf2`, `__trunctfdf2` +- Compare: `__eq`, `__ne`, `__lt`, `__le`, `__gt`, `__ge`, `__unord` × `sf2`/`df2`/`tf2` + +### Atomic fallbacks (only when target lacks native atomics for that width) +- Generic: `__atomic_load`, `__atomic_store`, `__atomic_exchange`, `__atomic_compare_exchange` +- Sized N ∈ {1,2,4,8,16}: `__atomic_load_N`, `__atomic_store_N`, `__atomic_exchange_N`, `__atomic_compare_exchange_N`, `__atomic_fetch_{add,sub,and,or,xor,nand}_N` + +### Architecture-specific aliases + +**ARM AAPCS / AEABI** (32-bit ARM only — these are aliases the AEABI ABI mandates): +- Int div/mod: `__aeabi_idiv`, `__aeabi_uidiv`, `__aeabi_idivmod`, `__aeabi_uidivmod`, `__aeabi_ldivmod`, `__aeabi_uldivmod` +- 64-bit shift/mul: `__aeabi_llsl`, `__aeabi_llsr`, `__aeabi_lasr`, `__aeabi_lmul` +- Soft-float arith: `__aeabi_{f,d}{add,sub,mul,div,neg}`, `__aeabi_{f,d}rsub` +- Soft-float convert: `__aeabi_f2iz`, `__aeabi_f2uiz`, `__aeabi_f2lz`, `__aeabi_f2ulz`, `__aeabi_d2iz`, `__aeabi_d2uiz`, `__aeabi_d2lz`, `__aeabi_d2ulz`, `__aeabi_i2f`, `__aeabi_ui2f`, `__aeabi_l2f`, `__aeabi_ul2f`, `__aeabi_i2d`, `__aeabi_ui2d`, `__aeabi_l2d`, `__aeabi_ul2d`, `__aeabi_f2d`, `__aeabi_d2f` +- Soft-float compare: `__aeabi_fcmp{eq,lt,le,gt,ge,un}`, `__aeabi_dcmp{eq,lt,le,gt,ge,un}` +- Mem variants (size-specialized): `__aeabi_memcpy`, `__aeabi_memcpy{4,8}`, `__aeabi_memmove`, `__aeabi_memmove{4,8}`, `__aeabi_memset`, `__aeabi_memset{4,8}`, `__aeabi_memclr`, `__aeabi_memclr{4,8}` + +**RISC-V** (only with `-msave-restore`, used by RV32E/embedded code-size builds): +- `__riscv_save_{0..12}`, `__riscv_restore_{0..12}` + +**x86 / x86_64**: no architecture-specific aliases; uses the generic libgcc names above. + +**WASM**: uses generic names; `memcpy`/`memset`/`memmove` may lower to `memory.copy` / `memory.fill` instructions instead of calls. + +--- + +## Target-identification macros + +cfree predefines a small, stable set of macros so headers and user code +can branch on architecture, OS, object format, and ABI without parsing +target triples. Compatible-by-design with the GCC/Clang names — code +written against `__x86_64__` / `__BYTE_ORDER__` / `__LP64__` works +unchanged. + +### Compiler identification +- `__cfree__` — defined to `1` +- `__cfree_major__`, `__cfree_minor__`, `__cfree_patchlevel__` +- `__STDC__ == 1`, `__STDC_VERSION__ == 201112L` +- `__STDC_HOSTED__ == 0` (cfree is freestanding-only) +- `__STDC_NO_COMPLEX__`, `__STDC_NO_THREADS__`, `__STDC_NO_VLA__` defined +- `__STDC_NO_ATOMICS__` *not* defined (cfree implements `<stdatomic.h>`) + +### Architecture (exactly one defined) +- `__i386__` — 32-bit x86 +- `__x86_64__` (and `__amd64__`) — 64-bit x86 +- `__arm__` — 32-bit ARM +- `__aarch64__` — 64-bit ARM +- `__riscv` — RISC-V (any width); paired with `__riscv_xlen` ∈ {32, 64} +- `__wasm__` — WebAssembly; paired with `__wasm32__` or `__wasm64__` + +### Pointer width / data model +- `__SIZEOF_POINTER__`, `__SIZEOF_LONG__`, `__SIZEOF_SIZE_T__`, `__SIZEOF_PTRDIFF_T__`, `__SIZEOF_WCHAR_T__`, `__SIZEOF_INT__`, `__SIZEOF_LONG_LONG__`, `__SIZEOF_FLOAT__`, `__SIZEOF_DOUBLE__`, `__SIZEOF_LONG_DOUBLE__` +- One of: `__LP64__` / `_LP64` (Unix 64), `__ILP32__` (32-bit), or neither (LLP64 — Win64) + +### Endianness +- `__BYTE_ORDER__` set to one of `__ORDER_LITTLE_ENDIAN__` / `__ORDER_BIG_ENDIAN__` +- `__ORDER_LITTLE_ENDIAN__ == 1234`, `__ORDER_BIG_ENDIAN__ == 4321` (values match GCC) + +### OS / platform (zero or one defined; freestanding bare-metal defines none) +- `__linux__` — Linux ABI +- `__APPLE__` and `__MACH__` — Darwin / macOS +- `_WIN32` (always on Windows), plus `_WIN64` on 64-bit Windows + +### Object format (exactly one defined per output) +- `__ELF__` — ELF (Linux, *BSD, bare-metal Unix-ish) +- `__MACH__` — Mach-O (Darwin) +- `_WIN32` doubles as the PE/COFF marker (matches MSVC/MinGW convention) + +### ARM-specific (defined only when `__arm__` or `__aarch64__`) +- `__ARM_ARCH` — integer arch version (7, 8, …); plus profile-specific `__ARM_ARCH_{7A,7R,7M,8A,…}__` +- `__ARM_EABI__` — defined on AAPCS/AEABI targets (always, for cfree's ARM32) +- `__ARM_PCS` (base PCS) or `__ARM_PCS_VFP` (hard-float PCS) +- `__ARM_FP` — bitmask of supported FP widths (0x4=fp32, 0x8=fp64); undefined on soft-float +- `__SOFTFP__` — defined ⇔ `-mfloat-abi=soft` (no FPU instructions, soft-float ABI) +- `__ARM_NEON` — defined ⇔ NEON SIMD available + +### RISC-V-specific (defined only when `__riscv`) +- `__riscv_xlen` ∈ {32, 64} +- `__riscv_flen` ∈ {0, 32, 64} — widest hardware FP register (0 ⇒ soft-float) +- Extension flags (defined ⇔ extension is on): `__riscv_mul`, `__riscv_div`, `__riscv_atomic`, `__riscv_compressed`, `__riscv_fdiv`, `__riscv_fsqrt` +- ABI: `__riscv_float_abi_soft`, `__riscv_float_abi_single`, `__riscv_float_abi_double` (exactly one) + +### x86-specific (defined only when `__i386__` or `__x86_64__`) +- Feature flags follow GCC names, defined ⇔ enabled at the chosen `-march`: `__SSE__`, `__SSE2__`, `__SSE3__`, `__SSSE3__`, `__SSE4_1__`, `__SSE4_2__`, `__AVX__`, `__AVX2__`, `__BMI__`, `__BMI2__`, `__POPCNT__`, `__FMA__` + +### WASM-specific (defined only when `__wasm__`) +- `__wasm_simd128__` — defined ⇔ SIMD proposal enabled +- `__wasm_bulk_memory__` — defined ⇔ `memory.copy`/`memory.fill` available (gates the lowering noted under mem intrinsics) + +--- + ## Discovery To enumerate what a compiler predefines for the current target: diff --git a/lib/LICENSE-compiler-rt.txt b/lib/LICENSE-compiler-rt.txt @@ -0,0 +1,311 @@ +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== + +The compiler_rt library is dual licensed under both the University of Illinois +"BSD-Like" license and the MIT license. As a user of this code you may choose +to use it under either license. As a contributor, you agree to allow your code +to be used under both. + +Full text of the relevant licenses is included below. + +============================================================================== + +University of Illinois/NCSA +Open Source License + +Copyright (c) 2009-2019 by the contributors listed in CREDITS.TXT + +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== + +Copyright (c) 2009-2015 by the contributors listed in CREDITS.TXT + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/lib/README.md b/lib/README.md @@ -0,0 +1,167 @@ +# lib/ — `libcfree_rt.a` source + +Per-op runtime helpers for cfree, copied from +[compiler-rt 18.1.8](https://github.com/llvm/llvm-project/releases/tag/llvmorg-18.1.8) +(`lib/builtins/`) and stripped to remove ifdefs that select between targets. +The build system picks files by directory; each `.c` / `.S` file compiles to +exactly one target without preprocessor branching. + +License: Apache-2.0 WITH LLVM-exception (see `LICENSE-compiler-rt.txt`). The +hand-written `mem/mem.c` is 0BSD; relicense as desired. + +## Layout + +### Source dirs (each `.c` / `.S` becomes one object in `libcfree_rt.a`) + +| Directory | Purpose | Built on | +| -------------- | --------------------------------------------------------- | ----------------------------------------- | +| `int/` | Integer helpers needed on every target | All | +| `int32/` | 64-bit ops synthesized from 32-bit | ILP32 only | +| `int64/` | 128-bit ops via `__int128` | LP64 / LLP64 only | +| `fp/` | Soft-float `sf` (binary32) and `df` (binary64) | FPU-less targets (RV{32,64}I, ARM softfp, WASM) | +| `fp_tf/` | Soft-float `tf` (binary128) | Targets with binary128 long double (e.g. aarch64 `-mlong-double-128`) | +| `fp_ti/` | `__int128` ↔ float | LP64 / LLP64 + soft-float | +| `arm/` | ARM AAPCS / AEABI aliases & wrappers | 32-bit ARM | +| `riscv/` | `__riscv_save_*` / `__riscv_restore_*` (`-msave-restore`) | RISC-V (per-xlen file) | +| `mem/` | `memcpy` / `memmove` / `memset` / `memcmp` (weak) | All; user libc overrides | +| `atomic/` | `__atomic_*` fallback shim | All | + +### Build-time include dirs (consumed by the source dirs above; nothing here lands in `libcfree_rt.a`) + +| Directory | Consumed by | +| -------------------------- | ------------------------------------------------------------------------------------------ | +| `impl/` | `int/divdi3.c` etc. (via `int_div_impl.inc`); every `fp*/`, `fp_tf/`, `fp_ti/` file | +| `include/common/` | All `.c` (transitively, via the per-target `int_lib.h`); `arm/*.S` includes `assembly.h` | +| `include/lp64_le/` | LP64 builds; selected via `-Iinclude/lp64_le` | +| `include/llp64_le/` | LLP64 (Win64) builds | +| `include/ilp32_le/` | ILP32 builds | +| `include/lp64_le_ldbl128/` | Extra `-include tf_supplement.h` when compiling `fp_tf/` on binary128-long-double targets | +| `atomic/atomic_common.inc` | `atomic/atomic_freestanding.c` | + +## How the build picks files + +```text +target tuple ⟶ compile + +x86_64-linux / aarch64-linux / aarch64-darwin / rv64 + ⟶ int/* int64/* fp/* + atomic/atomic_freestanding.c + mem/mem.c + -Iinclude/lp64_le + -DHAS_INT128=1 + +x86_64-windows ⟶ int/* int64/* fp/* atomic/* mem/* + -Iinclude/llp64_le + -DHAS_INT128=1 + +i386-* / arm32-* / rv32 / wasm32 ⟶ int/* int32/* fp/* atomic/* mem/* + -Iinclude/ilp32_le + -DHAS_INT128=0 + +aarch64-* with binary128 long double ⟶ above + fp_tf/* fp_ti/* + -include include/lp64_le_ldbl128/tf_supplement.h + +rv32 with -msave-restore ⟶ above + riscv/save_rv32.S riscv/restore_rv32.S +rv64 with -msave-restore ⟶ above + riscv/save_rv64.S riscv/restore_rv64.S +arm32 ARMv7+/Thumb2 (AEABI) ⟶ above + arm/aeabi_*.{S,c} (excluding *_thumb1.S) +arm32 ARMv6-M Thumb1 (Cortex-M0/M0+/M1) ⟶ above + arm/aeabi_*_thumb1.S + + the AEABI files with no Thumb1 variant + (aeabi_ldivmod.S, aeabi_uldivmod.S, + aeabi_dcmp.S, aeabi_fcmp.S, + aeabi_drsub.c, aeabi_frsub.c) +``` + +`-Iinclude/common` is always added. + +## Endianness + +All headers assume `__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__`. cfree's +supported targets (x86, ARM-LE, RISC-V, WASM) are LE in practice. Big-endian +support would need a parallel `*_be/` header set; not provided. + +## Files with surviving preprocessor logic + +The "no ifdefs" rule is applied to every `.c` and `.S` source — none branch on +target. Some headers and shared `.inc` files retain preprocessor logic that is +*not* target dispatch: + +- `impl/*.inc`, `impl/*.h`, `include/common/fp_lib.h` — parameterized via + `SINGLE_PRECISION` / `DOUBLE_PRECISION` / `QUAD_PRECISION` and the + `SRC_*` / `DST_*` selectors. Each `.c` file `#define`s exactly one before + the include; this is the metaprogramming model compiler-rt uses for fp ops. +- `include/common/assembly.h` — abstracts assembler syntax (ELF vs Mach-O vs + COFF symbol decoration, ARM/Thumb mode markers, etc.). Heavily ifdef'd by + design; consumed only by `arm/*.S`. +- `atomic/atomic_common.inc` — keys the 16-byte cases off `HAS_INT128`, set + by the build (`-DHAS_INT128=1` on 64-bit, `-DHAS_INT128=0` on 32-bit). + +## Notes per directory + +### `mem/mem.c` +Hand-written portable C (not from compiler-rt). All four functions are weak so +a user libc, or a tuned arch-specific replacement, wins at link time. The +existing `arm/aeabi_mem*.S` files forward to these symbols. + +### `atomic/` +`atomic_freestanding.c` defines a pointer-sized `_Atomic(uintptr_t)` spinlock +as the lock primitive (no OS dependency) then `#include`s `atomic_common.inc`, +which contains the dispatch logic and all `__atomic_*_N` expansions. The +shim calls the GCC `__atomic_*` builtin family (the one cfree documents in +`doc/builtins.md`); upstream's Clang-only `__c11_atomic_*` calls were +translated. Public symbols are exported via `#pragma redefine_extname` from +`_c`-suffixed names so they don't collide with the clang builtins of the +same name. + +### `arm/` +AEABI aliases for div/mod, soft-float compares, and the `aeabi_mem{cpy,move,set,clr}` +size-specialized wrappers. Six files have a `*_thumb1.S` companion for +ARMv6-M (Cortex-M0/M0+/M1), where the ISA can't tail-call (`b memcpy`) or +fold the `subs/muls` form into one instruction: +`aeabi_idivmod`, `aeabi_uidivmod`, `aeabi_memcpy`, `aeabi_memmove`, +`aeabi_memset`, `aeabi_memcmp`. The build picks one variant per symbol — +the Thumb1 file for ARMv6-M, the base file for ARMv7+/Thumb2. The remaining +AEABI files (`aeabi_ldivmod`, `aeabi_uldivmod`, `aeabi_{d,f}cmp`, +`aeabi_{d,f}rsub`) are ISA-agnostic and used as-is on both. + +### `riscv/` +`save.S` / `restore.S` upstream are one file each, gated on `__riscv_xlen`. +Split into `save_rv32.S` / `save_rv64.S` and `restore_rv32.S` / `restore_rv64.S`. +The embedded ABI variants (`__riscv_32e` / `__riscv_64e`) were not carried over. + +### `fp_tf/` +Compile only on targets where `long double` is IEEE binary128 (typically +`aarch64` with `-mlong-double-128`). The build must `-include` +`lp64_le_ldbl128/tf_supplement.h` so `tf_float`, `CRT_HAS_TF_MODE`, etc. are +defined before `fp_lib.h` is processed. + +### Compare files +`fp/comparesf2.c`, `fp/comparedf2.c`, `fp_tf/comparetf2.c` define every variant +(`__eqXf2`, `__ltXf2`, `__neXf2`, `__gtXf2`) as a separate function rather than +using `COMPILER_RT_ALIAS`. Replaces an object-format-conditional macro with a +handful of one-line wrappers. + +### `int/int_util.c` +Replaced upstream's hosted/kernel/Apple/Win32 abort cascade with a single +freestanding `__compilerrt_abort_impl` that calls `__builtin_trap()`. + +## Things this lib does NOT cover + +These are documented in `doc/builtins.md` but not provided here: + +- `__riscv_save_*` for `__riscv_32e` / `__riscv_64e` (rare embedded ABIs). +- Big-endian targets. +- 80-bit `xf` (x86 long double) soft-float — cfree's spec doesn't list `xf` + ops in the runtime contract; x86 always has the FPU for long double. +- Half-precision (`hf`) conversions — not in the cfree contract. + +## Updating from a newer compiler-rt + +When pulling in a new release: +1. Diff `lib/builtins/*.c` against the corresponding files here. Per-op files + typically need only the `__ARM_EABI__` / `__MINGW32__` / `__SOFTFP__` blocks + re-stripped. +2. Re-strip `int_util.c` to the freestanding-only abort path. +3. Re-split any new `riscv/` or `arm/` files that combine modes. +4. Diff `int_lib.h` / `int_types.h` / `fp_lib.h` against the per-target copies + under `include/`; update for any new feature gates that aren't legitimately + target-orthogonal. diff --git a/lib/arm/aeabi_dcmp.S b/lib/arm/aeabi_dcmp.S @@ -0,0 +1,45 @@ +//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) { +// int result = __{eq,lt,le,ge,gt}df2(a, b); +// if (result {==,<,<=,>=,>} 0) { +// return 1; +// } else { +// return 0; +// } +// } + +# define CONVERT_DCMP_ARGS_TO_DF2_ARGS + +#define DEFINE_AEABI_DCMP(cond) \ + .syntax unified SEPARATOR \ + .p2align 2 SEPARATOR \ +DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \ + push { r4, lr } SEPARATOR \ + CONVERT_DCMP_ARGS_TO_DF2_ARGS SEPARATOR \ + bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \ + cmp r0, #0 SEPARATOR \ + b ## cond 1f SEPARATOR \ + movs r0, #0 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +1: SEPARATOR \ + movs r0, #1 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) + +DEFINE_AEABI_DCMP(eq) +DEFINE_AEABI_DCMP(lt) +DEFINE_AEABI_DCMP(le) +DEFINE_AEABI_DCMP(ge) +DEFINE_AEABI_DCMP(gt) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/lib/arm/aeabi_drsub.c b/lib/arm/aeabi_drsub.c @@ -0,0 +1,14 @@ +//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +AEABI_RTABI fp_t __aeabi_dsub(fp_t, fp_t); + +AEABI_RTABI fp_t __aeabi_drsub(fp_t a, fp_t b) { return __aeabi_dsub(b, a); } diff --git a/lib/arm/aeabi_fcmp.S b/lib/arm/aeabi_fcmp.S @@ -0,0 +1,45 @@ +//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) { +// int result = __{eq,lt,le,ge,gt}sf2(a, b); +// if (result {==,<,<=,>=,>} 0) { +// return 1; +// } else { +// return 0; +// } +// } + +# define CONVERT_FCMP_ARGS_TO_SF2_ARGS + +#define DEFINE_AEABI_FCMP(cond) \ + .syntax unified SEPARATOR \ + .p2align 2 SEPARATOR \ +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \ + push { r4, lr } SEPARATOR \ + CONVERT_FCMP_ARGS_TO_SF2_ARGS SEPARATOR \ + bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \ + cmp r0, #0 SEPARATOR \ + b ## cond 1f SEPARATOR \ + movs r0, #0 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +1: SEPARATOR \ + movs r0, #1 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) + +DEFINE_AEABI_FCMP(eq) +DEFINE_AEABI_FCMP(lt) +DEFINE_AEABI_FCMP(le) +DEFINE_AEABI_FCMP(ge) +DEFINE_AEABI_FCMP(gt) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/lib/arm/aeabi_frsub.c b/lib/arm/aeabi_frsub.c @@ -0,0 +1,14 @@ +//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +AEABI_RTABI fp_t __aeabi_fsub(fp_t, fp_t); + +AEABI_RTABI fp_t __aeabi_frsub(fp_t a, fp_t b) { return __aeabi_fsub(b, a); } diff --git a/lib/arm/aeabi_idivmod.S b/lib/arm/aeabi_idivmod.S @@ -0,0 +1,33 @@ +//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) { +// int rem, quot; +// quot = __divmodsi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + + .syntax unified + .text + DEFINE_CODE_STATE + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod) + push { lr } + sub sp, sp, #4 + mov r2, sp + bl SYMBOL_NAME(__divmodsi4) + ldr r1, [sp] + add sp, sp, #4 + pop { pc } +END_COMPILERRT_FUNCTION(__aeabi_idivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/lib/arm/aeabi_idivmod_thumb1.S b/lib/arm/aeabi_idivmod_thumb1.S @@ -0,0 +1,28 @@ +//===-- aeabi_idivmod_thumb1.S - Thumb1 (Cortex-M0/M1) variant -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Thumb1 has no muls-with-3-operands and limited register-set instructions, +// so __aeabi_idivmod is implemented as: quot = __divsi3(num, denom); +// rem = num - quot * denom. +// Used on ARMv6-M (Cortex-M0/M0+/M1). +//===----------------------------------------------------------------------===// + +#include "assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod) + push {r0, r1, lr} + bl SYMBOL_NAME(__divsi3) + pop {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom + muls r2, r0, r2 // r2 = quot * denom + subs r1, r1, r2 + JMP (r3) +END_COMPILERRT_FUNCTION(__aeabi_idivmod) + +NO_EXEC_STACK_DIRECTIVE diff --git a/lib/arm/aeabi_ldivmod.S b/lib/arm/aeabi_ldivmod.S @@ -0,0 +1,34 @@ +//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// struct { int64_t quot, int64_t rem} +// __aeabi_ldivmod(int64_t numerator, int64_t denominator) { +// int64_t rem, quot; +// quot = __divmoddi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod) + push {r6, lr} + sub sp, sp, #16 + add r6, sp, #8 + str r6, [sp] + bl SYMBOL_NAME(__divmoddi4) + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r6, pc} +END_COMPILERRT_FUNCTION(__aeabi_ldivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/lib/arm/aeabi_memcmp.S b/lib/arm/aeabi_memcmp.S @@ -0,0 +1,23 @@ +//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp) + b memcmp +END_COMPILERRT_FUNCTION(__aeabi_memcmp) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/lib/arm/aeabi_memcmp_thumb1.S b/lib/arm/aeabi_memcmp_thumb1.S @@ -0,0 +1,25 @@ +//===-- aeabi_memcmp_thumb1.S - Thumb1 (Cortex-M0/M1) variant ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Used on ARMv6-M (Cortex-M0/M0+/M1). +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp) + push {r7, lr} + bl memcmp + pop {r7, pc} +END_COMPILERRT_FUNCTION(__aeabi_memcmp) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp) + +NO_EXEC_STACK_DIRECTIVE diff --git a/lib/arm/aeabi_memcpy.S b/lib/arm/aeabi_memcpy.S @@ -0,0 +1,23 @@ +//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy) + b memcpy +END_COMPILERRT_FUNCTION(__aeabi_memcpy) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/lib/arm/aeabi_memcpy_thumb1.S b/lib/arm/aeabi_memcpy_thumb1.S @@ -0,0 +1,25 @@ +//===-- aeabi_memcpy_thumb1.S - Thumb1 (Cortex-M0/M1) variant ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Used on ARMv6-M (Cortex-M0/M0+/M1). +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy) + push {r7, lr} + bl memcpy + pop {r7, pc} +END_COMPILERRT_FUNCTION(__aeabi_memcpy) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy) + +NO_EXEC_STACK_DIRECTIVE diff --git a/lib/arm/aeabi_memmove.S b/lib/arm/aeabi_memmove.S @@ -0,0 +1,22 @@ +//===-- aeabi_memmove.S - EABI memmove implementation --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include "assembly.h" + +// void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove) + b memmove +END_COMPILERRT_FUNCTION(__aeabi_memmove) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/lib/arm/aeabi_memmove_thumb1.S b/lib/arm/aeabi_memmove_thumb1.S @@ -0,0 +1,24 @@ +//===-- aeabi_memmove_thumb1.S - Thumb1 (Cortex-M0/M1) variant -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Used on ARMv6-M (Cortex-M0/M0+/M1). +//===---------------------------------------------------------------------===// + +#include "assembly.h" + +// void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove) + push {r7, lr} + bl memmove + pop {r7, pc} +END_COMPILERRT_FUNCTION(__aeabi_memmove) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove) + +NO_EXEC_STACK_DIRECTIVE diff --git a/lib/arm/aeabi_memset.S b/lib/arm/aeabi_memset.S @@ -0,0 +1,37 @@ +//===-- aeabi_memset.S - EABI memset implementation -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); } +// void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memset) + mov r3, r1 + mov r1, r2 + mov r2, r3 + b memset +END_COMPILERRT_FUNCTION(__aeabi_memset) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset) + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr) + mov r2, r1 + movs r1, #0 + b memset +END_COMPILERRT_FUNCTION(__aeabi_memclr) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/lib/arm/aeabi_memset_thumb1.S b/lib/arm/aeabi_memset_thumb1.S @@ -0,0 +1,42 @@ +//===-- aeabi_memset_thumb1.S - Thumb1 (Cortex-M0/M1) variant ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Thumb1 lacks an unconditional `b <reg>` to a far symbol, so we bl/pop +// instead of tail-calling memset. Used on ARMv6-M (Cortex-M0/M0+/M1). +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); } +// void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memset) + mov r3, r1 + mov r1, r2 + mov r2, r3 + push {r7, lr} + bl memset + pop {r7, pc} +END_COMPILERRT_FUNCTION(__aeabi_memset) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset) + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr) + mov r2, r1 + movs r1, #0 + push {r7, lr} + bl memset + pop {r7, pc} +END_COMPILERRT_FUNCTION(__aeabi_memclr) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr) + +NO_EXEC_STACK_DIRECTIVE diff --git a/lib/arm/aeabi_uidivmod.S b/lib/arm/aeabi_uidivmod.S @@ -0,0 +1,34 @@ +//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// struct { unsigned quot, unsigned rem} +// __aeabi_uidivmod(unsigned numerator, unsigned denominator) { +// unsigned rem, quot; +// quot = __udivmodsi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + + .syntax unified + .text + DEFINE_CODE_STATE + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) + push { lr } + sub sp, sp, #4 + mov r2, sp + bl SYMBOL_NAME(__udivmodsi4) + ldr r1, [sp] + add sp, sp, #4 + pop { pc } +END_COMPILERRT_FUNCTION(__aeabi_uidivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/lib/arm/aeabi_uidivmod_thumb1.S b/lib/arm/aeabi_uidivmod_thumb1.S @@ -0,0 +1,31 @@ +//===-- aeabi_uidivmod_thumb1.S - Thumb1 (Cortex-M0/M1) variant ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Used on ARMv6-M (Cortex-M0/M0+/M1). +//===----------------------------------------------------------------------===// + +#include "assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) + cmp r0, r1 + bcc LOCAL_LABEL(case_denom_larger) + push {r0, r1, lr} + bl SYMBOL_NAME(__aeabi_uidiv) + pop {r1, r2, r3} + muls r2, r0, r2 // r2 = quot * denom + subs r1, r1, r2 + JMP (r3) +LOCAL_LABEL(case_denom_larger): + movs r1, r0 + movs r0, #0 + JMP (lr) +END_COMPILERRT_FUNCTION(__aeabi_uidivmod) + +NO_EXEC_STACK_DIRECTIVE diff --git a/lib/arm/aeabi_uldivmod.S b/lib/arm/aeabi_uldivmod.S @@ -0,0 +1,34 @@ +//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "assembly.h" + +// struct { uint64_t quot, uint64_t rem} +// __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) { +// uint64_t rem, quot; +// quot = __udivmoddi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod) + push {r6, lr} + sub sp, sp, #16 + add r6, sp, #8 + str r6, [sp] + bl SYMBOL_NAME(__udivmoddi4) + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r6, pc} +END_COMPILERRT_FUNCTION(__aeabi_uldivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/lib/atomic/atomic_common.inc b/lib/atomic/atomic_common.inc @@ -0,0 +1,238 @@ +//===-- atomic_common.inc - shared body for atomic_*.c ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Includer must define BEFORE this include: +// typedef ... Lock; +// static inline void lock(Lock *l); +// static inline void unlock(Lock *l); +// static Lock locks[SPINLOCK_COUNT]; +// +// Uses GCC-style __atomic_* builtins (the family cfree provides) rather than +// Clang-specific __c11_atomic_*. +// +// The five generic entry points (load/store/exchange/compare_exchange/ +// is_lock_free) collide with clang builtins of the same name. We define them +// under _c-suffixed symbols and rename them at link time via the standard +// `#pragma redefine_extname` trick. +//===----------------------------------------------------------------------===// + +#pragma redefine_extname __atomic_load_c __atomic_load +#pragma redefine_extname __atomic_store_c __atomic_store +#pragma redefine_extname __atomic_exchange_c __atomic_exchange +#pragma redefine_extname __atomic_compare_exchange_c __atomic_compare_exchange +#pragma redefine_extname __atomic_is_lock_free_c __atomic_is_lock_free + +static __inline Lock *lock_for_pointer(void *ptr) { + intptr_t hash = (intptr_t)ptr; + hash >>= 4; + intptr_t low = hash & SPINLOCK_MASK; + hash >>= 16; + hash ^= low; + return locks + (hash & SPINLOCK_MASK); +} + +#define ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(size, p) \ + (__atomic_always_lock_free(size, p) || \ + (__atomic_always_lock_free(size, 0) && ((uintptr_t)p % size) == 0)) +#define IS_LOCK_FREE_1(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(1, p) +#define IS_LOCK_FREE_2(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(2, p) +#define IS_LOCK_FREE_4(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(4, p) +#define IS_LOCK_FREE_8(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(8, p) +#define IS_LOCK_FREE_16(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(16, p) + +#define TRY_LOCK_FREE_CASE(n, type, ptr) \ + case n: \ + if (IS_LOCK_FREE_##n(ptr)) { LOCK_FREE_ACTION(type); } \ + break; + +#if HAS_INT128 +#define TRY_LOCK_FREE_CASE_16(p) TRY_LOCK_FREE_CASE(16, __uint128_t, p) +#define OPTIMISED_CASES \ + OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \ + OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \ + OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \ + OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) \ + OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t) +#else +#define TRY_LOCK_FREE_CASE_16(p) /* no __uint128_t */ +#define OPTIMISED_CASES \ + OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \ + OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \ + OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \ + OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) +#endif + +#define LOCK_FREE_CASES(ptr) \ + do { \ + switch (size) { \ + TRY_LOCK_FREE_CASE(1, uint8_t, ptr) \ + TRY_LOCK_FREE_CASE(2, uint16_t, ptr) \ + TRY_LOCK_FREE_CASE(4, uint32_t, ptr) \ + TRY_LOCK_FREE_CASE(8, uint64_t, ptr) \ + TRY_LOCK_FREE_CASE_16(ptr) \ + default: break; \ + } \ + } while (0) + +bool __atomic_is_lock_free_c(size_t size, void *ptr) { +#define LOCK_FREE_ACTION(type) return true; + LOCK_FREE_CASES(ptr); +#undef LOCK_FREE_ACTION + return false; +} + +void __atomic_load_c(int size, void *src, void *dest, int model) { +#define LOCK_FREE_ACTION(type) \ + *((type *)dest) = __atomic_load_n((type *)src, model); \ + return; + LOCK_FREE_CASES(src); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(src); + lock(l); + __builtin_memcpy(dest, src, size); + unlock(l); +} + +void __atomic_store_c(int size, void *dest, void *src, int model) { +#define LOCK_FREE_ACTION(type) \ + __atomic_store_n((type *)dest, *(type *)src, model); \ + return; + LOCK_FREE_CASES(dest); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(dest); + lock(l); + __builtin_memcpy(dest, src, size); + unlock(l); +} + +int __atomic_compare_exchange_c(int size, void *ptr, void *expected, + void *desired, int success, int failure) { +#define LOCK_FREE_ACTION(type) \ + return __atomic_compare_exchange_n((type *)ptr, (type *)expected, \ + *(type *)desired, 0, success, failure); + LOCK_FREE_CASES(ptr); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(ptr); + lock(l); + if (__builtin_memcmp(ptr, expected, size) == 0) { + __builtin_memcpy(ptr, desired, size); + unlock(l); + return 1; + } + __builtin_memcpy(expected, ptr, size); + unlock(l); + return 0; +} + +void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) { +#define LOCK_FREE_ACTION(type) \ + *(type *)old = __atomic_exchange_n((type *)ptr, *(type *)val, model); \ + return; + LOCK_FREE_CASES(ptr); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(ptr); + lock(l); + __builtin_memcpy(old, ptr, size); + __builtin_memcpy(ptr, val, size); + unlock(l); +} + +#define OPTIMISED_CASE(n, lockfree, type) \ + type __atomic_load_##n(type *src, int model) { \ + if (lockfree(src)) return __atomic_load_n(src, model); \ + Lock *l = lock_for_pointer(src); \ + lock(l); \ + type val = *src; \ + unlock(l); \ + return val; \ + } +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type) \ + void __atomic_store_##n(type *dest, type val, int model) { \ + if (lockfree(dest)) { __atomic_store_n(dest, val, model); return; } \ + Lock *l = lock_for_pointer(dest); \ + lock(l); \ + *dest = val; \ + unlock(l); \ + } +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type) \ + type __atomic_exchange_##n(type *dest, type val, int model) { \ + if (lockfree(dest)) return __atomic_exchange_n(dest, val, model); \ + Lock *l = lock_for_pointer(dest); \ + lock(l); \ + type tmp = *dest; \ + *dest = val; \ + unlock(l); \ + return tmp; \ + } +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type) \ + bool __atomic_compare_exchange_##n(type *ptr, type *expected, type desired, \ + int success, int failure) { \ + if (lockfree(ptr)) \ + return __atomic_compare_exchange_n(ptr, expected, desired, 0, success, \ + failure); \ + Lock *l = lock_for_pointer(ptr); \ + lock(l); \ + if (*ptr == *expected) { \ + *ptr = desired; \ + unlock(l); \ + return true; \ + } \ + *expected = *ptr; \ + unlock(l); \ + return false; \ + } +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define ATOMIC_RMW(n, lockfree, type, opname, op) \ + type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) { \ + if (lockfree(ptr)) return __atomic_fetch_##opname(ptr, val, model); \ + Lock *l = lock_for_pointer(ptr); \ + lock(l); \ + type tmp = *ptr; \ + *ptr = tmp op val; \ + unlock(l); \ + return tmp; \ + } + +#define ATOMIC_RMW_NAND(n, lockfree, type) \ + type __atomic_fetch_nand_##n(type *ptr, type val, int model) { \ + if (lockfree(ptr)) return __atomic_fetch_nand(ptr, val, model); \ + Lock *l = lock_for_pointer(ptr); \ + lock(l); \ + type tmp = *ptr; \ + *ptr = ~(tmp & val); \ + unlock(l); \ + return tmp; \ + } + +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW_NAND(n, lockfree, type) +OPTIMISED_CASES +#undef OPTIMISED_CASE diff --git a/lib/atomic/atomic_freestanding.c b/lib/atomic/atomic_freestanding.c @@ -0,0 +1,36 @@ +//===-- atomic_freestanding.c - lock fallbacks via C11 atomic spinlock ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Atomic fallback shim using a pointer-sized C11 atomic spinlock as the +// lock primitive. Has no OS or libc dependency. +//===----------------------------------------------------------------------===// + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#define SPINLOCK_COUNT (1 << 10) +static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1; + +// HAS_INT128 is defined by the build system: -DHAS_INT128=1 on 64-bit targets, +// -DHAS_INT128=0 on 32-bit. atomic_common.inc keys 16-byte cases off it. + +typedef _Atomic(uintptr_t) Lock; + +static inline void unlock(Lock *l) { + __atomic_store_n((uintptr_t *)l, 0, __ATOMIC_RELEASE); +} + +static inline void lock(Lock *l) { + uintptr_t old = 0; + while (!__atomic_compare_exchange_n((uintptr_t *)l, &old, 1, 1, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) + old = 0; +} + +static Lock locks[SPINLOCK_COUNT]; + +#include "atomic_common.inc" diff --git a/lib/fp/adddf3.c b/lib/fp/adddf3.c @@ -0,0 +1,17 @@ +//===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float addition. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_add_impl.inc" + +COMPILER_RT_ABI double __adddf3(double a, double b) { return __addXf3__(a, b); } + diff --git a/lib/fp/addsf3.c b/lib/fp/addsf3.c @@ -0,0 +1,17 @@ +//===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float addition. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_add_impl.inc" + +COMPILER_RT_ABI float __addsf3(float a, float b) { return __addXf3__(a, b); } + diff --git a/lib/fp/comparedf2.c b/lib/fp/comparedf2.c @@ -0,0 +1,52 @@ +//===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// // This file implements the following soft-float comparison routines: +// +// __eqdf2 __gedf2 __unorddf2 +// __ledf2 __gtdf2 +// __ltdf2 +// __nedf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __ledf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gedf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unorddf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#include "fp_compare_impl.inc" + +COMPILER_RT_ABI CMP_RESULT __ledf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __eqdf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __ltdf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __nedf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __gedf2(fp_t a, fp_t b) { return __geXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __gtdf2(fp_t a, fp_t b) { return __geXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __unorddf2(fp_t a, fp_t b) { return __unordXf2__(a, b); } + + diff --git a/lib/fp/comparesf2.c b/lib/fp/comparesf2.c @@ -0,0 +1,52 @@ +//===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the following soft-fp_t comparison routines: +// +// __eqsf2 __gesf2 __unordsf2 +// __lesf2 __gtsf2 +// __ltsf2 +// __nesf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __lesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordsf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#include "fp_compare_impl.inc" + +COMPILER_RT_ABI CMP_RESULT __lesf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __eqsf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __ltsf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __nesf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __gesf2(fp_t a, fp_t b) { return __geXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __gtsf2(fp_t a, fp_t b) { return __geXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __unordsf2(fp_t a, fp_t b){ return __unordXf2__(a, b); } + + diff --git a/lib/fp/divdf3.c b/lib/fp/divdf3.c @@ -0,0 +1,22 @@ +//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION + +#define NUMBER_OF_HALF_ITERATIONS 3 +#define NUMBER_OF_FULL_ITERATIONS 1 + +#include "fp_div_impl.inc" + +COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { return __divXf3__(a, b); } + diff --git a/lib/fp/divsf3.c b/lib/fp/divsf3.c @@ -0,0 +1,23 @@ +//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION + +#define NUMBER_OF_HALF_ITERATIONS 0 +#define NUMBER_OF_FULL_ITERATIONS 3 +#define USE_NATIVE_FULL_ITERATIONS + +#include "fp_div_impl.inc" + +COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { return __divXf3__(a, b); } + diff --git a/lib/fp/extendsfdf2.c b/lib/fp/extendsfdf2.c @@ -0,0 +1,14 @@ +//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SRC_SINGLE +#define DST_DOUBLE +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI double __extendsfdf2(float a) { return __extendXfYf2__(a); } + diff --git a/lib/fp/fixdfdi.c b/lib/fp/fixdfdi.c @@ -0,0 +1,21 @@ +//===-- fixdfdi.c - Implement __fixdfdi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int __fixdfdi(fp_t a) { return __fixint(a); } + diff --git a/lib/fp/fixdfsi.c b/lib/fp/fixdfsi.c @@ -0,0 +1,16 @@ +//===-- fixdfsi.c - Implement __fixdfsi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI si_int __fixdfsi(fp_t a) { return __fixint(a); } + diff --git a/lib/fp/fixsfdi.c b/lib/fp/fixsfdi.c @@ -0,0 +1,21 @@ +//===-- fixsfdi.c - Implement __fixsfdi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int __fixsfdi(fp_t a) { return __fixint(a); } + diff --git a/lib/fp/fixsfsi.c b/lib/fp/fixsfsi.c @@ -0,0 +1,16 @@ +//===-- fixsfsi.c - Implement __fixsfsi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI si_int __fixsfsi(fp_t a) { return __fixint(a); } + diff --git a/lib/fp/fixunsdfdi.c b/lib/fp/fixunsdfdi.c @@ -0,0 +1,20 @@ +//===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int __fixunsdfdi(fp_t a) { return __fixuint(a); } + diff --git a/lib/fp/fixunsdfsi.c b/lib/fp/fixunsdfsi.c @@ -0,0 +1,15 @@ +//===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI su_int __fixunsdfsi(fp_t a) { return __fixuint(a); } + diff --git a/lib/fp/fixunssfdi.c b/lib/fp/fixunssfdi.c @@ -0,0 +1,20 @@ +//===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int __fixunssfdi(fp_t a) { return __fixuint(a); } + diff --git a/lib/fp/fixunssfsi.c b/lib/fp/fixunssfsi.c @@ -0,0 +1,19 @@ +//===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunssfsi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI su_int __fixunssfsi(fp_t a) { return __fixuint(a); } + diff --git a/lib/fp/floatdidf.c b/lib/fp/floatdidf.c @@ -0,0 +1,32 @@ +//===-- floatdidf.c - Implement __floatdidf -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatdidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// di_int is a 64 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm + +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +#define SRC_I64 +#define DST_DOUBLE +#include "int_to_fp_impl.inc" + +COMPILER_RT_ABI double __floatdidf(di_int a) { return __floatXiYf__(a); } + diff --git a/lib/fp/floatdisf.c b/lib/fp/floatdisf.c @@ -0,0 +1,27 @@ +//===-- floatdisf.c - Implement __floatdisf -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatdisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// di_int is a 64 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +#include "int_lib.h" + +#define SRC_I64 +#define DST_SINGLE +#include "int_to_fp_impl.inc" + +COMPILER_RT_ABI float __floatdisf(di_int a) { return __floatXiYf__(a); } + diff --git a/lib/fp/floatsidf.c b/lib/fp/floatsidf.c @@ -0,0 +1,51 @@ +//===-- lib/floatsidf.c - integer -> double-precision conversion --*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to double-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t __floatsidf(si_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + su_int aAbs = (su_int)a; + if (a < 0) { + sign = signBit; + aAbs = -aAbs; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(aAbs); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. Extra + // cast to unsigned int is necessary to get the correct behavior for + // the input INT_MIN. + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + diff --git a/lib/fp/floatsisf.c b/lib/fp/floatsisf.c @@ -0,0 +1,59 @@ +//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to single-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t __floatsisf(si_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + su_int aAbs = (su_int)a; + if (a < 0) { + sign = signBit; + aAbs = -aAbs; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(aAbs); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + } else { + const int shift = exponent - significandBits; + result = (rep_t)aAbs >> shift ^ implicitBit; + rep_t round = (rep_t)aAbs << (typeWidth - shift); + if (round > signBit) + result++; + if (round == signBit) + result += result & 1; + } + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + diff --git a/lib/fp/floatundidf.c b/lib/fp/floatundidf.c @@ -0,0 +1,32 @@ +//===-- floatundidf.c - Implement __floatundidf ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// du_int is a 64 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm + +#include "int_lib.h" + +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +#define SRC_U64 +#define DST_DOUBLE +#include "int_to_fp_impl.inc" + +COMPILER_RT_ABI double __floatundidf(du_int a) { return __floatXiYf__(a); } + diff --git a/lib/fp/floatundisf.c b/lib/fp/floatundisf.c @@ -0,0 +1,27 @@ +//===-- floatundisf.c - Implement __floatundisf ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// du_int is a 64 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +#include "int_lib.h" + +#define SRC_U64 +#define DST_SINGLE +#include "int_to_fp_impl.inc" + +COMPILER_RT_ABI float __floatundisf(du_int a) { return __floatXiYf__(a); } + diff --git a/lib/fp/floatunsidf.c b/lib/fp/floatunsidf.c @@ -0,0 +1,40 @@ +//===-- lib/floatunsidf.c - uint -> double-precision conversion ---*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to double-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t __floatunsidf(su_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + diff --git a/lib/fp/floatunsisf.c b/lib/fp/floatunsisf.c @@ -0,0 +1,50 @@ +//===-- lib/floatunsisf.c - uint -> single-precision conversion ---*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to single-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t __floatunsisf(su_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(a); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + } else { + const int shift = exponent - significandBits; + result = (rep_t)a >> shift ^ implicitBit; + rep_t round = (rep_t)a << (typeWidth - shift); + if (round > signBit) + result++; + if (round == signBit) + result += result & 1; + } + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + diff --git a/lib/fp/fp_mode.c b/lib/fp/fp_mode.c @@ -0,0 +1,22 @@ +//===----- lib/fp_mode.c - Floaing-point environment mode utilities --C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides a default implementation of fp_mode.h for architectures +// that does not support or does not have an implementation of floating point +// environment mode. +// +//===----------------------------------------------------------------------===// + +#include "fp_mode.h" + +// IEEE-754 default rounding (to nearest, ties to even). +CRT_FE_ROUND_MODE __fe_getround(void) { return CRT_FE_TONEAREST; } + +int __fe_raise_inexact(void) { + return 0; +} diff --git a/lib/fp/muldf3.c b/lib/fp/muldf3.c @@ -0,0 +1,18 @@ +//===-- lib/muldf3.c - Double-precision multiplication ------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float multiplication +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_mul_impl.inc" + +COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } + diff --git a/lib/fp/mulsf3.c b/lib/fp/mulsf3.c @@ -0,0 +1,18 @@ +//===-- lib/mulsf3.c - Single-precision multiplication ------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float multiplication +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_mul_impl.inc" + +COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } + diff --git a/lib/fp/negdf2.c b/lib/fp/negdf2.c @@ -0,0 +1,17 @@ +//===-- lib/negdf2.c - double-precision negation ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float negation. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +COMPILER_RT_ABI fp_t __negdf2(fp_t a) { return fromRep(toRep(a) ^ signBit); } + diff --git a/lib/fp/negsf2.c b/lib/fp/negsf2.c @@ -0,0 +1,17 @@ +//===-- lib/negsf2.c - single-precision negation ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float negation. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +COMPILER_RT_ABI fp_t __negsf2(fp_t a) { return fromRep(toRep(a) ^ signBit); } + diff --git a/lib/fp/subdf3.c b/lib/fp/subdf3.c @@ -0,0 +1,20 @@ +//===-- lib/adddf3.c - Double-precision subtraction ---------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float subtraction. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +// Subtraction; flip the sign bit of b and add. +COMPILER_RT_ABI fp_t __subdf3(fp_t a, fp_t b) { + return __adddf3(a, fromRep(toRep(b) ^ signBit)); +} + diff --git a/lib/fp/subsf3.c b/lib/fp/subsf3.c @@ -0,0 +1,20 @@ +//===-- lib/subsf3.c - Single-precision subtraction ---------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float subtraction. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +// Subtraction; flip the sign bit of b and add. +COMPILER_RT_ABI fp_t __subsf3(fp_t a, fp_t b) { + return __addsf3(a, fromRep(toRep(b) ^ signBit)); +} + diff --git a/lib/fp/truncdfsf2.c b/lib/fp/truncdfsf2.c @@ -0,0 +1,14 @@ +//===-- lib/truncdfsf2.c - double -> single conversion ------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SRC_DOUBLE +#define DST_SINGLE +#include "fp_trunc_impl.inc" + +COMPILER_RT_ABI float __truncdfsf2(double a) { return __truncXfYf2__(a); } + diff --git a/lib/fp_tf/addtf3.c b/lib/fp_tf/addtf3.c @@ -0,0 +1,21 @@ +//===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float addition. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#include "fp_add_impl.inc" + +COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b) { + return __addXf3__(a, b); +} + diff --git a/lib/fp_tf/comparetf2.c b/lib/fp_tf/comparetf2.c @@ -0,0 +1,51 @@ +//===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// // This file implements the following soft-float comparison routines: +// +// __eqtf2 __getf2 __unordtf2 +// __letf2 __gttf2 +// __lttf2 +// __netf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __letf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __getf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordtf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __letf2( ) and __getf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#include "fp_compare_impl.inc" + +COMPILER_RT_ABI CMP_RESULT __letf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __eqtf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __lttf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __netf2(fp_t a, fp_t b) { return __leXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __getf2(fp_t a, fp_t b) { return __geXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __gttf2(fp_t a, fp_t b) { return __geXf2__(a, b); } +COMPILER_RT_ABI CMP_RESULT __unordtf2(fp_t a, fp_t b) { return __unordXf2__(a, b); } + diff --git a/lib/fp_tf/divtf3.c b/lib/fp_tf/divtf3.c @@ -0,0 +1,24 @@ +//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + + +#define NUMBER_OF_HALF_ITERATIONS 4 +#define NUMBER_OF_FULL_ITERATIONS 1 + +#include "fp_div_impl.inc" + +COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { return __divXf3__(a, b); } + diff --git a/lib/fp_tf/extenddftf2.c b/lib/fp_tf/extenddftf2.c @@ -0,0 +1,17 @@ +//===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#define SRC_DOUBLE +#define DST_QUAD +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI dst_t __extenddftf2(src_t a) { return __extendXfYf2__(a); } + diff --git a/lib/fp_tf/extendsftf2.c b/lib/fp_tf/extendsftf2.c @@ -0,0 +1,17 @@ +//===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#define SRC_SINGLE +#define DST_QUAD +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI dst_t __extendsftf2(src_t a) { return __extendXfYf2__(a); } + diff --git a/lib/fp_tf/fixtfdi.c b/lib/fp_tf/fixtfdi.c @@ -0,0 +1,16 @@ +//===-- fixtfdi.c - Implement __fixtfdi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int __fixtfdi(fp_t a) { return __fixint(a); } diff --git a/lib/fp_tf/fixtfsi.c b/lib/fp_tf/fixtfsi.c @@ -0,0 +1,16 @@ +//===-- fixtfsi.c - Implement __fixtfsi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI si_int __fixtfsi(fp_t a) { return __fixint(a); } diff --git a/lib/fp_tf/fixtfti.c b/lib/fp_tf/fixtfti.c @@ -0,0 +1,16 @@ +//===-- fixtfti.c - Implement __fixtfti -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int __fixtfti(fp_t a) { return __fixint(a); } diff --git a/lib/fp_tf/fixunstfdi.c b/lib/fp_tf/fixunstfdi.c @@ -0,0 +1,15 @@ +//===-- fixunstfdi.c - Implement __fixunstfdi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int __fixunstfdi(fp_t a) { return __fixuint(a); } diff --git a/lib/fp_tf/fixunstfsi.c b/lib/fp_tf/fixunstfsi.c @@ -0,0 +1,15 @@ +//===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI su_int __fixunstfsi(fp_t a) { return __fixuint(a); } diff --git a/lib/fp_tf/fixunstfti.c b/lib/fp_tf/fixunstfti.c @@ -0,0 +1,15 @@ +//===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int __fixunstfti(fp_t a) { return __fixuint(a); } diff --git a/lib/fp_tf/floatditf.c b/lib/fp_tf/floatditf.c @@ -0,0 +1,47 @@ +//===-- lib/floatditf.c - integer -> quad-precision conversion ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements di_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +COMPILER_RT_ABI fp_t __floatditf(di_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + du_int aAbs = (du_int)a; + if (a < 0) { + sign = signBit; + aAbs = ~(du_int)a + 1U; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clzll(aAbs); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + diff --git a/lib/fp_tf/floatsitf.c b/lib/fp_tf/floatsitf.c @@ -0,0 +1,47 @@ +//===-- lib/floatsitf.c - integer -> quad-precision conversion ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +COMPILER_RT_ABI fp_t __floatsitf(si_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + su_int aAbs = (su_int)a; + if (a < 0) { + sign = signBit; + aAbs = -aAbs; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(aAbs); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + diff --git a/lib/fp_tf/floattitf.c b/lib/fp_tf/floattitf.c @@ -0,0 +1,33 @@ +//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements ti_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" + +#define SRC_I128 +#define DST_QUAD +#include "int_to_fp_impl.inc" + +// Returns: convert a ti_int to a fp_t, rounding toward even. + +// Assumption: fp_t is a IEEE 128 bit floating point type +// ti_int is a 128 bit integral type + +// seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +COMPILER_RT_ABI fp_t __floattitf(ti_int a) { return __floatXiYf__(a); } + diff --git a/lib/fp_tf/floatunditf.c b/lib/fp_tf/floatunditf.c @@ -0,0 +1,38 @@ +//===-- lib/floatunditf.c - uint -> quad-precision conversion -----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements du_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +COMPILER_RT_ABI fp_t __floatunditf(du_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clzll(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + diff --git a/lib/fp_tf/floatunsitf.c b/lib/fp_tf/floatunsitf.c @@ -0,0 +1,38 @@ +//===-- lib/floatunsitf.c - uint -> quad-precision conversion -----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +COMPILER_RT_ABI fp_t __floatunsitf(su_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + diff --git a/lib/fp_tf/floatuntitf.c b/lib/fp_tf/floatuntitf.c @@ -0,0 +1,33 @@ +//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements tu_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" + +#define SRC_U128 +#define DST_QUAD +#include "int_to_fp_impl.inc" + +// Returns: convert a tu_int to a fp_t, rounding toward even. + +// Assumption: fp_t is a IEEE 128 bit floating point type +// tu_int is a 128 bit integral type + +// seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +COMPILER_RT_ABI fp_t __floatuntitf(tu_int a) { return __floatXiYf__(a); } + diff --git a/lib/fp_tf/multf3.c b/lib/fp_tf/multf3.c @@ -0,0 +1,20 @@ +//===-- lib/multf3.c - Quad-precision multiplication --------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float multiplication +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#include "fp_mul_impl.inc" + +COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } + diff --git a/lib/fp_tf/subtf3.c b/lib/fp_tf/subtf3.c @@ -0,0 +1,22 @@ +//===-- lib/subtf3.c - Quad-precision subtraction -----------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float subtraction. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b); + +// Subtraction; flip the sign bit of b and add. +COMPILER_RT_ABI fp_t __subtf3(fp_t a, fp_t b) { + return __addtf3(a, fromRep(toRep(b) ^ signBit)); +} + diff --git a/lib/fp_tf/trunctfdf2.c b/lib/fp_tf/trunctfdf2.c @@ -0,0 +1,17 @@ +//===-- lib/truncdfsf2.c - quad -> double conversion --------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#define SRC_QUAD +#define DST_DOUBLE +#include "fp_trunc_impl.inc" + +COMPILER_RT_ABI dst_t __trunctfdf2(src_t a) { return __truncXfYf2__(a); } + diff --git a/lib/fp_tf/trunctfsf2.c b/lib/fp_tf/trunctfsf2.c @@ -0,0 +1,17 @@ +//===-- lib/trunctfsf2.c - quad -> single conversion --------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#define SRC_QUAD +#define DST_SINGLE +#include "fp_trunc_impl.inc" + +COMPILER_RT_ABI dst_t __trunctfsf2(src_t a) { return __truncXfYf2__(a); } + diff --git a/lib/fp_ti/fixdfti.c b/lib/fp_ti/fixdfti.c @@ -0,0 +1,19 @@ +//===-- fixdfti.c - Implement __fixdfti -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int __fixdfti(fp_t a) { return __fixint(a); } + diff --git a/lib/fp_ti/fixsfti.c b/lib/fp_ti/fixsfti.c @@ -0,0 +1,19 @@ +//===-- fixsfti.c - Implement __fixsfti -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#define SINGLE_PRECISION +#include "fp_lib.h" + +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int __fixsfti(fp_t a) { return __fixint(a); } + diff --git a/lib/fp_ti/fixunsdfti.c b/lib/fp_ti/fixunsdfti.c @@ -0,0 +1,16 @@ +//===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int __fixunsdfti(fp_t a) { return __fixuint(a); } diff --git a/lib/fp_ti/fixunssfti.c b/lib/fp_ti/fixunssfti.c @@ -0,0 +1,19 @@ +//===-- fixunssfti.c - Implement __fixunssfti -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunssfti for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int __fixunssfti(fp_t a) { return __fixuint(a); } diff --git a/lib/fp_ti/floattidf.c b/lib/fp_ti/floattidf.c @@ -0,0 +1,29 @@ +//===-- floattidf.c - Implement __floattidf -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floattidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +#define SRC_I128 +#define DST_DOUBLE +#include "int_to_fp_impl.inc" + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// ti_int is a 128 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm + +COMPILER_RT_ABI double __floattidf(ti_int a) { return __floatXiYf__(a); } + diff --git a/lib/fp_ti/floattisf.c b/lib/fp_ti/floattisf.c @@ -0,0 +1,28 @@ +//===-- floattisf.c - Implement __floattisf -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floattisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +#define SRC_I128 +#define DST_SINGLE +#include "int_to_fp_impl.inc" + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// ti_int is a 128 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +COMPILER_RT_ABI float __floattisf(ti_int a) { return __floatXiYf__(a); } + diff --git a/lib/fp_ti/floatuntidf.c b/lib/fp_ti/floatuntidf.c @@ -0,0 +1,29 @@ +//===-- floatuntidf.c - Implement __floatuntidf ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatuntidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +#define SRC_U128 +#define DST_DOUBLE +#include "int_to_fp_impl.inc" + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// tu_int is a 128 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm + +COMPILER_RT_ABI double __floatuntidf(tu_int a) { return __floatXiYf__(a); } + diff --git a/lib/fp_ti/floatuntisf.c b/lib/fp_ti/floatuntisf.c @@ -0,0 +1,28 @@ +//===-- floatuntisf.c - Implement __floatuntisf ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatuntisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +#define SRC_U128 +#define DST_SINGLE +#include "int_to_fp_impl.inc" + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// tu_int is a 128 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +COMPILER_RT_ABI float __floatuntisf(tu_int a) { return __floatXiYf__(a); } + diff --git a/lib/impl/fp_add_impl.inc b/lib/impl/fp_add_impl.inc @@ -0,0 +1,172 @@ +//===----- lib/fp_add_impl.inc - floaing point addition -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float addition with the IEEE-754 default rounding +// (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" +#include "fp_mode.h" + +static __inline fp_t __addXf3__(fp_t a, fp_t b) { + rep_t aRep = toRep(a); + rep_t bRep = toRep(b); + const rep_t aAbs = aRep & absMask; + const rep_t bAbs = bRep & absMask; + + // Detect if a or b is zero, infinity, or NaN. + if (aAbs - REP_C(1) >= infRep - REP_C(1) || + bAbs - REP_C(1) >= infRep - REP_C(1)) { + // NaN + anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything + NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // +/-infinity + -/+infinity = qNaN + if ((toRep(a) ^ toRep(b)) == signBit) + return fromRep(qnanRep); + // +/-infinity + anything remaining = +/- infinity + else + return a; + } + + // anything remaining + +/-infinity = +/-infinity + if (bAbs == infRep) + return b; + + // zero + anything = anything + if (!aAbs) { + // We need to get the sign right for zero + zero. + if (!bAbs) + return fromRep(toRep(a) & toRep(b)); + else + return b; + } + + // anything + zero = anything + if (!bAbs) + return a; + } + + // Swap a and b if necessary so that a has the larger absolute value. + if (bAbs > aAbs) { + const rep_t temp = aRep; + aRep = bRep; + bRep = temp; + } + + // Extract the exponent and significand from the (possibly swapped) a and b. + int aExponent = aRep >> significandBits & maxExponent; + int bExponent = bRep >> significandBits & maxExponent; + rep_t aSignificand = aRep & significandMask; + rep_t bSignificand = bRep & significandMask; + + // Normalize any denormals, and adjust the exponent accordingly. + if (aExponent == 0) + aExponent = normalize(&aSignificand); + if (bExponent == 0) + bExponent = normalize(&bSignificand); + + // The sign of the result is the sign of the larger operand, a. If they + // have opposite signs, we are performing a subtraction. Otherwise, we + // perform addition. + const rep_t resultSign = aRep & signBit; + const bool subtraction = (aRep ^ bRep) & signBit; + + // Shift the significands to give us round, guard and sticky, and set the + // implicit significand bit. If we fell through from the denormal path it + // was already set by normalize( ), but setting it twice won't hurt + // anything. + aSignificand = (aSignificand | implicitBit) << 3; + bSignificand = (bSignificand | implicitBit) << 3; + + // Shift the significand of b by the difference in exponents, with a sticky + // bottom bit to get rounding correct. + const unsigned int align = aExponent - bExponent; + if (align) { + if (align < typeWidth) { + const bool sticky = (bSignificand << (typeWidth - align)) != 0; + bSignificand = bSignificand >> align | sticky; + } else { + bSignificand = 1; // Set the sticky bit. b is known to be non-zero. + } + } + if (subtraction) { + aSignificand -= bSignificand; + // If a == -b, return +zero. + if (aSignificand == 0) + return fromRep(0); + + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent. + if (aSignificand < implicitBit << 3) { + const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); + aSignificand <<= shift; + aExponent -= shift; + } + } else /* addition */ { + aSignificand += bSignificand; + + // If the addition carried up, we need to right-shift the result and + // adjust the exponent. + if (aSignificand & implicitBit << 4) { + const bool sticky = aSignificand & 1; + aSignificand = aSignificand >> 1 | sticky; + aExponent += 1; + } + } + + // If we have overflowed the type, return +/- infinity. + if (aExponent >= maxExponent) + return fromRep(infRep | resultSign); + + if (aExponent <= 0) { + // The result is denormal before rounding. The exponent is zero and we + // need to shift the significand. + const int shift = 1 - aExponent; + const bool sticky = (aSignificand << (typeWidth - shift)) != 0; + aSignificand = aSignificand >> shift | sticky; + aExponent = 0; + } + + // Low three bits are round, guard, and sticky. + const int roundGuardSticky = aSignificand & 0x7; + + // Shift the significand into place, and mask off the implicit bit. + rep_t result = aSignificand >> 3 & significandMask; + + // Insert the exponent and sign. + result |= (rep_t)aExponent << significandBits; + result |= resultSign; + + // Perform the final rounding. The result may overflow to infinity, but + // that is the correct result in that case. + switch (__fe_getround()) { + case CRT_FE_TONEAREST: + if (roundGuardSticky > 0x4) + result++; + if (roundGuardSticky == 0x4) + result += result & 1; + break; + case CRT_FE_DOWNWARD: + if (resultSign && roundGuardSticky) result++; + break; + case CRT_FE_UPWARD: + if (!resultSign && roundGuardSticky) result++; + break; + case CRT_FE_TOWARDZERO: + break; + } + if (roundGuardSticky) + __fe_raise_inexact(); + return fromRep(result); +} diff --git a/lib/impl/fp_compare_impl.inc b/lib/impl/fp_compare_impl.inc @@ -0,0 +1,119 @@ +//===-- lib/fp_compare_impl.inc - Floating-point comparison -------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +// GCC uses long (at least for x86_64) as the return type of the comparison +// functions. We need to ensure that the return value is sign-extended in the +// same way as GCC expects (since otherwise GCC-generated __builtin_isinf +// returns true for finite 128-bit floating-point numbers). +#ifdef __aarch64__ +// AArch64 GCC overrides libgcc_cmp_return to use int instead of long. +typedef int CMP_RESULT; +#elif __SIZEOF_POINTER__ == 8 && __SIZEOF_LONG__ == 4 +// LLP64 ABIs use long long instead of long. +typedef long long CMP_RESULT; +#elif __AVR__ +// AVR uses a single byte for the return value. +typedef char CMP_RESULT; +#else +// Otherwise the comparison functions return long. +typedef long CMP_RESULT; +#endif + +#if !defined(__clang__) && defined(__GNUC__) +// GCC uses a special __libgcc_cmp_return__ mode to define the return type, so +// check that we are ABI-compatible when compiling the builtins with GCC. +typedef int GCC_CMP_RESULT __attribute__((__mode__(__libgcc_cmp_return__))); +_Static_assert(sizeof(GCC_CMP_RESULT) == sizeof(CMP_RESULT), + "SOFTFP ABI not compatible with GCC"); +#endif + +enum { + LE_LESS = -1, + LE_EQUAL = 0, + LE_GREATER = 1, + LE_UNORDERED = 1, +}; + +static inline CMP_RESULT __leXf2__(fp_t a, fp_t b) { + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) + return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) + return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a floating-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } else { + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + if (aInt > bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } +} + +enum { + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED +}; + +static inline CMP_RESULT __geXf2__(fp_t a, fp_t b) { + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) + return GE_UNORDERED; + if ((aAbs | bAbs) == 0) + return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } else { + if (aInt > bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } +} + +static inline CMP_RESULT __unordXf2__(fp_t a, fp_t b) { + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; +} diff --git a/lib/impl/fp_div_impl.inc b/lib/impl/fp_div_impl.inc @@ -0,0 +1,419 @@ +//===-- fp_div_impl.inc - Floating point division -----------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float division with the IEEE-754 default +// rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +// The __divXf3__ function implements Newton-Raphson floating point division. +// It uses 3 iterations for float32, 4 for float64 and 5 for float128, +// respectively. Due to number of significant bits being roughly doubled +// every iteration, the two modes are supported: N full-width iterations (as +// it is done for float32 by default) and (N-1) half-width iteration plus one +// final full-width iteration. It is expected that half-width integer +// operations (w.r.t rep_t size) can be performed faster for some hardware but +// they require error estimations to be computed separately due to larger +// computational errors caused by truncating intermediate results. + +// Half the bit-size of rep_t +#define HW (typeWidth / 2) +// rep_t-sized bitmask with lower half of bits set to ones +#define loMask (REP_C(-1) >> HW) + +#if NUMBER_OF_FULL_ITERATIONS < 1 +#error At least one full iteration is required +#endif + +static __inline fp_t __divXf3__(fp_t a, fp_t b) { + + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent - 1U >= maxExponent - 1U || + bExponent - 1U >= maxExponent - 1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) + return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else + return fromRep(aAbs | quotientSign); + } + + // anything else / infinity = +/- 0 + if (bAbs == infRep) + return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) + return fromRep(qnanRep); + // zero / anything else = +/- zero + else + return fromRep(quotientSign); + } + // anything else / zero = +/- infinity + if (!bAbs) + return fromRep(infRep | quotientSign); + + // One or both of a or b is denormal. The other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) + scale += normalize(&aSignificand); + if (bAbs < implicitBit) + scale -= normalize(&bSignificand); + } + + // Set the implicit significand bit. If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything. + aSignificand |= implicitBit; + bSignificand |= implicitBit; + + int writtenExponent = (aExponent - bExponent + scale) + exponentBias; + + const rep_t b_UQ1 = bSignificand << (typeWidth - significandBits - 1); + + // Align the significand of b as a UQ1.(n-1) fixed-point number in the range + // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax + // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2. + // The max error for this approximation is achieved at endpoints, so + // abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289..., + // which is about 4.5 bits. + // The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571... + + // Then, refine the reciprocal estimate using a quadratically converging + // Newton-Raphson iteration: + // x_{n+1} = x_n * (2 - x_n * b) + // + // Let b be the original divisor considered "in infinite precision" and + // obtained from IEEE754 representation of function argument (with the + // implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in + // UQ1.(W-1). + // + // Let b_hw be an infinitely precise number obtained from the highest (HW-1) + // bits of divisor significand (with the implicit bit set). Corresponds to + // half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated** + // version of b_UQ1. + // + // Let e_n := x_n - 1/b_hw + // E_n := x_n - 1/b + // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b) + // = abs(e_n) + (b - b_hw) / (b*b_hw) + // <= abs(e_n) + 2 * 2^-HW + + // rep_t-sized iterations may be slower than the corresponding half-width + // variant depending on the handware and whether single/double/quad precision + // is selected. + // NB: Using half-width iterations increases computation errors due to + // rounding, so error estimations have to be computed taking the selected + // mode into account! +#if NUMBER_OF_HALF_ITERATIONS > 0 + // Starting with (n-1) half-width iterations + const half_rep_t b_UQ1_hw = bSignificand >> (significandBits + 1 - HW); + + // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW + // with W0 being either 16 or 32 and W0 <= HW. + // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which + // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. +#if defined(SINGLE_PRECISION) + // Use 16-bit initial estimation in case we are using half-width iterations + // for float32 division. This is expected to be useful for some 16-bit + // targets. Not used by default as it requires performing more work during + // rounding and would hardly help on regular 32- or 64-bit targets. + const half_rep_t C_hw = HALF_REP_C(0x7504); +#else + // HW is at least 32. Shifting into the highest bits if needed. + const half_rep_t C_hw = HALF_REP_C(0x7504F333) << (HW - 32); +#endif + + // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, + // so x0 fits to UQ0.HW without wrapping. + half_rep_t x_UQ0_hw = C_hw - (b_UQ1_hw /* exact b_hw/2 as UQ0.HW */); + // An e_0 error is comprised of errors due to + // * x0 being an inherently imprecise first approximation of 1/b_hw + // * C_hw being some (irrational) number **truncated** to W0 bits + // Please note that e_0 is calculated against the infinitely precise + // reciprocal of b_hw (that is, **truncated** version of b). + // + // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 + + // By construction, 1 <= b < 2 + // f(x) = x * (2 - b*x) = 2*x - b*x^2 + // f'(x) = 2 * (1 - b*x) + // + // On the [0, 1] interval, f(0) = 0, + // then it increses until f(1/b) = 1 / b, maximum on (0, 1), + // then it decreses to f(1) = 2 - b + // + // Let g(x) = x - f(x) = b*x^2 - x. + // On (0, 1/b), g(x) < 0 <=> f(x) > x + // On (1/b, 1], g(x) > 0 <=> f(x) < x + // + // For half-width iterations, b_hw is used instead of b. + REPEAT_N_TIMES(NUMBER_OF_HALF_ITERATIONS, { + // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp + // of corr_UQ1_hw. + // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). + // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided + // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is + // expected to be strictly positive because b_UQ1_hw has its highest bit set + // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). + half_rep_t corr_UQ1_hw = 0 - ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW); + + // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally + // obtaining an UQ1.(HW-1) number and proving its highest bit could be + // considered to be 0 to be able to represent it in UQ0.HW. + // From the above analysis of f(x), if corr_UQ1_hw would be represented + // without any intermediate loss of precision (that is, in twice_rep_t) + // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly + // less otherwise. On the other hand, to obtain [1.]000..., one have to pass + // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due + // to 1.0 being not representable as UQ0.HW). + // The fact corr_UQ1_hw was virtually round up (due to result of + // multiplication being **first** truncated, then negated - to improve + // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. + x_UQ0_hw = (rep_t)x_UQ0_hw * corr_UQ1_hw >> (HW - 1); + // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t + // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after + // any number of iterations, so just subtract 2 from the reciprocal + // approximation after last iteration. + + // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: + // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 + // = 1 - e_n * b_hw + 2*eps1 + // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 + // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 + // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw + // \------ >0 -------/ \-- >0 ---/ + // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U) + }) + // For initial half-width iterations, U = 2^-HW + // Let abs(e_n) <= u_n * U, + // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U) + // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2) + + // Account for possible overflow (see above). For an overflow to occur for the + // first time, for "ideal" corr_UQ1_hw (that is, without intermediate + // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum + // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to + // be not below that value (see g(x) above), so it is safe to decrement just + // once after the final iteration. On the other hand, an effective value of + // divisor changes after this point (from b_hw to b), so adjust here. + x_UQ0_hw -= 1U; + rep_t x_UQ0 = (rep_t)x_UQ0_hw << HW; + x_UQ0 -= 1U; + +#else + // C is (3/4 + 1/sqrt(2)) - 1 truncated to 32 fractional bits as UQ0.n + const rep_t C = REP_C(0x7504F333) << (typeWidth - 32); + rep_t x_UQ0 = C - b_UQ1; + // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-32 +#endif + + // Error estimations for full-precision iterations are calculated just + // as above, but with U := 2^-W and taking extra decrementing into account. + // We need at least one such iteration. + +#ifdef USE_NATIVE_FULL_ITERATIONS + REPEAT_N_TIMES(NUMBER_OF_FULL_ITERATIONS, { + rep_t corr_UQ1 = 0 - ((twice_rep_t)x_UQ0 * b_UQ1 >> typeWidth); + x_UQ0 = (twice_rep_t)x_UQ0 * corr_UQ1 >> (typeWidth - 1); + }) +#else +#if NUMBER_OF_FULL_ITERATIONS != 1 +#error Only a single emulated full iteration is supported +#endif +#if !(NUMBER_OF_HALF_ITERATIONS > 0) + // Cannot normally reach here: only one full-width iteration is requested and + // the total number of iterations should be at least 3 even for float32. +#error Check NUMBER_OF_HALF_ITERATIONS, NUMBER_OF_FULL_ITERATIONS and USE_NATIVE_FULL_ITERATIONS. +#endif + // Simulating operations on a twice_rep_t to perform a single final full-width + // iteration. Using ad-hoc multiplication implementations to take advantage + // of particular structure of operands. + rep_t blo = b_UQ1 & loMask; + // x_UQ0 = x_UQ0_hw * 2^HW - 1 + // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 + // + // <--- higher half ---><--- lower half ---> + // [x_UQ0_hw * b_UQ1_hw] + // + [ x_UQ0_hw * blo ] + // - [ b_UQ1 ] + // = [ result ][.... discarded ...] + rep_t corr_UQ1 = 0U - ( (rep_t)x_UQ0_hw * b_UQ1_hw + + ((rep_t)x_UQ0_hw * blo >> HW) + - REP_C(1)); // account for *possible* carry + rep_t lo_corr = corr_UQ1 & loMask; + rep_t hi_corr = corr_UQ1 >> HW; + // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 + x_UQ0 = ((rep_t)x_UQ0_hw * hi_corr << 1) + + ((rep_t)x_UQ0_hw * lo_corr >> (HW - 1)) + - REP_C(2); // 1 to account for the highest bit of corr_UQ1 can be 1 + // 1 to account for possible carry + // Just like the case of half-width iterations but with possibility + // of overflowing by one extra Ulp of x_UQ0. + x_UQ0 -= 1U; + // ... and then traditional fixup by 2 should work + + // On error estimation: + // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW + // + (2^-HW + 2^-W)) + // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW + + // Then like for the half-width iterations: + // With 0 <= eps1, eps2 < 2^-W + // E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b + // abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ] + // abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ] +#endif + + // Finally, account for possible overflow, as explained above. + x_UQ0 -= 2U; + + // u_n for different precisions (with N-1 half-width iterations): + // W0 is the precision of C + // u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW + + // Estimated with bc: + // define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; } + // define half2(un) { return 2.0 * un / 2.0^hw + 2.0; } + // define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; } + // define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; } + + // | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1) + // u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797 + // u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440 + // u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317 + // u_3 | < 7.31 | | < 7.31 | < 27054456580 + // u_4 | | | | < 80.4 + // Final (U_N) | same as u_3 | < 72 | < 218 | < 13920 + + // Add 2 to U_N due to final decrement. + +#if defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 2 && NUMBER_OF_FULL_ITERATIONS == 1 +#define RECIPROCAL_PRECISION REP_C(74) +#elif defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 0 && NUMBER_OF_FULL_ITERATIONS == 3 +#define RECIPROCAL_PRECISION REP_C(10) +#elif defined(DOUBLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 3 && NUMBER_OF_FULL_ITERATIONS == 1 +#define RECIPROCAL_PRECISION REP_C(220) +#elif defined(QUAD_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 4 && NUMBER_OF_FULL_ITERATIONS == 1 +#define RECIPROCAL_PRECISION REP_C(13922) +#else +#error Invalid number of iterations +#endif + + // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W + x_UQ0 -= RECIPROCAL_PRECISION; + // Now 1/b - (2*P) * 2^-W < x < 1/b + // FIXME Is x_UQ0 still >= 0.5? + + rep_t quotient_UQ1, dummy; + wideMultiply(x_UQ0, aSignificand << 1, &quotient_UQ1, &dummy); + // Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W). + + // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), + // adjust it to be in [1.0, 2.0) as UQ1.SB. + rep_t residualLo; + if (quotient_UQ1 < (implicitBit << 1)) { + // Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB, + // effectively doubling its value as well as its error estimation. + residualLo = (aSignificand << (significandBits + 1)) - quotient_UQ1 * bSignificand; + writtenExponent -= 1; + aSignificand <<= 1; + } else { + // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it + // to UQ1.SB by right shifting by 1. Least significant bit is omitted. + quotient_UQ1 >>= 1; + residualLo = (aSignificand << significandBits) - quotient_UQ1 * bSignificand; + } + // NB: residualLo is calculated above for the normal result case. + // It is re-computed on denormal path that is expected to be not so + // performance-sensitive. + + // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB + // Each NextAfter() increments the floating point value by at least 2^-SB + // (more, if exponent was incremented). + // Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint): + // q + // | | * | | | | | + // <---> 2^t + // | | | | | * | | + // q + // To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB. + // (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB + // (8*P) * 2^-W < 0.5 * 2^-SB + // P < 2^(W-4-SB) + // Generally, for at most R NextAfter() to be enough, + // P < (2*R - 1) * 2^(W-4-SB) + // For f32 (0+3): 10 < 32 (OK) + // For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required + // For f64: 220 < 256 (OK) + // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required) + + // If we have overflowed the exponent, return infinity + if (writtenExponent >= maxExponent) + return fromRep(infRep | quotientSign); + + // Now, quotient_UQ1_SB <= the correctly-rounded result + // and may need taking NextAfter() up to 3 times (see error estimates above) + // r = a - b * q + rep_t absResult; + if (writtenExponent > 0) { + // Clear the implicit bit + absResult = quotient_UQ1 & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + residualLo <<= 1; + } else { + // Prevent shift amount from being negative + if (significandBits + writtenExponent < 0) + return fromRep(quotientSign); + + absResult = quotient_UQ1 >> (-writtenExponent + 1); + + // multiplied by two to prevent shift amount to be negative + residualLo = (aSignificand << (significandBits + writtenExponent)) - (absResult * bSignificand << 1); + } + + // Round + residualLo += absResult & 1; // tie to even + // The above line conditionally turns the below LT comparison into LTE + absResult += residualLo > bSignificand; +#if defined(QUAD_PRECISION) || (defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS > 0) + // Do not round Infinity to NaN + absResult += absResult < infRep && residualLo > (2 + 1) * bSignificand; +#endif +#if defined(QUAD_PRECISION) + absResult += absResult < infRep && residualLo > (4 + 1) * bSignificand; +#endif + return fromRep(absResult | quotientSign); +} diff --git a/lib/impl/fp_extend.h b/lib/impl/fp_extend.h @@ -0,0 +1,174 @@ +//===-lib/fp_extend.h - low precision -> high precision conversion -*- C +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Set source and destination setting +// +//===----------------------------------------------------------------------===// + +#ifndef FP_EXTEND_HEADER +#define FP_EXTEND_HEADER + +#include "int_lib.h" + +#if defined SRC_SINGLE +typedef float src_t; +typedef uint32_t src_rep_t; +#define SRC_REP_C UINT32_C +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 23; +// -1 accounts for the sign bit. +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 8; +#define src_rep_t_clz clzsi + +#elif defined SRC_DOUBLE +typedef double src_t; +typedef uint64_t src_rep_t; +#define SRC_REP_C UINT64_C +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 52; +// -1 accounts for the sign bit. +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 11; + +static inline int src_rep_t_clz_impl(src_rep_t a) { +#if defined __LP64__ + return __builtin_clzl(a); +#else + if (a & REP_C(0xffffffff00000000)) + return clzsi(a >> 32); + else + return 32 + clzsi(a & REP_C(0xffffffff)); +#endif +} +#define src_rep_t_clz src_rep_t_clz_impl + +#elif defined SRC_80 +typedef xf_float src_t; +typedef __uint128_t src_rep_t; +#define SRC_REP_C (__uint128_t) +// sign bit, exponent and significand occupy the lower 80 bits. +static const int srcBits = 80; +static const int srcSigFracBits = 63; +// -1 accounts for the sign bit. +// -1 accounts for the explicitly stored integer bit. +// srcBits - srcSigFracBits - 1 - 1 +static const int srcExpBits = 15; + +#elif defined SRC_HALF +#ifdef COMPILER_RT_HAS_FLOAT16 +typedef _Float16 src_t; +#else +typedef uint16_t src_t; +#endif +typedef uint16_t src_rep_t; +#define SRC_REP_C UINT16_C +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 10; +// -1 accounts for the sign bit. +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 5; + +static inline int src_rep_t_clz_impl(src_rep_t a) { + return __builtin_clz(a) - 16; +} + +#define src_rep_t_clz src_rep_t_clz_impl + +#else +#error Source should be half, single, or double precision! +#endif // end source precision + +#if defined DST_SINGLE +typedef float dst_t; +typedef uint32_t dst_rep_t; +#define DST_REP_C UINT32_C +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 23; +// -1 accounts for the sign bit. +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 8; + +#elif defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 52; +// -1 accounts for the sign bit. +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 11; + +#elif defined DST_QUAD +typedef tf_float dst_t; +typedef __uint128_t dst_rep_t; +#define DST_REP_C (__uint128_t) +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 112; +// -1 accounts for the sign bit. +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 15; + +#else +#error Destination should be single, double, or quad precision! +#endif // end destination precision + +// End of specialization parameters. + +// TODO: These helper routines should be placed into fp_lib.h +// Currently they depend on macros/constants defined above. + +static inline src_rep_t extract_sign_from_src(src_rep_t x) { + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1); + return (x & srcSignMask) >> (srcBits - 1); +} + +static inline src_rep_t extract_exp_from_src(src_rep_t x) { + const int srcSigBits = srcBits - 1 - srcExpBits; + const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits; + return (x & srcExpMask) >> srcSigBits; +} + +static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) { + const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1; + return x & srcSigFracMask; +} + +#ifdef src_rep_t_clz +static inline int clz_in_sig_frac(src_rep_t sigFrac) { + const int skip = 1 + srcExpBits; + return src_rep_t_clz(sigFrac) - skip; +} +#endif + +static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) { + return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac; +} + +// Two helper routines for conversion to and from the representation of +// floating-point data as integer values follow. + +static inline src_rep_t srcToRep(src_t x) { + const union { + src_t f; + src_rep_t i; + } rep = {.f = x}; + return rep.i; +} + +static inline dst_t dstFromRep(dst_rep_t x) { + const union { + dst_t f; + dst_rep_t i; + } rep = {.i = x}; + return rep.f; +} +// End helper routines. Conversion implementation follows. + +#endif // FP_EXTEND_HEADER diff --git a/lib/impl/fp_extend_impl.inc b/lib/impl/fp_extend_impl.inc @@ -0,0 +1,108 @@ +//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a fairly generic conversion from a narrower to a wider +// IEEE-754 floating-point type. The constants and types defined following the +// includes below parameterize the conversion. +// +// It does not support types that don't use the usual IEEE-754 interchange +// formats; specifically, some work would be needed to adapt it to +// (for example) the Intel 80-bit format or PowerPC double-double format. +// +// Note please, however, that this implementation is only intended to support +// *widening* operations; if you need to convert to a *narrower* floating-point +// type (e.g. double -> float), then this routine will not do what you want it +// to. +// +// It also requires that integer types at least as large as both formats +// are available on the target platform; this may pose a problem when trying +// to add support for quad on some 32-bit systems, for example. You also may +// run into trouble finding an appropriate CLZ function for wide source types; +// you will likely need to roll your own on some platforms. +// +// Finally, the following assumptions are made: +// +// 1. Floating-point types and integer types have the same endianness on the +// target platform. +// +// 2. Quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set. +// +//===----------------------------------------------------------------------===// + +#include "fp_extend.h" + +// The source type may use a usual IEEE-754 interchange format or Intel 80-bit +// format. In particular, for the source type srcSigFracBits may be not equal to +// srcSigBits. The destination type is assumed to be one of IEEE-754 standard +// types. +static __inline dst_t __extendXfYf2__(src_t a) { + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + // Break a into a sign and representation of the absolute value. + const src_rep_t aRep = srcToRep(a); + const src_rep_t srcSign = extract_sign_from_src(aRep); + const src_rep_t srcExp = extract_exp_from_src(aRep); + const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep); + + dst_rep_t dstSign = srcSign; + dst_rep_t dstExp; + dst_rep_t dstSigFrac; + + if (srcExp >= 1 && srcExp < (src_rep_t)srcInfExp) { + // a is a normal number. + dstExp = (dst_rep_t)srcExp + (dst_rep_t)(dstExpBias - srcExpBias); + dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits); + } + + else if (srcExp == srcInfExp) { + // a is NaN or infinity. + dstExp = dstInfExp; + dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits); + } + + else if (srcSigFrac) { + // a is denormal. + if (srcExpBits == dstExpBits) { + // The exponent fields are identical and this is a denormal number, so all + // the non-significand bits are zero. In particular, this branch is always + // taken when we extend a denormal F80 to F128. + dstExp = 0; + dstSigFrac = ((dst_rep_t)srcSigFrac) << (dstSigFracBits - srcSigFracBits); + } else { +#ifndef src_rep_t_clz + // If src_rep_t_clz is not defined this branch must be unreachable. + __builtin_unreachable(); +#else + // Renormalize the significand and clear the leading bit. + // For F80 -> F128 this codepath is unused. + const int scale = clz_in_sig_frac(srcSigFrac) + 1; + dstExp = dstExpBias - srcExpBias - scale + 1; + dstSigFrac = (dst_rep_t)srcSigFrac + << (dstSigFracBits - srcSigFracBits + scale); + const dst_rep_t dstMinNormal = DST_REP_C(1) << (dstBits - 1 - dstExpBits); + dstSigFrac ^= dstMinNormal; +#endif + } + } + + else { + // a is zero. + dstExp = 0; + dstSigFrac = 0; + } + + const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac); + return dstFromRep(result); +} diff --git a/lib/impl/fp_fixint_impl.inc b/lib/impl/fp_fixint_impl.inc @@ -0,0 +1,40 @@ +//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements float to integer conversion for the +// compiler-rt library. +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fixint_t __fixint(fp_t a) { + const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2); + const fixint_t fixint_min = -fixint_max - 1; + // Break a into sign, exponent, significand parts. + const rep_t aRep = toRep(a); + const rep_t aAbs = aRep & absMask; + const fixint_t sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const rep_t significand = (aAbs & significandMask) | implicitBit; + + // If exponent is negative, the result is zero. + if (exponent < 0) + return 0; + + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT) + return sign == 1 ? fixint_max : fixint_min; + + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return sign * (significand >> (significandBits - exponent)); + else + return sign * ((fixuint_t)significand << (exponent - significandBits)); +} diff --git a/lib/impl/fp_fixuint_impl.inc b/lib/impl/fp_fixuint_impl.inc @@ -0,0 +1,38 @@ +//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements float to unsigned integer conversion for the +// compiler-rt library. +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fixuint_t __fixuint(fp_t a) { + // Break a into sign, exponent, significand parts. + const rep_t aRep = toRep(a); + const rep_t aAbs = aRep & absMask; + const int sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const rep_t significand = (aAbs & significandMask) | implicitBit; + + // If either the value or the exponent is negative, the result is zero. + if (sign == -1 || exponent < 0) + return 0; + + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT) + return ~(fixuint_t)0; + + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return significand >> (significandBits - exponent); + else + return (fixuint_t)significand << (exponent - significandBits); +} diff --git a/lib/impl/fp_mul_impl.inc b/lib/impl/fp_mul_impl.inc @@ -0,0 +1,128 @@ +//===---- lib/fp_mul_impl.inc - floating point multiplication -----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float multiplication with the IEEE-754 default +// rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fp_t __mulXf3__(fp_t a, fp_t b) { + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent - 1U >= maxExponent - 1U || + bExponent - 1U >= maxExponent - 1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN * anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything * NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity * non-zero = +/- infinity + if (bAbs) + return fromRep(aAbs | productSign); + // infinity * zero = NaN + else + return fromRep(qnanRep); + } + + if (bAbs == infRep) { + // non-zero * infinity = +/- infinity + if (aAbs) + return fromRep(bAbs | productSign); + // zero * infinity = NaN + else + return fromRep(qnanRep); + } + + // zero * anything = +/- zero + if (!aAbs) + return fromRep(productSign); + // anything * zero = +/- zero + if (!bAbs) + return fromRep(productSign); + + // One or both of a or b is denormal. The other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) + scale += normalize(&aSignificand); + if (bAbs < implicitBit) + scale += normalize(&bSignificand); + } + + // Set the implicit significand bit. If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything. + aSignificand |= implicitBit; + bSignificand |= implicitBit; + + // Perform a basic multiplication on the significands. One of them must be + // shifted beforehand to be aligned with the exponent. + rep_t productHi, productLo; + wideMultiply(aSignificand, bSignificand << exponentBits, &productHi, + &productLo); + + int productExponent = aExponent + bExponent - exponentBias + scale; + + // Normalize the significand and adjust the exponent if needed. + if (productHi & implicitBit) + productExponent++; + else + wideLeftShift(&productHi, &productLo, 1); + + // If we have overflowed the type, return +/- infinity. + if (productExponent >= maxExponent) + return fromRep(infRep | productSign); + + if (productExponent <= 0) { + // The result is denormal before rounding. + // + // If the result is so small that it just underflows to zero, return + // zero with the appropriate sign. Mathematically, there is no need to + // handle this case separately, but we make it a special case to + // simplify the shift logic. + const unsigned int shift = REP_C(1) - (unsigned int)productExponent; + if (shift >= typeWidth) + return fromRep(productSign); + + // Otherwise, shift the significand of the result so that the round + // bit is the high bit of productLo. + wideRightShiftWithSticky(&productHi, &productLo, shift); + } else { + // The result is normal before rounding. Insert the exponent. + productHi &= significandMask; + productHi |= (rep_t)productExponent << significandBits; + } + + // Insert the sign of the result. + productHi |= productSign; + + // Perform the final rounding. The final result may overflow to infinity, + // or underflow to zero, but those are the correct results in those cases. + // We use the default IEEE-754 round-to-nearest, ties-to-even rounding mode. + if (productLo > signBit) + productHi++; + if (productLo == signBit) + productHi += productHi & 1; + return fromRep(productHi); +} diff --git a/lib/impl/fp_trunc.h b/lib/impl/fp_trunc.h @@ -0,0 +1,158 @@ +//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Set source and destination precision setting +// +//===----------------------------------------------------------------------===// + +#ifndef FP_TRUNC_HEADER +#define FP_TRUNC_HEADER + +#include "int_lib.h" + +#if defined SRC_SINGLE +typedef float src_t; +typedef uint32_t src_rep_t; +#define SRC_REP_C UINT32_C +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 23; +// -1 accounts for the sign bit. +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 8; + +#elif defined SRC_DOUBLE +typedef double src_t; +typedef uint64_t src_rep_t; +#define SRC_REP_C UINT64_C +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 52; +// -1 accounts for the sign bit. +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 11; + +#elif defined SRC_QUAD +typedef tf_float src_t; +typedef __uint128_t src_rep_t; +#define SRC_REP_C (__uint128_t) +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 112; +// -1 accounts for the sign bit. +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 15; + +#else +#error Source should be double precision or quad precision! +#endif // end source precision + +#if defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 52; +// -1 accounts for the sign bit. +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 11; + +#elif defined DST_80 +typedef xf_float dst_t; +typedef __uint128_t dst_rep_t; +#define DST_REP_C (__uint128_t) +static const int dstBits = 80; +static const int dstSigFracBits = 63; +// -1 accounts for the sign bit. +// -1 accounts for the explicitly stored integer bit. +// dstBits - dstSigFracBits - 1 - 1 +static const int dstExpBits = 15; + +#elif defined DST_SINGLE +typedef float dst_t; +typedef uint32_t dst_rep_t; +#define DST_REP_C UINT32_C +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 23; +// -1 accounts for the sign bit. +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 8; + +#elif defined DST_HALF +#ifdef COMPILER_RT_HAS_FLOAT16 +typedef _Float16 dst_t; +#else +typedef uint16_t dst_t; +#endif +typedef uint16_t dst_rep_t; +#define DST_REP_C UINT16_C +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 10; +// -1 accounts for the sign bit. +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 5; + +#elif defined DST_BFLOAT +typedef __bf16 dst_t; +typedef uint16_t dst_rep_t; +#define DST_REP_C UINT16_C +static const int dstBits = sizeof(dst_t) * CHAR_BIT; +static const int dstSigFracBits = 7; +// -1 accounts for the sign bit. +// dstBits - dstSigFracBits - 1 +static const int dstExpBits = 8; + +#else +#error Destination should be single precision or double precision! +#endif // end destination precision + +// TODO: These helper routines should be placed into fp_lib.h +// Currently they depend on macros/constants defined above. + +static inline src_rep_t extract_sign_from_src(src_rep_t x) { + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1); + return (x & srcSignMask) >> (srcBits - 1); +} + +static inline src_rep_t extract_exp_from_src(src_rep_t x) { + const int srcSigBits = srcBits - 1 - srcExpBits; + const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits; + return (x & srcExpMask) >> srcSigBits; +} + +static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) { + const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1; + return x & srcSigFracMask; +} + +static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) { + dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac; + // Set the explicit integer bit in F80 if present. + if (dstBits == 80 && exp) { + result |= (DST_REP_C(1) << dstSigFracBits); + } + return result; +} + +// End of specialization parameters. Two helper routines for conversion to and +// from the representation of floating-point data as integer values follow. + +static inline src_rep_t srcToRep(src_t x) { + const union { + src_t f; + src_rep_t i; + } rep = {.f = x}; + return rep.i; +} + +static inline dst_t dstFromRep(dst_rep_t x) { + const union { + dst_t f; + dst_rep_t i; + } rep = {.i = x}; + return rep.f; +} + +#endif // FP_TRUNC_HEADER diff --git a/lib/impl/fp_trunc_impl.inc b/lib/impl/fp_trunc_impl.inc @@ -0,0 +1,155 @@ +//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a fairly generic conversion from a wider to a narrower +// IEEE-754 floating-point type in the default (round to nearest, ties to even) +// rounding mode. The constants and types defined following the includes below +// parameterize the conversion. +// +// This routine can be trivially adapted to support conversions to +// half-precision or from quad-precision. It does not support types that don't +// use the usual IEEE-754 interchange formats; specifically, some work would be +// needed to adapt it to (for example) the Intel 80-bit format or PowerPC +// double-double format. +// +// Note please, however, that this implementation is only intended to support +// *narrowing* operations; if you need to convert to a *wider* floating-point +// type (e.g. float -> double), then this routine will not do what you want it +// to. +// +// It also requires that integer types at least as large as both formats +// are available on the target platform; this may pose a problem when trying +// to add support for quad on some 32-bit systems, for example. +// +// Finally, the following assumptions are made: +// +// 1. Floating-point types and integer types have the same endianness on the +// target platform. +// +// 2. Quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set. +// +//===----------------------------------------------------------------------===// + +#include "fp_trunc.h" + +// The destination type may use a usual IEEE-754 interchange format or Intel +// 80-bit format. In particular, for the destination type dstSigFracBits may be +// not equal to dstSigBits. The source type is assumed to be one of IEEE-754 +// standard types. +static __inline dst_t __truncXfYf2__(src_t a) { + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigFracBits; + const src_rep_t roundMask = + (SRC_REP_C(1) << (srcSigFracBits - dstSigFracBits)) - 1; + const src_rep_t halfway = SRC_REP_C(1) + << (srcSigFracBits - dstSigFracBits - 1); + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigFracBits - 1); + const src_rep_t srcNaNCode = srcQNaN - 1; + + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; + + const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigFracBits - 1); + const dst_rep_t dstNaNCode = dstQNaN - 1; + + const src_rep_t aRep = srcToRep(a); + const src_rep_t srcSign = extract_sign_from_src(aRep); + const src_rep_t srcExp = extract_exp_from_src(aRep); + const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep); + + dst_rep_t dstSign = srcSign; + dst_rep_t dstExp; + dst_rep_t dstSigFrac; + + // Same size exponents and a's significand tail is 0. + // The significand can be truncated and the exponent can be copied over. + const int sigFracTailBits = srcSigFracBits - dstSigFracBits; + if (srcExpBits == dstExpBits && + ((aRep >> sigFracTailBits) << sigFracTailBits) == aRep) { + dstExp = srcExp; + dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits); + return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac)); + } + + const int dstExpCandidate = ((int)srcExp - srcExpBias) + dstExpBias; + if (dstExpCandidate >= 1 && dstExpCandidate < dstInfExp) { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + dstExp = dstExpCandidate; + dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits); + + const src_rep_t roundBits = srcSigFrac & roundMask; + // Round to nearest. + if (roundBits > halfway) + dstSigFrac++; + // Tie to even. + else if (roundBits == halfway) + dstSigFrac += dstSigFrac & 1; + + // Rounding has changed the exponent. + if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) { + dstExp += 1; + dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits); + } + } else if (srcExp == srcInfExp && srcSigFrac) { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + dstExp = dstInfExp; + dstSigFrac = dstQNaN; + dstSigFrac |= ((srcSigFrac & srcNaNCode) >> sigFracTailBits) & dstNaNCode; + } else if ((int)srcExp >= overflowExponent) { + dstExp = dstInfExp; + dstSigFrac = 0; + } else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + src_rep_t significand = srcSigFrac; + int shift = srcExpBias - dstExpBias - srcExp; + + if (srcExp) { + // Set the implicit integer bit if the source is a normal number. + significand |= srcMinNormal; + shift += 1; + } + + // Right shift by the denormalization amount with sticky. + if (shift > srcSigFracBits) { + dstExp = 0; + dstSigFrac = 0; + } else { + dstExp = 0; + const bool sticky = shift && ((significand << (srcBits - shift)) != 0); + src_rep_t denormalizedSignificand = significand >> shift | sticky; + dstSigFrac = denormalizedSignificand >> sigFracTailBits; + const src_rep_t roundBits = denormalizedSignificand & roundMask; + // Round to nearest + if (roundBits > halfway) + dstSigFrac++; + // Ties to even + else if (roundBits == halfway) + dstSigFrac += dstSigFrac & 1; + + // Rounding has changed the exponent. + if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) { + dstExp += 1; + dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits); + } + } + } + + return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac)); +} diff --git a/lib/impl/int_div_impl.inc b/lib/impl/int_div_impl.inc @@ -0,0 +1,95 @@ +//===-- int_div_impl.inc - Integer division ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Helpers used by __udivsi3, __umodsi3, __udivdi3, and __umodsi3. +// +//===----------------------------------------------------------------------===// + +#define clz(a) (sizeof(a) == sizeof(unsigned long long) ? __builtin_clzll(a) : clzsi(a)) + +// Adapted from Figure 3-40 of The PowerPC Compiler Writer's Guide +static __inline fixuint_t __udivXi3(fixuint_t n, fixuint_t d) { + const unsigned N = sizeof(fixuint_t) * CHAR_BIT; + // d == 0 cases are unspecified. + unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N); + // 0 <= sr <= N - 1 or sr is very large. + if (sr > N - 1) // n < d + return 0; + if (sr == N - 1) // d == 1 + return n; + ++sr; + // 1 <= sr <= N - 1. Shifts do not trigger UB. + fixuint_t r = n >> sr; + n <<= N - sr; + fixuint_t carry = 0; + for (; sr > 0; --sr) { + r = (r << 1) | (n >> (N - 1)); + n = (n << 1) | carry; + // Branch-less version of: + // carry = 0; + // if (r >= d) r -= d, carry = 1; + const fixint_t s = (fixint_t)(d - r - 1) >> (N - 1); + carry = s & 1; + r -= d & s; + } + n = (n << 1) | carry; + return n; +} + +// Mostly identical to __udivXi3 but the return values are different. +static __inline fixuint_t __umodXi3(fixuint_t n, fixuint_t d) { + const unsigned N = sizeof(fixuint_t) * CHAR_BIT; + // d == 0 cases are unspecified. + unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N); + // 0 <= sr <= N - 1 or sr is very large. + if (sr > N - 1) // n < d + return n; + if (sr == N - 1) // d == 1 + return 0; + ++sr; + // 1 <= sr <= N - 1. Shifts do not trigger UB. + fixuint_t r = n >> sr; + n <<= N - sr; + fixuint_t carry = 0; + for (; sr > 0; --sr) { + r = (r << 1) | (n >> (N - 1)); + n = (n << 1) | carry; + // Branch-less version of: + // carry = 0; + // if (r >= d) r -= d, carry = 1; + const fixint_t s = (fixint_t)(d - r - 1) >> (N - 1); + carry = s & 1; + r -= d & s; + } + return r; +} + +#ifdef COMPUTE_UDIV +static __inline fixint_t __divXi3(fixint_t a, fixint_t b) { + const int N = (int)(sizeof(fixint_t) * CHAR_BIT) - 1; + fixint_t s_a = a >> N; // s_a = a < 0 ? -1 : 0 + fixint_t s_b = b >> N; // s_b = b < 0 ? -1 : 0 + fixuint_t a_u = (fixuint_t)(a ^ s_a) + (-s_a); // negate if s_a == -1 + fixuint_t b_u = (fixuint_t)(b ^ s_b) + (-s_b); // negate if s_b == -1 + s_a ^= s_b; // sign of quotient + return (COMPUTE_UDIV(a_u, b_u) ^ s_a) + (-s_a); // negate if s_a == -1 +} +#endif // COMPUTE_UDIV + +#ifdef ASSIGN_UMOD +static __inline fixint_t __modXi3(fixint_t a, fixint_t b) { + const int N = (int)(sizeof(fixint_t) * CHAR_BIT) - 1; + fixint_t s = b >> N; // s = b < 0 ? -1 : 0 + fixuint_t b_u = (fixuint_t)(b ^ s) + (-s); // negate if s == -1 + s = a >> N; // s = a < 0 ? -1 : 0 + fixuint_t a_u = (fixuint_t)(a ^ s) + (-s); // negate if s == -1 + fixuint_t res; + ASSIGN_UMOD(res, a_u, b_u); + return (res ^ s) + (-s); // negate if s == -1 +} +#endif // ASSIGN_UMOD diff --git a/lib/impl/int_to_fp.h b/lib/impl/int_to_fp.h @@ -0,0 +1,82 @@ +//===-- int_to_fp.h - integer to floating point conversion ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Set source and destination defines in order to use a correctly +// parameterised floatXiYf implementation. +// +//===----------------------------------------------------------------------===// + +#ifndef INT_TO_FP_H +#define INT_TO_FP_H + +#include "int_lib.h" + +#if defined SRC_I64 +typedef int64_t src_t; +typedef uint64_t usrc_t; +static __inline int clzSrcT(usrc_t x) { return __builtin_clzll(x); } + +#elif defined SRC_U64 +typedef uint64_t src_t; +typedef uint64_t usrc_t; +static __inline int clzSrcT(usrc_t x) { return __builtin_clzll(x); } + +#elif defined SRC_I128 +typedef __int128_t src_t; +typedef __uint128_t usrc_t; +static __inline int clzSrcT(usrc_t x) { return __clzti2(x); } + +#elif defined SRC_U128 +typedef __uint128_t src_t; +typedef __uint128_t usrc_t; +static __inline int clzSrcT(usrc_t x) { return __clzti2(x); } + +#else +#error Source should be a handled integer type. +#endif + +#if defined DST_SINGLE +typedef float dst_t; +typedef uint32_t dst_rep_t; +#define DST_REP_C UINT32_C + +enum { + dstSigBits = 23, +}; + +#elif defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C + +enum { + dstSigBits = 52, +}; + +#elif defined DST_QUAD +typedef tf_float dst_t; +typedef __uint128_t dst_rep_t; +#define DST_REP_C (__uint128_t) + +enum { + dstSigBits = 112, +}; + +#else +#error Destination should be a handled floating point type +#endif + +static __inline dst_t dstFromRep(dst_rep_t x) { + const union { + dst_t f; + dst_rep_t i; + } rep = {.i = x}; + return rep.f; +} + +#endif // INT_TO_FP_H diff --git a/lib/impl/int_to_fp_impl.inc b/lib/impl/int_to_fp_impl.inc @@ -0,0 +1,72 @@ +//===-- int_to_fp_impl.inc - integer to floating point conversion ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Thsi file implements a generic conversion from an integer type to an +// IEEE-754 floating point type, allowing a common implementation to be hsared +// without copy and paste. +// +//===----------------------------------------------------------------------===// + +#include "int_to_fp.h" + +static __inline dst_t __floatXiYf__(src_t a) { + if (a == 0) + return 0.0; + + enum { + dstMantDig = dstSigBits + 1, + srcBits = sizeof(src_t) * CHAR_BIT, + srcIsSigned = ((src_t)-1) < 0, + }; + + const src_t s = srcIsSigned ? a >> (srcBits - 1) : 0; + + a = (usrc_t)(a ^ s) - s; + int sd = srcBits - clzSrcT(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > dstMantDig) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit dstMantDig-1 bits to the right of 1 + // Q = bit dstMantDig bits to the right of 1 + // R = "or" of all bits to the right of Q + if (sd == dstMantDig + 1) { + a <<= 1; + } else if (sd == dstMantDig + 2) { + // Do nothing. + } else { + a = ((usrc_t)a >> (sd - (dstMantDig + 2))) | + ((a & ((usrc_t)(-1) >> ((srcBits + dstMantDig + 2) - sd))) != 0); + } + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to dstMantDig or dstMantDig+1 bits + if (a & ((usrc_t)1 << dstMantDig)) { + a >>= 1; + ++e; + } + // a is now rounded to dstMantDig bits + } else { + a <<= (dstMantDig - sd); + // a is now rounded to dstMantDig bits + } + const int dstBits = sizeof(dst_t) * CHAR_BIT; + const dst_rep_t dstSignMask = DST_REP_C(1) << (dstBits - 1); + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstExpBias = (1 << (dstExpBits - 1)) - 1; + const dst_rep_t dstSignificandMask = (DST_REP_C(1) << dstSigBits) - 1; + // Combine sign, exponent, and mantissa. + const dst_rep_t result = ((dst_rep_t)s & dstSignMask) | + ((dst_rep_t)(e + dstExpBias) << dstSigBits) | + ((dst_rep_t)(a) & dstSignificandMask); + return dstFromRep(result); +} diff --git a/lib/include/common/assembly.h b/lib/include/common/assembly.h @@ -0,0 +1,293 @@ +//===-- assembly.h - compiler-rt assembler support macros -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines macros for use in compiler-rt assembler source. +// This file is not part of the interface of this library. +// +//===----------------------------------------------------------------------===// + +#ifndef COMPILERRT_ASSEMBLY_H +#define COMPILERRT_ASSEMBLY_H + +#if defined(__linux__) && defined(__CET__) +#if __has_include(<cet.h>) +#include <cet.h> +#endif +#endif + +#if defined(__APPLE__) && defined(__aarch64__) +#define SEPARATOR %% +#else +#define SEPARATOR ; +#endif + +#if defined(__APPLE__) +#define HIDDEN(name) .private_extern name +#define LOCAL_LABEL(name) L_##name +// tell linker it can break up file at label boundaries +#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols +#define SYMBOL_IS_FUNC(name) +#define CONST_SECTION .const + +#define NO_EXEC_STACK_DIRECTIVE + +#elif defined(__ELF__) + +#define HIDDEN(name) .hidden name +#define LOCAL_LABEL(name) .L_##name +#define FILE_LEVEL_DIRECTIVE +#if defined(__arm__) || defined(__aarch64__) +#define SYMBOL_IS_FUNC(name) .type name,%function +#else +#define SYMBOL_IS_FUNC(name) .type name,@function +#endif +#define CONST_SECTION .section .rodata + +#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ + defined(__linux__) +#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits +#else +#define NO_EXEC_STACK_DIRECTIVE +#endif + +#else // !__APPLE__ && !__ELF__ + +#define HIDDEN(name) +#define LOCAL_LABEL(name) .L ## name +#define FILE_LEVEL_DIRECTIVE +#define SYMBOL_IS_FUNC(name) \ + .def name SEPARATOR \ + .scl 2 SEPARATOR \ + .type 32 SEPARATOR \ + .endef +#define CONST_SECTION .section .rdata,"rd" + +#define NO_EXEC_STACK_DIRECTIVE + +#endif + +#if defined(__arm__) || defined(__aarch64__) +#define FUNC_ALIGN \ + .text SEPARATOR \ + .balign 16 SEPARATOR +#else +#define FUNC_ALIGN +#endif + +// BTI and PAC gnu property note +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1 +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2 + +#if defined(__ARM_FEATURE_BTI_DEFAULT) +#define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI +#else +#define BTI_FLAG 0 +#endif + +#if __ARM_FEATURE_PAC_DEFAULT & 3 +#define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC +#else +#define PAC_FLAG 0 +#endif + +#define GNU_PROPERTY(type, value) \ + .pushsection .note.gnu.property, "a" SEPARATOR \ + .p2align 3 SEPARATOR \ + .word 4 SEPARATOR \ + .word 16 SEPARATOR \ + .word NT_GNU_PROPERTY_TYPE_0 SEPARATOR \ + .asciz "GNU" SEPARATOR \ + .word type SEPARATOR \ + .word 4 SEPARATOR \ + .word value SEPARATOR \ + .word 0 SEPARATOR \ + .popsection + +#if BTI_FLAG != 0 +#define BTI_C hint #34 +#define BTI_J hint #36 +#else +#define BTI_C +#define BTI_J +#endif + +#if (BTI_FLAG | PAC_FLAG) != 0 +#define GNU_PROPERTY_BTI_PAC \ + GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG) +#else +#define GNU_PROPERTY_BTI_PAC +#endif + +#if defined(__clang__) || defined(__GCC_HAVE_DWARF2_CFI_ASM) +#define CFI_START .cfi_startproc +#define CFI_END .cfi_endproc +#else +#define CFI_START +#define CFI_END +#endif + +#if defined(__arm__) + +// Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros: +// - for '-mthumb -march=armv6' compiler defines '__thumb__' +// - for '-mthumb -march=armv7' compiler defines '__thumb__' and '__thumb2__' +#if defined(__thumb2__) || defined(__thumb__) +#define DEFINE_CODE_STATE .thumb SEPARATOR +#define DECLARE_FUNC_ENCODING .thumb_func SEPARATOR +#if defined(__thumb2__) +#define USE_THUMB_2 +#define IT(cond) it cond +#define ITT(cond) itt cond +#define ITE(cond) ite cond +#else +#define USE_THUMB_1 +#define IT(cond) +#define ITT(cond) +#define ITE(cond) +#endif // defined(__thumb__2) +#else // !defined(__thumb2__) && !defined(__thumb__) +#define DEFINE_CODE_STATE .arm SEPARATOR +#define DECLARE_FUNC_ENCODING +#define IT(cond) +#define ITT(cond) +#define ITE(cond) +#endif + +#if defined(USE_THUMB_1) && defined(USE_THUMB_2) +#error "USE_THUMB_1 and USE_THUMB_2 can't be defined together." +#endif + +#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 +#define ARM_HAS_BX +#endif +#if !defined(__ARM_FEATURE_CLZ) && !defined(USE_THUMB_1) && \ + (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__))) +#define __ARM_FEATURE_CLZ +#endif + +#ifdef ARM_HAS_BX +#define JMP(r) bx r +#define JMPc(r, c) bx##c r +#else +#define JMP(r) mov pc, r +#define JMPc(r, c) mov##c pc, r +#endif + +// pop {pc} can't switch Thumb mode on ARMv4T +#if __ARM_ARCH >= 5 +#define POP_PC() pop {pc} +#else +#define POP_PC() \ + pop {ip}; \ + JMP(ip) +#endif + +#if defined(USE_THUMB_2) +#define WIDE(op) op.w +#else +#define WIDE(op) op +#endif +#else // !defined(__arm) +#define DECLARE_FUNC_ENCODING +#define DEFINE_CODE_STATE +#endif + +#define GLUE2_(a, b) a##b +#define GLUE(a, b) GLUE2_(a, b) +#define GLUE2(a, b) GLUE2_(a, b) +#define GLUE3_(a, b, c) a##b##c +#define GLUE3(a, b, c) GLUE3_(a, b, c) +#define GLUE4_(a, b, c, d) a##b##c##d +#define GLUE4(a, b, c, d) GLUE4_(a, b, c, d) + +#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) + +#ifdef VISIBILITY_HIDDEN +#define DECLARE_SYMBOL_VISIBILITY(name) \ + HIDDEN(SYMBOL_NAME(name)) SEPARATOR +#define DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name) \ + HIDDEN(name) SEPARATOR +#else +#define DECLARE_SYMBOL_VISIBILITY(name) +#define DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name) +#endif + +#define DEFINE_COMPILERRT_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) \ + DECLARE_FUNC_ENCODING \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_THUMB_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \ + .thumb_func SEPARATOR \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + HIDDEN(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_FUNC_ENCODING \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \ + DEFINE_CODE_STATE \ + .globl name SEPARATOR \ + SYMBOL_IS_FUNC(name) SEPARATOR \ + HIDDEN(name) SEPARATOR \ + DECLARE_FUNC_ENCODING \ + name: + +#define DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(name) \ + DEFINE_CODE_STATE \ + FUNC_ALIGN \ + .globl name SEPARATOR \ + SYMBOL_IS_FUNC(name) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name) SEPARATOR \ + DECLARE_FUNC_ENCODING \ + name: \ + SEPARATOR CFI_START \ + SEPARATOR BTI_C + +#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \ + .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR + +#if defined(__ARM_EABI__) +#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) \ + DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name) +#else +#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) +#endif + +#ifdef __ELF__ +#define END_COMPILERRT_FUNCTION(name) \ + .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) +#define END_COMPILERRT_OUTLINE_FUNCTION(name) \ + CFI_END SEPARATOR \ + .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) +#else +#define END_COMPILERRT_FUNCTION(name) +#define END_COMPILERRT_OUTLINE_FUNCTION(name) \ + CFI_END +#endif + +#endif // COMPILERRT_ASSEMBLY_H diff --git a/lib/include/common/fp_lib.h b/lib/include/common/fp_lib.h @@ -0,0 +1,281 @@ +//===-- fp_lib.h - Floating-point utilities ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// cfree-tailored: little-endian, GCC/clang-compatible builtins, IEEE 754 +// binary32/binary64. binary128 (QUAD_PRECISION) requires tf_supplement.h to +// have been pre-included so that CRT_HAS_TF_MODE / CRT_HAS_IEEE_TF and +// tf_float are defined. +// +// Selected by the includer via #define SINGLE_PRECISION | DOUBLE_PRECISION +// | QUAD_PRECISION before #include "fp_lib.h". +//===----------------------------------------------------------------------===// + +#ifndef FP_LIB_HEADER +#define FP_LIB_HEADER + +#include "int_lib.h" +#include "int_math.h" +#include "int_types.h" +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> + +#if defined SINGLE_PRECISION + +typedef uint16_t half_rep_t; +typedef uint32_t rep_t; +typedef uint64_t twice_rep_t; +typedef int32_t srep_t; +typedef float fp_t; +#define HALF_REP_C UINT16_C +#define REP_C UINT32_C +#define significandBits 23 + +static __inline int rep_clz(rep_t a) { return clzsi(a); } + +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + const uint64_t product = (uint64_t)a * b; + *hi = product >> 32; + *lo = product; +} +COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b); + +#elif defined DOUBLE_PRECISION + +typedef uint32_t half_rep_t; +typedef uint64_t rep_t; +typedef int64_t srep_t; +typedef double fp_t; +#define HALF_REP_C UINT32_C +#define REP_C UINT64_C +#define significandBits 52 + +static __inline int rep_clz(rep_t a) { return __builtin_clzll(a); } + +#define loWord(a) (a & 0xffffffffU) +#define hiWord(a) (a >> 32) + +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + const uint64_t plolo = loWord(a) * loWord(b); + const uint64_t plohi = loWord(a) * hiWord(b); + const uint64_t philo = hiWord(a) * loWord(b); + const uint64_t phihi = hiWord(a) * hiWord(b); + const uint64_t r0 = loWord(plolo); + const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); + *lo = r0 + (r1 << 32); + *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; +} +#undef loWord +#undef hiWord + +COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); + +#elif defined QUAD_PRECISION +// Requires tf_supplement.h to be pre-included so CRT_HAS_TF_MODE and +// CRT_HAS_IEEE_TF are defined and tf_float is typedef'd. +typedef uint64_t half_rep_t; +typedef __uint128_t rep_t; +typedef __int128_t srep_t; +typedef tf_float fp_t; +#define HALF_REP_C UINT64_C +#define REP_C (__uint128_t) +#define significandBits 112 +#define TF_MANT_DIG (significandBits + 1) + +static __inline int rep_clz(rep_t a) { + const union { + __uint128_t ll; + struct { uint64_t low, high; } s; + } uu = {.ll = a}; + uint64_t word, add; + if (uu.s.high) { word = uu.s.high; add = 0; } + else { word = uu.s.low; add = 64; } + return __builtin_clzll(word) + add; +} + +#define Word_LoMask UINT64_C(0x00000000ffffffff) +#define Word_HiMask UINT64_C(0xffffffff00000000) +#define Word_FullMask UINT64_C(0xffffffffffffffff) +#define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask) +#define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask) +#define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask) +#define Word_4(a) (uint64_t)(a & Word_LoMask) + +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + const uint64_t product11 = Word_1(a) * Word_1(b); + const uint64_t product12 = Word_1(a) * Word_2(b); + const uint64_t product13 = Word_1(a) * Word_3(b); + const uint64_t product14 = Word_1(a) * Word_4(b); + const uint64_t product21 = Word_2(a) * Word_1(b); + const uint64_t product22 = Word_2(a) * Word_2(b); + const uint64_t product23 = Word_2(a) * Word_3(b); + const uint64_t product24 = Word_2(a) * Word_4(b); + const uint64_t product31 = Word_3(a) * Word_1(b); + const uint64_t product32 = Word_3(a) * Word_2(b); + const uint64_t product33 = Word_3(a) * Word_3(b); + const uint64_t product34 = Word_3(a) * Word_4(b); + const uint64_t product41 = Word_4(a) * Word_1(b); + const uint64_t product42 = Word_4(a) * Word_2(b); + const uint64_t product43 = Word_4(a) * Word_3(b); + const uint64_t product44 = Word_4(a) * Word_4(b); + + const __uint128_t sum0 = (__uint128_t)product44; + const __uint128_t sum1 = (__uint128_t)product34 + (__uint128_t)product43; + const __uint128_t sum2 = (__uint128_t)product24 + (__uint128_t)product33 + + (__uint128_t)product42; + const __uint128_t sum3 = (__uint128_t)product14 + (__uint128_t)product23 + + (__uint128_t)product32 + (__uint128_t)product41; + const __uint128_t sum4 = (__uint128_t)product13 + (__uint128_t)product22 + + (__uint128_t)product31; + const __uint128_t sum5 = (__uint128_t)product12 + (__uint128_t)product21; + const __uint128_t sum6 = (__uint128_t)product11; + + const __uint128_t r0 = (sum0 & Word_FullMask) + ((sum1 & Word_LoMask) << 32); + const __uint128_t r1 = (sum0 >> 64) + ((sum1 >> 32) & Word_FullMask) + + (sum2 & Word_FullMask) + ((sum3 << 32) & Word_HiMask); + *lo = r0 + (r1 << 64); + *hi = (r1 >> 64) + (sum1 >> 96) + (sum2 >> 64) + (sum3 >> 32) + sum4 + + (sum5 << 32) + (sum6 << 64); +} +#undef Word_1 +#undef Word_2 +#undef Word_3 +#undef Word_4 +#undef Word_HiMask +#undef Word_LoMask +#undef Word_FullMask +#else +#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. +#endif + +#define typeWidth (sizeof(rep_t) * CHAR_BIT) + +static __inline rep_t toRep(fp_t x) { + const union { fp_t f; rep_t i; } rep = {.f = x}; + return rep.i; +} + +static __inline fp_t fromRep(rep_t x) { + const union { fp_t f; rep_t i; } rep = {.i = x}; + return rep.f; +} + +#define exponentBits (typeWidth - significandBits - 1) +#define maxExponent ((1 << exponentBits) - 1) +#define exponentBias (maxExponent >> 1) + +#define implicitBit (REP_C(1) << significandBits) +#define significandMask (implicitBit - 1U) +#define signBit (REP_C(1) << (significandBits + exponentBits)) +#define absMask (signBit - 1U) +#define exponentMask (absMask ^ significandMask) +#define oneRep ((rep_t)exponentBias << significandBits) +#define infRep exponentMask +#define quietBit (implicitBit >> 1) +#define qnanRep (exponentMask | quietBit) + +static __inline int normalize(rep_t *significand) { + const int shift = rep_clz(*significand) - rep_clz(implicitBit); + *significand <<= shift; + return 1 - shift; +} + +static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) { + *hi = *hi << count | *lo >> (typeWidth - count); + *lo = *lo << count; +} + +static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, + unsigned int count) { + if (count < typeWidth) { + const bool sticky = (*lo << (typeWidth - count)) != 0; + *lo = *hi << (typeWidth - count) | *lo >> count | sticky; + *hi = *hi >> count; + } else if (count < 2 * typeWidth) { + const bool sticky = *hi << (2 * typeWidth - count) | *lo; + *lo = *hi >> (count - typeWidth) | sticky; + *hi = 0; + } else { + const bool sticky = *hi | *lo; + *lo = sticky; + *hi = 0; + } +} + +static __inline fp_t __compiler_rt_logbX(fp_t x) { + rep_t rep = toRep(x); + int exp = (rep & exponentMask) >> significandBits; + + if (exp == maxExponent) { + if (((rep & signBit) == 0) || (x != x)) return x; + else return -x; + } else if (x == 0.0) { + return fromRep(infRep | signBit); + } + + if (exp != 0) { + return exp - exponentBias; + } else { + rep &= absMask; + const int shift = 1 - normalize(&rep); + exp = (rep & exponentMask) >> significandBits; + return exp - exponentBias - shift; + } +} + +static __inline fp_t __compiler_rt_scalbnX(fp_t x, int y) { + const rep_t rep = toRep(x); + int exp = (rep & exponentMask) >> significandBits; + + if (x == 0.0 || exp == maxExponent) return x; + + rep_t sig = rep & significandMask; + if (exp == 0) { + exp += normalize(&sig); + sig &= ~implicitBit; + } + + if (__builtin_sadd_overflow(exp, y, &exp)) { + exp = (y >= 0) ? INT_MAX : INT_MIN; + } + + const rep_t sign = rep & signBit; + if (exp >= maxExponent) { + return fromRep(sign | ((rep_t)(maxExponent - 1) << significandBits)) * 2.0f; + } else if (exp <= 0) { + fp_t tmp = fromRep(sign | (REP_C(1) << significandBits) | sig); + exp += exponentBias - 1; + if (exp < 1) exp = 1; + tmp *= fromRep((rep_t)exp << significandBits); + return tmp; + } else + return fromRep(sign | ((rep_t)exp << significandBits) | sig); +} + +static __inline fp_t __compiler_rt_fmaxX(fp_t x, fp_t y) { + return (crt_isnan(x) || x < y) ? y : x; +} + +#if defined(SINGLE_PRECISION) +static __inline fp_t __compiler_rt_logbf(fp_t x) { return __compiler_rt_logbX(x); } +static __inline fp_t __compiler_rt_scalbnf(fp_t x, int y) { return __compiler_rt_scalbnX(x, y); } +static __inline fp_t __compiler_rt_fmaxf(fp_t x, fp_t y) { return __compiler_rt_fmaxX(x, y); } +#elif defined(DOUBLE_PRECISION) +static __inline fp_t __compiler_rt_logb(fp_t x) { return __compiler_rt_logbX(x); } +static __inline fp_t __compiler_rt_scalbn(fp_t x, int y) { return __compiler_rt_scalbnX(x, y); } +static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) { return __compiler_rt_fmaxX(x, y); } +#elif defined(QUAD_PRECISION) +static __inline tf_float __compiler_rt_logbtf(tf_float x) { return __compiler_rt_logbX(x); } +static __inline tf_float __compiler_rt_scalbntf(tf_float x, int y){ return __compiler_rt_scalbnX(x, y); } +static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y){ return __compiler_rt_fmaxX(x, y); } +#define __compiler_rt_logbl __compiler_rt_logbtf +#define __compiler_rt_scalbnl __compiler_rt_scalbntf +#define __compiler_rt_fmaxl __compiler_rt_fmaxtf +#define crt_fabstf crt_fabsf128 +#define crt_copysigntf crt_copysignf128 +#endif + +#endif // FP_LIB_HEADER diff --git a/lib/include/common/fp_mode.h b/lib/include/common/fp_mode.h @@ -0,0 +1,29 @@ +//===----- lib/fp_mode.h - Floaing-point environment mode utilities --C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is not part of the interface of this library. +// +// This file defines an interface for accessing hardware floating point +// environment mode. +// +//===----------------------------------------------------------------------===// + +#ifndef FP_MODE_H +#define FP_MODE_H + +typedef enum { + CRT_FE_TONEAREST, + CRT_FE_DOWNWARD, + CRT_FE_UPWARD, + CRT_FE_TOWARDZERO +} CRT_FE_ROUND_MODE; + +CRT_FE_ROUND_MODE __fe_getround(void); +int __fe_raise_inexact(void); + +#endif // FP_MODE_H diff --git a/lib/include/common/int_math.h b/lib/include/common/int_math.h @@ -0,0 +1,32 @@ +//===-- int_math.h - internal math inlines --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// cfree-tailored: assumes a GCC/clang-compatible compiler with the standard +// __builtin_* family. MSVC paths and GCC <7 fallbacks have been dropped. +//===----------------------------------------------------------------------===// + +#ifndef INT_MATH_H +#define INT_MATH_H + +#define CRT_INFINITY __builtin_huge_valf() + +#define crt_isfinite(x) __builtin_isfinite((x)) +#define crt_isinf(x) __builtin_isinf((x)) +#define crt_isnan(x) __builtin_isnan((x)) + +#define crt_copysign(x, y) __builtin_copysign((x), (y)) +#define crt_copysignf(x, y) __builtin_copysignf((x), (y)) +#define crt_copysignl(x, y) __builtin_copysignl((x), (y)) + +#define crt_fabs(x) __builtin_fabs((x)) +#define crt_fabsf(x) __builtin_fabsf((x)) +#define crt_fabsl(x) __builtin_fabsl((x)) + +#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y)) +#define crt_logbl(x) __builtin_logbl((x)) +#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y)) + +#endif // INT_MATH_H diff --git a/lib/include/common/int_util.h b/lib/include/common/int_util.h @@ -0,0 +1,47 @@ +//===-- int_util.h - internal utility functions ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is not part of the interface of this library. +// +// This file defines non-inline utilities which are available for use in the +// library. The function definitions themselves are all contained in int_util.c +// which will always be compiled into any compiler-rt library. +// +//===----------------------------------------------------------------------===// + +#ifndef INT_UTIL_H +#define INT_UTIL_H + +/// \brief Trigger a program abort (or panic for kernel code). +#define compilerrt_abort() __compilerrt_abort_impl(__FILE__, __LINE__, __func__) + +NORETURN void __compilerrt_abort_impl(const char *file, int line, + const char *function); + +#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__) +#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt) +#define COMPILE_TIME_ASSERT2(expr, cnt) \ + typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED + +// Force unrolling the code specified to be repeated N times. +#define REPEAT_0_TIMES(code_to_repeat) /* do nothing */ +#define REPEAT_1_TIMES(code_to_repeat) code_to_repeat +#define REPEAT_2_TIMES(code_to_repeat) \ + REPEAT_1_TIMES(code_to_repeat) \ + code_to_repeat +#define REPEAT_3_TIMES(code_to_repeat) \ + REPEAT_2_TIMES(code_to_repeat) \ + code_to_repeat +#define REPEAT_4_TIMES(code_to_repeat) \ + REPEAT_3_TIMES(code_to_repeat) \ + code_to_repeat + +#define REPEAT_N_TIMES_(N, code_to_repeat) REPEAT_##N##_TIMES(code_to_repeat) +#define REPEAT_N_TIMES(N, code_to_repeat) REPEAT_N_TIMES_(N, code_to_repeat) + +#endif // INT_UTIL_H diff --git a/lib/include/ilp32_le/int_endianness.h b/lib/include/ilp32_le/int_endianness.h @@ -0,0 +1,13 @@ +//===-- int_endianness.h - LP64 little-endian ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// +#ifndef INT_ENDIANNESS_H +#define INT_ENDIANNESS_H + +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 + +#endif diff --git a/lib/include/ilp32_le/int_lib.h b/lib/include/ilp32_le/int_lib.h @@ -0,0 +1,43 @@ +//===-- int_lib.h - ILP32 little-endian ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// cfree-tailored: ILP32 (long == 32, pointer == 32), little-endian, freestanding. +// No 128-bit integer support (no ti_int / __int128). +// Targets: Linux/Darwin x86, ARM32, RV32, WASM32. +//===----------------------------------------------------------------------===// + +#ifndef INT_LIB_H +#define INT_LIB_H + +#define COMPILER_RT_ABI + +// ARM AEABI runtime helpers explicitly use the AAPCS PCS regardless of the +// translation unit default. cfree's ARM target is always AAPCS, so this is +// effectively a no-op, but the symbol must exist for the arm/ source files. +#define AEABI_RTABI __attribute__((__pcs__("aapcs"))) + +#define ALWAYS_INLINE __attribute__((always_inline)) +#define NOINLINE __attribute__((noinline)) +#define NORETURN __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) + +#include <float.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> + +#include "int_types.h" +#include "int_util.h" + +COMPILER_RT_ABI int __paritysi2(si_int a); +COMPILER_RT_ABI int __paritydi2(di_int a); +COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b); +COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b); +COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d); +COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int *rem); +COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem); + +#endif // INT_LIB_H diff --git a/lib/include/ilp32_le/int_types.h b/lib/include/ilp32_le/int_types.h @@ -0,0 +1,48 @@ +//===-- int_types.h - ILP32 little-endian --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// cfree-tailored: ILP32 little-endian. No 128-bit integer support. +//===----------------------------------------------------------------------===// + +#ifndef INT_TYPES_H +#define INT_TYPES_H + +#include "int_endianness.h" + +#ifdef si_int +#undef si_int +#endif +typedef int32_t si_int; +typedef uint32_t su_int; +#define clzsi __builtin_clz +#define ctzsi __builtin_ctz + +typedef int64_t di_int; +typedef uint64_t du_int; + +typedef union { + di_int all; + struct { su_int low; si_int high; } s; +} dwords; + +typedef union { + du_int all; + struct { su_int low; su_int high; } s; +} udwords; + +#define CRT_HAS_FLOATING_POINT 1 + +typedef union { su_int u; float f; } float_bits; +typedef union { udwords u; double f; } double_bits; +typedef struct { udwords low; udwords high; } uqwords; + +typedef float _Complex Fcomplex; +typedef double _Complex Dcomplex; +typedef long double _Complex Lcomplex; +#define COMPLEX_REAL(x) __real__(x) +#define COMPLEX_IMAGINARY(x) __imag__(x) + +#endif // INT_TYPES_H diff --git a/lib/include/llp64_le/int_endianness.h b/lib/include/llp64_le/int_endianness.h @@ -0,0 +1,13 @@ +//===-- int_endianness.h - LP64 little-endian ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// +#ifndef INT_ENDIANNESS_H +#define INT_ENDIANNESS_H + +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 + +#endif diff --git a/lib/include/llp64_le/int_lib.h b/lib/include/llp64_le/int_lib.h @@ -0,0 +1,39 @@ +//===-- int_lib.h - LLP64 little-endian ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// cfree-tailored: LLP64 (long == 32, pointer == 64), little-endian, freestanding. +// Targets: Win64 x86_64. +//===----------------------------------------------------------------------===// + +#ifndef INT_LIB_H +#define INT_LIB_H + +#define COMPILER_RT_ABI + +#define ALWAYS_INLINE __attribute__((always_inline)) +#define NOINLINE __attribute__((noinline)) +#define NORETURN __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) + +#include <float.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> + +#include "int_types.h" +#include "int_util.h" + +COMPILER_RT_ABI int __paritysi2(si_int a); +COMPILER_RT_ABI int __paritydi2(di_int a); +COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b); +COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b); +COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d); +COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int *rem); +COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem); +COMPILER_RT_ABI int __clzti2(ti_int a); +COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem); + +#endif // INT_LIB_H diff --git a/lib/include/llp64_le/int_types.h b/lib/include/llp64_le/int_types.h @@ -0,0 +1,69 @@ +//===-- int_types.h - LLP64 little-endian --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// cfree-tailored: LLP64 little-endian. Has 128-bit integer (__int128) support. +//===----------------------------------------------------------------------===// + +#ifndef INT_TYPES_H +#define INT_TYPES_H + +#include "int_endianness.h" + +#ifdef si_int +#undef si_int +#endif +typedef int32_t si_int; +typedef uint32_t su_int; +#define clzsi __builtin_clz +#define ctzsi __builtin_ctz + +typedef int64_t di_int; +typedef uint64_t du_int; + +typedef union { + di_int all; + struct { su_int low; si_int high; } s; +} dwords; + +typedef union { + du_int all; + struct { su_int low; su_int high; } s; +} udwords; + +#define CRT_HAS_128BIT +typedef int ti_int __attribute__((mode(TI))); +typedef unsigned tu_int __attribute__((mode(TI))); + +typedef union { + ti_int all; + struct { du_int low; di_int high; } s; +} twords; + +typedef union { + tu_int all; + struct { du_int low; du_int high; } s; +} utwords; + +static __inline ti_int make_ti(di_int h, di_int l) { + twords r; r.s.high = h; r.s.low = l; return r.all; +} +static __inline tu_int make_tu(du_int h, du_int l) { + utwords r; r.s.high = h; r.s.low = l; return r.all; +} + +#define CRT_HAS_FLOATING_POINT 1 + +typedef union { su_int u; float f; } float_bits; +typedef union { udwords u; double f; } double_bits; +typedef struct { udwords low; udwords high; } uqwords; + +typedef float _Complex Fcomplex; +typedef double _Complex Dcomplex; +typedef long double _Complex Lcomplex; +#define COMPLEX_REAL(x) __real__(x) +#define COMPLEX_IMAGINARY(x) __imag__(x) + +#endif // INT_TYPES_H diff --git a/lib/include/lp64_le/int_endianness.h b/lib/include/lp64_le/int_endianness.h @@ -0,0 +1,13 @@ +//===-- int_endianness.h - LP64 little-endian ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +//===----------------------------------------------------------------------===// +#ifndef INT_ENDIANNESS_H +#define INT_ENDIANNESS_H + +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 + +#endif diff --git a/lib/include/lp64_le/int_lib.h b/lib/include/lp64_le/int_lib.h @@ -0,0 +1,41 @@ +//===-- int_lib.h - LP64 little-endian -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// cfree-tailored: LP64 (long == 64, pointer == 64), little-endian, freestanding. +// Targets: Linux/Darwin x86_64, Linux/Darwin aarch64, RV64. +//===----------------------------------------------------------------------===// + +#ifndef INT_LIB_H +#define INT_LIB_H + +// AEABI ABI is handled via lib/arm/ assembly files; non-ARM targets use the +// platform's default C ABI for these helpers. +#define COMPILER_RT_ABI + +#define ALWAYS_INLINE __attribute__((always_inline)) +#define NOINLINE __attribute__((noinline)) +#define NORETURN __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) + +#include <float.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> + +#include "int_types.h" +#include "int_util.h" + +COMPILER_RT_ABI int __paritysi2(si_int a); +COMPILER_RT_ABI int __paritydi2(di_int a); +COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b); +COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b); +COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d); +COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int *rem); +COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem); +COMPILER_RT_ABI int __clzti2(ti_int a); +COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem); + +#endif // INT_LIB_H diff --git a/lib/include/lp64_le/int_types.h b/lib/include/lp64_le/int_types.h @@ -0,0 +1,74 @@ +//===-- int_types.h - LP64 little-endian ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// cfree-tailored: LP64 little-endian. Has 128-bit integer (__int128) support. +//===----------------------------------------------------------------------===// + +#ifndef INT_TYPES_H +#define INT_TYPES_H + +#include "int_endianness.h" + +#ifdef si_int +#undef si_int +#endif +typedef int32_t si_int; +typedef uint32_t su_int; +#define clzsi __builtin_clz +#define ctzsi __builtin_ctz + +typedef int64_t di_int; +typedef uint64_t du_int; + +typedef union { + di_int all; + struct { su_int low; si_int high; } s; +} dwords; + +typedef union { + du_int all; + struct { su_int low; su_int high; } s; +} udwords; + +#define CRT_HAS_128BIT +typedef int ti_int __attribute__((mode(TI))); +typedef unsigned tu_int __attribute__((mode(TI))); + +typedef union { + ti_int all; + struct { du_int low; di_int high; } s; +} twords; + +typedef union { + tu_int all; + struct { du_int low; du_int high; } s; +} utwords; + +static __inline ti_int make_ti(di_int h, di_int l) { + twords r; r.s.high = h; r.s.low = l; return r.all; +} +static __inline tu_int make_tu(du_int h, du_int l) { + utwords r; r.s.high = h; r.s.low = l; return r.all; +} + +#define CRT_HAS_FLOATING_POINT 1 + +typedef union { su_int u; float f; } float_bits; +typedef union { udwords u; double f; } double_bits; + +typedef struct { udwords low; udwords high; } uqwords; + +// IEEE binary128 (tf_float / CRT_HAS_TF_MODE) is supplied via +// include/lp64_le_ldbl128/tf_supplement.h when compiling lib/fp_tf/. Not +// available in this base header to keep it free of feature ifdefs. + +typedef float _Complex Fcomplex; +typedef double _Complex Dcomplex; +typedef long double _Complex Lcomplex; +#define COMPLEX_REAL(x) __real__(x) +#define COMPLEX_IMAGINARY(x) __imag__(x) + +#endif // INT_TYPES_H diff --git a/lib/include/lp64_le_ldbl128/tf_supplement.h b/lib/include/lp64_le_ldbl128/tf_supplement.h @@ -0,0 +1,32 @@ +//===-- tf_supplement.h - binary128 long double support ------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Include via -include when compiling lib/fp_tf/ on a target where +// long double is IEEE binary128 (e.g. aarch64-linux with -mlong-double-128). +//===----------------------------------------------------------------------===// +#ifndef TF_SUPPLEMENT_H +#define TF_SUPPLEMENT_H + +// Pre-included before the translation unit, so the int_types.h pull happens +// before stdint.h has been brought in by int_lib.h. Bring it in directly. +#include <stdint.h> + +#include "int_types.h" + +typedef long double tf_float; +#define CRT_LDBL_128BIT +#define CRT_HAS_F128 +#define CRT_HAS_IEEE_TF +#define CRT_LDBL_IEEE_F128 +#define TF_C(x) x##L +#define CRT_HAS_TF_MODE + +typedef union { uqwords u; tf_float f; } tf_bits; + +typedef Lcomplex Qcomplex; +#define CRT_HAS_NATIVE_COMPLEX_F128 +#define COMPLEXTF_REAL(x) __real__(x) +#define COMPLEXTF_IMAGINARY(x) __imag__(x) + +#endif diff --git a/lib/int/absvdi2.c b/lib/int/absvdi2.c @@ -0,0 +1,25 @@ +//===-- absvdi2.c - Implement __absvdi2 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __absvdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: absolute value + +// Effects: aborts if abs(x) < 0 + +COMPILER_RT_ABI di_int __absvdi2(di_int a) { + const int N = (int)(sizeof(di_int) * CHAR_BIT); + if (a == ((di_int)((du_int)1 << (N - 1)))) + compilerrt_abort(); + const di_int t = a >> (N - 1); + return (a ^ t) - t; +} diff --git a/lib/int/bswapdi2.c b/lib/int/bswapdi2.c @@ -0,0 +1,25 @@ +//===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __bswapdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) { + return ( + (((u)&0xff00000000000000ULL) >> 56) | + (((u)&0x00ff000000000000ULL) >> 40) | + (((u)&0x0000ff0000000000ULL) >> 24) | + (((u)&0x000000ff00000000ULL) >> 8) | + (((u)&0x00000000ff000000ULL) << 8) | + (((u)&0x0000000000ff0000ULL) << 24) | + (((u)&0x000000000000ff00ULL) << 40) | + (((u)&0x00000000000000ffULL) << 56)); +} diff --git a/lib/int/bswapsi2.c b/lib/int/bswapsi2.c @@ -0,0 +1,20 @@ +//===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __bswapsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) { + return ((((u)&0xff000000) >> 24) | + (((u)&0x00ff0000) >> 8) | + (((u)&0x0000ff00) << 8) | + (((u)&0x000000ff) << 24)); +} diff --git a/lib/int/clzdi2.c b/lib/int/clzdi2.c @@ -0,0 +1,25 @@ +//===-- clzdi2.c - Implement __clzdi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __clzdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of leading 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI int __clzdi2(di_int a) { + dwords x; + x.all = a; + const si_int f = -(x.s.high == 0); + return clzsi((x.s.high & ~f) | (x.s.low & f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} diff --git a/lib/int/clzsi2.c b/lib/int/clzsi2.c @@ -0,0 +1,48 @@ +//===-- clzsi2.c - Implement __clzsi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __clzsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of leading 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI int __clzsi2(si_int a) { + su_int x = (su_int)a; + si_int t = ((x & 0xFFFF0000) == 0) << 4; // if (x is small) t = 16 else 0 + x >>= 16 - t; // x = [0 - 0xFFFF] + su_int r = t; // r = [0, 16] + // return r + clz(x) + t = ((x & 0xFF00) == 0) << 3; + x >>= 8 - t; // x = [0 - 0xFF] + r += t; // r = [0, 8, 16, 24] + // return r + clz(x) + t = ((x & 0xF0) == 0) << 2; + x >>= 4 - t; // x = [0 - 0xF] + r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28] + // return r + clz(x) + t = ((x & 0xC) == 0) << 1; + x >>= 2 - t; // x = [0 - 3] + r += t; // r = [0 - 30] and is even + // return r + clz(x) + // switch (x) + // { + // case 0: + // return r + 2; + // case 1: + // return r + 1; + // case 2: + // case 3: + // return r; + // } + return r + ((2 - x) & -((x & 2) == 0)); +} diff --git a/lib/int/cmpdi2.c b/lib/int/cmpdi2.c @@ -0,0 +1,34 @@ +//===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __cmpdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: if (a < b) returns 0 +// if (a == b) returns 1 +// if (a > b) returns 2 + +COMPILER_RT_ABI si_int __cmpdi2(di_int a, di_int b) { + dwords x; + x.all = a; + dwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + diff --git a/lib/int/ctzdi2.c b/lib/int/ctzdi2.c @@ -0,0 +1,25 @@ +//===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ctzdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of trailing 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI int __ctzdi2(di_int a) { + dwords x; + x.all = a; + const si_int f = -(x.s.low == 0); + return ctzsi((x.s.high & f) | (x.s.low & ~f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} diff --git a/lib/int/ctzsi2.c b/lib/int/ctzsi2.c @@ -0,0 +1,53 @@ +//===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ctzsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of trailing 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI int __ctzsi2(si_int a) { + su_int x = (su_int)a; + si_int t = ((x & 0x0000FFFF) == 0) + << 4; // if (x has no small bits) t = 16 else 0 + x >>= t; // x = [0 - 0xFFFF] + higher garbage bits + su_int r = t; // r = [0, 16] + // return r + ctz(x) + t = ((x & 0x00FF) == 0) << 3; + x >>= t; // x = [0 - 0xFF] + higher garbage bits + r += t; // r = [0, 8, 16, 24] + // return r + ctz(x) + t = ((x & 0x0F) == 0) << 2; + x >>= t; // x = [0 - 0xF] + higher garbage bits + r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28] + // return r + ctz(x) + t = ((x & 0x3) == 0) << 1; + x >>= t; + x &= 3; // x = [0 - 3] + r += t; // r = [0 - 30] and is even + // return r + ctz(x) + + // The branch-less return statement below is equivalent + // to the following switch statement: + // switch (x) + // { + // case 0: + // return r + 2; + // case 2: + // return r + 1; + // case 1: + // case 3: + // return r; + // } + return r + ((2 - (x >> 1)) & -((x & 1) == 0)); +} diff --git a/lib/int/divdi3.c b/lib/int/divdi3.c @@ -0,0 +1,22 @@ +//===-- divdi3.c - Implement __divdi3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a / b + +#define fixint_t di_int +#define fixuint_t du_int +#define COMPUTE_UDIV(a, b) __udivmoddi4((a), (b), (du_int *)0) +#include "int_div_impl.inc" + +COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) { return __divXi3(a, b); } diff --git a/lib/int/divmoddi4.c b/lib/int/divmoddi4.c @@ -0,0 +1,28 @@ +//===-- divmoddi4.c - Implement __divmoddi4 -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divmoddi4 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a / b, *rem = a % b + +COMPILER_RT_ABI di_int __divmoddi4(di_int a, di_int b, di_int *rem) { + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s_a = a >> bits_in_dword_m1; // s_a = a < 0 ? -1 : 0 + di_int s_b = b >> bits_in_dword_m1; // s_b = b < 0 ? -1 : 0 + a = (du_int)(a ^ s_a) - s_a; // negate if s_a == -1 + b = (du_int)(b ^ s_b) - s_b; // negate if s_b == -1 + s_b ^= s_a; // sign of quotient + du_int r; + di_int q = (__udivmoddi4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1 + *rem = (r ^ s_a) - s_a; // negate if s_a == -1 + return q; +} diff --git a/lib/int/ffsdi2.c b/lib/int/ffsdi2.c @@ -0,0 +1,27 @@ +//===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ffsdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the index of the least significant 1-bit in a, or +// the value zero if a is zero. The least significant bit is index one. + +COMPILER_RT_ABI int __ffsdi2(di_int a) { + dwords x; + x.all = a; + if (x.s.low == 0) { + if (x.s.high == 0) + return 0; + return ctzsi(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT); + } + return ctzsi(x.s.low) + 1; +} diff --git a/lib/int/int_util.c b/lib/int/int_util.c @@ -0,0 +1,20 @@ +//===-- int_util.c - Internal utilities for compiler-rt ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// cfree-tailored: freestanding only (cfree predefines __STDC_HOSTED__ == 0). +// Aborts via __builtin_trap() with no libc dependency. +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +__attribute__((weak)) +__attribute__((visibility("hidden"))) +void __compilerrt_abort_impl(const char *file, int line, const char *function) { + (void)file; + (void)line; + (void)function; + __builtin_trap(); +} diff --git a/lib/int/moddi3.c b/lib/int/moddi3.c @@ -0,0 +1,22 @@ +//===-- moddi3.c - Implement __moddi3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __moddi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a % b + +#define fixint_t di_int +#define fixuint_t du_int +#define ASSIGN_UMOD(res, a, b) __udivmoddi4((a), (b), &(res)) +#include "int_div_impl.inc" + +COMPILER_RT_ABI di_int __moddi3(di_int a, di_int b) { return __modXi3(a, b); } diff --git a/lib/int/negdi2.c b/lib/int/negdi2.c @@ -0,0 +1,21 @@ +//===-- negdi2.c - Implement __negdi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __negdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: -a + +COMPILER_RT_ABI di_int __negdi2(di_int a) { + // Note: this routine is here for API compatibility; any sane compiler + // should expand it inline. + return -(du_int)a; +} diff --git a/lib/int/paritydi2.c b/lib/int/paritydi2.c @@ -0,0 +1,25 @@ +//===-- paritydi2.c - Implement __paritydi2 -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __paritydi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: 1 if number of bits is odd else returns 0 + +COMPILER_RT_ABI int __paritydi2(di_int a) { + dwords x; + x.all = a; + su_int x2 = x.s.high ^ x.s.low; + x2 ^= x2 >> 16; + x2 ^= x2 >> 8; + x2 ^= x2 >> 4; + return (0x6996 >> (x2 & 0xF)) & 1; +} diff --git a/lib/int/paritysi2.c b/lib/int/paritysi2.c @@ -0,0 +1,23 @@ +//===-- paritysi2.c - Implement __paritysi2 -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __paritysi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: 1 if number of bits is odd else returns 0 + +COMPILER_RT_ABI int __paritysi2(si_int a) { + su_int x = (su_int)a; + x ^= x >> 16; + x ^= x >> 8; + x ^= x >> 4; + return (0x6996 >> (x & 0xF)) & 1; +} diff --git a/lib/int/popcountdi2.c b/lib/int/popcountdi2.c @@ -0,0 +1,32 @@ +//===-- popcountdi2.c - Implement __popcountdi2 ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __popcountdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: count of 1 bits + +COMPILER_RT_ABI int __popcountdi2(di_int a) { + du_int x2 = (du_int)a; + x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL); + // Every 2 bits holds the sum of every pair of bits (32) + x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL); + // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) + x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL; + // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) + su_int x = (su_int)(x2 + (x2 >> 32)); + // The lower 32 bits hold four 16 bit sums (5 significant bits). + // Upper 32 bits are garbage + x = x + (x >> 16); + // The lower 16 bits hold two 32 bit sums (6 significant bits). + // Upper 16 bits are garbage + return (x + (x >> 8)) & 0x0000007F; // (7 significant bits) +} diff --git a/lib/int/popcountsi2.c b/lib/int/popcountsi2.c @@ -0,0 +1,29 @@ +//===-- popcountsi2.c - Implement __popcountsi2 ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __popcountsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: count of 1 bits + +COMPILER_RT_ABI int __popcountsi2(si_int a) { + su_int x = (su_int)a; + x = x - ((x >> 1) & 0x55555555); + // Every 2 bits holds the sum of every pair of bits + x = ((x >> 2) & 0x33333333) + (x & 0x33333333); + // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) + x = (x + (x >> 4)) & 0x0F0F0F0F; + // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) + x = (x + (x >> 16)); + // The lower 16 bits hold two 8 bit sums (5 significant bits). + // Upper 16 bits are garbage + return (x + (x >> 8)) & 0x0000003F; // (6 significant bits) +} diff --git a/lib/int/ucmpdi2.c b/lib/int/ucmpdi2.c @@ -0,0 +1,34 @@ +//===-- ucmpdi2.c - Implement __ucmpdi2 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ucmpdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: if (a < b) returns 0 +// if (a == b) returns 1 +// if (a > b) returns 2 + +COMPILER_RT_ABI si_int __ucmpdi2(du_int a, du_int b) { + udwords x; + x.all = a; + udwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + diff --git a/lib/int/udivdi3.c b/lib/int/udivdi3.c @@ -0,0 +1,23 @@ +//===-- udivdi3.c - Implement __udivdi3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __udivdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +typedef du_int fixuint_t; +typedef di_int fixint_t; +#include "int_div_impl.inc" + +// Returns: a / b + +COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) { + return __udivXi3(a, b); +} diff --git a/lib/int/udivmoddi4.c b/lib/int/udivmoddi4.c @@ -0,0 +1,191 @@ +//===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __udivmoddi4 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Effects: if rem != 0, *rem = a % b +// Returns: a / b + +// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide + + +COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) { + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + udwords n; + n.all = a; + udwords d; + d.all = b; + udwords q; + udwords r; + unsigned sr; + // special cases, X is unknown, K != 0 + if (n.s.high == 0) { + if (d.s.high == 0) { + // 0 X + // --- + // 0 X + if (rem) + *rem = n.s.low % d.s.low; + return n.s.low / d.s.low; + } + // 0 X + // --- + // K X + if (rem) + *rem = n.s.low; + return 0; + } + // n.s.high != 0 + if (d.s.low == 0) { + if (d.s.high == 0) { + // K X + // --- + // 0 0 + if (rem) + *rem = n.s.high % d.s.low; + return n.s.high / d.s.low; + } + // d.s.high != 0 + if (n.s.low == 0) { + // K 0 + // --- + // K 0 + if (rem) { + r.s.high = n.s.high % d.s.high; + r.s.low = 0; + *rem = r.all; + } + return n.s.high / d.s.high; + } + // K K + // --- + // K 0 + if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ { + if (rem) { + r.s.low = n.s.low; + r.s.high = n.s.high & (d.s.high - 1); + *rem = r.all; + } + return n.s.high >> ctzsi(d.s.high); + } + // K K + // --- + // K 0 + sr = clzsi(d.s.high) - clzsi(n.s.high); + // 0 <= sr <= n_uword_bits - 2 or sr large + if (sr > n_uword_bits - 2) { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + // 1 <= sr <= n_uword_bits - 1 + // q.all = n.all << (n_udword_bits - sr); + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + // r.all = n.all >> sr; + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } else /* d.s.low != 0 */ { + if (d.s.high == 0) { + // K X + // --- + // 0 K + if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ { + if (rem) + *rem = n.s.low & (d.s.low - 1); + if (d.s.low == 1) + return n.all; + sr = ctzsi(d.s.low); + q.s.high = n.s.high >> sr; + q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + return q.all; + } + // K X + // --- + // 0 K + sr = 1 + n_uword_bits + clzsi(d.s.low) - clzsi(n.s.high); + // 2 <= sr <= n_udword_bits - 1 + // q.all = n.all << (n_udword_bits - sr); + // r.all = n.all >> sr; + if (sr == n_uword_bits) { + q.s.low = 0; + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ { + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ { + q.s.low = n.s.low << (n_udword_bits - sr); + q.s.high = (n.s.high << (n_udword_bits - sr)) | + (n.s.low >> (sr - n_uword_bits)); + r.s.high = 0; + r.s.low = n.s.high >> (sr - n_uword_bits); + } + } else { + // K X + // --- + // K K + sr = clzsi(d.s.high) - clzsi(n.s.high); + // 0 <= sr <= n_uword_bits - 1 or sr large + if (sr > n_uword_bits - 1) { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + // 1 <= sr <= n_uword_bits + // q.all = n.all << (n_udword_bits - sr); + q.s.low = 0; + if (sr == n_uword_bits) { + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } else { + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + } + } + // Not a special case + // q and r are initialized with: + // q.all = n.all << (n_udword_bits - sr); + // r.all = n.all >> sr; + // 1 <= sr <= n_udword_bits - 1 + su_int carry = 0; + for (; sr > 0; --sr) { + // r:q = ((r:q) << 1) | carry + r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); + r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); + q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); + q.s.low = (q.s.low << 1) | carry; + // carry = 0; + // if (r.all >= d.all) + // { + // r.all -= d.all; + // carry = 1; + // } + const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; +} + diff --git a/lib/int/umoddi3.c b/lib/int/umoddi3.c @@ -0,0 +1,23 @@ +//===-- umoddi3.c - Implement __umoddi3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __umoddi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +typedef du_int fixuint_t; +typedef di_int fixint_t; +#include "int_div_impl.inc" + +// Returns: a % b + +COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) { + return __umodXi3(a, b); +} diff --git a/lib/int32/ashldi3.c b/lib/int32/ashldi3.c @@ -0,0 +1,36 @@ +// ====-- ashldi3.c - Implement __ashldi3 ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashldi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a << b + +// Precondition: 0 <= b < bits_in_dword + +COMPILER_RT_ABI di_int __ashldi3(di_int a, int b) { + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ { + result.s.low = 0; + result.s.high = input.s.low << (b - bits_in_word); + } else /* 0 <= b < bits_in_word */ { + if (b == 0) + return a; + result.s.low = input.s.low << b; + result.s.high = + ((su_int)input.s.high << b) | (input.s.low >> (bits_in_word - b)); + } + return result.all; +} + diff --git a/lib/int32/ashrdi3.c b/lib/int32/ashrdi3.c @@ -0,0 +1,37 @@ +//===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashrdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: arithmetic a >> b + +// Precondition: 0 <= b < bits_in_dword + +COMPILER_RT_ABI di_int __ashrdi3(di_int a, int b) { + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ { + // result.s.high = input.s.high < 0 ? -1 : 0 + result.s.high = input.s.high >> (bits_in_word - 1); + result.s.low = input.s.high >> (b - bits_in_word); + } else /* 0 <= b < bits_in_word */ { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = + ((su_int)input.s.high << (bits_in_word - b)) | (input.s.low >> b); + } + return result.all; +} + diff --git a/lib/int32/lshrdi3.c b/lib/int32/lshrdi3.c @@ -0,0 +1,35 @@ +//===-- lshrdi3.c - Implement __lshrdi3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __lshrdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: logical a >> b + +// Precondition: 0 <= b < bits_in_dword + +COMPILER_RT_ABI di_int __lshrdi3(di_int a, int b) { + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + udwords input; + udwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ { + result.s.high = 0; + result.s.low = input.s.high >> (b - bits_in_word); + } else /* 0 <= b < bits_in_word */ { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); + } + return result.all; +} + diff --git a/lib/int32/muldi3.c b/lib/int32/muldi3.c @@ -0,0 +1,48 @@ +//===-- muldi3.c - Implement __muldi3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __muldi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a * b + +static di_int __muldsi3(su_int a, su_int b) { + dwords r; + const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2; + const su_int lower_mask = (su_int)~0 >> bits_in_word_2; + r.s.low = (a & lower_mask) * (b & lower_mask); + su_int t = r.s.low >> bits_in_word_2; + r.s.low &= lower_mask; + t += (a >> bits_in_word_2) * (b & lower_mask); + r.s.low += (t & lower_mask) << bits_in_word_2; + r.s.high = t >> bits_in_word_2; + t = r.s.low >> bits_in_word_2; + r.s.low &= lower_mask; + t += (b >> bits_in_word_2) * (a & lower_mask); + r.s.low += (t & lower_mask) << bits_in_word_2; + r.s.high += t >> bits_in_word_2; + r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2); + return r.all; +} + +// Returns: a * b + +COMPILER_RT_ABI di_int __muldi3(di_int a, di_int b) { + dwords x; + x.all = a; + dwords y; + y.all = b; + dwords r; + r.all = __muldsi3(x.s.low, y.s.low); + r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; + return r.all; +} + diff --git a/lib/int64/ashlti3.c b/lib/int64/ashlti3.c @@ -0,0 +1,37 @@ +//===-- ashlti3.c - Implement __ashlti3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashlti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: a << b + +// Precondition: 0 <= b < bits_in_tword + +COMPILER_RT_ABI ti_int __ashlti3(ti_int a, int b) { + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ { + result.s.low = 0; + result.s.high = input.s.low << (b - bits_in_dword); + } else /* 0 <= b < bits_in_dword */ { + if (b == 0) + return a; + result.s.low = input.s.low << b; + result.s.high = + ((du_int)input.s.high << b) | (input.s.low >> (bits_in_dword - b)); + } + return result.all; +} + diff --git a/lib/int64/ashrti3.c b/lib/int64/ashrti3.c @@ -0,0 +1,38 @@ +//===-- ashrti3.c - Implement __ashrti3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashrti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: arithmetic a >> b + +// Precondition: 0 <= b < bits_in_tword + +COMPILER_RT_ABI ti_int __ashrti3(ti_int a, int b) { + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ { + // result.s.high = input.s.high < 0 ? -1 : 0 + result.s.high = input.s.high >> (bits_in_dword - 1); + result.s.low = input.s.high >> (b - bits_in_dword); + } else /* 0 <= b < bits_in_dword */ { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = + ((du_int)input.s.high << (bits_in_dword - b)) | (input.s.low >> b); + } + return result.all; +} + diff --git a/lib/int64/clzti2.c b/lib/int64/clzti2.c @@ -0,0 +1,27 @@ +//===-- clzti2.c - Implement __clzti2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __clzti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: the number of leading 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI int __clzti2(ti_int a) { + twords x; + x.all = a; + const di_int f = -(x.s.high == 0); + return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +} + diff --git a/lib/int64/ctzti2.c b/lib/int64/ctzti2.c @@ -0,0 +1,27 @@ +//===-- ctzti2.c - Implement __ctzti2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ctzti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: the number of trailing 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI int __ctzti2(ti_int a) { + twords x; + x.all = a; + const di_int f = -(x.s.low == 0); + return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +} + diff --git a/lib/int64/divmodti4.c b/lib/int64/divmodti4.c @@ -0,0 +1,30 @@ +//===-- divmodti4.c - Implement __divmodti4 -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divmodti4 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: a / b, *rem = a % b + +COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int *rem) { + const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; + ti_int s_a = a >> bits_in_tword_m1; // s_a = a < 0 ? -1 : 0 + ti_int s_b = b >> bits_in_tword_m1; // s_b = b < 0 ? -1 : 0 + a = (tu_int)(a ^ s_a) - s_a; // negate if s_a == -1 + b = (tu_int)(b ^ s_b) - s_b; // negate if s_b == -1 + s_b ^= s_a; // sign of quotient + tu_int r; + ti_int q = (__udivmodti4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1 + *rem = (r ^ s_a) - s_a; // negate if s_a == -1 + return q; +} + diff --git a/lib/int64/divti3.c b/lib/int64/divti3.c @@ -0,0 +1,24 @@ +//===-- divti3.c - Implement __divti3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: a / b + +#define fixint_t ti_int +#define fixuint_t tu_int +#define COMPUTE_UDIV(a, b) __udivmodti4((a), (b), (tu_int *)0) +#include "int_div_impl.inc" + +COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) { return __divXi3(a, b); } + diff --git a/lib/int64/lshrti3.c b/lib/int64/lshrti3.c @@ -0,0 +1,36 @@ +//===-- lshrti3.c - Implement __lshrti3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __lshrti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: logical a >> b + +// Precondition: 0 <= b < bits_in_tword + +COMPILER_RT_ABI ti_int __lshrti3(ti_int a, int b) { + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + utwords input; + utwords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ { + result.s.high = 0; + result.s.low = input.s.high >> (b - bits_in_dword); + } else /* 0 <= b < bits_in_dword */ { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); + } + return result.all; +} + diff --git a/lib/int64/modti3.c b/lib/int64/modti3.c @@ -0,0 +1,24 @@ +//===-- modti3.c - Implement __modti3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __modti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: a % b + +#define fixint_t ti_int +#define fixuint_t tu_int +#define ASSIGN_UMOD(res, a, b) __udivmodti4((a), (b), &(res)) +#include "int_div_impl.inc" + +COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) { return __modXi3(a, b); } + diff --git a/lib/int64/multi3.c b/lib/int64/multi3.c @@ -0,0 +1,49 @@ +//===-- multi3.c - Implement __multi3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __multi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: a * b + +static ti_int __mulddi3(du_int a, du_int b) { + twords r; + const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2; + const du_int lower_mask = (du_int)~0 >> bits_in_dword_2; + r.s.low = (a & lower_mask) * (b & lower_mask); + du_int t = r.s.low >> bits_in_dword_2; + r.s.low &= lower_mask; + t += (a >> bits_in_dword_2) * (b & lower_mask); + r.s.low += (t & lower_mask) << bits_in_dword_2; + r.s.high = t >> bits_in_dword_2; + t = r.s.low >> bits_in_dword_2; + r.s.low &= lower_mask; + t += (b >> bits_in_dword_2) * (a & lower_mask); + r.s.low += (t & lower_mask) << bits_in_dword_2; + r.s.high += t >> bits_in_dword_2; + r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2); + return r.all; +} + +// Returns: a * b + +COMPILER_RT_ABI ti_int __multi3(ti_int a, ti_int b) { + twords x; + x.all = a; + twords y; + y.all = b; + twords r; + r.all = __mulddi3(x.s.low, y.s.low); + r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; + return r.all; +} + diff --git a/lib/int64/negti2.c b/lib/int64/negti2.c @@ -0,0 +1,23 @@ +//===-- negti2.c - Implement __negti2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __negti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: -a + +COMPILER_RT_ABI ti_int __negti2(ti_int a) { + // Note: this routine is here for API compatibility; any sane compiler + // should expand it inline. + return -(tu_int)a; +} + diff --git a/lib/int64/udivmodti4.c b/lib/int64/udivmodti4.c @@ -0,0 +1,148 @@ +//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __udivmodti4 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits. +// Remainder stored in r. +// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division +// fallback. For a correctness proof see the reference for this algorithm +// in Knuth, Volume 2, section 4.3.1, Algorithm D. +UNUSED +static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v, + du_int *r) { + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits) + du_int un1, un0; // Norm. dividend LSD's + du_int vn1, vn0; // Norm. divisor digits + du_int q1, q0; // Quotient digits + du_int un64, un21, un10; // Dividend digit pairs + du_int rhat; // A remainder + si_int s; // Shift amount for normalization + + s = __builtin_clzll(v); + if (s > 0) { + // Normalize the divisor. + v = v << s; + un64 = (u1 << s) | (u0 >> (n_udword_bits - s)); + un10 = u0 << s; // Shift dividend left + } else { + // Avoid undefined behavior of (u0 >> 64). + un64 = u1; + un10 = u0; + } + + // Break divisor up into two 32-bit digits. + vn1 = v >> (n_udword_bits / 2); + vn0 = v & 0xFFFFFFFF; + + // Break right half of dividend into two digits. + un1 = un10 >> (n_udword_bits / 2); + un0 = un10 & 0xFFFFFFFF; + + // Compute the first quotient digit, q1. + q1 = un64 / vn1; + rhat = un64 - q1 * vn1; + + // q1 has at most error 2. No more than 2 iterations. + while (q1 >= b || q1 * vn0 > b * rhat + un1) { + q1 = q1 - 1; + rhat = rhat + vn1; + if (rhat >= b) + break; + } + + un21 = un64 * b + un1 - q1 * v; + + // Compute the second quotient digit. + q0 = un21 / vn1; + rhat = un21 - q0 * vn1; + + // q0 has at most error 2. No more than 2 iterations. + while (q0 >= b || q0 * vn0 > b * rhat + un0) { + q0 = q0 - 1; + rhat = rhat + vn1; + if (rhat >= b) + break; + } + + *r = (un21 * b + un0 - q0 * v) >> s; + return q1 * b + q0; +} + +static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v, + du_int *r) { + return udiv128by64to64default(u1, u0, v, r); +} + +// Effects: if rem != 0, *rem = a % b +// Returns: a / b + +COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) { + const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; + utwords dividend; + dividend.all = a; + utwords divisor; + divisor.all = b; + utwords quotient; + utwords remainder; + if (divisor.all > dividend.all) { + if (rem) + *rem = dividend.all; + return 0; + } + // When the divisor fits in 64 bits, we can use an optimized path. + if (divisor.s.high == 0) { + remainder.s.high = 0; + if (dividend.s.high < divisor.s.low) { + // The result fits in 64 bits. + quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low, + divisor.s.low, &remainder.s.low); + quotient.s.high = 0; + } else { + // First, divide with the high part to get the remainder in dividend.s.high. + // After that dividend.s.high < divisor.s.low. + quotient.s.high = dividend.s.high / divisor.s.low; + dividend.s.high = dividend.s.high % divisor.s.low; + quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low, + divisor.s.low, &remainder.s.low); + } + if (rem) + *rem = remainder.all; + return quotient.all; + } + // 0 <= shift <= 63. + si_int shift = + __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high); + divisor.all <<= shift; + quotient.s.high = 0; + quotient.s.low = 0; + for (; shift >= 0; --shift) { + quotient.s.low <<= 1; + // Branch free version of. + // if (dividend.all >= divisor.all) + // { + // dividend.all -= divisor.all; + // carry = 1; + // } + const ti_int s = + (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1); + quotient.s.low |= s & 1; + dividend.all -= divisor.all & s; + divisor.all >>= 1; + } + if (rem) + *rem = dividend.all; + return quotient.all; +} + diff --git a/lib/int64/udivti3.c b/lib/int64/udivti3.c @@ -0,0 +1,21 @@ +//===-- udivti3.c - Implement __udivti3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __udivti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: a / b + +COMPILER_RT_ABI tu_int __udivti3(tu_int a, tu_int b) { + return __udivmodti4(a, b, 0); +} + diff --git a/lib/int64/umodti3.c b/lib/int64/umodti3.c @@ -0,0 +1,23 @@ +//===-- umodti3.c - Implement __umodti3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __umodti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + + +// Returns: a % b + +COMPILER_RT_ABI tu_int __umodti3(tu_int a, tu_int b) { + tu_int r; + __udivmodti4(a, b, &r); + return r; +} + diff --git a/lib/mem/mem.c b/lib/mem/mem.c @@ -0,0 +1,49 @@ +//===-- mem.c - cfree freestanding mem* primitives -----------------------===// +// +// SPDX-License-Identifier: 0BSD +// +// Portable C implementations of memcpy, memmove, memset, memcmp. +// All are weak so a user libc (or a tuned arch-specific version) wins. +// Targets without __builtin_trap can replace it; cfree always provides it. +//===----------------------------------------------------------------------===// + +#include <stddef.h> + +__attribute__((weak)) +void *memcpy(void *restrict dst, const void *restrict src, size_t n) { + unsigned char *d = (unsigned char *)dst; + const unsigned char *s = (const unsigned char *)src; + for (size_t i = 0; i < n; i++) d[i] = s[i]; + return dst; +} + +__attribute__((weak)) +void *memmove(void *dst, const void *src, size_t n) { + unsigned char *d = (unsigned char *)dst; + const unsigned char *s = (const unsigned char *)src; + if (d == s || n == 0) return dst; + if (d < s) { + for (size_t i = 0; i < n; i++) d[i] = s[i]; + } else { + for (size_t i = n; i-- > 0;) d[i] = s[i]; + } + return dst; +} + +__attribute__((weak)) +void *memset(void *dst, int c, size_t n) { + unsigned char *d = (unsigned char *)dst; + unsigned char v = (unsigned char)c; + for (size_t i = 0; i < n; i++) d[i] = v; + return dst; +} + +__attribute__((weak)) +int memcmp(const void *a, const void *b, size_t n) { + const unsigned char *p = (const unsigned char *)a; + const unsigned char *q = (const unsigned char *)b; + for (size_t i = 0; i < n; i++) { + if (p[i] != q[i]) return (int)p[i] - (int)q[i]; + } + return 0; +} diff --git a/lib/riscv/restore_rv32.S b/lib/riscv/restore_rv32.S @@ -0,0 +1,73 @@ +//===-- restore_rv32.S - restore up to 12 callee-save registers (RV32) ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + .text + + .globl __riscv_restore_12 + .type __riscv_restore_12,@function +__riscv_restore_12: + lw s11, 12(sp) + addi sp, sp, 16 + // fallthrough into __riscv_restore_11/10/9/8 + + .globl __riscv_restore_11 + .type __riscv_restore_11,@function + .globl __riscv_restore_10 + .type __riscv_restore_10,@function + .globl __riscv_restore_9 + .type __riscv_restore_9,@function + .globl __riscv_restore_8 + .type __riscv_restore_8,@function +__riscv_restore_11: +__riscv_restore_10: +__riscv_restore_9: +__riscv_restore_8: + lw s10, 0(sp) + lw s9, 4(sp) + lw s8, 8(sp) + lw s7, 12(sp) + addi sp, sp, 16 + // fallthrough into __riscv_restore_7/6/5/4 + + .globl __riscv_restore_7 + .type __riscv_restore_7,@function + .globl __riscv_restore_6 + .type __riscv_restore_6,@function + .globl __riscv_restore_5 + .type __riscv_restore_5,@function + .globl __riscv_restore_4 + .type __riscv_restore_4,@function +__riscv_restore_7: +__riscv_restore_6: +__riscv_restore_5: +__riscv_restore_4: + lw s6, 0(sp) + lw s5, 4(sp) + lw s4, 8(sp) + lw s3, 12(sp) + addi sp, sp, 16 + // fallthrough into __riscv_restore_3/2/1/0 + + .globl __riscv_restore_3 + .type __riscv_restore_3,@function + .globl __riscv_restore_2 + .type __riscv_restore_2,@function + .globl __riscv_restore_1 + .type __riscv_restore_1,@function + .globl __riscv_restore_0 + .type __riscv_restore_0,@function +__riscv_restore_3: +__riscv_restore_2: +__riscv_restore_1: +__riscv_restore_0: + lw s2, 0(sp) + lw s1, 4(sp) + lw s0, 8(sp) + lw ra, 12(sp) + addi sp, sp, 16 + ret diff --git a/lib/riscv/restore_rv64.S b/lib/riscv/restore_rv64.S @@ -0,0 +1,82 @@ +//===-- restore_rv64.S - restore up to 12 callee-save registers (RV64) ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + .text + + .globl __riscv_restore_12 + .type __riscv_restore_12,@function +__riscv_restore_12: + ld s11, 8(sp) + addi sp, sp, 16 + // fallthrough into __riscv_restore_11/10 + + .globl __riscv_restore_11 + .type __riscv_restore_11,@function + .globl __riscv_restore_10 + .type __riscv_restore_10,@function +__riscv_restore_11: +__riscv_restore_10: + ld s10, 0(sp) + ld s9, 8(sp) + addi sp, sp, 16 + // fallthrough into __riscv_restore_9/8 + + .globl __riscv_restore_9 + .type __riscv_restore_9,@function + .globl __riscv_restore_8 + .type __riscv_restore_8,@function +__riscv_restore_9: +__riscv_restore_8: + ld s8, 0(sp) + ld s7, 8(sp) + addi sp, sp, 16 + // fallthrough into __riscv_restore_7/6 + + .globl __riscv_restore_7 + .type __riscv_restore_7,@function + .globl __riscv_restore_6 + .type __riscv_restore_6,@function +__riscv_restore_7: +__riscv_restore_6: + ld s6, 0(sp) + ld s5, 8(sp) + addi sp, sp, 16 + // fallthrough into __riscv_restore_5/4 + + .globl __riscv_restore_5 + .type __riscv_restore_5,@function + .globl __riscv_restore_4 + .type __riscv_restore_4,@function +__riscv_restore_5: +__riscv_restore_4: + ld s4, 0(sp) + ld s3, 8(sp) + addi sp, sp, 16 + // fallthrough into __riscv_restore_3/2 + + .globl __riscv_restore_3 + .type __riscv_restore_3,@function + .globl __riscv_restore_2 + .type __riscv_restore_2,@function +__riscv_restore_3: +__riscv_restore_2: + ld s2, 0(sp) + ld s1, 8(sp) + addi sp, sp, 16 + // fallthrough into __riscv_restore_1/0 + + .globl __riscv_restore_1 + .type __riscv_restore_1,@function + .globl __riscv_restore_0 + .type __riscv_restore_0,@function +__riscv_restore_1: +__riscv_restore_0: + ld s0, 0(sp) + ld ra, 8(sp) + addi sp, sp, 16 + ret diff --git a/lib/riscv/save_rv32.S b/lib/riscv/save_rv32.S @@ -0,0 +1,88 @@ +//===-- save_rv32.S - save up to 12 callee-saved registers (RV32) --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Multiple entry points depending on number of registers to save. +// Entry points are grouped in 4s for rv32 to maintain 16-byte stack alignment. +// +//===----------------------------------------------------------------------===// + + .text + + .globl __riscv_save_12 + .type __riscv_save_12,@function +__riscv_save_12: + addi sp, sp, -64 + mv t1, zero + sw s11, 12(sp) + j .Lriscv_save_11_8 + + .globl __riscv_save_11 + .type __riscv_save_11,@function + .globl __riscv_save_10 + .type __riscv_save_10,@function + .globl __riscv_save_9 + .type __riscv_save_9,@function + .globl __riscv_save_8 + .type __riscv_save_8,@function +__riscv_save_11: +__riscv_save_10: +__riscv_save_9: +__riscv_save_8: + addi sp, sp, -64 + li t1, 16 +.Lriscv_save_11_8: + sw s10, 16(sp) + sw s9, 20(sp) + sw s8, 24(sp) + sw s7, 28(sp) + j .Lriscv_save_7_4 + + .globl __riscv_save_7 + .type __riscv_save_7,@function + .globl __riscv_save_6 + .type __riscv_save_6,@function + .globl __riscv_save_5 + .type __riscv_save_5,@function + .globl __riscv_save_4 + .type __riscv_save_4,@function +__riscv_save_7: +__riscv_save_6: +__riscv_save_5: +__riscv_save_4: + addi sp, sp, -64 + li t1, 32 +.Lriscv_save_7_4: + sw s6, 32(sp) + sw s5, 36(sp) + sw s4, 40(sp) + sw s3, 44(sp) + sw s2, 48(sp) + sw s1, 52(sp) + sw s0, 56(sp) + sw ra, 60(sp) + add sp, sp, t1 + jr t0 + + .globl __riscv_save_3 + .type __riscv_save_3,@function + .globl __riscv_save_2 + .type __riscv_save_2,@function + .globl __riscv_save_1 + .type __riscv_save_1,@function + .globl __riscv_save_0 + .type __riscv_save_0,@function +__riscv_save_3: +__riscv_save_2: +__riscv_save_1: +__riscv_save_0: + addi sp, sp, -16 + sw s2, 0(sp) + sw s1, 4(sp) + sw s0, 8(sp) + sw ra, 12(sp) + jr t0 diff --git a/lib/riscv/save_rv64.S b/lib/riscv/save_rv64.S @@ -0,0 +1,101 @@ +//===-- save_rv64.S - save up to 12 callee-saved registers (RV64) --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Multiple entry points depending on number of registers to save. +// Entry points are grouped in 2s for rv64 to maintain 16-byte stack alignment. +// +//===----------------------------------------------------------------------===// + + .text + + .globl __riscv_save_12 + .type __riscv_save_12,@function +__riscv_save_12: + addi sp, sp, -112 + mv t1, zero + sd s11, 8(sp) + j .Lriscv_save_11_10 + + .globl __riscv_save_11 + .type __riscv_save_11,@function + .globl __riscv_save_10 + .type __riscv_save_10,@function +__riscv_save_11: +__riscv_save_10: + addi sp, sp, -112 + li t1, 16 +.Lriscv_save_11_10: + sd s10, 16(sp) + sd s9, 24(sp) + j .Lriscv_save_9_8 + + .globl __riscv_save_9 + .type __riscv_save_9,@function + .globl __riscv_save_8 + .type __riscv_save_8,@function +__riscv_save_9: +__riscv_save_8: + addi sp, sp, -112 + li t1, 32 +.Lriscv_save_9_8: + sd s8, 32(sp) + sd s7, 40(sp) + j .Lriscv_save_7_6 + + .globl __riscv_save_7 + .type __riscv_save_7,@function + .globl __riscv_save_6 + .type __riscv_save_6,@function +__riscv_save_7: +__riscv_save_6: + addi sp, sp, -112 + li t1, 48 +.Lriscv_save_7_6: + sd s6, 48(sp) + sd s5, 56(sp) + j .Lriscv_save_5_4 + + .globl __riscv_save_5 + .type __riscv_save_5,@function + .globl __riscv_save_4 + .type __riscv_save_4,@function +__riscv_save_5: +__riscv_save_4: + addi sp, sp, -112 + li t1, 64 +.Lriscv_save_5_4: + sd s4, 64(sp) + sd s3, 72(sp) + j .Lriscv_save_3_2 + + .globl __riscv_save_3 + .type __riscv_save_3,@function + .globl __riscv_save_2 + .type __riscv_save_2,@function +__riscv_save_3: +__riscv_save_2: + addi sp, sp, -112 + li t1, 80 +.Lriscv_save_3_2: + sd s2, 80(sp) + sd s1, 88(sp) + sd s0, 96(sp) + sd ra, 104(sp) + add sp, sp, t1 + jr t0 + + .globl __riscv_save_1 + .type __riscv_save_1,@function + .globl __riscv_save_0 + .type __riscv_save_0,@function +__riscv_save_1: +__riscv_save_0: + addi sp, sp, -16 + sd s0, 0(sp) + sd ra, 8(sp) + jr t0