commit 8874cd3e9e3f4ec33010f3a603e03b831b4ba2d9
parent 310b5ab01ab448779044ddb36f3cdf5248ae18f3
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 7 May 2026 10:27:18 -0700
compiler_rt
Diffstat:
155 files changed, 7597 insertions(+), 0 deletions(-)
diff --git a/doc/builtins.md b/doc/builtins.md
@@ -114,6 +114,129 @@ Operations (signatures match the GCC `__atomic` builtin family):
---
+## `libcfree_rt.a` — runtime support library
+
+The codegen emits calls to symbols the user can't reasonably supply. cfree
+ships them all in a single archive: integer/float/atomic helpers *and* the
+`mem*` family the codegen lowers struct copies and aggregate inits to.
+
+Naming follows the libgcc / compiler-rt convention (`{op}{mode}{N}`, where
+mode is `qi/hi/si/di/ti/sf/df/tf` for 1/2/4/8/16-byte int and 32/64/128-bit
+float). All `mem*` are weak so a user libc wins.
+
+### Mem intrinsics (always shipped)
+- `memcpy`, `memmove`, `memset`, `memcmp`
+
+### Integer helpers
+Always:
+- Div/mod 64-bit: `__divdi3`, `__udivdi3`, `__moddi3`, `__umoddi3`, `__divmoddi4`, `__udivmoddi4`
+- Count/bits: `__clzsi2`, `__clzdi2`, `__ctzsi2`, `__ctzdi2`, `__ffsdi2`, `__popcountsi2`, `__popcountdi2`, `__paritysi2`, `__paritydi2`, `__bswapsi2`, `__bswapdi2`
+- Compare: `__cmpdi2`, `__ucmpdi2`
+- Negate/abs: `__negdi2`, `__absvdi2`
+
+64-bit targets only (128-bit `__int128` support):
+- `__divti3`, `__udivti3`, `__modti3`, `__umodti3`, `__divmodti4`, `__udivmodti4`
+- `__ashlti3`, `__lshrti3`, `__ashrti3`, `__multi3`, `__negti2`, `__clzti2`, `__ctzti2`
+
+32-bit targets only (no native 64-bit ops):
+- `__muldi3`, `__ashldi3`, `__lshrdi3`, `__ashrdi3`
+
+### Soft-float (only on FPU-less targets — RV{32,64}I, ARM `-mfloat-abi=soft`, WASM-no-simd)
+- Arithmetic `sf`/`df`/`tf`: `__add`, `__sub`, `__mul`, `__div`, `__neg`
+ → e.g. `__addsf3`, `__divdf3`, `__multf3`
+- Int → float: `__float{,un}{si,di,ti}{sf,df,tf}` (e.g. `__floatdisf`, `__floatunsidf`)
+- Float → int: `__fix{,uns}{sf,df,tf}{si,di,ti}` (e.g. `__fixdfdi`, `__fixunssfsi`)
+- Float → float: `__extendsfdf2`, `__extendsftf2`, `__extenddftf2`, `__truncdfsf2`, `__trunctfsf2`, `__trunctfdf2`
+- Compare: `__eq`, `__ne`, `__lt`, `__le`, `__gt`, `__ge`, `__unord` × `sf2`/`df2`/`tf2`
+
+### Atomic fallbacks (only when target lacks native atomics for that width)
+- Generic: `__atomic_load`, `__atomic_store`, `__atomic_exchange`, `__atomic_compare_exchange`
+- Sized N ∈ {1,2,4,8,16}: `__atomic_load_N`, `__atomic_store_N`, `__atomic_exchange_N`, `__atomic_compare_exchange_N`, `__atomic_fetch_{add,sub,and,or,xor,nand}_N`
+
+### Architecture-specific aliases
+
+**ARM AAPCS / AEABI** (32-bit ARM only — these are aliases the AEABI ABI mandates):
+- Int div/mod: `__aeabi_idiv`, `__aeabi_uidiv`, `__aeabi_idivmod`, `__aeabi_uidivmod`, `__aeabi_ldivmod`, `__aeabi_uldivmod`
+- 64-bit shift/mul: `__aeabi_llsl`, `__aeabi_llsr`, `__aeabi_lasr`, `__aeabi_lmul`
+- Soft-float arith: `__aeabi_{f,d}{add,sub,mul,div,neg}`, `__aeabi_{f,d}rsub`
+- Soft-float convert: `__aeabi_f2iz`, `__aeabi_f2uiz`, `__aeabi_f2lz`, `__aeabi_f2ulz`, `__aeabi_d2iz`, `__aeabi_d2uiz`, `__aeabi_d2lz`, `__aeabi_d2ulz`, `__aeabi_i2f`, `__aeabi_ui2f`, `__aeabi_l2f`, `__aeabi_ul2f`, `__aeabi_i2d`, `__aeabi_ui2d`, `__aeabi_l2d`, `__aeabi_ul2d`, `__aeabi_f2d`, `__aeabi_d2f`
+- Soft-float compare: `__aeabi_fcmp{eq,lt,le,gt,ge,un}`, `__aeabi_dcmp{eq,lt,le,gt,ge,un}`
+- Mem variants (size-specialized): `__aeabi_memcpy`, `__aeabi_memcpy{4,8}`, `__aeabi_memmove`, `__aeabi_memmove{4,8}`, `__aeabi_memset`, `__aeabi_memset{4,8}`, `__aeabi_memclr`, `__aeabi_memclr{4,8}`
+
+**RISC-V** (only with `-msave-restore`, used by RV32E/embedded code-size builds):
+- `__riscv_save_{0..12}`, `__riscv_restore_{0..12}`
+
+**x86 / x86_64**: no architecture-specific aliases; uses the generic libgcc names above.
+
+**WASM**: uses generic names; `memcpy`/`memset`/`memmove` may lower to `memory.copy` / `memory.fill` instructions instead of calls.
+
+---
+
+## Target-identification macros
+
+cfree predefines a small, stable set of macros so headers and user code
+can branch on architecture, OS, object format, and ABI without parsing
+target triples. Compatible-by-design with the GCC/Clang names — code
+written against `__x86_64__` / `__BYTE_ORDER__` / `__LP64__` works
+unchanged.
+
+### Compiler identification
+- `__cfree__` — defined to `1`
+- `__cfree_major__`, `__cfree_minor__`, `__cfree_patchlevel__`
+- `__STDC__ == 1`, `__STDC_VERSION__ == 201112L`
+- `__STDC_HOSTED__ == 0` (cfree is freestanding-only)
+- `__STDC_NO_COMPLEX__`, `__STDC_NO_THREADS__`, `__STDC_NO_VLA__` defined
+- `__STDC_NO_ATOMICS__` *not* defined (cfree implements `<stdatomic.h>`)
+
+### Architecture (exactly one defined)
+- `__i386__` — 32-bit x86
+- `__x86_64__` (and `__amd64__`) — 64-bit x86
+- `__arm__` — 32-bit ARM
+- `__aarch64__` — 64-bit ARM
+- `__riscv` — RISC-V (any width); paired with `__riscv_xlen` ∈ {32, 64}
+- `__wasm__` — WebAssembly; paired with `__wasm32__` or `__wasm64__`
+
+### Pointer width / data model
+- `__SIZEOF_POINTER__`, `__SIZEOF_LONG__`, `__SIZEOF_SIZE_T__`, `__SIZEOF_PTRDIFF_T__`, `__SIZEOF_WCHAR_T__`, `__SIZEOF_INT__`, `__SIZEOF_LONG_LONG__`, `__SIZEOF_FLOAT__`, `__SIZEOF_DOUBLE__`, `__SIZEOF_LONG_DOUBLE__`
+- One of: `__LP64__` / `_LP64` (Unix 64), `__ILP32__` (32-bit), or neither (LLP64 — Win64)
+
+### Endianness
+- `__BYTE_ORDER__` set to one of `__ORDER_LITTLE_ENDIAN__` / `__ORDER_BIG_ENDIAN__`
+- `__ORDER_LITTLE_ENDIAN__ == 1234`, `__ORDER_BIG_ENDIAN__ == 4321` (values match GCC)
+
+### OS / platform (zero or one defined; freestanding bare-metal defines none)
+- `__linux__` — Linux ABI
+- `__APPLE__` and `__MACH__` — Darwin / macOS
+- `_WIN32` (always on Windows), plus `_WIN64` on 64-bit Windows
+
+### Object format (exactly one defined per output)
+- `__ELF__` — ELF (Linux, *BSD, bare-metal Unix-ish)
+- `__MACH__` — Mach-O (Darwin)
+- `_WIN32` doubles as the PE/COFF marker (matches MSVC/MinGW convention)
+
+### ARM-specific (defined only when `__arm__` or `__aarch64__`)
+- `__ARM_ARCH` — integer arch version (7, 8, …); plus profile-specific `__ARM_ARCH_{7A,7R,7M,8A,…}__`
+- `__ARM_EABI__` — defined on AAPCS/AEABI targets (always, for cfree's ARM32)
+- `__ARM_PCS` (base PCS) or `__ARM_PCS_VFP` (hard-float PCS)
+- `__ARM_FP` — bitmask of supported FP widths (0x4=fp32, 0x8=fp64); undefined on soft-float
+- `__SOFTFP__` — defined ⇔ `-mfloat-abi=soft` (no FPU instructions, soft-float ABI)
+- `__ARM_NEON` — defined ⇔ NEON SIMD available
+
+### RISC-V-specific (defined only when `__riscv`)
+- `__riscv_xlen` ∈ {32, 64}
+- `__riscv_flen` ∈ {0, 32, 64} — widest hardware FP register (0 ⇒ soft-float)
+- Extension flags (defined ⇔ extension is on): `__riscv_mul`, `__riscv_div`, `__riscv_atomic`, `__riscv_compressed`, `__riscv_fdiv`, `__riscv_fsqrt`
+- ABI: `__riscv_float_abi_soft`, `__riscv_float_abi_single`, `__riscv_float_abi_double` (exactly one)
+
+### x86-specific (defined only when `__i386__` or `__x86_64__`)
+- Feature flags follow GCC names, defined ⇔ enabled at the chosen `-march`: `__SSE__`, `__SSE2__`, `__SSE3__`, `__SSSE3__`, `__SSE4_1__`, `__SSE4_2__`, `__AVX__`, `__AVX2__`, `__BMI__`, `__BMI2__`, `__POPCNT__`, `__FMA__`
+
+### WASM-specific (defined only when `__wasm__`)
+- `__wasm_simd128__` — defined ⇔ SIMD proposal enabled
+- `__wasm_bulk_memory__` — defined ⇔ `memory.copy`/`memory.fill` available (gates the lowering noted under mem intrinsics)
+
+---
+
## Discovery
To enumerate what a compiler predefines for the current target:
diff --git a/lib/LICENSE-compiler-rt.txt b/lib/LICENSE-compiler-rt.txt
@@ -0,0 +1,311 @@
+==============================================================================
+The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
+==============================================================================
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+
+---- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
+
+==============================================================================
+Software from third parties included in the LLVM Project:
+==============================================================================
+The LLVM Project contains third party software which is under different license
+terms. All such code will be identified clearly using at least one of two
+mechanisms:
+1) It will be in a separate directory tree with its own `LICENSE.txt` or
+ `LICENSE` file at the top containing the specific license and restrictions
+ which apply to that software, or
+2) It will contain specific license and restriction terms at the top of every
+ file.
+
+==============================================================================
+Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy):
+==============================================================================
+
+The compiler_rt library is dual licensed under both the University of Illinois
+"BSD-Like" license and the MIT license. As a user of this code you may choose
+to use it under either license. As a contributor, you agree to allow your code
+to be used under both.
+
+Full text of the relevant licenses is included below.
+
+==============================================================================
+
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 2009-2019 by the contributors listed in CREDITS.TXT
+
+All rights reserved.
+
+Developed by:
+
+ LLVM Team
+
+ University of Illinois at Urbana-Champaign
+
+ http://llvm.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimers.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimers in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the names of the LLVM Team, University of Illinois at
+ Urbana-Champaign, nor the names of its contributors may be used to
+ endorse or promote products derived from this Software without specific
+ prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+==============================================================================
+
+Copyright (c) 2009-2015 by the contributors listed in CREDITS.TXT
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/lib/README.md b/lib/README.md
@@ -0,0 +1,167 @@
+# lib/ — `libcfree_rt.a` source
+
+Per-op runtime helpers for cfree, copied from
+[compiler-rt 18.1.8](https://github.com/llvm/llvm-project/releases/tag/llvmorg-18.1.8)
+(`lib/builtins/`) and stripped to remove ifdefs that select between targets.
+The build system picks files by directory; each `.c` / `.S` file compiles to
+exactly one target without preprocessor branching.
+
+License: Apache-2.0 WITH LLVM-exception (see `LICENSE-compiler-rt.txt`). The
+hand-written `mem/mem.c` is 0BSD; relicense as desired.
+
+## Layout
+
+### Source dirs (each `.c` / `.S` becomes one object in `libcfree_rt.a`)
+
+| Directory | Purpose | Built on |
+| -------------- | --------------------------------------------------------- | ----------------------------------------- |
+| `int/` | Integer helpers needed on every target | All |
+| `int32/` | 64-bit ops synthesized from 32-bit | ILP32 only |
+| `int64/` | 128-bit ops via `__int128` | LP64 / LLP64 only |
+| `fp/` | Soft-float `sf` (binary32) and `df` (binary64) | FPU-less targets (RV{32,64}I, ARM softfp, WASM) |
+| `fp_tf/` | Soft-float `tf` (binary128) | Targets with binary128 long double (e.g. aarch64 `-mlong-double-128`) |
+| `fp_ti/` | `__int128` ↔ float | LP64 / LLP64 + soft-float |
+| `arm/` | ARM AAPCS / AEABI aliases & wrappers | 32-bit ARM |
+| `riscv/` | `__riscv_save_*` / `__riscv_restore_*` (`-msave-restore`) | RISC-V (per-xlen file) |
+| `mem/` | `memcpy` / `memmove` / `memset` / `memcmp` (weak) | All; user libc overrides |
+| `atomic/` | `__atomic_*` fallback shim | All |
+
+### Build-time include dirs (consumed by the source dirs above; nothing here lands in `libcfree_rt.a`)
+
+| Directory | Consumed by |
+| -------------------------- | ------------------------------------------------------------------------------------------ |
+| `impl/` | `int/divdi3.c` etc. (via `int_div_impl.inc`); every `fp*/`, `fp_tf/`, `fp_ti/` file |
+| `include/common/` | All `.c` (transitively, via the per-target `int_lib.h`); `arm/*.S` includes `assembly.h` |
+| `include/lp64_le/` | LP64 builds; selected via `-Iinclude/lp64_le` |
+| `include/llp64_le/` | LLP64 (Win64) builds |
+| `include/ilp32_le/` | ILP32 builds |
+| `include/lp64_le_ldbl128/` | Extra `-include tf_supplement.h` when compiling `fp_tf/` on binary128-long-double targets |
+| `atomic/atomic_common.inc` | `atomic/atomic_freestanding.c` |
+
+## How the build picks files
+
+```text
+target tuple ⟶ compile
+
+x86_64-linux / aarch64-linux / aarch64-darwin / rv64
+ ⟶ int/* int64/* fp/*
+ atomic/atomic_freestanding.c
+ mem/mem.c
+ -Iinclude/lp64_le
+ -DHAS_INT128=1
+
+x86_64-windows ⟶ int/* int64/* fp/* atomic/* mem/*
+ -Iinclude/llp64_le
+ -DHAS_INT128=1
+
+i386-* / arm32-* / rv32 / wasm32 ⟶ int/* int32/* fp/* atomic/* mem/*
+ -Iinclude/ilp32_le
+ -DHAS_INT128=0
+
+aarch64-* with binary128 long double ⟶ above + fp_tf/* fp_ti/*
+ -include include/lp64_le_ldbl128/tf_supplement.h
+
+rv32 with -msave-restore ⟶ above + riscv/save_rv32.S riscv/restore_rv32.S
+rv64 with -msave-restore ⟶ above + riscv/save_rv64.S riscv/restore_rv64.S
+arm32 ARMv7+/Thumb2 (AEABI) ⟶ above + arm/aeabi_*.{S,c} (excluding *_thumb1.S)
+arm32 ARMv6-M Thumb1 (Cortex-M0/M0+/M1) ⟶ above + arm/aeabi_*_thumb1.S
+ + the AEABI files with no Thumb1 variant
+ (aeabi_ldivmod.S, aeabi_uldivmod.S,
+ aeabi_dcmp.S, aeabi_fcmp.S,
+ aeabi_drsub.c, aeabi_frsub.c)
+```
+
+`-Iinclude/common` is always added.
+
+## Endianness
+
+All headers assume `__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__`. cfree's
+supported targets (x86, ARM-LE, RISC-V, WASM) are LE in practice. Big-endian
+support would need a parallel `*_be/` header set; not provided.
+
+## Files with surviving preprocessor logic
+
+The "no ifdefs" rule is applied to every `.c` and `.S` source — none branch on
+target. Some headers and shared `.inc` files retain preprocessor logic that is
+*not* target dispatch:
+
+- `impl/*.inc`, `impl/*.h`, `include/common/fp_lib.h` — parameterized via
+ `SINGLE_PRECISION` / `DOUBLE_PRECISION` / `QUAD_PRECISION` and the
+ `SRC_*` / `DST_*` selectors. Each `.c` file `#define`s exactly one before
+ the include; this is the metaprogramming model compiler-rt uses for fp ops.
+- `include/common/assembly.h` — abstracts assembler syntax (ELF vs Mach-O vs
+ COFF symbol decoration, ARM/Thumb mode markers, etc.). Heavily ifdef'd by
+ design; consumed only by `arm/*.S`.
+- `atomic/atomic_common.inc` — keys the 16-byte cases off `HAS_INT128`, set
+ by the build (`-DHAS_INT128=1` on 64-bit, `-DHAS_INT128=0` on 32-bit).
+
+## Notes per directory
+
+### `mem/mem.c`
+Hand-written portable C (not from compiler-rt). All four functions are weak so
+a user libc, or a tuned arch-specific replacement, wins at link time. The
+existing `arm/aeabi_mem*.S` files forward to these symbols.
+
+### `atomic/`
+`atomic_freestanding.c` defines a pointer-sized `_Atomic(uintptr_t)` spinlock
+as the lock primitive (no OS dependency) then `#include`s `atomic_common.inc`,
+which contains the dispatch logic and all `__atomic_*_N` expansions. The
+shim calls the GCC `__atomic_*` builtin family (the one cfree documents in
+`doc/builtins.md`); upstream's Clang-only `__c11_atomic_*` calls were
+translated. Public symbols are exported via `#pragma redefine_extname` from
+`_c`-suffixed names so they don't collide with the clang builtins of the
+same name.
+
+### `arm/`
+AEABI aliases for div/mod, soft-float compares, and the `aeabi_mem{cpy,move,set,clr}`
+size-specialized wrappers. Six files have a `*_thumb1.S` companion for
+ARMv6-M (Cortex-M0/M0+/M1), where the ISA can't tail-call (`b memcpy`) or
+fold the `subs/muls` form into one instruction:
+`aeabi_idivmod`, `aeabi_uidivmod`, `aeabi_memcpy`, `aeabi_memmove`,
+`aeabi_memset`, `aeabi_memcmp`. The build picks one variant per symbol —
+the Thumb1 file for ARMv6-M, the base file for ARMv7+/Thumb2. The remaining
+AEABI files (`aeabi_ldivmod`, `aeabi_uldivmod`, `aeabi_{d,f}cmp`,
+`aeabi_{d,f}rsub`) are ISA-agnostic and used as-is on both.
+
+### `riscv/`
+`save.S` / `restore.S` upstream are one file each, gated on `__riscv_xlen`.
+Split into `save_rv32.S` / `save_rv64.S` and `restore_rv32.S` / `restore_rv64.S`.
+The embedded ABI variants (`__riscv_32e` / `__riscv_64e`) were not carried over.
+
+### `fp_tf/`
+Compile only on targets where `long double` is IEEE binary128 (typically
+`aarch64` with `-mlong-double-128`). The build must `-include`
+`lp64_le_ldbl128/tf_supplement.h` so `tf_float`, `CRT_HAS_TF_MODE`, etc. are
+defined before `fp_lib.h` is processed.
+
+### Compare files
+`fp/comparesf2.c`, `fp/comparedf2.c`, `fp_tf/comparetf2.c` define every variant
+(`__eqXf2`, `__ltXf2`, `__neXf2`, `__gtXf2`) as a separate function rather than
+using `COMPILER_RT_ALIAS`. Replaces an object-format-conditional macro with a
+handful of one-line wrappers.
+
+### `int/int_util.c`
+Replaced upstream's hosted/kernel/Apple/Win32 abort cascade with a single
+freestanding `__compilerrt_abort_impl` that calls `__builtin_trap()`.
+
+## Things this lib does NOT cover
+
+These are documented in `doc/builtins.md` but not provided here:
+
+- `__riscv_save_*` for `__riscv_32e` / `__riscv_64e` (rare embedded ABIs).
+- Big-endian targets.
+- 80-bit `xf` (x86 long double) soft-float — cfree's spec doesn't list `xf`
+ ops in the runtime contract; x86 always has the FPU for long double.
+- Half-precision (`hf`) conversions — not in the cfree contract.
+
+## Updating from a newer compiler-rt
+
+When pulling in a new release:
+1. Diff `lib/builtins/*.c` against the corresponding files here. Per-op files
+ typically need only the `__ARM_EABI__` / `__MINGW32__` / `__SOFTFP__` blocks
+ re-stripped.
+2. Re-strip `int_util.c` to the freestanding-only abort path.
+3. Re-split any new `riscv/` or `arm/` files that combine modes.
+4. Diff `int_lib.h` / `int_types.h` / `fp_lib.h` against the per-target copies
+ under `include/`; update for any new feature gates that aren't legitimately
+ target-orthogonal.
diff --git a/lib/arm/aeabi_dcmp.S b/lib/arm/aeabi_dcmp.S
@@ -0,0 +1,45 @@
+//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) {
+// int result = __{eq,lt,le,ge,gt}df2(a, b);
+// if (result {==,<,<=,>=,>} 0) {
+// return 1;
+// } else {
+// return 0;
+// }
+// }
+
+# define CONVERT_DCMP_ARGS_TO_DF2_ARGS
+
+#define DEFINE_AEABI_DCMP(cond) \
+ .syntax unified SEPARATOR \
+ .p2align 2 SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \
+ push { r4, lr } SEPARATOR \
+ CONVERT_DCMP_ARGS_TO_DF2_ARGS SEPARATOR \
+ bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
+ cmp r0, #0 SEPARATOR \
+ b ## cond 1f SEPARATOR \
+ movs r0, #0 SEPARATOR \
+ pop { r4, pc } SEPARATOR \
+1: SEPARATOR \
+ movs r0, #1 SEPARATOR \
+ pop { r4, pc } SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
+
+DEFINE_AEABI_DCMP(eq)
+DEFINE_AEABI_DCMP(lt)
+DEFINE_AEABI_DCMP(le)
+DEFINE_AEABI_DCMP(ge)
+DEFINE_AEABI_DCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/arm/aeabi_drsub.c b/lib/arm/aeabi_drsub.c
@@ -0,0 +1,14 @@
+//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+AEABI_RTABI fp_t __aeabi_dsub(fp_t, fp_t);
+
+AEABI_RTABI fp_t __aeabi_drsub(fp_t a, fp_t b) { return __aeabi_dsub(b, a); }
diff --git a/lib/arm/aeabi_fcmp.S b/lib/arm/aeabi_fcmp.S
@@ -0,0 +1,45 @@
+//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) {
+// int result = __{eq,lt,le,ge,gt}sf2(a, b);
+// if (result {==,<,<=,>=,>} 0) {
+// return 1;
+// } else {
+// return 0;
+// }
+// }
+
+# define CONVERT_FCMP_ARGS_TO_SF2_ARGS
+
+#define DEFINE_AEABI_FCMP(cond) \
+ .syntax unified SEPARATOR \
+ .p2align 2 SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \
+ push { r4, lr } SEPARATOR \
+ CONVERT_FCMP_ARGS_TO_SF2_ARGS SEPARATOR \
+ bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
+ cmp r0, #0 SEPARATOR \
+ b ## cond 1f SEPARATOR \
+ movs r0, #0 SEPARATOR \
+ pop { r4, pc } SEPARATOR \
+1: SEPARATOR \
+ movs r0, #1 SEPARATOR \
+ pop { r4, pc } SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
+
+DEFINE_AEABI_FCMP(eq)
+DEFINE_AEABI_FCMP(lt)
+DEFINE_AEABI_FCMP(le)
+DEFINE_AEABI_FCMP(ge)
+DEFINE_AEABI_FCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/arm/aeabi_frsub.c b/lib/arm/aeabi_frsub.c
@@ -0,0 +1,14 @@
+//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+AEABI_RTABI fp_t __aeabi_fsub(fp_t, fp_t);
+
+AEABI_RTABI fp_t __aeabi_frsub(fp_t a, fp_t b) { return __aeabi_fsub(b, a); }
diff --git a/lib/arm/aeabi_idivmod.S b/lib/arm/aeabi_idivmod.S
@@ -0,0 +1,33 @@
+//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) {
+// int rem, quot;
+// quot = __divmodsi4(numerator, denominator, &rem);
+// return {quot, rem};
+// }
+
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+ push { lr }
+ sub sp, sp, #4
+ mov r2, sp
+ bl SYMBOL_NAME(__divmodsi4)
+ ldr r1, [sp]
+ add sp, sp, #4
+ pop { pc }
+END_COMPILERRT_FUNCTION(__aeabi_idivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/arm/aeabi_idivmod_thumb1.S b/lib/arm/aeabi_idivmod_thumb1.S
@@ -0,0 +1,28 @@
+//===-- aeabi_idivmod_thumb1.S - Thumb1 (Cortex-M0/M1) variant -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Thumb1 has no muls-with-3-operands and limited register-set instructions,
+// so __aeabi_idivmod is implemented as: quot = __divsi3(num, denom);
+// rem = num - quot * denom.
+// Used on ARMv6-M (Cortex-M0/M0+/M1).
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+ push {r0, r1, lr}
+ bl SYMBOL_NAME(__divsi3)
+ pop {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom
+ muls r2, r0, r2 // r2 = quot * denom
+ subs r1, r1, r2
+ JMP (r3)
+END_COMPILERRT_FUNCTION(__aeabi_idivmod)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_ldivmod.S b/lib/arm/aeabi_ldivmod.S
@@ -0,0 +1,34 @@
+//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// struct { int64_t quot, int64_t rem}
+// __aeabi_ldivmod(int64_t numerator, int64_t denominator) {
+// int64_t rem, quot;
+// quot = __divmoddi4(numerator, denominator, &rem);
+// return {quot, rem};
+// }
+
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+ push {r6, lr}
+ sub sp, sp, #16
+ add r6, sp, #8
+ str r6, [sp]
+ bl SYMBOL_NAME(__divmoddi4)
+ ldr r2, [sp, #8]
+ ldr r3, [sp, #12]
+ add sp, sp, #16
+ pop {r6, pc}
+END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/arm/aeabi_memcmp.S b/lib/arm/aeabi_memcmp.S
@@ -0,0 +1,23 @@
+//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
+ b memcmp
+END_COMPILERRT_FUNCTION(__aeabi_memcmp)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/arm/aeabi_memcmp_thumb1.S b/lib/arm/aeabi_memcmp_thumb1.S
@@ -0,0 +1,25 @@
+//===-- aeabi_memcmp_thumb1.S - Thumb1 (Cortex-M0/M1) variant ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Used on ARMv6-M (Cortex-M0/M0+/M1).
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
+ push {r7, lr}
+ bl memcmp
+ pop {r7, pc}
+END_COMPILERRT_FUNCTION(__aeabi_memcmp)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_memcpy.S b/lib/arm/aeabi_memcpy.S
@@ -0,0 +1,23 @@
+//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
+ b memcpy
+END_COMPILERRT_FUNCTION(__aeabi_memcpy)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/arm/aeabi_memcpy_thumb1.S b/lib/arm/aeabi_memcpy_thumb1.S
@@ -0,0 +1,25 @@
+//===-- aeabi_memcpy_thumb1.S - Thumb1 (Cortex-M0/M1) variant ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Used on ARMv6-M (Cortex-M0/M0+/M1).
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
+ push {r7, lr}
+ bl memcpy
+ pop {r7, pc}
+END_COMPILERRT_FUNCTION(__aeabi_memcpy)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_memmove.S b/lib/arm/aeabi_memmove.S
@@ -0,0 +1,22 @@
+//===-- aeabi_memmove.S - EABI memmove implementation --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); }
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove)
+ b memmove
+END_COMPILERRT_FUNCTION(__aeabi_memmove)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/arm/aeabi_memmove_thumb1.S b/lib/arm/aeabi_memmove_thumb1.S
@@ -0,0 +1,24 @@
+//===-- aeabi_memmove_thumb1.S - Thumb1 (Cortex-M0/M1) variant -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Used on ARMv6-M (Cortex-M0/M0+/M1).
+//===---------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); }
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove)
+ push {r7, lr}
+ bl memmove
+ pop {r7, pc}
+END_COMPILERRT_FUNCTION(__aeabi_memmove)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_memset.S b/lib/arm/aeabi_memset.S
@@ -0,0 +1,37 @@
+//===-- aeabi_memset.S - EABI memset implementation -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
+// void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
+ mov r3, r1
+ mov r1, r2
+ mov r2, r3
+ b memset
+END_COMPILERRT_FUNCTION(__aeabi_memset)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
+ mov r2, r1
+ movs r1, #0
+ b memset
+END_COMPILERRT_FUNCTION(__aeabi_memclr)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/arm/aeabi_memset_thumb1.S b/lib/arm/aeabi_memset_thumb1.S
@@ -0,0 +1,42 @@
+//===-- aeabi_memset_thumb1.S - Thumb1 (Cortex-M0/M1) variant ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Thumb1 lacks an unconditional `b <reg>` to a far symbol, so we bl/pop
+// instead of tail-calling memset. Used on ARMv6-M (Cortex-M0/M0+/M1).
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
+// void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
+ mov r3, r1
+ mov r1, r2
+ mov r2, r3
+ push {r7, lr}
+ bl memset
+ pop {r7, pc}
+END_COMPILERRT_FUNCTION(__aeabi_memset)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
+ mov r2, r1
+ movs r1, #0
+ push {r7, lr}
+ bl memset
+ pop {r7, pc}
+END_COMPILERRT_FUNCTION(__aeabi_memclr)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_uidivmod.S b/lib/arm/aeabi_uidivmod.S
@@ -0,0 +1,34 @@
+//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// struct { unsigned quot, unsigned rem}
+// __aeabi_uidivmod(unsigned numerator, unsigned denominator) {
+// unsigned rem, quot;
+// quot = __udivmodsi4(numerator, denominator, &rem);
+// return {quot, rem};
+// }
+
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+ push { lr }
+ sub sp, sp, #4
+ mov r2, sp
+ bl SYMBOL_NAME(__udivmodsi4)
+ ldr r1, [sp]
+ add sp, sp, #4
+ pop { pc }
+END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/arm/aeabi_uidivmod_thumb1.S b/lib/arm/aeabi_uidivmod_thumb1.S
@@ -0,0 +1,31 @@
+//===-- aeabi_uidivmod_thumb1.S - Thumb1 (Cortex-M0/M1) variant ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Used on ARMv6-M (Cortex-M0/M0+/M1).
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+ .syntax unified
+ .text
+ DEFINE_CODE_STATE
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+ cmp r0, r1
+ bcc LOCAL_LABEL(case_denom_larger)
+ push {r0, r1, lr}
+ bl SYMBOL_NAME(__aeabi_uidiv)
+ pop {r1, r2, r3}
+ muls r2, r0, r2 // r2 = quot * denom
+ subs r1, r1, r2
+ JMP (r3)
+LOCAL_LABEL(case_denom_larger):
+ movs r1, r0
+ movs r0, #0
+ JMP (lr)
+END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_uldivmod.S b/lib/arm/aeabi_uldivmod.S
@@ -0,0 +1,34 @@
+//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "assembly.h"
+
+// struct { uint64_t quot, uint64_t rem}
+// __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) {
+// uint64_t rem, quot;
+// quot = __udivmoddi4(numerator, denominator, &rem);
+// return {quot, rem};
+// }
+
+
+ .syntax unified
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+ push {r6, lr}
+ sub sp, sp, #16
+ add r6, sp, #8
+ str r6, [sp]
+ bl SYMBOL_NAME(__udivmoddi4)
+ ldr r2, [sp, #8]
+ ldr r3, [sp, #12]
+ add sp, sp, #16
+ pop {r6, pc}
+END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/atomic/atomic_common.inc b/lib/atomic/atomic_common.inc
@@ -0,0 +1,238 @@
+//===-- atomic_common.inc - shared body for atomic_*.c ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Includer must define BEFORE this include:
+// typedef ... Lock;
+// static inline void lock(Lock *l);
+// static inline void unlock(Lock *l);
+// static Lock locks[SPINLOCK_COUNT];
+//
+// Uses GCC-style __atomic_* builtins (the family cfree provides) rather than
+// Clang-specific __c11_atomic_*.
+//
+// The five generic entry points (load/store/exchange/compare_exchange/
+// is_lock_free) collide with clang builtins of the same name. We define them
+// under _c-suffixed symbols and rename them at link time via the standard
+// `#pragma redefine_extname` trick.
+//===----------------------------------------------------------------------===//
+
+#pragma redefine_extname __atomic_load_c __atomic_load
+#pragma redefine_extname __atomic_store_c __atomic_store
+#pragma redefine_extname __atomic_exchange_c __atomic_exchange
+#pragma redefine_extname __atomic_compare_exchange_c __atomic_compare_exchange
+#pragma redefine_extname __atomic_is_lock_free_c __atomic_is_lock_free
+
+static __inline Lock *lock_for_pointer(void *ptr) {
+ intptr_t hash = (intptr_t)ptr;
+ hash >>= 4;
+ intptr_t low = hash & SPINLOCK_MASK;
+ hash >>= 16;
+ hash ^= low;
+ return locks + (hash & SPINLOCK_MASK);
+}
+
+#define ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(size, p) \
+ (__atomic_always_lock_free(size, p) || \
+ (__atomic_always_lock_free(size, 0) && ((uintptr_t)p % size) == 0))
+#define IS_LOCK_FREE_1(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(1, p)
+#define IS_LOCK_FREE_2(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(2, p)
+#define IS_LOCK_FREE_4(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(4, p)
+#define IS_LOCK_FREE_8(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(8, p)
+#define IS_LOCK_FREE_16(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(16, p)
+
+#define TRY_LOCK_FREE_CASE(n, type, ptr) \
+ case n: \
+ if (IS_LOCK_FREE_##n(ptr)) { LOCK_FREE_ACTION(type); } \
+ break;
+
+#if HAS_INT128
+#define TRY_LOCK_FREE_CASE_16(p) TRY_LOCK_FREE_CASE(16, __uint128_t, p)
+#define OPTIMISED_CASES \
+ OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \
+ OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \
+ OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \
+ OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) \
+ OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t)
+#else
+#define TRY_LOCK_FREE_CASE_16(p) /* no __uint128_t */
+#define OPTIMISED_CASES \
+ OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \
+ OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \
+ OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \
+ OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)
+#endif
+
+#define LOCK_FREE_CASES(ptr) \
+ do { \
+ switch (size) { \
+ TRY_LOCK_FREE_CASE(1, uint8_t, ptr) \
+ TRY_LOCK_FREE_CASE(2, uint16_t, ptr) \
+ TRY_LOCK_FREE_CASE(4, uint32_t, ptr) \
+ TRY_LOCK_FREE_CASE(8, uint64_t, ptr) \
+ TRY_LOCK_FREE_CASE_16(ptr) \
+ default: break; \
+ } \
+ } while (0)
+
+bool __atomic_is_lock_free_c(size_t size, void *ptr) {
+#define LOCK_FREE_ACTION(type) return true;
+ LOCK_FREE_CASES(ptr);
+#undef LOCK_FREE_ACTION
+ return false;
+}
+
+void __atomic_load_c(int size, void *src, void *dest, int model) {
+#define LOCK_FREE_ACTION(type) \
+ *((type *)dest) = __atomic_load_n((type *)src, model); \
+ return;
+ LOCK_FREE_CASES(src);
+#undef LOCK_FREE_ACTION
+ Lock *l = lock_for_pointer(src);
+ lock(l);
+ __builtin_memcpy(dest, src, size);
+ unlock(l);
+}
+
+void __atomic_store_c(int size, void *dest, void *src, int model) {
+#define LOCK_FREE_ACTION(type) \
+ __atomic_store_n((type *)dest, *(type *)src, model); \
+ return;
+ LOCK_FREE_CASES(dest);
+#undef LOCK_FREE_ACTION
+ Lock *l = lock_for_pointer(dest);
+ lock(l);
+ __builtin_memcpy(dest, src, size);
+ unlock(l);
+}
+
+int __atomic_compare_exchange_c(int size, void *ptr, void *expected,
+ void *desired, int success, int failure) {
+#define LOCK_FREE_ACTION(type) \
+ return __atomic_compare_exchange_n((type *)ptr, (type *)expected, \
+ *(type *)desired, 0, success, failure);
+ LOCK_FREE_CASES(ptr);
+#undef LOCK_FREE_ACTION
+ Lock *l = lock_for_pointer(ptr);
+ lock(l);
+ if (__builtin_memcmp(ptr, expected, size) == 0) {
+ __builtin_memcpy(ptr, desired, size);
+ unlock(l);
+ return 1;
+ }
+ __builtin_memcpy(expected, ptr, size);
+ unlock(l);
+ return 0;
+}
+
+void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) {
+#define LOCK_FREE_ACTION(type) \
+ *(type *)old = __atomic_exchange_n((type *)ptr, *(type *)val, model); \
+ return;
+ LOCK_FREE_CASES(ptr);
+#undef LOCK_FREE_ACTION
+ Lock *l = lock_for_pointer(ptr);
+ lock(l);
+ __builtin_memcpy(old, ptr, size);
+ __builtin_memcpy(ptr, val, size);
+ unlock(l);
+}
+
+#define OPTIMISED_CASE(n, lockfree, type) \
+ type __atomic_load_##n(type *src, int model) { \
+ if (lockfree(src)) return __atomic_load_n(src, model); \
+ Lock *l = lock_for_pointer(src); \
+ lock(l); \
+ type val = *src; \
+ unlock(l); \
+ return val; \
+ }
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type) \
+ void __atomic_store_##n(type *dest, type val, int model) { \
+ if (lockfree(dest)) { __atomic_store_n(dest, val, model); return; } \
+ Lock *l = lock_for_pointer(dest); \
+ lock(l); \
+ *dest = val; \
+ unlock(l); \
+ }
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type) \
+ type __atomic_exchange_##n(type *dest, type val, int model) { \
+ if (lockfree(dest)) return __atomic_exchange_n(dest, val, model); \
+ Lock *l = lock_for_pointer(dest); \
+ lock(l); \
+ type tmp = *dest; \
+ *dest = val; \
+ unlock(l); \
+ return tmp; \
+ }
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define OPTIMISED_CASE(n, lockfree, type) \
+ bool __atomic_compare_exchange_##n(type *ptr, type *expected, type desired, \
+ int success, int failure) { \
+ if (lockfree(ptr)) \
+ return __atomic_compare_exchange_n(ptr, expected, desired, 0, success, \
+ failure); \
+ Lock *l = lock_for_pointer(ptr); \
+ lock(l); \
+ if (*ptr == *expected) { \
+ *ptr = desired; \
+ unlock(l); \
+ return true; \
+ } \
+ *expected = *ptr; \
+ unlock(l); \
+ return false; \
+ }
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+
+#define ATOMIC_RMW(n, lockfree, type, opname, op) \
+ type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) { \
+ if (lockfree(ptr)) return __atomic_fetch_##opname(ptr, val, model); \
+ Lock *l = lock_for_pointer(ptr); \
+ lock(l); \
+ type tmp = *ptr; \
+ *ptr = tmp op val; \
+ unlock(l); \
+ return tmp; \
+ }
+
+#define ATOMIC_RMW_NAND(n, lockfree, type) \
+ type __atomic_fetch_nand_##n(type *ptr, type val, int model) { \
+ if (lockfree(ptr)) return __atomic_fetch_nand(ptr, val, model); \
+ Lock *l = lock_for_pointer(ptr); \
+ lock(l); \
+ type tmp = *ptr; \
+ *ptr = ~(tmp & val); \
+ unlock(l); \
+ return tmp; \
+ }
+
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW_NAND(n, lockfree, type)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
diff --git a/lib/atomic/atomic_freestanding.c b/lib/atomic/atomic_freestanding.c
@@ -0,0 +1,36 @@
+//===-- atomic_freestanding.c - lock fallbacks via C11 atomic spinlock ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Atomic fallback shim using a pointer-sized C11 atomic spinlock as the
+// lock primitive. Has no OS or libc dependency.
+//===----------------------------------------------------------------------===//
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#define SPINLOCK_COUNT (1 << 10)
+static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1;
+
+// HAS_INT128 is defined by the build system: -DHAS_INT128=1 on 64-bit targets,
+// -DHAS_INT128=0 on 32-bit. atomic_common.inc keys 16-byte cases off it.
+
+typedef _Atomic(uintptr_t) Lock;
+
+static inline void unlock(Lock *l) {
+ __atomic_store_n((uintptr_t *)l, 0, __ATOMIC_RELEASE);
+}
+
+static inline void lock(Lock *l) {
+ uintptr_t old = 0;
+ while (!__atomic_compare_exchange_n((uintptr_t *)l, &old, 1, 1,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+ old = 0;
+}
+
+static Lock locks[SPINLOCK_COUNT];
+
+#include "atomic_common.inc"
diff --git a/lib/fp/adddf3.c b/lib/fp/adddf3.c
@@ -0,0 +1,17 @@
+//===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float addition.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_add_impl.inc"
+
+COMPILER_RT_ABI double __adddf3(double a, double b) { return __addXf3__(a, b); }
+
diff --git a/lib/fp/addsf3.c b/lib/fp/addsf3.c
@@ -0,0 +1,17 @@
+//===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float addition.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_add_impl.inc"
+
+COMPILER_RT_ABI float __addsf3(float a, float b) { return __addXf3__(a, b); }
+
diff --git a/lib/fp/comparedf2.c b/lib/fp/comparedf2.c
@@ -0,0 +1,52 @@
+//===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// // This file implements the following soft-float comparison routines:
+//
+// __eqdf2 __gedf2 __unorddf2
+// __ledf2 __gtdf2
+// __ltdf2
+// __nedf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+// __ledf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// 1 if either a or b is NaN
+//
+// __gedf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// -1 if either a or b is NaN
+//
+// __unorddf2(a,b) returns 0 if both a and b are numbers
+// 1 if either a or b is NaN
+//
+// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#include "fp_compare_impl.inc"
+
+COMPILER_RT_ABI CMP_RESULT __ledf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __eqdf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __ltdf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __nedf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __gedf2(fp_t a, fp_t b) { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __gtdf2(fp_t a, fp_t b) { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __unorddf2(fp_t a, fp_t b) { return __unordXf2__(a, b); }
+
+
diff --git a/lib/fp/comparesf2.c b/lib/fp/comparesf2.c
@@ -0,0 +1,52 @@
+//===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the following soft-fp_t comparison routines:
+//
+// __eqsf2 __gesf2 __unordsf2
+// __lesf2 __gtsf2
+// __ltsf2
+// __nesf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+// __lesf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// 1 if either a or b is NaN
+//
+// __gesf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// -1 if either a or b is NaN
+//
+// __unordsf2(a,b) returns 0 if both a and b are numbers
+// 1 if either a or b is NaN
+//
+// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#include "fp_compare_impl.inc"
+
+COMPILER_RT_ABI CMP_RESULT __lesf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __eqsf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __ltsf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __nesf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __gesf2(fp_t a, fp_t b) { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __gtsf2(fp_t a, fp_t b) { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __unordsf2(fp_t a, fp_t b){ return __unordXf2__(a, b); }
+
+
diff --git a/lib/fp/divdf3.c b/lib/fp/divdf3.c
@@ -0,0 +1,22 @@
+//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float division
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+
+#define NUMBER_OF_HALF_ITERATIONS 3
+#define NUMBER_OF_FULL_ITERATIONS 1
+
+#include "fp_div_impl.inc"
+
+COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
+
diff --git a/lib/fp/divsf3.c b/lib/fp/divsf3.c
@@ -0,0 +1,23 @@
+//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float division
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+
+#define NUMBER_OF_HALF_ITERATIONS 0
+#define NUMBER_OF_FULL_ITERATIONS 3
+#define USE_NATIVE_FULL_ITERATIONS
+
+#include "fp_div_impl.inc"
+
+COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
+
diff --git a/lib/fp/extendsfdf2.c b/lib/fp/extendsfdf2.c
@@ -0,0 +1,14 @@
+//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define SRC_SINGLE
+#define DST_DOUBLE
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI double __extendsfdf2(float a) { return __extendXfYf2__(a); }
+
diff --git a/lib/fp/fixdfdi.c b/lib/fp/fixdfdi.c
@@ -0,0 +1,21 @@
+//===-- fixdfdi.c - Implement __fixdfdi -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+typedef di_int fixint_t;
+typedef du_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int __fixdfdi(fp_t a) { return __fixint(a); }
+
diff --git a/lib/fp/fixdfsi.c b/lib/fp/fixdfsi.c
@@ -0,0 +1,16 @@
+//===-- fixdfsi.c - Implement __fixdfsi -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+typedef si_int fixint_t;
+typedef su_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI si_int __fixdfsi(fp_t a) { return __fixint(a); }
+
diff --git a/lib/fp/fixsfdi.c b/lib/fp/fixsfdi.c
@@ -0,0 +1,21 @@
+//===-- fixsfdi.c - Implement __fixsfdi -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+typedef di_int fixint_t;
+typedef du_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int __fixsfdi(fp_t a) { return __fixint(a); }
+
diff --git a/lib/fp/fixsfsi.c b/lib/fp/fixsfsi.c
@@ -0,0 +1,16 @@
+//===-- fixsfsi.c - Implement __fixsfsi -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+typedef si_int fixint_t;
+typedef su_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI si_int __fixsfsi(fp_t a) { return __fixint(a); }
+
diff --git a/lib/fp/fixunsdfdi.c b/lib/fp/fixunsdfdi.c
@@ -0,0 +1,20 @@
+//===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+typedef du_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int __fixunsdfdi(fp_t a) { return __fixuint(a); }
+
diff --git a/lib/fp/fixunsdfsi.c b/lib/fp/fixunsdfsi.c
@@ -0,0 +1,15 @@
+//===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+typedef su_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI su_int __fixunsdfsi(fp_t a) { return __fixuint(a); }
+
diff --git a/lib/fp/fixunssfdi.c b/lib/fp/fixunssfdi.c
@@ -0,0 +1,20 @@
+//===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+typedef du_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int __fixunssfdi(fp_t a) { return __fixuint(a); }
+
diff --git a/lib/fp/fixunssfsi.c b/lib/fp/fixunssfsi.c
@@ -0,0 +1,19 @@
+//===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __fixunssfsi for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+typedef su_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI su_int __fixunssfsi(fp_t a) { return __fixuint(a); }
+
diff --git a/lib/fp/floatdidf.c b/lib/fp/floatdidf.c
@@ -0,0 +1,32 @@
+//===-- floatdidf.c - Implement __floatdidf -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floatdidf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: convert a to a double, rounding toward even.
+
+// Assumption: double is a IEEE 64 bit floating point type
+// di_int is a 64 bit integral type
+
+// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+// mmmm
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+#define SRC_I64
+#define DST_DOUBLE
+#include "int_to_fp_impl.inc"
+
+COMPILER_RT_ABI double __floatdidf(di_int a) { return __floatXiYf__(a); }
+
diff --git a/lib/fp/floatdisf.c b/lib/fp/floatdisf.c
@@ -0,0 +1,27 @@
+//===-- floatdisf.c - Implement __floatdisf -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floatdisf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+// Returns: convert a to a float, rounding toward even.
+
+// Assumption: float is a IEEE 32 bit floating point type
+// di_int is a 64 bit integral type
+
+// seee eeee emmm mmmm mmmm mmmm mmmm mmmm
+
+#include "int_lib.h"
+
+#define SRC_I64
+#define DST_SINGLE
+#include "int_to_fp_impl.inc"
+
+COMPILER_RT_ABI float __floatdisf(di_int a) { return __floatXiYf__(a); }
+
diff --git a/lib/fp/floatsidf.c b/lib/fp/floatsidf.c
@@ -0,0 +1,51 @@
+//===-- lib/floatsidf.c - integer -> double-precision conversion --*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer to double-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t __floatsidf(si_int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // All other cases begin by extracting the sign and absolute value of a
+ rep_t sign = 0;
+ su_int aAbs = (su_int)a;
+ if (a < 0) {
+ sign = signBit;
+ aAbs = -aAbs;
+ }
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - clzsi(aAbs);
+ rep_t result;
+
+ // Shift a into the significand field and clear the implicit bit. Extra
+ // cast to unsigned int is necessary to get the correct behavior for
+ // the input INT_MIN.
+ const int shift = significandBits - exponent;
+ result = (rep_t)aAbs << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ // Insert the sign bit and return
+ return fromRep(result | sign);
+}
+
diff --git a/lib/fp/floatsisf.c b/lib/fp/floatsisf.c
@@ -0,0 +1,59 @@
+//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer to single-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t __floatsisf(si_int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // All other cases begin by extracting the sign and absolute value of a
+ rep_t sign = 0;
+ su_int aAbs = (su_int)a;
+ if (a < 0) {
+ sign = signBit;
+ aAbs = -aAbs;
+ }
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - clzsi(aAbs);
+ rep_t result;
+
+ // Shift a into the significand field, rounding if it is a right-shift
+ if (exponent <= significandBits) {
+ const int shift = significandBits - exponent;
+ result = (rep_t)aAbs << shift ^ implicitBit;
+ } else {
+ const int shift = exponent - significandBits;
+ result = (rep_t)aAbs >> shift ^ implicitBit;
+ rep_t round = (rep_t)aAbs << (typeWidth - shift);
+ if (round > signBit)
+ result++;
+ if (round == signBit)
+ result += result & 1;
+ }
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ // Insert the sign bit and return
+ return fromRep(result | sign);
+}
+
diff --git a/lib/fp/floatundidf.c b/lib/fp/floatundidf.c
@@ -0,0 +1,32 @@
+//===-- floatundidf.c - Implement __floatundidf ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floatundidf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+// Returns: convert a to a double, rounding toward even.
+
+// Assumption: double is a IEEE 64 bit floating point type
+// du_int is a 64 bit integral type
+
+// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+// mmmm
+
+#include "int_lib.h"
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+#define SRC_U64
+#define DST_DOUBLE
+#include "int_to_fp_impl.inc"
+
+COMPILER_RT_ABI double __floatundidf(du_int a) { return __floatXiYf__(a); }
+
diff --git a/lib/fp/floatundisf.c b/lib/fp/floatundisf.c
@@ -0,0 +1,27 @@
+//===-- floatundisf.c - Implement __floatundisf ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floatundisf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+// Returns: convert a to a float, rounding toward even.
+
+// Assumption: float is a IEEE 32 bit floating point type
+// du_int is a 64 bit integral type
+
+// seee eeee emmm mmmm mmmm mmmm mmmm mmmm
+
+#include "int_lib.h"
+
+#define SRC_U64
+#define DST_SINGLE
+#include "int_to_fp_impl.inc"
+
+COMPILER_RT_ABI float __floatundisf(du_int a) { return __floatXiYf__(a); }
+
diff --git a/lib/fp/floatunsidf.c b/lib/fp/floatunsidf.c
@@ -0,0 +1,40 @@
+//===-- lib/floatunsidf.c - uint -> double-precision conversion ---*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unsigned integer to double-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t __floatunsidf(su_int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - clzsi(a);
+ rep_t result;
+
+ // Shift a into the significand field and clear the implicit bit.
+ const int shift = significandBits - exponent;
+ result = (rep_t)a << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ return fromRep(result);
+}
+
diff --git a/lib/fp/floatunsisf.c b/lib/fp/floatunsisf.c
@@ -0,0 +1,50 @@
+//===-- lib/floatunsisf.c - uint -> single-precision conversion ---*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unsigned integer to single-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t __floatunsisf(su_int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - clzsi(a);
+ rep_t result;
+
+ // Shift a into the significand field, rounding if it is a right-shift
+ if (exponent <= significandBits) {
+ const int shift = significandBits - exponent;
+ result = (rep_t)a << shift ^ implicitBit;
+ } else {
+ const int shift = exponent - significandBits;
+ result = (rep_t)a >> shift ^ implicitBit;
+ rep_t round = (rep_t)a << (typeWidth - shift);
+ if (round > signBit)
+ result++;
+ if (round == signBit)
+ result += result & 1;
+ }
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ return fromRep(result);
+}
+
diff --git a/lib/fp/fp_mode.c b/lib/fp/fp_mode.c
@@ -0,0 +1,22 @@
+//===----- lib/fp_mode.c - Floaing-point environment mode utilities --C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a default implementation of fp_mode.h for architectures
+// that does not support or does not have an implementation of floating point
+// environment mode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_mode.h"
+
+// IEEE-754 default rounding (to nearest, ties to even).
+CRT_FE_ROUND_MODE __fe_getround(void) { return CRT_FE_TONEAREST; }
+
+int __fe_raise_inexact(void) {
+ return 0;
+}
diff --git a/lib/fp/muldf3.c b/lib/fp/muldf3.c
@@ -0,0 +1,18 @@
+//===-- lib/muldf3.c - Double-precision multiplication ------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float multiplication
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_mul_impl.inc"
+
+COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { return __mulXf3__(a, b); }
+
diff --git a/lib/fp/mulsf3.c b/lib/fp/mulsf3.c
@@ -0,0 +1,18 @@
+//===-- lib/mulsf3.c - Single-precision multiplication ------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float multiplication
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_mul_impl.inc"
+
+COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { return __mulXf3__(a, b); }
+
diff --git a/lib/fp/negdf2.c b/lib/fp/negdf2.c
@@ -0,0 +1,17 @@
+//===-- lib/negdf2.c - double-precision negation ------------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float negation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __negdf2(fp_t a) { return fromRep(toRep(a) ^ signBit); }
+
diff --git a/lib/fp/negsf2.c b/lib/fp/negsf2.c
@@ -0,0 +1,17 @@
+//===-- lib/negsf2.c - single-precision negation ------------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float negation.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __negsf2(fp_t a) { return fromRep(toRep(a) ^ signBit); }
+
diff --git a/lib/fp/subdf3.c b/lib/fp/subdf3.c
@@ -0,0 +1,20 @@
+//===-- lib/adddf3.c - Double-precision subtraction ---------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements double-precision soft-float subtraction.
+//
+//===----------------------------------------------------------------------===//
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t __subdf3(fp_t a, fp_t b) {
+ return __adddf3(a, fromRep(toRep(b) ^ signBit));
+}
+
diff --git a/lib/fp/subsf3.c b/lib/fp/subsf3.c
@@ -0,0 +1,20 @@
+//===-- lib/subsf3.c - Single-precision subtraction ---------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements single-precision soft-float subtraction.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t __subsf3(fp_t a, fp_t b) {
+ return __addsf3(a, fromRep(toRep(b) ^ signBit));
+}
+
diff --git a/lib/fp/truncdfsf2.c b/lib/fp/truncdfsf2.c
@@ -0,0 +1,14 @@
+//===-- lib/truncdfsf2.c - double -> single conversion ------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define SRC_DOUBLE
+#define DST_SINGLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI float __truncdfsf2(double a) { return __truncXfYf2__(a); }
+
diff --git a/lib/fp_tf/addtf3.c b/lib/fp_tf/addtf3.c
@@ -0,0 +1,21 @@
+//===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float addition.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#include "fp_add_impl.inc"
+
+COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b) {
+ return __addXf3__(a, b);
+}
+
diff --git a/lib/fp_tf/comparetf2.c b/lib/fp_tf/comparetf2.c
@@ -0,0 +1,51 @@
+//===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// // This file implements the following soft-float comparison routines:
+//
+// __eqtf2 __getf2 __unordtf2
+// __letf2 __gttf2
+// __lttf2
+// __netf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+// __letf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// 1 if either a or b is NaN
+//
+// __getf2(a,b) returns -1 if a < b
+// 0 if a == b
+// 1 if a > b
+// -1 if either a or b is NaN
+//
+// __unordtf2(a,b) returns 0 if both a and b are numbers
+// 1 if either a or b is NaN
+//
+// Note that __letf2( ) and __getf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#include "fp_compare_impl.inc"
+
+COMPILER_RT_ABI CMP_RESULT __letf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __eqtf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __lttf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __netf2(fp_t a, fp_t b) { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __getf2(fp_t a, fp_t b) { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __gttf2(fp_t a, fp_t b) { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __unordtf2(fp_t a, fp_t b) { return __unordXf2__(a, b); }
+
diff --git a/lib/fp_tf/divtf3.c b/lib/fp_tf/divtf3.c
@@ -0,0 +1,24 @@
+//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float division
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+
+#define NUMBER_OF_HALF_ITERATIONS 4
+#define NUMBER_OF_FULL_ITERATIONS 1
+
+#include "fp_div_impl.inc"
+
+COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
+
diff --git a/lib/fp_tf/extenddftf2.c b/lib/fp_tf/extenddftf2.c
@@ -0,0 +1,17 @@
+//===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define SRC_DOUBLE
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI dst_t __extenddftf2(src_t a) { return __extendXfYf2__(a); }
+
diff --git a/lib/fp_tf/extendsftf2.c b/lib/fp_tf/extendsftf2.c
@@ -0,0 +1,17 @@
+//===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define SRC_SINGLE
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI dst_t __extendsftf2(src_t a) { return __extendXfYf2__(a); }
+
diff --git a/lib/fp_tf/fixtfdi.c b/lib/fp_tf/fixtfdi.c
@@ -0,0 +1,16 @@
+//===-- fixtfdi.c - Implement __fixtfdi -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+typedef di_int fixint_t;
+typedef du_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int __fixtfdi(fp_t a) { return __fixint(a); }
diff --git a/lib/fp_tf/fixtfsi.c b/lib/fp_tf/fixtfsi.c
@@ -0,0 +1,16 @@
+//===-- fixtfsi.c - Implement __fixtfsi -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+typedef si_int fixint_t;
+typedef su_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI si_int __fixtfsi(fp_t a) { return __fixint(a); }
diff --git a/lib/fp_tf/fixtfti.c b/lib/fp_tf/fixtfti.c
@@ -0,0 +1,16 @@
+//===-- fixtfti.c - Implement __fixtfti -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+typedef ti_int fixint_t;
+typedef tu_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int __fixtfti(fp_t a) { return __fixint(a); }
diff --git a/lib/fp_tf/fixunstfdi.c b/lib/fp_tf/fixunstfdi.c
@@ -0,0 +1,15 @@
+//===-- fixunstfdi.c - Implement __fixunstfdi -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+typedef du_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int __fixunstfdi(fp_t a) { return __fixuint(a); }
diff --git a/lib/fp_tf/fixunstfsi.c b/lib/fp_tf/fixunstfsi.c
@@ -0,0 +1,15 @@
+//===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+typedef su_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI su_int __fixunstfsi(fp_t a) { return __fixuint(a); }
diff --git a/lib/fp_tf/fixunstfti.c b/lib/fp_tf/fixunstfti.c
@@ -0,0 +1,15 @@
+//===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+typedef tu_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int __fixunstfti(fp_t a) { return __fixuint(a); }
diff --git a/lib/fp_tf/floatditf.c b/lib/fp_tf/floatditf.c
@@ -0,0 +1,47 @@
+//===-- lib/floatditf.c - integer -> quad-precision conversion ----*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements di_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __floatditf(di_int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // All other cases begin by extracting the sign and absolute value of a
+ rep_t sign = 0;
+ du_int aAbs = (du_int)a;
+ if (a < 0) {
+ sign = signBit;
+ aAbs = ~(du_int)a + 1U;
+ }
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - __builtin_clzll(aAbs);
+ rep_t result;
+
+ // Shift a into the significand field, rounding if it is a right-shift
+ const int shift = significandBits - exponent;
+ result = (rep_t)aAbs << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ // Insert the sign bit and return
+ return fromRep(result | sign);
+}
+
diff --git a/lib/fp_tf/floatsitf.c b/lib/fp_tf/floatsitf.c
@@ -0,0 +1,47 @@
+//===-- lib/floatsitf.c - integer -> quad-precision conversion ----*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __floatsitf(si_int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // All other cases begin by extracting the sign and absolute value of a
+ rep_t sign = 0;
+ su_int aAbs = (su_int)a;
+ if (a < 0) {
+ sign = signBit;
+ aAbs = -aAbs;
+ }
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - clzsi(aAbs);
+ rep_t result;
+
+ // Shift a into the significand field and clear the implicit bit.
+ const int shift = significandBits - exponent;
+ result = (rep_t)aAbs << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ // Insert the sign bit and return
+ return fromRep(result | sign);
+}
+
diff --git a/lib/fp_tf/floattitf.c b/lib/fp_tf/floattitf.c
@@ -0,0 +1,33 @@
+//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ti_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+#define SRC_I128
+#define DST_QUAD
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a ti_int to a fp_t, rounding toward even.
+
+// Assumption: fp_t is a IEEE 128 bit floating point type
+// ti_int is a 128 bit integral type
+
+// seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+// mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm
+// mmmm mmmm mmmm
+
+COMPILER_RT_ABI fp_t __floattitf(ti_int a) { return __floatXiYf__(a); }
+
diff --git a/lib/fp_tf/floatunditf.c b/lib/fp_tf/floatunditf.c
@@ -0,0 +1,38 @@
+//===-- lib/floatunditf.c - uint -> quad-precision conversion -----*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements du_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __floatunditf(du_int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - __builtin_clzll(a);
+ rep_t result;
+
+ // Shift a into the significand field and clear the implicit bit.
+ const int shift = significandBits - exponent;
+ result = (rep_t)a << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ return fromRep(result);
+}
+
diff --git a/lib/fp_tf/floatunsitf.c b/lib/fp_tf/floatunsitf.c
@@ -0,0 +1,38 @@
+//===-- lib/floatunsitf.c - uint -> quad-precision conversion -----*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unsigned integer to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __floatunsitf(su_int a) {
+
+ const int aWidth = sizeof a * CHAR_BIT;
+
+ // Handle zero as a special case to protect clz
+ if (a == 0)
+ return fromRep(0);
+
+ // Exponent of (fp_t)a is the width of abs(a).
+ const int exponent = (aWidth - 1) - clzsi(a);
+ rep_t result;
+
+ // Shift a into the significand field and clear the implicit bit.
+ const int shift = significandBits - exponent;
+ result = (rep_t)a << shift ^ implicitBit;
+
+ // Insert the exponent
+ result += (rep_t)(exponent + exponentBias) << significandBits;
+ return fromRep(result);
+}
+
diff --git a/lib/fp_tf/floatuntitf.c b/lib/fp_tf/floatuntitf.c
@@ -0,0 +1,33 @@
+//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tu_int to quad-precision conversion for the
+// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
+// mode.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+#define SRC_U128
+#define DST_QUAD
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a tu_int to a fp_t, rounding toward even.
+
+// Assumption: fp_t is a IEEE 128 bit floating point type
+// tu_int is a 128 bit integral type
+
+// seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+// mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm
+// mmmm mmmm mmmm
+
+COMPILER_RT_ABI fp_t __floatuntitf(tu_int a) { return __floatXiYf__(a); }
+
diff --git a/lib/fp_tf/multf3.c b/lib/fp_tf/multf3.c
@@ -0,0 +1,20 @@
+//===-- lib/multf3.c - Quad-precision multiplication --------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float multiplication
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#include "fp_mul_impl.inc"
+
+COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) { return __mulXf3__(a, b); }
+
diff --git a/lib/fp_tf/subtf3.c b/lib/fp_tf/subtf3.c
@@ -0,0 +1,22 @@
+//===-- lib/subtf3.c - Quad-precision subtraction -----------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float subtraction.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b);
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t __subtf3(fp_t a, fp_t b) {
+ return __addtf3(a, fromRep(toRep(b) ^ signBit));
+}
+
diff --git a/lib/fp_tf/trunctfdf2.c b/lib/fp_tf/trunctfdf2.c
@@ -0,0 +1,17 @@
+//===-- lib/truncdfsf2.c - quad -> double conversion --------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define SRC_QUAD
+#define DST_DOUBLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI dst_t __trunctfdf2(src_t a) { return __truncXfYf2__(a); }
+
diff --git a/lib/fp_tf/trunctfsf2.c b/lib/fp_tf/trunctfsf2.c
@@ -0,0 +1,17 @@
+//===-- lib/trunctfsf2.c - quad -> single conversion --------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define SRC_QUAD
+#define DST_SINGLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI dst_t __trunctfsf2(src_t a) { return __truncXfYf2__(a); }
+
diff --git a/lib/fp_ti/fixdfti.c b/lib/fp_ti/fixdfti.c
@@ -0,0 +1,19 @@
+//===-- fixdfti.c - Implement __fixdfti -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+typedef ti_int fixint_t;
+typedef tu_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int __fixdfti(fp_t a) { return __fixint(a); }
+
diff --git a/lib/fp_ti/fixsfti.c b/lib/fp_ti/fixsfti.c
@@ -0,0 +1,19 @@
+//===-- fixsfti.c - Implement __fixsfti -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+typedef ti_int fixint_t;
+typedef tu_int fixuint_t;
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int __fixsfti(fp_t a) { return __fixint(a); }
+
diff --git a/lib/fp_ti/fixunsdfti.c b/lib/fp_ti/fixunsdfti.c
@@ -0,0 +1,16 @@
+//===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+typedef tu_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int __fixunsdfti(fp_t a) { return __fixuint(a); }
diff --git a/lib/fp_ti/fixunssfti.c b/lib/fp_ti/fixunssfti.c
@@ -0,0 +1,19 @@
+//===-- fixunssfti.c - Implement __fixunssfti -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __fixunssfti for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+typedef tu_int fixuint_t;
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int __fixunssfti(fp_t a) { return __fixuint(a); }
diff --git a/lib/fp_ti/floattidf.c b/lib/fp_ti/floattidf.c
@@ -0,0 +1,29 @@
+//===-- floattidf.c - Implement __floattidf -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floattidf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+#define SRC_I128
+#define DST_DOUBLE
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a to a double, rounding toward even.
+
+// Assumption: double is a IEEE 64 bit floating point type
+// ti_int is a 128 bit integral type
+
+// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+// mmmm
+
+COMPILER_RT_ABI double __floattidf(ti_int a) { return __floatXiYf__(a); }
+
diff --git a/lib/fp_ti/floattisf.c b/lib/fp_ti/floattisf.c
@@ -0,0 +1,28 @@
+//===-- floattisf.c - Implement __floattisf -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floattisf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+#define SRC_I128
+#define DST_SINGLE
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a to a float, rounding toward even.
+
+// Assumption: float is a IEEE 32 bit floating point type
+// ti_int is a 128 bit integral type
+
+// seee eeee emmm mmmm mmmm mmmm mmmm mmmm
+
+COMPILER_RT_ABI float __floattisf(ti_int a) { return __floatXiYf__(a); }
+
diff --git a/lib/fp_ti/floatuntidf.c b/lib/fp_ti/floatuntidf.c
@@ -0,0 +1,29 @@
+//===-- floatuntidf.c - Implement __floatuntidf ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floatuntidf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+#define SRC_U128
+#define DST_DOUBLE
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a to a double, rounding toward even.
+
+// Assumption: double is a IEEE 64 bit floating point type
+// tu_int is a 128 bit integral type
+
+// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+// mmmm
+
+COMPILER_RT_ABI double __floatuntidf(tu_int a) { return __floatXiYf__(a); }
+
diff --git a/lib/fp_ti/floatuntisf.c b/lib/fp_ti/floatuntisf.c
@@ -0,0 +1,28 @@
+//===-- floatuntisf.c - Implement __floatuntisf ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __floatuntisf for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+#define SRC_U128
+#define DST_SINGLE
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a to a float, rounding toward even.
+
+// Assumption: float is a IEEE 32 bit floating point type
+// tu_int is a 128 bit integral type
+
+// seee eeee emmm mmmm mmmm mmmm mmmm mmmm
+
+COMPILER_RT_ABI float __floatuntisf(tu_int a) { return __floatXiYf__(a); }
+
diff --git a/lib/impl/fp_add_impl.inc b/lib/impl/fp_add_impl.inc
@@ -0,0 +1,172 @@
+//===----- lib/fp_add_impl.inc - floaing point addition -----------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements soft-float addition with the IEEE-754 default rounding
+// (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+#include "fp_mode.h"
+
+static __inline fp_t __addXf3__(fp_t a, fp_t b) {
+ rep_t aRep = toRep(a);
+ rep_t bRep = toRep(b);
+ const rep_t aAbs = aRep & absMask;
+ const rep_t bAbs = bRep & absMask;
+
+ // Detect if a or b is zero, infinity, or NaN.
+ if (aAbs - REP_C(1) >= infRep - REP_C(1) ||
+ bAbs - REP_C(1) >= infRep - REP_C(1)) {
+ // NaN + anything = qNaN
+ if (aAbs > infRep)
+ return fromRep(toRep(a) | quietBit);
+ // anything + NaN = qNaN
+ if (bAbs > infRep)
+ return fromRep(toRep(b) | quietBit);
+
+ if (aAbs == infRep) {
+ // +/-infinity + -/+infinity = qNaN
+ if ((toRep(a) ^ toRep(b)) == signBit)
+ return fromRep(qnanRep);
+ // +/-infinity + anything remaining = +/- infinity
+ else
+ return a;
+ }
+
+ // anything remaining + +/-infinity = +/-infinity
+ if (bAbs == infRep)
+ return b;
+
+ // zero + anything = anything
+ if (!aAbs) {
+ // We need to get the sign right for zero + zero.
+ if (!bAbs)
+ return fromRep(toRep(a) & toRep(b));
+ else
+ return b;
+ }
+
+ // anything + zero = anything
+ if (!bAbs)
+ return a;
+ }
+
+ // Swap a and b if necessary so that a has the larger absolute value.
+ if (bAbs > aAbs) {
+ const rep_t temp = aRep;
+ aRep = bRep;
+ bRep = temp;
+ }
+
+ // Extract the exponent and significand from the (possibly swapped) a and b.
+ int aExponent = aRep >> significandBits & maxExponent;
+ int bExponent = bRep >> significandBits & maxExponent;
+ rep_t aSignificand = aRep & significandMask;
+ rep_t bSignificand = bRep & significandMask;
+
+ // Normalize any denormals, and adjust the exponent accordingly.
+ if (aExponent == 0)
+ aExponent = normalize(&aSignificand);
+ if (bExponent == 0)
+ bExponent = normalize(&bSignificand);
+
+ // The sign of the result is the sign of the larger operand, a. If they
+ // have opposite signs, we are performing a subtraction. Otherwise, we
+ // perform addition.
+ const rep_t resultSign = aRep & signBit;
+ const bool subtraction = (aRep ^ bRep) & signBit;
+
+ // Shift the significands to give us round, guard and sticky, and set the
+ // implicit significand bit. If we fell through from the denormal path it
+ // was already set by normalize( ), but setting it twice won't hurt
+ // anything.
+ aSignificand = (aSignificand | implicitBit) << 3;
+ bSignificand = (bSignificand | implicitBit) << 3;
+
+ // Shift the significand of b by the difference in exponents, with a sticky
+ // bottom bit to get rounding correct.
+ const unsigned int align = aExponent - bExponent;
+ if (align) {
+ if (align < typeWidth) {
+ const bool sticky = (bSignificand << (typeWidth - align)) != 0;
+ bSignificand = bSignificand >> align | sticky;
+ } else {
+ bSignificand = 1; // Set the sticky bit. b is known to be non-zero.
+ }
+ }
+ if (subtraction) {
+ aSignificand -= bSignificand;
+ // If a == -b, return +zero.
+ if (aSignificand == 0)
+ return fromRep(0);
+
+ // If partial cancellation occured, we need to left-shift the result
+ // and adjust the exponent.
+ if (aSignificand < implicitBit << 3) {
+ const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+ aSignificand <<= shift;
+ aExponent -= shift;
+ }
+ } else /* addition */ {
+ aSignificand += bSignificand;
+
+ // If the addition carried up, we need to right-shift the result and
+ // adjust the exponent.
+ if (aSignificand & implicitBit << 4) {
+ const bool sticky = aSignificand & 1;
+ aSignificand = aSignificand >> 1 | sticky;
+ aExponent += 1;
+ }
+ }
+
+ // If we have overflowed the type, return +/- infinity.
+ if (aExponent >= maxExponent)
+ return fromRep(infRep | resultSign);
+
+ if (aExponent <= 0) {
+ // The result is denormal before rounding. The exponent is zero and we
+ // need to shift the significand.
+ const int shift = 1 - aExponent;
+ const bool sticky = (aSignificand << (typeWidth - shift)) != 0;
+ aSignificand = aSignificand >> shift | sticky;
+ aExponent = 0;
+ }
+
+ // Low three bits are round, guard, and sticky.
+ const int roundGuardSticky = aSignificand & 0x7;
+
+ // Shift the significand into place, and mask off the implicit bit.
+ rep_t result = aSignificand >> 3 & significandMask;
+
+ // Insert the exponent and sign.
+ result |= (rep_t)aExponent << significandBits;
+ result |= resultSign;
+
+ // Perform the final rounding. The result may overflow to infinity, but
+ // that is the correct result in that case.
+ switch (__fe_getround()) {
+ case CRT_FE_TONEAREST:
+ if (roundGuardSticky > 0x4)
+ result++;
+ if (roundGuardSticky == 0x4)
+ result += result & 1;
+ break;
+ case CRT_FE_DOWNWARD:
+ if (resultSign && roundGuardSticky) result++;
+ break;
+ case CRT_FE_UPWARD:
+ if (!resultSign && roundGuardSticky) result++;
+ break;
+ case CRT_FE_TOWARDZERO:
+ break;
+ }
+ if (roundGuardSticky)
+ __fe_raise_inexact();
+ return fromRep(result);
+}
diff --git a/lib/impl/fp_compare_impl.inc b/lib/impl/fp_compare_impl.inc
@@ -0,0 +1,119 @@
+//===-- lib/fp_compare_impl.inc - Floating-point comparison -------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+// GCC uses long (at least for x86_64) as the return type of the comparison
+// functions. We need to ensure that the return value is sign-extended in the
+// same way as GCC expects (since otherwise GCC-generated __builtin_isinf
+// returns true for finite 128-bit floating-point numbers).
+#ifdef __aarch64__
+// AArch64 GCC overrides libgcc_cmp_return to use int instead of long.
+typedef int CMP_RESULT;
+#elif __SIZEOF_POINTER__ == 8 && __SIZEOF_LONG__ == 4
+// LLP64 ABIs use long long instead of long.
+typedef long long CMP_RESULT;
+#elif __AVR__
+// AVR uses a single byte for the return value.
+typedef char CMP_RESULT;
+#else
+// Otherwise the comparison functions return long.
+typedef long CMP_RESULT;
+#endif
+
+#if !defined(__clang__) && defined(__GNUC__)
+// GCC uses a special __libgcc_cmp_return__ mode to define the return type, so
+// check that we are ABI-compatible when compiling the builtins with GCC.
+typedef int GCC_CMP_RESULT __attribute__((__mode__(__libgcc_cmp_return__)));
+_Static_assert(sizeof(GCC_CMP_RESULT) == sizeof(CMP_RESULT),
+ "SOFTFP ABI not compatible with GCC");
+#endif
+
+enum {
+ LE_LESS = -1,
+ LE_EQUAL = 0,
+ LE_GREATER = 1,
+ LE_UNORDERED = 1,
+};
+
+static inline CMP_RESULT __leXf2__(fp_t a, fp_t b) {
+ const srep_t aInt = toRep(a);
+ const srep_t bInt = toRep(b);
+ const rep_t aAbs = aInt & absMask;
+ const rep_t bAbs = bInt & absMask;
+
+ // If either a or b is NaN, they are unordered.
+ if (aAbs > infRep || bAbs > infRep)
+ return LE_UNORDERED;
+
+ // If a and b are both zeros, they are equal.
+ if ((aAbs | bAbs) == 0)
+ return LE_EQUAL;
+
+ // If at least one of a and b is positive, we get the same result comparing
+ // a and b as signed integers as we would with a floating-point compare.
+ if ((aInt & bInt) >= 0) {
+ if (aInt < bInt)
+ return LE_LESS;
+ else if (aInt == bInt)
+ return LE_EQUAL;
+ else
+ return LE_GREATER;
+ } else {
+ // Otherwise, both are negative, so we need to flip the sense of the
+ // comparison to get the correct result. (This assumes a twos- or ones-
+ // complement integer representation; if integers are represented in a
+ // sign-magnitude representation, then this flip is incorrect).
+ if (aInt > bInt)
+ return LE_LESS;
+ else if (aInt == bInt)
+ return LE_EQUAL;
+ else
+ return LE_GREATER;
+ }
+}
+
+enum {
+ GE_LESS = -1,
+ GE_EQUAL = 0,
+ GE_GREATER = 1,
+ GE_UNORDERED = -1 // Note: different from LE_UNORDERED
+};
+
+static inline CMP_RESULT __geXf2__(fp_t a, fp_t b) {
+ const srep_t aInt = toRep(a);
+ const srep_t bInt = toRep(b);
+ const rep_t aAbs = aInt & absMask;
+ const rep_t bAbs = bInt & absMask;
+
+ if (aAbs > infRep || bAbs > infRep)
+ return GE_UNORDERED;
+ if ((aAbs | bAbs) == 0)
+ return GE_EQUAL;
+ if ((aInt & bInt) >= 0) {
+ if (aInt < bInt)
+ return GE_LESS;
+ else if (aInt == bInt)
+ return GE_EQUAL;
+ else
+ return GE_GREATER;
+ } else {
+ if (aInt > bInt)
+ return GE_LESS;
+ else if (aInt == bInt)
+ return GE_EQUAL;
+ else
+ return GE_GREATER;
+ }
+}
+
+static inline CMP_RESULT __unordXf2__(fp_t a, fp_t b) {
+ const rep_t aAbs = toRep(a) & absMask;
+ const rep_t bAbs = toRep(b) & absMask;
+ return aAbs > infRep || bAbs > infRep;
+}
diff --git a/lib/impl/fp_div_impl.inc b/lib/impl/fp_div_impl.inc
@@ -0,0 +1,419 @@
+//===-- fp_div_impl.inc - Floating point division -----------------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements soft-float division with the IEEE-754 default
+// rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+// The __divXf3__ function implements Newton-Raphson floating point division.
+// It uses 3 iterations for float32, 4 for float64 and 5 for float128,
+// respectively. Due to number of significant bits being roughly doubled
+// every iteration, the two modes are supported: N full-width iterations (as
+// it is done for float32 by default) and (N-1) half-width iteration plus one
+// final full-width iteration. It is expected that half-width integer
+// operations (w.r.t rep_t size) can be performed faster for some hardware but
+// they require error estimations to be computed separately due to larger
+// computational errors caused by truncating intermediate results.
+
+// Half the bit-size of rep_t
+#define HW (typeWidth / 2)
+// rep_t-sized bitmask with lower half of bits set to ones
+#define loMask (REP_C(-1) >> HW)
+
+#if NUMBER_OF_FULL_ITERATIONS < 1
+#error At least one full iteration is required
+#endif
+
+static __inline fp_t __divXf3__(fp_t a, fp_t b) {
+
+ const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+ const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+ const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;
+
+ rep_t aSignificand = toRep(a) & significandMask;
+ rep_t bSignificand = toRep(b) & significandMask;
+ int scale = 0;
+
+ // Detect if a or b is zero, denormal, infinity, or NaN.
+ if (aExponent - 1U >= maxExponent - 1U ||
+ bExponent - 1U >= maxExponent - 1U) {
+
+ const rep_t aAbs = toRep(a) & absMask;
+ const rep_t bAbs = toRep(b) & absMask;
+
+ // NaN / anything = qNaN
+ if (aAbs > infRep)
+ return fromRep(toRep(a) | quietBit);
+ // anything / NaN = qNaN
+ if (bAbs > infRep)
+ return fromRep(toRep(b) | quietBit);
+
+ if (aAbs == infRep) {
+ // infinity / infinity = NaN
+ if (bAbs == infRep)
+ return fromRep(qnanRep);
+ // infinity / anything else = +/- infinity
+ else
+ return fromRep(aAbs | quotientSign);
+ }
+
+ // anything else / infinity = +/- 0
+ if (bAbs == infRep)
+ return fromRep(quotientSign);
+
+ if (!aAbs) {
+ // zero / zero = NaN
+ if (!bAbs)
+ return fromRep(qnanRep);
+ // zero / anything else = +/- zero
+ else
+ return fromRep(quotientSign);
+ }
+ // anything else / zero = +/- infinity
+ if (!bAbs)
+ return fromRep(infRep | quotientSign);
+
+ // One or both of a or b is denormal. The other (if applicable) is a
+ // normal number. Renormalize one or both of a and b, and set scale to
+ // include the necessary exponent adjustment.
+ if (aAbs < implicitBit)
+ scale += normalize(&aSignificand);
+ if (bAbs < implicitBit)
+ scale -= normalize(&bSignificand);
+ }
+
+ // Set the implicit significand bit. If we fell through from the
+ // denormal path it was already set by normalize( ), but setting it twice
+ // won't hurt anything.
+ aSignificand |= implicitBit;
+ bSignificand |= implicitBit;
+
+ int writtenExponent = (aExponent - bExponent + scale) + exponentBias;
+
+ const rep_t b_UQ1 = bSignificand << (typeWidth - significandBits - 1);
+
+ // Align the significand of b as a UQ1.(n-1) fixed-point number in the range
+ // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax
+ // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2.
+ // The max error for this approximation is achieved at endpoints, so
+ // abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289...,
+ // which is about 4.5 bits.
+ // The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571...
+
+ // Then, refine the reciprocal estimate using a quadratically converging
+ // Newton-Raphson iteration:
+ // x_{n+1} = x_n * (2 - x_n * b)
+ //
+ // Let b be the original divisor considered "in infinite precision" and
+ // obtained from IEEE754 representation of function argument (with the
+ // implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in
+ // UQ1.(W-1).
+ //
+ // Let b_hw be an infinitely precise number obtained from the highest (HW-1)
+ // bits of divisor significand (with the implicit bit set). Corresponds to
+ // half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated**
+ // version of b_UQ1.
+ //
+ // Let e_n := x_n - 1/b_hw
+ // E_n := x_n - 1/b
+ // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b)
+ // = abs(e_n) + (b - b_hw) / (b*b_hw)
+ // <= abs(e_n) + 2 * 2^-HW
+
+ // rep_t-sized iterations may be slower than the corresponding half-width
+ // variant depending on the handware and whether single/double/quad precision
+ // is selected.
+ // NB: Using half-width iterations increases computation errors due to
+ // rounding, so error estimations have to be computed taking the selected
+ // mode into account!
+#if NUMBER_OF_HALF_ITERATIONS > 0
+ // Starting with (n-1) half-width iterations
+ const half_rep_t b_UQ1_hw = bSignificand >> (significandBits + 1 - HW);
+
+ // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
+ // with W0 being either 16 or 32 and W0 <= HW.
+ // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
+ // b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
+#if defined(SINGLE_PRECISION)
+ // Use 16-bit initial estimation in case we are using half-width iterations
+ // for float32 division. This is expected to be useful for some 16-bit
+ // targets. Not used by default as it requires performing more work during
+ // rounding and would hardly help on regular 32- or 64-bit targets.
+ const half_rep_t C_hw = HALF_REP_C(0x7504);
+#else
+ // HW is at least 32. Shifting into the highest bits if needed.
+ const half_rep_t C_hw = HALF_REP_C(0x7504F333) << (HW - 32);
+#endif
+
+ // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
+ // so x0 fits to UQ0.HW without wrapping.
+ half_rep_t x_UQ0_hw = C_hw - (b_UQ1_hw /* exact b_hw/2 as UQ0.HW */);
+ // An e_0 error is comprised of errors due to
+ // * x0 being an inherently imprecise first approximation of 1/b_hw
+ // * C_hw being some (irrational) number **truncated** to W0 bits
+ // Please note that e_0 is calculated against the infinitely precise
+ // reciprocal of b_hw (that is, **truncated** version of b).
+ //
+ // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0
+
+ // By construction, 1 <= b < 2
+ // f(x) = x * (2 - b*x) = 2*x - b*x^2
+ // f'(x) = 2 * (1 - b*x)
+ //
+ // On the [0, 1] interval, f(0) = 0,
+ // then it increses until f(1/b) = 1 / b, maximum on (0, 1),
+ // then it decreses to f(1) = 2 - b
+ //
+ // Let g(x) = x - f(x) = b*x^2 - x.
+ // On (0, 1/b), g(x) < 0 <=> f(x) > x
+ // On (1/b, 1], g(x) > 0 <=> f(x) < x
+ //
+ // For half-width iterations, b_hw is used instead of b.
+ REPEAT_N_TIMES(NUMBER_OF_HALF_ITERATIONS, {
+ // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp
+ // of corr_UQ1_hw.
+ // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1).
+ // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided
+ // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
+ // expected to be strictly positive because b_UQ1_hw has its highest bit set
+ // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
+ half_rep_t corr_UQ1_hw = 0 - ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW);
+
+ // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
+ // obtaining an UQ1.(HW-1) number and proving its highest bit could be
+ // considered to be 0 to be able to represent it in UQ0.HW.
+ // From the above analysis of f(x), if corr_UQ1_hw would be represented
+ // without any intermediate loss of precision (that is, in twice_rep_t)
+ // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly
+ // less otherwise. On the other hand, to obtain [1.]000..., one have to pass
+ // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due
+ // to 1.0 being not representable as UQ0.HW).
+ // The fact corr_UQ1_hw was virtually round up (due to result of
+ // multiplication being **first** truncated, then negated - to improve
+ // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
+ x_UQ0_hw = (rep_t)x_UQ0_hw * corr_UQ1_hw >> (HW - 1);
+ // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
+ // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
+ // any number of iterations, so just subtract 2 from the reciprocal
+ // approximation after last iteration.
+
+ // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW:
+ // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1
+ // = 1 - e_n * b_hw + 2*eps1
+ // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2
+ // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2
+ // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2
+ // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2
+ // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw
+ // \------ >0 -------/ \-- >0 ---/
+ // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U)
+ })
+ // For initial half-width iterations, U = 2^-HW
+ // Let abs(e_n) <= u_n * U,
+ // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U)
+ // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2)
+
+ // Account for possible overflow (see above). For an overflow to occur for the
+ // first time, for "ideal" corr_UQ1_hw (that is, without intermediate
+ // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum
+ // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to
+ // be not below that value (see g(x) above), so it is safe to decrement just
+ // once after the final iteration. On the other hand, an effective value of
+ // divisor changes after this point (from b_hw to b), so adjust here.
+ x_UQ0_hw -= 1U;
+ rep_t x_UQ0 = (rep_t)x_UQ0_hw << HW;
+ x_UQ0 -= 1U;
+
+#else
+ // C is (3/4 + 1/sqrt(2)) - 1 truncated to 32 fractional bits as UQ0.n
+ const rep_t C = REP_C(0x7504F333) << (typeWidth - 32);
+ rep_t x_UQ0 = C - b_UQ1;
+ // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-32
+#endif
+
+ // Error estimations for full-precision iterations are calculated just
+ // as above, but with U := 2^-W and taking extra decrementing into account.
+ // We need at least one such iteration.
+
+#ifdef USE_NATIVE_FULL_ITERATIONS
+ REPEAT_N_TIMES(NUMBER_OF_FULL_ITERATIONS, {
+ rep_t corr_UQ1 = 0 - ((twice_rep_t)x_UQ0 * b_UQ1 >> typeWidth);
+ x_UQ0 = (twice_rep_t)x_UQ0 * corr_UQ1 >> (typeWidth - 1);
+ })
+#else
+#if NUMBER_OF_FULL_ITERATIONS != 1
+#error Only a single emulated full iteration is supported
+#endif
+#if !(NUMBER_OF_HALF_ITERATIONS > 0)
+ // Cannot normally reach here: only one full-width iteration is requested and
+ // the total number of iterations should be at least 3 even for float32.
+#error Check NUMBER_OF_HALF_ITERATIONS, NUMBER_OF_FULL_ITERATIONS and USE_NATIVE_FULL_ITERATIONS.
+#endif
+ // Simulating operations on a twice_rep_t to perform a single final full-width
+ // iteration. Using ad-hoc multiplication implementations to take advantage
+ // of particular structure of operands.
+ rep_t blo = b_UQ1 & loMask;
+ // x_UQ0 = x_UQ0_hw * 2^HW - 1
+ // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
+ //
+ // <--- higher half ---><--- lower half --->
+ // [x_UQ0_hw * b_UQ1_hw]
+ // + [ x_UQ0_hw * blo ]
+ // - [ b_UQ1 ]
+ // = [ result ][.... discarded ...]
+ rep_t corr_UQ1 = 0U - ( (rep_t)x_UQ0_hw * b_UQ1_hw
+ + ((rep_t)x_UQ0_hw * blo >> HW)
+ - REP_C(1)); // account for *possible* carry
+ rep_t lo_corr = corr_UQ1 & loMask;
+ rep_t hi_corr = corr_UQ1 >> HW;
+ // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
+ x_UQ0 = ((rep_t)x_UQ0_hw * hi_corr << 1)
+ + ((rep_t)x_UQ0_hw * lo_corr >> (HW - 1))
+ - REP_C(2); // 1 to account for the highest bit of corr_UQ1 can be 1
+ // 1 to account for possible carry
+ // Just like the case of half-width iterations but with possibility
+ // of overflowing by one extra Ulp of x_UQ0.
+ x_UQ0 -= 1U;
+ // ... and then traditional fixup by 2 should work
+
+ // On error estimation:
+ // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW
+ // + (2^-HW + 2^-W))
+ // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW
+
+ // Then like for the half-width iterations:
+ // With 0 <= eps1, eps2 < 2^-W
+ // E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b
+ // abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ]
+ // abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ]
+#endif
+
+ // Finally, account for possible overflow, as explained above.
+ x_UQ0 -= 2U;
+
+ // u_n for different precisions (with N-1 half-width iterations):
+ // W0 is the precision of C
+ // u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW
+
+ // Estimated with bc:
+ // define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; }
+ // define half2(un) { return 2.0 * un / 2.0^hw + 2.0; }
+ // define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; }
+ // define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; }
+
+ // | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1)
+ // u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797
+ // u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440
+ // u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317
+ // u_3 | < 7.31 | | < 7.31 | < 27054456580
+ // u_4 | | | | < 80.4
+ // Final (U_N) | same as u_3 | < 72 | < 218 | < 13920
+
+ // Add 2 to U_N due to final decrement.
+
+#if defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 2 && NUMBER_OF_FULL_ITERATIONS == 1
+#define RECIPROCAL_PRECISION REP_C(74)
+#elif defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 0 && NUMBER_OF_FULL_ITERATIONS == 3
+#define RECIPROCAL_PRECISION REP_C(10)
+#elif defined(DOUBLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 3 && NUMBER_OF_FULL_ITERATIONS == 1
+#define RECIPROCAL_PRECISION REP_C(220)
+#elif defined(QUAD_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 4 && NUMBER_OF_FULL_ITERATIONS == 1
+#define RECIPROCAL_PRECISION REP_C(13922)
+#else
+#error Invalid number of iterations
+#endif
+
+ // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W
+ x_UQ0 -= RECIPROCAL_PRECISION;
+ // Now 1/b - (2*P) * 2^-W < x < 1/b
+ // FIXME Is x_UQ0 still >= 0.5?
+
+ rep_t quotient_UQ1, dummy;
+ wideMultiply(x_UQ0, aSignificand << 1, "ient_UQ1, &dummy);
+ // Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
+
+ // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
+ // adjust it to be in [1.0, 2.0) as UQ1.SB.
+ rep_t residualLo;
+ if (quotient_UQ1 < (implicitBit << 1)) {
+ // Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB,
+ // effectively doubling its value as well as its error estimation.
+ residualLo = (aSignificand << (significandBits + 1)) - quotient_UQ1 * bSignificand;
+ writtenExponent -= 1;
+ aSignificand <<= 1;
+ } else {
+ // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it
+ // to UQ1.SB by right shifting by 1. Least significant bit is omitted.
+ quotient_UQ1 >>= 1;
+ residualLo = (aSignificand << significandBits) - quotient_UQ1 * bSignificand;
+ }
+ // NB: residualLo is calculated above for the normal result case.
+ // It is re-computed on denormal path that is expected to be not so
+ // performance-sensitive.
+
+ // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB
+ // Each NextAfter() increments the floating point value by at least 2^-SB
+ // (more, if exponent was incremented).
+ // Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint):
+ // q
+ // | | * | | | | |
+ // <---> 2^t
+ // | | | | | * | |
+ // q
+ // To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB.
+ // (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB
+ // (8*P) * 2^-W < 0.5 * 2^-SB
+ // P < 2^(W-4-SB)
+ // Generally, for at most R NextAfter() to be enough,
+ // P < (2*R - 1) * 2^(W-4-SB)
+ // For f32 (0+3): 10 < 32 (OK)
+ // For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required
+ // For f64: 220 < 256 (OK)
+ // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required)
+
+ // If we have overflowed the exponent, return infinity
+ if (writtenExponent >= maxExponent)
+ return fromRep(infRep | quotientSign);
+
+ // Now, quotient_UQ1_SB <= the correctly-rounded result
+ // and may need taking NextAfter() up to 3 times (see error estimates above)
+ // r = a - b * q
+ rep_t absResult;
+ if (writtenExponent > 0) {
+ // Clear the implicit bit
+ absResult = quotient_UQ1 & significandMask;
+ // Insert the exponent
+ absResult |= (rep_t)writtenExponent << significandBits;
+ residualLo <<= 1;
+ } else {
+ // Prevent shift amount from being negative
+ if (significandBits + writtenExponent < 0)
+ return fromRep(quotientSign);
+
+ absResult = quotient_UQ1 >> (-writtenExponent + 1);
+
+ // multiplied by two to prevent shift amount to be negative
+ residualLo = (aSignificand << (significandBits + writtenExponent)) - (absResult * bSignificand << 1);
+ }
+
+ // Round
+ residualLo += absResult & 1; // tie to even
+ // The above line conditionally turns the below LT comparison into LTE
+ absResult += residualLo > bSignificand;
+#if defined(QUAD_PRECISION) || (defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS > 0)
+ // Do not round Infinity to NaN
+ absResult += absResult < infRep && residualLo > (2 + 1) * bSignificand;
+#endif
+#if defined(QUAD_PRECISION)
+ absResult += absResult < infRep && residualLo > (4 + 1) * bSignificand;
+#endif
+ return fromRep(absResult | quotientSign);
+}
diff --git a/lib/impl/fp_extend.h b/lib/impl/fp_extend.h
@@ -0,0 +1,174 @@
+//===-lib/fp_extend.h - low precision -> high precision conversion -*- C
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Set source and destination setting
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_EXTEND_HEADER
+#define FP_EXTEND_HEADER
+
+#include "int_lib.h"
+
+#if defined SRC_SINGLE
+typedef float src_t;
+typedef uint32_t src_rep_t;
+#define SRC_REP_C UINT32_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 23;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 8;
+#define src_rep_t_clz clzsi
+
+#elif defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+#define SRC_REP_C UINT64_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 52;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 11;
+
+static inline int src_rep_t_clz_impl(src_rep_t a) {
+#if defined __LP64__
+ return __builtin_clzl(a);
+#else
+ if (a & REP_C(0xffffffff00000000))
+ return clzsi(a >> 32);
+ else
+ return 32 + clzsi(a & REP_C(0xffffffff));
+#endif
+}
+#define src_rep_t_clz src_rep_t_clz_impl
+
+#elif defined SRC_80
+typedef xf_float src_t;
+typedef __uint128_t src_rep_t;
+#define SRC_REP_C (__uint128_t)
+// sign bit, exponent and significand occupy the lower 80 bits.
+static const int srcBits = 80;
+static const int srcSigFracBits = 63;
+// -1 accounts for the sign bit.
+// -1 accounts for the explicitly stored integer bit.
+// srcBits - srcSigFracBits - 1 - 1
+static const int srcExpBits = 15;
+
+#elif defined SRC_HALF
+#ifdef COMPILER_RT_HAS_FLOAT16
+typedef _Float16 src_t;
+#else
+typedef uint16_t src_t;
+#endif
+typedef uint16_t src_rep_t;
+#define SRC_REP_C UINT16_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 10;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 5;
+
+static inline int src_rep_t_clz_impl(src_rep_t a) {
+ return __builtin_clz(a) - 16;
+}
+
+#define src_rep_t_clz src_rep_t_clz_impl
+
+#else
+#error Source should be half, single, or double precision!
+#endif // end source precision
+
+#if defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+#define DST_REP_C UINT32_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 23;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 8;
+
+#elif defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 52;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 11;
+
+#elif defined DST_QUAD
+typedef tf_float dst_t;
+typedef __uint128_t dst_rep_t;
+#define DST_REP_C (__uint128_t)
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 112;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 15;
+
+#else
+#error Destination should be single, double, or quad precision!
+#endif // end destination precision
+
+// End of specialization parameters.
+
+// TODO: These helper routines should be placed into fp_lib.h
+// Currently they depend on macros/constants defined above.
+
+static inline src_rep_t extract_sign_from_src(src_rep_t x) {
+ const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
+ return (x & srcSignMask) >> (srcBits - 1);
+}
+
+static inline src_rep_t extract_exp_from_src(src_rep_t x) {
+ const int srcSigBits = srcBits - 1 - srcExpBits;
+ const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
+ return (x & srcExpMask) >> srcSigBits;
+}
+
+static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
+ const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
+ return x & srcSigFracMask;
+}
+
+#ifdef src_rep_t_clz
+static inline int clz_in_sig_frac(src_rep_t sigFrac) {
+ const int skip = 1 + srcExpBits;
+ return src_rep_t_clz(sigFrac) - skip;
+}
+#endif
+
+static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
+ return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
+}
+
+// Two helper routines for conversion to and from the representation of
+// floating-point data as integer values follow.
+
+static inline src_rep_t srcToRep(src_t x) {
+ const union {
+ src_t f;
+ src_rep_t i;
+ } rep = {.f = x};
+ return rep.i;
+}
+
+static inline dst_t dstFromRep(dst_rep_t x) {
+ const union {
+ dst_t f;
+ dst_rep_t i;
+ } rep = {.i = x};
+ return rep.f;
+}
+// End helper routines. Conversion implementation follows.
+
+#endif // FP_EXTEND_HEADER
diff --git a/lib/impl/fp_extend_impl.inc b/lib/impl/fp_extend_impl.inc
@@ -0,0 +1,108 @@
+//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a fairly generic conversion from a narrower to a wider
+// IEEE-754 floating-point type. The constants and types defined following the
+// includes below parameterize the conversion.
+//
+// It does not support types that don't use the usual IEEE-754 interchange
+// formats; specifically, some work would be needed to adapt it to
+// (for example) the Intel 80-bit format or PowerPC double-double format.
+//
+// Note please, however, that this implementation is only intended to support
+// *widening* operations; if you need to convert to a *narrower* floating-point
+// type (e.g. double -> float), then this routine will not do what you want it
+// to.
+//
+// It also requires that integer types at least as large as both formats
+// are available on the target platform; this may pose a problem when trying
+// to add support for quad on some 32-bit systems, for example. You also may
+// run into trouble finding an appropriate CLZ function for wide source types;
+// you will likely need to roll your own on some platforms.
+//
+// Finally, the following assumptions are made:
+//
+// 1. Floating-point types and integer types have the same endianness on the
+// target platform.
+//
+// 2. Quiet NaNs, if supported, are indicated by the leading bit of the
+// significand field being set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_extend.h"
+
+// The source type may use a usual IEEE-754 interchange format or Intel 80-bit
+// format. In particular, for the source type srcSigFracBits may be not equal to
+// srcSigBits. The destination type is assumed to be one of IEEE-754 standard
+// types.
+static __inline dst_t __extendXfYf2__(src_t a) {
+ // Various constants whose values follow from the type parameters.
+ // Any reasonable optimizer will fold and propagate all of these.
+ const int srcInfExp = (1 << srcExpBits) - 1;
+ const int srcExpBias = srcInfExp >> 1;
+
+ const int dstInfExp = (1 << dstExpBits) - 1;
+ const int dstExpBias = dstInfExp >> 1;
+
+ // Break a into a sign and representation of the absolute value.
+ const src_rep_t aRep = srcToRep(a);
+ const src_rep_t srcSign = extract_sign_from_src(aRep);
+ const src_rep_t srcExp = extract_exp_from_src(aRep);
+ const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
+
+ dst_rep_t dstSign = srcSign;
+ dst_rep_t dstExp;
+ dst_rep_t dstSigFrac;
+
+ if (srcExp >= 1 && srcExp < (src_rep_t)srcInfExp) {
+ // a is a normal number.
+ dstExp = (dst_rep_t)srcExp + (dst_rep_t)(dstExpBias - srcExpBias);
+ dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
+ }
+
+ else if (srcExp == srcInfExp) {
+ // a is NaN or infinity.
+ dstExp = dstInfExp;
+ dstSigFrac = (dst_rep_t)srcSigFrac << (dstSigFracBits - srcSigFracBits);
+ }
+
+ else if (srcSigFrac) {
+ // a is denormal.
+ if (srcExpBits == dstExpBits) {
+ // The exponent fields are identical and this is a denormal number, so all
+ // the non-significand bits are zero. In particular, this branch is always
+ // taken when we extend a denormal F80 to F128.
+ dstExp = 0;
+ dstSigFrac = ((dst_rep_t)srcSigFrac) << (dstSigFracBits - srcSigFracBits);
+ } else {
+#ifndef src_rep_t_clz
+ // If src_rep_t_clz is not defined this branch must be unreachable.
+ __builtin_unreachable();
+#else
+ // Renormalize the significand and clear the leading bit.
+ // For F80 -> F128 this codepath is unused.
+ const int scale = clz_in_sig_frac(srcSigFrac) + 1;
+ dstExp = dstExpBias - srcExpBias - scale + 1;
+ dstSigFrac = (dst_rep_t)srcSigFrac
+ << (dstSigFracBits - srcSigFracBits + scale);
+ const dst_rep_t dstMinNormal = DST_REP_C(1) << (dstBits - 1 - dstExpBits);
+ dstSigFrac ^= dstMinNormal;
+#endif
+ }
+ }
+
+ else {
+ // a is zero.
+ dstExp = 0;
+ dstSigFrac = 0;
+ }
+
+ const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac);
+ return dstFromRep(result);
+}
diff --git a/lib/impl/fp_fixint_impl.inc b/lib/impl/fp_fixint_impl.inc
@@ -0,0 +1,40 @@
+//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float to integer conversion for the
+// compiler-rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+static __inline fixint_t __fixint(fp_t a) {
+ const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2);
+ const fixint_t fixint_min = -fixint_max - 1;
+ // Break a into sign, exponent, significand parts.
+ const rep_t aRep = toRep(a);
+ const rep_t aAbs = aRep & absMask;
+ const fixint_t sign = aRep & signBit ? -1 : 1;
+ const int exponent = (aAbs >> significandBits) - exponentBias;
+ const rep_t significand = (aAbs & significandMask) | implicitBit;
+
+ // If exponent is negative, the result is zero.
+ if (exponent < 0)
+ return 0;
+
+ // If the value is too large for the integer type, saturate.
+ if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT)
+ return sign == 1 ? fixint_max : fixint_min;
+
+ // If 0 <= exponent < significandBits, right shift to get the result.
+ // Otherwise, shift left.
+ if (exponent < significandBits)
+ return sign * (significand >> (significandBits - exponent));
+ else
+ return sign * ((fixuint_t)significand << (exponent - significandBits));
+}
diff --git a/lib/impl/fp_fixuint_impl.inc b/lib/impl/fp_fixuint_impl.inc
@@ -0,0 +1,38 @@
+//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float to unsigned integer conversion for the
+// compiler-rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+static __inline fixuint_t __fixuint(fp_t a) {
+ // Break a into sign, exponent, significand parts.
+ const rep_t aRep = toRep(a);
+ const rep_t aAbs = aRep & absMask;
+ const int sign = aRep & signBit ? -1 : 1;
+ const int exponent = (aAbs >> significandBits) - exponentBias;
+ const rep_t significand = (aAbs & significandMask) | implicitBit;
+
+ // If either the value or the exponent is negative, the result is zero.
+ if (sign == -1 || exponent < 0)
+ return 0;
+
+ // If the value is too large for the integer type, saturate.
+ if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT)
+ return ~(fixuint_t)0;
+
+ // If 0 <= exponent < significandBits, right shift to get the result.
+ // Otherwise, shift left.
+ if (exponent < significandBits)
+ return significand >> (significandBits - exponent);
+ else
+ return (fixuint_t)significand << (exponent - significandBits);
+}
diff --git a/lib/impl/fp_mul_impl.inc b/lib/impl/fp_mul_impl.inc
@@ -0,0 +1,128 @@
+//===---- lib/fp_mul_impl.inc - floating point multiplication -----*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements soft-float multiplication with the IEEE-754 default
+// rounding (to nearest, ties to even).
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_lib.h"
+
+static __inline fp_t __mulXf3__(fp_t a, fp_t b) {
+ const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+ const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+ const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
+
+ rep_t aSignificand = toRep(a) & significandMask;
+ rep_t bSignificand = toRep(b) & significandMask;
+ int scale = 0;
+
+ // Detect if a or b is zero, denormal, infinity, or NaN.
+ if (aExponent - 1U >= maxExponent - 1U ||
+ bExponent - 1U >= maxExponent - 1U) {
+
+ const rep_t aAbs = toRep(a) & absMask;
+ const rep_t bAbs = toRep(b) & absMask;
+
+ // NaN * anything = qNaN
+ if (aAbs > infRep)
+ return fromRep(toRep(a) | quietBit);
+ // anything * NaN = qNaN
+ if (bAbs > infRep)
+ return fromRep(toRep(b) | quietBit);
+
+ if (aAbs == infRep) {
+ // infinity * non-zero = +/- infinity
+ if (bAbs)
+ return fromRep(aAbs | productSign);
+ // infinity * zero = NaN
+ else
+ return fromRep(qnanRep);
+ }
+
+ if (bAbs == infRep) {
+ // non-zero * infinity = +/- infinity
+ if (aAbs)
+ return fromRep(bAbs | productSign);
+ // zero * infinity = NaN
+ else
+ return fromRep(qnanRep);
+ }
+
+ // zero * anything = +/- zero
+ if (!aAbs)
+ return fromRep(productSign);
+ // anything * zero = +/- zero
+ if (!bAbs)
+ return fromRep(productSign);
+
+ // One or both of a or b is denormal. The other (if applicable) is a
+ // normal number. Renormalize one or both of a and b, and set scale to
+ // include the necessary exponent adjustment.
+ if (aAbs < implicitBit)
+ scale += normalize(&aSignificand);
+ if (bAbs < implicitBit)
+ scale += normalize(&bSignificand);
+ }
+
+ // Set the implicit significand bit. If we fell through from the
+ // denormal path it was already set by normalize( ), but setting it twice
+ // won't hurt anything.
+ aSignificand |= implicitBit;
+ bSignificand |= implicitBit;
+
+ // Perform a basic multiplication on the significands. One of them must be
+ // shifted beforehand to be aligned with the exponent.
+ rep_t productHi, productLo;
+ wideMultiply(aSignificand, bSignificand << exponentBits, &productHi,
+ &productLo);
+
+ int productExponent = aExponent + bExponent - exponentBias + scale;
+
+ // Normalize the significand and adjust the exponent if needed.
+ if (productHi & implicitBit)
+ productExponent++;
+ else
+ wideLeftShift(&productHi, &productLo, 1);
+
+ // If we have overflowed the type, return +/- infinity.
+ if (productExponent >= maxExponent)
+ return fromRep(infRep | productSign);
+
+ if (productExponent <= 0) {
+ // The result is denormal before rounding.
+ //
+ // If the result is so small that it just underflows to zero, return
+ // zero with the appropriate sign. Mathematically, there is no need to
+ // handle this case separately, but we make it a special case to
+ // simplify the shift logic.
+ const unsigned int shift = REP_C(1) - (unsigned int)productExponent;
+ if (shift >= typeWidth)
+ return fromRep(productSign);
+
+ // Otherwise, shift the significand of the result so that the round
+ // bit is the high bit of productLo.
+ wideRightShiftWithSticky(&productHi, &productLo, shift);
+ } else {
+ // The result is normal before rounding. Insert the exponent.
+ productHi &= significandMask;
+ productHi |= (rep_t)productExponent << significandBits;
+ }
+
+ // Insert the sign of the result.
+ productHi |= productSign;
+
+ // Perform the final rounding. The final result may overflow to infinity,
+ // or underflow to zero, but those are the correct results in those cases.
+ // We use the default IEEE-754 round-to-nearest, ties-to-even rounding mode.
+ if (productLo > signBit)
+ productHi++;
+ if (productLo == signBit)
+ productHi += productHi & 1;
+ return fromRep(productHi);
+}
diff --git a/lib/impl/fp_trunc.h b/lib/impl/fp_trunc.h
@@ -0,0 +1,158 @@
+//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Set source and destination precision setting
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_TRUNC_HEADER
+#define FP_TRUNC_HEADER
+
+#include "int_lib.h"
+
+#if defined SRC_SINGLE
+typedef float src_t;
+typedef uint32_t src_rep_t;
+#define SRC_REP_C UINT32_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 23;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 8;
+
+#elif defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+#define SRC_REP_C UINT64_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 52;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 11;
+
+#elif defined SRC_QUAD
+typedef tf_float src_t;
+typedef __uint128_t src_rep_t;
+#define SRC_REP_C (__uint128_t)
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 112;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 15;
+
+#else
+#error Source should be double precision or quad precision!
+#endif // end source precision
+
+#if defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 52;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 11;
+
+#elif defined DST_80
+typedef xf_float dst_t;
+typedef __uint128_t dst_rep_t;
+#define DST_REP_C (__uint128_t)
+static const int dstBits = 80;
+static const int dstSigFracBits = 63;
+// -1 accounts for the sign bit.
+// -1 accounts for the explicitly stored integer bit.
+// dstBits - dstSigFracBits - 1 - 1
+static const int dstExpBits = 15;
+
+#elif defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+#define DST_REP_C UINT32_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 23;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 8;
+
+#elif defined DST_HALF
+#ifdef COMPILER_RT_HAS_FLOAT16
+typedef _Float16 dst_t;
+#else
+typedef uint16_t dst_t;
+#endif
+typedef uint16_t dst_rep_t;
+#define DST_REP_C UINT16_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 10;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 5;
+
+#elif defined DST_BFLOAT
+typedef __bf16 dst_t;
+typedef uint16_t dst_rep_t;
+#define DST_REP_C UINT16_C
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 7;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 8;
+
+#else
+#error Destination should be single precision or double precision!
+#endif // end destination precision
+
+// TODO: These helper routines should be placed into fp_lib.h
+// Currently they depend on macros/constants defined above.
+
+static inline src_rep_t extract_sign_from_src(src_rep_t x) {
+ const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
+ return (x & srcSignMask) >> (srcBits - 1);
+}
+
+static inline src_rep_t extract_exp_from_src(src_rep_t x) {
+ const int srcSigBits = srcBits - 1 - srcExpBits;
+ const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
+ return (x & srcExpMask) >> srcSigBits;
+}
+
+static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
+ const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
+ return x & srcSigFracMask;
+}
+
+static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
+ dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
+ // Set the explicit integer bit in F80 if present.
+ if (dstBits == 80 && exp) {
+ result |= (DST_REP_C(1) << dstSigFracBits);
+ }
+ return result;
+}
+
+// End of specialization parameters. Two helper routines for conversion to and
+// from the representation of floating-point data as integer values follow.
+
+static inline src_rep_t srcToRep(src_t x) {
+ const union {
+ src_t f;
+ src_rep_t i;
+ } rep = {.f = x};
+ return rep.i;
+}
+
+static inline dst_t dstFromRep(dst_rep_t x) {
+ const union {
+ dst_t f;
+ dst_rep_t i;
+ } rep = {.i = x};
+ return rep.f;
+}
+
+#endif // FP_TRUNC_HEADER
diff --git a/lib/impl/fp_trunc_impl.inc b/lib/impl/fp_trunc_impl.inc
@@ -0,0 +1,155 @@
+//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a fairly generic conversion from a wider to a narrower
+// IEEE-754 floating-point type in the default (round to nearest, ties to even)
+// rounding mode. The constants and types defined following the includes below
+// parameterize the conversion.
+//
+// This routine can be trivially adapted to support conversions to
+// half-precision or from quad-precision. It does not support types that don't
+// use the usual IEEE-754 interchange formats; specifically, some work would be
+// needed to adapt it to (for example) the Intel 80-bit format or PowerPC
+// double-double format.
+//
+// Note please, however, that this implementation is only intended to support
+// *narrowing* operations; if you need to convert to a *wider* floating-point
+// type (e.g. float -> double), then this routine will not do what you want it
+// to.
+//
+// It also requires that integer types at least as large as both formats
+// are available on the target platform; this may pose a problem when trying
+// to add support for quad on some 32-bit systems, for example.
+//
+// Finally, the following assumptions are made:
+//
+// 1. Floating-point types and integer types have the same endianness on the
+// target platform.
+//
+// 2. Quiet NaNs, if supported, are indicated by the leading bit of the
+// significand field being set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_trunc.h"
+
+// The destination type may use a usual IEEE-754 interchange format or Intel
+// 80-bit format. In particular, for the destination type dstSigFracBits may be
+// not equal to dstSigBits. The source type is assumed to be one of IEEE-754
+// standard types.
+static __inline dst_t __truncXfYf2__(src_t a) {
+ // Various constants whose values follow from the type parameters.
+ // Any reasonable optimizer will fold and propagate all of these.
+ const int srcInfExp = (1 << srcExpBits) - 1;
+ const int srcExpBias = srcInfExp >> 1;
+
+ const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigFracBits;
+ const src_rep_t roundMask =
+ (SRC_REP_C(1) << (srcSigFracBits - dstSigFracBits)) - 1;
+ const src_rep_t halfway = SRC_REP_C(1)
+ << (srcSigFracBits - dstSigFracBits - 1);
+ const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigFracBits - 1);
+ const src_rep_t srcNaNCode = srcQNaN - 1;
+
+ const int dstInfExp = (1 << dstExpBits) - 1;
+ const int dstExpBias = dstInfExp >> 1;
+ const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
+
+ const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigFracBits - 1);
+ const dst_rep_t dstNaNCode = dstQNaN - 1;
+
+ const src_rep_t aRep = srcToRep(a);
+ const src_rep_t srcSign = extract_sign_from_src(aRep);
+ const src_rep_t srcExp = extract_exp_from_src(aRep);
+ const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
+
+ dst_rep_t dstSign = srcSign;
+ dst_rep_t dstExp;
+ dst_rep_t dstSigFrac;
+
+ // Same size exponents and a's significand tail is 0.
+ // The significand can be truncated and the exponent can be copied over.
+ const int sigFracTailBits = srcSigFracBits - dstSigFracBits;
+ if (srcExpBits == dstExpBits &&
+ ((aRep >> sigFracTailBits) << sigFracTailBits) == aRep) {
+ dstExp = srcExp;
+ dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);
+ return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));
+ }
+
+ const int dstExpCandidate = ((int)srcExp - srcExpBias) + dstExpBias;
+ if (dstExpCandidate >= 1 && dstExpCandidate < dstInfExp) {
+ // The exponent of a is within the range of normal numbers in the
+ // destination format. We can convert by simply right-shifting with
+ // rounding and adjusting the exponent.
+ dstExp = dstExpCandidate;
+ dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);
+
+ const src_rep_t roundBits = srcSigFrac & roundMask;
+ // Round to nearest.
+ if (roundBits > halfway)
+ dstSigFrac++;
+ // Tie to even.
+ else if (roundBits == halfway)
+ dstSigFrac += dstSigFrac & 1;
+
+ // Rounding has changed the exponent.
+ if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {
+ dstExp += 1;
+ dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);
+ }
+ } else if (srcExp == srcInfExp && srcSigFrac) {
+ // a is NaN.
+ // Conjure the result by beginning with infinity, setting the qNaN
+ // bit and inserting the (truncated) trailing NaN field.
+ dstExp = dstInfExp;
+ dstSigFrac = dstQNaN;
+ dstSigFrac |= ((srcSigFrac & srcNaNCode) >> sigFracTailBits) & dstNaNCode;
+ } else if ((int)srcExp >= overflowExponent) {
+ dstExp = dstInfExp;
+ dstSigFrac = 0;
+ } else {
+ // a underflows on conversion to the destination type or is an exact
+ // zero. The result may be a denormal or zero. Extract the exponent
+ // to get the shift amount for the denormalization.
+ src_rep_t significand = srcSigFrac;
+ int shift = srcExpBias - dstExpBias - srcExp;
+
+ if (srcExp) {
+ // Set the implicit integer bit if the source is a normal number.
+ significand |= srcMinNormal;
+ shift += 1;
+ }
+
+ // Right shift by the denormalization amount with sticky.
+ if (shift > srcSigFracBits) {
+ dstExp = 0;
+ dstSigFrac = 0;
+ } else {
+ dstExp = 0;
+ const bool sticky = shift && ((significand << (srcBits - shift)) != 0);
+ src_rep_t denormalizedSignificand = significand >> shift | sticky;
+ dstSigFrac = denormalizedSignificand >> sigFracTailBits;
+ const src_rep_t roundBits = denormalizedSignificand & roundMask;
+ // Round to nearest
+ if (roundBits > halfway)
+ dstSigFrac++;
+ // Ties to even
+ else if (roundBits == halfway)
+ dstSigFrac += dstSigFrac & 1;
+
+ // Rounding has changed the exponent.
+ if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {
+ dstExp += 1;
+ dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);
+ }
+ }
+ }
+
+ return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));
+}
diff --git a/lib/impl/int_div_impl.inc b/lib/impl/int_div_impl.inc
@@ -0,0 +1,95 @@
+//===-- int_div_impl.inc - Integer division ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Helpers used by __udivsi3, __umodsi3, __udivdi3, and __umodsi3.
+//
+//===----------------------------------------------------------------------===//
+
+#define clz(a) (sizeof(a) == sizeof(unsigned long long) ? __builtin_clzll(a) : clzsi(a))
+
+// Adapted from Figure 3-40 of The PowerPC Compiler Writer's Guide
+static __inline fixuint_t __udivXi3(fixuint_t n, fixuint_t d) {
+ const unsigned N = sizeof(fixuint_t) * CHAR_BIT;
+ // d == 0 cases are unspecified.
+ unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N);
+ // 0 <= sr <= N - 1 or sr is very large.
+ if (sr > N - 1) // n < d
+ return 0;
+ if (sr == N - 1) // d == 1
+ return n;
+ ++sr;
+ // 1 <= sr <= N - 1. Shifts do not trigger UB.
+ fixuint_t r = n >> sr;
+ n <<= N - sr;
+ fixuint_t carry = 0;
+ for (; sr > 0; --sr) {
+ r = (r << 1) | (n >> (N - 1));
+ n = (n << 1) | carry;
+ // Branch-less version of:
+ // carry = 0;
+ // if (r >= d) r -= d, carry = 1;
+ const fixint_t s = (fixint_t)(d - r - 1) >> (N - 1);
+ carry = s & 1;
+ r -= d & s;
+ }
+ n = (n << 1) | carry;
+ return n;
+}
+
+// Mostly identical to __udivXi3 but the return values are different.
+static __inline fixuint_t __umodXi3(fixuint_t n, fixuint_t d) {
+ const unsigned N = sizeof(fixuint_t) * CHAR_BIT;
+ // d == 0 cases are unspecified.
+ unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N);
+ // 0 <= sr <= N - 1 or sr is very large.
+ if (sr > N - 1) // n < d
+ return n;
+ if (sr == N - 1) // d == 1
+ return 0;
+ ++sr;
+ // 1 <= sr <= N - 1. Shifts do not trigger UB.
+ fixuint_t r = n >> sr;
+ n <<= N - sr;
+ fixuint_t carry = 0;
+ for (; sr > 0; --sr) {
+ r = (r << 1) | (n >> (N - 1));
+ n = (n << 1) | carry;
+ // Branch-less version of:
+ // carry = 0;
+ // if (r >= d) r -= d, carry = 1;
+ const fixint_t s = (fixint_t)(d - r - 1) >> (N - 1);
+ carry = s & 1;
+ r -= d & s;
+ }
+ return r;
+}
+
+#ifdef COMPUTE_UDIV
+static __inline fixint_t __divXi3(fixint_t a, fixint_t b) {
+ const int N = (int)(sizeof(fixint_t) * CHAR_BIT) - 1;
+ fixint_t s_a = a >> N; // s_a = a < 0 ? -1 : 0
+ fixint_t s_b = b >> N; // s_b = b < 0 ? -1 : 0
+ fixuint_t a_u = (fixuint_t)(a ^ s_a) + (-s_a); // negate if s_a == -1
+ fixuint_t b_u = (fixuint_t)(b ^ s_b) + (-s_b); // negate if s_b == -1
+ s_a ^= s_b; // sign of quotient
+ return (COMPUTE_UDIV(a_u, b_u) ^ s_a) + (-s_a); // negate if s_a == -1
+}
+#endif // COMPUTE_UDIV
+
+#ifdef ASSIGN_UMOD
+static __inline fixint_t __modXi3(fixint_t a, fixint_t b) {
+ const int N = (int)(sizeof(fixint_t) * CHAR_BIT) - 1;
+ fixint_t s = b >> N; // s = b < 0 ? -1 : 0
+ fixuint_t b_u = (fixuint_t)(b ^ s) + (-s); // negate if s == -1
+ s = a >> N; // s = a < 0 ? -1 : 0
+ fixuint_t a_u = (fixuint_t)(a ^ s) + (-s); // negate if s == -1
+ fixuint_t res;
+ ASSIGN_UMOD(res, a_u, b_u);
+ return (res ^ s) + (-s); // negate if s == -1
+}
+#endif // ASSIGN_UMOD
diff --git a/lib/impl/int_to_fp.h b/lib/impl/int_to_fp.h
@@ -0,0 +1,82 @@
+//===-- int_to_fp.h - integer to floating point conversion ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Set source and destination defines in order to use a correctly
+// parameterised floatXiYf implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_TO_FP_H
+#define INT_TO_FP_H
+
+#include "int_lib.h"
+
+#if defined SRC_I64
+typedef int64_t src_t;
+typedef uint64_t usrc_t;
+static __inline int clzSrcT(usrc_t x) { return __builtin_clzll(x); }
+
+#elif defined SRC_U64
+typedef uint64_t src_t;
+typedef uint64_t usrc_t;
+static __inline int clzSrcT(usrc_t x) { return __builtin_clzll(x); }
+
+#elif defined SRC_I128
+typedef __int128_t src_t;
+typedef __uint128_t usrc_t;
+static __inline int clzSrcT(usrc_t x) { return __clzti2(x); }
+
+#elif defined SRC_U128
+typedef __uint128_t src_t;
+typedef __uint128_t usrc_t;
+static __inline int clzSrcT(usrc_t x) { return __clzti2(x); }
+
+#else
+#error Source should be a handled integer type.
+#endif
+
+#if defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+#define DST_REP_C UINT32_C
+
+enum {
+ dstSigBits = 23,
+};
+
+#elif defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+
+enum {
+ dstSigBits = 52,
+};
+
+#elif defined DST_QUAD
+typedef tf_float dst_t;
+typedef __uint128_t dst_rep_t;
+#define DST_REP_C (__uint128_t)
+
+enum {
+ dstSigBits = 112,
+};
+
+#else
+#error Destination should be a handled floating point type
+#endif
+
+static __inline dst_t dstFromRep(dst_rep_t x) {
+ const union {
+ dst_t f;
+ dst_rep_t i;
+ } rep = {.i = x};
+ return rep.f;
+}
+
+#endif // INT_TO_FP_H
diff --git a/lib/impl/int_to_fp_impl.inc b/lib/impl/int_to_fp_impl.inc
@@ -0,0 +1,72 @@
+//===-- int_to_fp_impl.inc - integer to floating point conversion ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Thsi file implements a generic conversion from an integer type to an
+// IEEE-754 floating point type, allowing a common implementation to be hsared
+// without copy and paste.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_to_fp.h"
+
+static __inline dst_t __floatXiYf__(src_t a) {
+ if (a == 0)
+ return 0.0;
+
+ enum {
+ dstMantDig = dstSigBits + 1,
+ srcBits = sizeof(src_t) * CHAR_BIT,
+ srcIsSigned = ((src_t)-1) < 0,
+ };
+
+ const src_t s = srcIsSigned ? a >> (srcBits - 1) : 0;
+
+ a = (usrc_t)(a ^ s) - s;
+ int sd = srcBits - clzSrcT(a); // number of significant digits
+ int e = sd - 1; // exponent
+ if (sd > dstMantDig) {
+ // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+ // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+ // 12345678901234567890123456
+ // 1 = msb 1 bit
+ // P = bit dstMantDig-1 bits to the right of 1
+ // Q = bit dstMantDig bits to the right of 1
+ // R = "or" of all bits to the right of Q
+ if (sd == dstMantDig + 1) {
+ a <<= 1;
+ } else if (sd == dstMantDig + 2) {
+ // Do nothing.
+ } else {
+ a = ((usrc_t)a >> (sd - (dstMantDig + 2))) |
+ ((a & ((usrc_t)(-1) >> ((srcBits + dstMantDig + 2) - sd))) != 0);
+ }
+ // finish:
+ a |= (a & 4) != 0; // Or P into R
+ ++a; // round - this step may add a significant bit
+ a >>= 2; // dump Q and R
+ // a is now rounded to dstMantDig or dstMantDig+1 bits
+ if (a & ((usrc_t)1 << dstMantDig)) {
+ a >>= 1;
+ ++e;
+ }
+ // a is now rounded to dstMantDig bits
+ } else {
+ a <<= (dstMantDig - sd);
+ // a is now rounded to dstMantDig bits
+ }
+ const int dstBits = sizeof(dst_t) * CHAR_BIT;
+ const dst_rep_t dstSignMask = DST_REP_C(1) << (dstBits - 1);
+ const int dstExpBits = dstBits - dstSigBits - 1;
+ const int dstExpBias = (1 << (dstExpBits - 1)) - 1;
+ const dst_rep_t dstSignificandMask = (DST_REP_C(1) << dstSigBits) - 1;
+ // Combine sign, exponent, and mantissa.
+ const dst_rep_t result = ((dst_rep_t)s & dstSignMask) |
+ ((dst_rep_t)(e + dstExpBias) << dstSigBits) |
+ ((dst_rep_t)(a) & dstSignificandMask);
+ return dstFromRep(result);
+}
diff --git a/lib/include/common/assembly.h b/lib/include/common/assembly.h
@@ -0,0 +1,293 @@
+//===-- assembly.h - compiler-rt assembler support macros -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines macros for use in compiler-rt assembler source.
+// This file is not part of the interface of this library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef COMPILERRT_ASSEMBLY_H
+#define COMPILERRT_ASSEMBLY_H
+
+#if defined(__linux__) && defined(__CET__)
+#if __has_include(<cet.h>)
+#include <cet.h>
+#endif
+#endif
+
+#if defined(__APPLE__) && defined(__aarch64__)
+#define SEPARATOR %%
+#else
+#define SEPARATOR ;
+#endif
+
+#if defined(__APPLE__)
+#define HIDDEN(name) .private_extern name
+#define LOCAL_LABEL(name) L_##name
+// tell linker it can break up file at label boundaries
+#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols
+#define SYMBOL_IS_FUNC(name)
+#define CONST_SECTION .const
+
+#define NO_EXEC_STACK_DIRECTIVE
+
+#elif defined(__ELF__)
+
+#define HIDDEN(name) .hidden name
+#define LOCAL_LABEL(name) .L_##name
+#define FILE_LEVEL_DIRECTIVE
+#if defined(__arm__) || defined(__aarch64__)
+#define SYMBOL_IS_FUNC(name) .type name,%function
+#else
+#define SYMBOL_IS_FUNC(name) .type name,@function
+#endif
+#define CONST_SECTION .section .rodata
+
+#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
+ defined(__linux__)
+#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
+#else
+#define NO_EXEC_STACK_DIRECTIVE
+#endif
+
+#else // !__APPLE__ && !__ELF__
+
+#define HIDDEN(name)
+#define LOCAL_LABEL(name) .L ## name
+#define FILE_LEVEL_DIRECTIVE
+#define SYMBOL_IS_FUNC(name) \
+ .def name SEPARATOR \
+ .scl 2 SEPARATOR \
+ .type 32 SEPARATOR \
+ .endef
+#define CONST_SECTION .section .rdata,"rd"
+
+#define NO_EXEC_STACK_DIRECTIVE
+
+#endif
+
+#if defined(__arm__) || defined(__aarch64__)
+#define FUNC_ALIGN \
+ .text SEPARATOR \
+ .balign 16 SEPARATOR
+#else
+#define FUNC_ALIGN
+#endif
+
+// BTI and PAC gnu property note
+#define NT_GNU_PROPERTY_TYPE_0 5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
+
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+#define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+#else
+#define BTI_FLAG 0
+#endif
+
+#if __ARM_FEATURE_PAC_DEFAULT & 3
+#define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC
+#else
+#define PAC_FLAG 0
+#endif
+
+#define GNU_PROPERTY(type, value) \
+ .pushsection .note.gnu.property, "a" SEPARATOR \
+ .p2align 3 SEPARATOR \
+ .word 4 SEPARATOR \
+ .word 16 SEPARATOR \
+ .word NT_GNU_PROPERTY_TYPE_0 SEPARATOR \
+ .asciz "GNU" SEPARATOR \
+ .word type SEPARATOR \
+ .word 4 SEPARATOR \
+ .word value SEPARATOR \
+ .word 0 SEPARATOR \
+ .popsection
+
+#if BTI_FLAG != 0
+#define BTI_C hint #34
+#define BTI_J hint #36
+#else
+#define BTI_C
+#define BTI_J
+#endif
+
+#if (BTI_FLAG | PAC_FLAG) != 0
+#define GNU_PROPERTY_BTI_PAC \
+ GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
+#else
+#define GNU_PROPERTY_BTI_PAC
+#endif
+
+#if defined(__clang__) || defined(__GCC_HAVE_DWARF2_CFI_ASM)
+#define CFI_START .cfi_startproc
+#define CFI_END .cfi_endproc
+#else
+#define CFI_START
+#define CFI_END
+#endif
+
+#if defined(__arm__)
+
+// Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros:
+// - for '-mthumb -march=armv6' compiler defines '__thumb__'
+// - for '-mthumb -march=armv7' compiler defines '__thumb__' and '__thumb2__'
+#if defined(__thumb2__) || defined(__thumb__)
+#define DEFINE_CODE_STATE .thumb SEPARATOR
+#define DECLARE_FUNC_ENCODING .thumb_func SEPARATOR
+#if defined(__thumb2__)
+#define USE_THUMB_2
+#define IT(cond) it cond
+#define ITT(cond) itt cond
+#define ITE(cond) ite cond
+#else
+#define USE_THUMB_1
+#define IT(cond)
+#define ITT(cond)
+#define ITE(cond)
+#endif // defined(__thumb__2)
+#else // !defined(__thumb2__) && !defined(__thumb__)
+#define DEFINE_CODE_STATE .arm SEPARATOR
+#define DECLARE_FUNC_ENCODING
+#define IT(cond)
+#define ITT(cond)
+#define ITE(cond)
+#endif
+
+#if defined(USE_THUMB_1) && defined(USE_THUMB_2)
+#error "USE_THUMB_1 and USE_THUMB_2 can't be defined together."
+#endif
+
+#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
+#define ARM_HAS_BX
+#endif
+#if !defined(__ARM_FEATURE_CLZ) && !defined(USE_THUMB_1) && \
+ (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
+#define __ARM_FEATURE_CLZ
+#endif
+
+#ifdef ARM_HAS_BX
+#define JMP(r) bx r
+#define JMPc(r, c) bx##c r
+#else
+#define JMP(r) mov pc, r
+#define JMPc(r, c) mov##c pc, r
+#endif
+
+// pop {pc} can't switch Thumb mode on ARMv4T
+#if __ARM_ARCH >= 5
+#define POP_PC() pop {pc}
+#else
+#define POP_PC() \
+ pop {ip}; \
+ JMP(ip)
+#endif
+
+#if defined(USE_THUMB_2)
+#define WIDE(op) op.w
+#else
+#define WIDE(op) op
+#endif
+#else // !defined(__arm)
+#define DECLARE_FUNC_ENCODING
+#define DEFINE_CODE_STATE
+#endif
+
+#define GLUE2_(a, b) a##b
+#define GLUE(a, b) GLUE2_(a, b)
+#define GLUE2(a, b) GLUE2_(a, b)
+#define GLUE3_(a, b, c) a##b##c
+#define GLUE3(a, b, c) GLUE3_(a, b, c)
+#define GLUE4_(a, b, c, d) a##b##c##d
+#define GLUE4(a, b, c, d) GLUE4_(a, b, c, d)
+
+#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
+
+#ifdef VISIBILITY_HIDDEN
+#define DECLARE_SYMBOL_VISIBILITY(name) \
+ HIDDEN(SYMBOL_NAME(name)) SEPARATOR
+#define DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name) \
+ HIDDEN(name) SEPARATOR
+#else
+#define DECLARE_SYMBOL_VISIBILITY(name)
+#define DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name)
+#endif
+
+#define DEFINE_COMPILERRT_FUNCTION(name) \
+ DEFINE_CODE_STATE \
+ FILE_LEVEL_DIRECTIVE SEPARATOR \
+ .globl SYMBOL_NAME(name) SEPARATOR \
+ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
+ DECLARE_SYMBOL_VISIBILITY(name) \
+ DECLARE_FUNC_ENCODING \
+ SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_THUMB_FUNCTION(name) \
+ DEFINE_CODE_STATE \
+ FILE_LEVEL_DIRECTIVE SEPARATOR \
+ .globl SYMBOL_NAME(name) SEPARATOR \
+ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
+ DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \
+ .thumb_func SEPARATOR \
+ SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name) \
+ DEFINE_CODE_STATE \
+ FILE_LEVEL_DIRECTIVE SEPARATOR \
+ .globl SYMBOL_NAME(name) SEPARATOR \
+ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
+ HIDDEN(SYMBOL_NAME(name)) SEPARATOR \
+ DECLARE_FUNC_ENCODING \
+ SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \
+ DEFINE_CODE_STATE \
+ .globl name SEPARATOR \
+ SYMBOL_IS_FUNC(name) SEPARATOR \
+ HIDDEN(name) SEPARATOR \
+ DECLARE_FUNC_ENCODING \
+ name:
+
+#define DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(name) \
+ DEFINE_CODE_STATE \
+ FUNC_ALIGN \
+ .globl name SEPARATOR \
+ SYMBOL_IS_FUNC(name) SEPARATOR \
+ DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name) SEPARATOR \
+ DECLARE_FUNC_ENCODING \
+ name: \
+ SEPARATOR CFI_START \
+ SEPARATOR BTI_C
+
+#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \
+ .globl SYMBOL_NAME(name) SEPARATOR \
+ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
+ DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \
+ .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR
+
+#if defined(__ARM_EABI__)
+#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) \
+ DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name)
+#else
+#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)
+#endif
+
+#ifdef __ELF__
+#define END_COMPILERRT_FUNCTION(name) \
+ .size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
+#define END_COMPILERRT_OUTLINE_FUNCTION(name) \
+ CFI_END SEPARATOR \
+ .size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
+#else
+#define END_COMPILERRT_FUNCTION(name)
+#define END_COMPILERRT_OUTLINE_FUNCTION(name) \
+ CFI_END
+#endif
+
+#endif // COMPILERRT_ASSEMBLY_H
diff --git a/lib/include/common/fp_lib.h b/lib/include/common/fp_lib.h
@@ -0,0 +1,281 @@
+//===-- fp_lib.h - Floating-point utilities ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// cfree-tailored: little-endian, GCC/clang-compatible builtins, IEEE 754
+// binary32/binary64. binary128 (QUAD_PRECISION) requires tf_supplement.h to
+// have been pre-included so that CRT_HAS_TF_MODE / CRT_HAS_IEEE_TF and
+// tf_float are defined.
+//
+// Selected by the includer via #define SINGLE_PRECISION | DOUBLE_PRECISION
+// | QUAD_PRECISION before #include "fp_lib.h".
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_LIB_HEADER
+#define FP_LIB_HEADER
+
+#include "int_lib.h"
+#include "int_math.h"
+#include "int_types.h"
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#if defined SINGLE_PRECISION
+
+typedef uint16_t half_rep_t;
+typedef uint32_t rep_t;
+typedef uint64_t twice_rep_t;
+typedef int32_t srep_t;
+typedef float fp_t;
+#define HALF_REP_C UINT16_C
+#define REP_C UINT32_C
+#define significandBits 23
+
+static __inline int rep_clz(rep_t a) { return clzsi(a); }
+
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+ const uint64_t product = (uint64_t)a * b;
+ *hi = product >> 32;
+ *lo = product;
+}
+COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b);
+
+#elif defined DOUBLE_PRECISION
+
+typedef uint32_t half_rep_t;
+typedef uint64_t rep_t;
+typedef int64_t srep_t;
+typedef double fp_t;
+#define HALF_REP_C UINT32_C
+#define REP_C UINT64_C
+#define significandBits 52
+
+static __inline int rep_clz(rep_t a) { return __builtin_clzll(a); }
+
+#define loWord(a) (a & 0xffffffffU)
+#define hiWord(a) (a >> 32)
+
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+ const uint64_t plolo = loWord(a) * loWord(b);
+ const uint64_t plohi = loWord(a) * hiWord(b);
+ const uint64_t philo = hiWord(a) * loWord(b);
+ const uint64_t phihi = hiWord(a) * hiWord(b);
+ const uint64_t r0 = loWord(plolo);
+ const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo);
+ *lo = r0 + (r1 << 32);
+ *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi;
+}
+#undef loWord
+#undef hiWord
+
+COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
+
+#elif defined QUAD_PRECISION
+// Requires tf_supplement.h to be pre-included so CRT_HAS_TF_MODE and
+// CRT_HAS_IEEE_TF are defined and tf_float is typedef'd.
+typedef uint64_t half_rep_t;
+typedef __uint128_t rep_t;
+typedef __int128_t srep_t;
+typedef tf_float fp_t;
+#define HALF_REP_C UINT64_C
+#define REP_C (__uint128_t)
+#define significandBits 112
+#define TF_MANT_DIG (significandBits + 1)
+
+static __inline int rep_clz(rep_t a) {
+ const union {
+ __uint128_t ll;
+ struct { uint64_t low, high; } s;
+ } uu = {.ll = a};
+ uint64_t word, add;
+ if (uu.s.high) { word = uu.s.high; add = 0; }
+ else { word = uu.s.low; add = 64; }
+ return __builtin_clzll(word) + add;
+}
+
+#define Word_LoMask UINT64_C(0x00000000ffffffff)
+#define Word_HiMask UINT64_C(0xffffffff00000000)
+#define Word_FullMask UINT64_C(0xffffffffffffffff)
+#define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask)
+#define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask)
+#define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask)
+#define Word_4(a) (uint64_t)(a & Word_LoMask)
+
+static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+ const uint64_t product11 = Word_1(a) * Word_1(b);
+ const uint64_t product12 = Word_1(a) * Word_2(b);
+ const uint64_t product13 = Word_1(a) * Word_3(b);
+ const uint64_t product14 = Word_1(a) * Word_4(b);
+ const uint64_t product21 = Word_2(a) * Word_1(b);
+ const uint64_t product22 = Word_2(a) * Word_2(b);
+ const uint64_t product23 = Word_2(a) * Word_3(b);
+ const uint64_t product24 = Word_2(a) * Word_4(b);
+ const uint64_t product31 = Word_3(a) * Word_1(b);
+ const uint64_t product32 = Word_3(a) * Word_2(b);
+ const uint64_t product33 = Word_3(a) * Word_3(b);
+ const uint64_t product34 = Word_3(a) * Word_4(b);
+ const uint64_t product41 = Word_4(a) * Word_1(b);
+ const uint64_t product42 = Word_4(a) * Word_2(b);
+ const uint64_t product43 = Word_4(a) * Word_3(b);
+ const uint64_t product44 = Word_4(a) * Word_4(b);
+
+ const __uint128_t sum0 = (__uint128_t)product44;
+ const __uint128_t sum1 = (__uint128_t)product34 + (__uint128_t)product43;
+ const __uint128_t sum2 = (__uint128_t)product24 + (__uint128_t)product33 +
+ (__uint128_t)product42;
+ const __uint128_t sum3 = (__uint128_t)product14 + (__uint128_t)product23 +
+ (__uint128_t)product32 + (__uint128_t)product41;
+ const __uint128_t sum4 = (__uint128_t)product13 + (__uint128_t)product22 +
+ (__uint128_t)product31;
+ const __uint128_t sum5 = (__uint128_t)product12 + (__uint128_t)product21;
+ const __uint128_t sum6 = (__uint128_t)product11;
+
+ const __uint128_t r0 = (sum0 & Word_FullMask) + ((sum1 & Word_LoMask) << 32);
+ const __uint128_t r1 = (sum0 >> 64) + ((sum1 >> 32) & Word_FullMask) +
+ (sum2 & Word_FullMask) + ((sum3 << 32) & Word_HiMask);
+ *lo = r0 + (r1 << 64);
+ *hi = (r1 >> 64) + (sum1 >> 96) + (sum2 >> 64) + (sum3 >> 32) + sum4 +
+ (sum5 << 32) + (sum6 << 64);
+}
+#undef Word_1
+#undef Word_2
+#undef Word_3
+#undef Word_4
+#undef Word_HiMask
+#undef Word_LoMask
+#undef Word_FullMask
+#else
+#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
+#endif
+
+#define typeWidth (sizeof(rep_t) * CHAR_BIT)
+
+static __inline rep_t toRep(fp_t x) {
+ const union { fp_t f; rep_t i; } rep = {.f = x};
+ return rep.i;
+}
+
+static __inline fp_t fromRep(rep_t x) {
+ const union { fp_t f; rep_t i; } rep = {.i = x};
+ return rep.f;
+}
+
+#define exponentBits (typeWidth - significandBits - 1)
+#define maxExponent ((1 << exponentBits) - 1)
+#define exponentBias (maxExponent >> 1)
+
+#define implicitBit (REP_C(1) << significandBits)
+#define significandMask (implicitBit - 1U)
+#define signBit (REP_C(1) << (significandBits + exponentBits))
+#define absMask (signBit - 1U)
+#define exponentMask (absMask ^ significandMask)
+#define oneRep ((rep_t)exponentBias << significandBits)
+#define infRep exponentMask
+#define quietBit (implicitBit >> 1)
+#define qnanRep (exponentMask | quietBit)
+
+static __inline int normalize(rep_t *significand) {
+ const int shift = rep_clz(*significand) - rep_clz(implicitBit);
+ *significand <<= shift;
+ return 1 - shift;
+}
+
+static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) {
+ *hi = *hi << count | *lo >> (typeWidth - count);
+ *lo = *lo << count;
+}
+
+static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo,
+ unsigned int count) {
+ if (count < typeWidth) {
+ const bool sticky = (*lo << (typeWidth - count)) != 0;
+ *lo = *hi << (typeWidth - count) | *lo >> count | sticky;
+ *hi = *hi >> count;
+ } else if (count < 2 * typeWidth) {
+ const bool sticky = *hi << (2 * typeWidth - count) | *lo;
+ *lo = *hi >> (count - typeWidth) | sticky;
+ *hi = 0;
+ } else {
+ const bool sticky = *hi | *lo;
+ *lo = sticky;
+ *hi = 0;
+ }
+}
+
+static __inline fp_t __compiler_rt_logbX(fp_t x) {
+ rep_t rep = toRep(x);
+ int exp = (rep & exponentMask) >> significandBits;
+
+ if (exp == maxExponent) {
+ if (((rep & signBit) == 0) || (x != x)) return x;
+ else return -x;
+ } else if (x == 0.0) {
+ return fromRep(infRep | signBit);
+ }
+
+ if (exp != 0) {
+ return exp - exponentBias;
+ } else {
+ rep &= absMask;
+ const int shift = 1 - normalize(&rep);
+ exp = (rep & exponentMask) >> significandBits;
+ return exp - exponentBias - shift;
+ }
+}
+
+static __inline fp_t __compiler_rt_scalbnX(fp_t x, int y) {
+ const rep_t rep = toRep(x);
+ int exp = (rep & exponentMask) >> significandBits;
+
+ if (x == 0.0 || exp == maxExponent) return x;
+
+ rep_t sig = rep & significandMask;
+ if (exp == 0) {
+ exp += normalize(&sig);
+ sig &= ~implicitBit;
+ }
+
+ if (__builtin_sadd_overflow(exp, y, &exp)) {
+ exp = (y >= 0) ? INT_MAX : INT_MIN;
+ }
+
+ const rep_t sign = rep & signBit;
+ if (exp >= maxExponent) {
+ return fromRep(sign | ((rep_t)(maxExponent - 1) << significandBits)) * 2.0f;
+ } else if (exp <= 0) {
+ fp_t tmp = fromRep(sign | (REP_C(1) << significandBits) | sig);
+ exp += exponentBias - 1;
+ if (exp < 1) exp = 1;
+ tmp *= fromRep((rep_t)exp << significandBits);
+ return tmp;
+ } else
+ return fromRep(sign | ((rep_t)exp << significandBits) | sig);
+}
+
+static __inline fp_t __compiler_rt_fmaxX(fp_t x, fp_t y) {
+ return (crt_isnan(x) || x < y) ? y : x;
+}
+
+#if defined(SINGLE_PRECISION)
+static __inline fp_t __compiler_rt_logbf(fp_t x) { return __compiler_rt_logbX(x); }
+static __inline fp_t __compiler_rt_scalbnf(fp_t x, int y) { return __compiler_rt_scalbnX(x, y); }
+static __inline fp_t __compiler_rt_fmaxf(fp_t x, fp_t y) { return __compiler_rt_fmaxX(x, y); }
+#elif defined(DOUBLE_PRECISION)
+static __inline fp_t __compiler_rt_logb(fp_t x) { return __compiler_rt_logbX(x); }
+static __inline fp_t __compiler_rt_scalbn(fp_t x, int y) { return __compiler_rt_scalbnX(x, y); }
+static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) { return __compiler_rt_fmaxX(x, y); }
+#elif defined(QUAD_PRECISION)
+static __inline tf_float __compiler_rt_logbtf(tf_float x) { return __compiler_rt_logbX(x); }
+static __inline tf_float __compiler_rt_scalbntf(tf_float x, int y){ return __compiler_rt_scalbnX(x, y); }
+static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y){ return __compiler_rt_fmaxX(x, y); }
+#define __compiler_rt_logbl __compiler_rt_logbtf
+#define __compiler_rt_scalbnl __compiler_rt_scalbntf
+#define __compiler_rt_fmaxl __compiler_rt_fmaxtf
+#define crt_fabstf crt_fabsf128
+#define crt_copysigntf crt_copysignf128
+#endif
+
+#endif // FP_LIB_HEADER
diff --git a/lib/include/common/fp_mode.h b/lib/include/common/fp_mode.h
@@ -0,0 +1,29 @@
+//===----- lib/fp_mode.h - Floaing-point environment mode utilities --C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is not part of the interface of this library.
+//
+// This file defines an interface for accessing hardware floating point
+// environment mode.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_MODE_H
+#define FP_MODE_H
+
+typedef enum {
+ CRT_FE_TONEAREST,
+ CRT_FE_DOWNWARD,
+ CRT_FE_UPWARD,
+ CRT_FE_TOWARDZERO
+} CRT_FE_ROUND_MODE;
+
+CRT_FE_ROUND_MODE __fe_getround(void);
+int __fe_raise_inexact(void);
+
+#endif // FP_MODE_H
diff --git a/lib/include/common/int_math.h b/lib/include/common/int_math.h
@@ -0,0 +1,32 @@
+//===-- int_math.h - internal math inlines --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// cfree-tailored: assumes a GCC/clang-compatible compiler with the standard
+// __builtin_* family. MSVC paths and GCC <7 fallbacks have been dropped.
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_MATH_H
+#define INT_MATH_H
+
+#define CRT_INFINITY __builtin_huge_valf()
+
+#define crt_isfinite(x) __builtin_isfinite((x))
+#define crt_isinf(x) __builtin_isinf((x))
+#define crt_isnan(x) __builtin_isnan((x))
+
+#define crt_copysign(x, y) __builtin_copysign((x), (y))
+#define crt_copysignf(x, y) __builtin_copysignf((x), (y))
+#define crt_copysignl(x, y) __builtin_copysignl((x), (y))
+
+#define crt_fabs(x) __builtin_fabs((x))
+#define crt_fabsf(x) __builtin_fabsf((x))
+#define crt_fabsl(x) __builtin_fabsl((x))
+
+#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y))
+#define crt_logbl(x) __builtin_logbl((x))
+#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y))
+
+#endif // INT_MATH_H
diff --git a/lib/include/common/int_util.h b/lib/include/common/int_util.h
@@ -0,0 +1,47 @@
+//===-- int_util.h - internal utility functions ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is not part of the interface of this library.
+//
+// This file defines non-inline utilities which are available for use in the
+// library. The function definitions themselves are all contained in int_util.c
+// which will always be compiled into any compiler-rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_UTIL_H
+#define INT_UTIL_H
+
+/// \brief Trigger a program abort (or panic for kernel code).
+#define compilerrt_abort() __compilerrt_abort_impl(__FILE__, __LINE__, __func__)
+
+NORETURN void __compilerrt_abort_impl(const char *file, int line,
+ const char *function);
+
+#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__)
+#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt)
+#define COMPILE_TIME_ASSERT2(expr, cnt) \
+ typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED
+
+// Force unrolling the code specified to be repeated N times.
+#define REPEAT_0_TIMES(code_to_repeat) /* do nothing */
+#define REPEAT_1_TIMES(code_to_repeat) code_to_repeat
+#define REPEAT_2_TIMES(code_to_repeat) \
+ REPEAT_1_TIMES(code_to_repeat) \
+ code_to_repeat
+#define REPEAT_3_TIMES(code_to_repeat) \
+ REPEAT_2_TIMES(code_to_repeat) \
+ code_to_repeat
+#define REPEAT_4_TIMES(code_to_repeat) \
+ REPEAT_3_TIMES(code_to_repeat) \
+ code_to_repeat
+
+#define REPEAT_N_TIMES_(N, code_to_repeat) REPEAT_##N##_TIMES(code_to_repeat)
+#define REPEAT_N_TIMES(N, code_to_repeat) REPEAT_N_TIMES_(N, code_to_repeat)
+
+#endif // INT_UTIL_H
diff --git a/lib/include/ilp32_le/int_endianness.h b/lib/include/ilp32_le/int_endianness.h
@@ -0,0 +1,13 @@
+//===-- int_endianness.h - LP64 little-endian ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//===----------------------------------------------------------------------===//
+#ifndef INT_ENDIANNESS_H
+#define INT_ENDIANNESS_H
+
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+
+#endif
diff --git a/lib/include/ilp32_le/int_lib.h b/lib/include/ilp32_le/int_lib.h
@@ -0,0 +1,43 @@
+//===-- int_lib.h - ILP32 little-endian ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// cfree-tailored: ILP32 (long == 32, pointer == 32), little-endian, freestanding.
+// No 128-bit integer support (no ti_int / __int128).
+// Targets: Linux/Darwin x86, ARM32, RV32, WASM32.
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_LIB_H
+#define INT_LIB_H
+
+#define COMPILER_RT_ABI
+
+// ARM AEABI runtime helpers explicitly use the AAPCS PCS regardless of the
+// translation unit default. cfree's ARM target is always AAPCS, so this is
+// effectively a no-op, but the symbol must exist for the arm/ source files.
+#define AEABI_RTABI __attribute__((__pcs__("aapcs")))
+
+#define ALWAYS_INLINE __attribute__((always_inline))
+#define NOINLINE __attribute__((noinline))
+#define NORETURN __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+
+#include <float.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "int_types.h"
+#include "int_util.h"
+
+COMPILER_RT_ABI int __paritysi2(si_int a);
+COMPILER_RT_ABI int __paritydi2(di_int a);
+COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b);
+COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b);
+COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d);
+COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int *rem);
+COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem);
+
+#endif // INT_LIB_H
diff --git a/lib/include/ilp32_le/int_types.h b/lib/include/ilp32_le/int_types.h
@@ -0,0 +1,48 @@
+//===-- int_types.h - ILP32 little-endian --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// cfree-tailored: ILP32 little-endian. No 128-bit integer support.
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_TYPES_H
+#define INT_TYPES_H
+
+#include "int_endianness.h"
+
+#ifdef si_int
+#undef si_int
+#endif
+typedef int32_t si_int;
+typedef uint32_t su_int;
+#define clzsi __builtin_clz
+#define ctzsi __builtin_ctz
+
+typedef int64_t di_int;
+typedef uint64_t du_int;
+
+typedef union {
+ di_int all;
+ struct { su_int low; si_int high; } s;
+} dwords;
+
+typedef union {
+ du_int all;
+ struct { su_int low; su_int high; } s;
+} udwords;
+
+#define CRT_HAS_FLOATING_POINT 1
+
+typedef union { su_int u; float f; } float_bits;
+typedef union { udwords u; double f; } double_bits;
+typedef struct { udwords low; udwords high; } uqwords;
+
+typedef float _Complex Fcomplex;
+typedef double _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+#define COMPLEX_REAL(x) __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+
+#endif // INT_TYPES_H
diff --git a/lib/include/llp64_le/int_endianness.h b/lib/include/llp64_le/int_endianness.h
@@ -0,0 +1,13 @@
+//===-- int_endianness.h - LP64 little-endian ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//===----------------------------------------------------------------------===//
+#ifndef INT_ENDIANNESS_H
+#define INT_ENDIANNESS_H
+
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+
+#endif
diff --git a/lib/include/llp64_le/int_lib.h b/lib/include/llp64_le/int_lib.h
@@ -0,0 +1,39 @@
+//===-- int_lib.h - LLP64 little-endian ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// cfree-tailored: LLP64 (long == 32, pointer == 64), little-endian, freestanding.
+// Targets: Win64 x86_64.
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_LIB_H
+#define INT_LIB_H
+
+#define COMPILER_RT_ABI
+
+#define ALWAYS_INLINE __attribute__((always_inline))
+#define NOINLINE __attribute__((noinline))
+#define NORETURN __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+
+#include <float.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "int_types.h"
+#include "int_util.h"
+
+COMPILER_RT_ABI int __paritysi2(si_int a);
+COMPILER_RT_ABI int __paritydi2(di_int a);
+COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b);
+COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b);
+COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d);
+COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int *rem);
+COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem);
+COMPILER_RT_ABI int __clzti2(ti_int a);
+COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem);
+
+#endif // INT_LIB_H
diff --git a/lib/include/llp64_le/int_types.h b/lib/include/llp64_le/int_types.h
@@ -0,0 +1,69 @@
+//===-- int_types.h - LLP64 little-endian --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// cfree-tailored: LLP64 little-endian. Has 128-bit integer (__int128) support.
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_TYPES_H
+#define INT_TYPES_H
+
+#include "int_endianness.h"
+
+#ifdef si_int
+#undef si_int
+#endif
+typedef int32_t si_int;
+typedef uint32_t su_int;
+#define clzsi __builtin_clz
+#define ctzsi __builtin_ctz
+
+typedef int64_t di_int;
+typedef uint64_t du_int;
+
+typedef union {
+ di_int all;
+ struct { su_int low; si_int high; } s;
+} dwords;
+
+typedef union {
+ du_int all;
+ struct { su_int low; su_int high; } s;
+} udwords;
+
+#define CRT_HAS_128BIT
+typedef int ti_int __attribute__((mode(TI)));
+typedef unsigned tu_int __attribute__((mode(TI)));
+
+typedef union {
+ ti_int all;
+ struct { du_int low; di_int high; } s;
+} twords;
+
+typedef union {
+ tu_int all;
+ struct { du_int low; du_int high; } s;
+} utwords;
+
+static __inline ti_int make_ti(di_int h, di_int l) {
+ twords r; r.s.high = h; r.s.low = l; return r.all;
+}
+static __inline tu_int make_tu(du_int h, du_int l) {
+ utwords r; r.s.high = h; r.s.low = l; return r.all;
+}
+
+#define CRT_HAS_FLOATING_POINT 1
+
+typedef union { su_int u; float f; } float_bits;
+typedef union { udwords u; double f; } double_bits;
+typedef struct { udwords low; udwords high; } uqwords;
+
+typedef float _Complex Fcomplex;
+typedef double _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+#define COMPLEX_REAL(x) __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+
+#endif // INT_TYPES_H
diff --git a/lib/include/lp64_le/int_endianness.h b/lib/include/lp64_le/int_endianness.h
@@ -0,0 +1,13 @@
+//===-- int_endianness.h - LP64 little-endian ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//===----------------------------------------------------------------------===//
+#ifndef INT_ENDIANNESS_H
+#define INT_ENDIANNESS_H
+
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN 0
+
+#endif
diff --git a/lib/include/lp64_le/int_lib.h b/lib/include/lp64_le/int_lib.h
@@ -0,0 +1,41 @@
+//===-- int_lib.h - LP64 little-endian -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// cfree-tailored: LP64 (long == 64, pointer == 64), little-endian, freestanding.
+// Targets: Linux/Darwin x86_64, Linux/Darwin aarch64, RV64.
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_LIB_H
+#define INT_LIB_H
+
+// AEABI ABI is handled via lib/arm/ assembly files; non-ARM targets use the
+// platform's default C ABI for these helpers.
+#define COMPILER_RT_ABI
+
+#define ALWAYS_INLINE __attribute__((always_inline))
+#define NOINLINE __attribute__((noinline))
+#define NORETURN __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+
+#include <float.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "int_types.h"
+#include "int_util.h"
+
+COMPILER_RT_ABI int __paritysi2(si_int a);
+COMPILER_RT_ABI int __paritydi2(di_int a);
+COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b);
+COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b);
+COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d);
+COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int *rem);
+COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem);
+COMPILER_RT_ABI int __clzti2(ti_int a);
+COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem);
+
+#endif // INT_LIB_H
diff --git a/lib/include/lp64_le/int_types.h b/lib/include/lp64_le/int_types.h
@@ -0,0 +1,74 @@
+//===-- int_types.h - LP64 little-endian ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// cfree-tailored: LP64 little-endian. Has 128-bit integer (__int128) support.
+//===----------------------------------------------------------------------===//
+
+#ifndef INT_TYPES_H
+#define INT_TYPES_H
+
+#include "int_endianness.h"
+
+#ifdef si_int
+#undef si_int
+#endif
+typedef int32_t si_int;
+typedef uint32_t su_int;
+#define clzsi __builtin_clz
+#define ctzsi __builtin_ctz
+
+typedef int64_t di_int;
+typedef uint64_t du_int;
+
+typedef union {
+ di_int all;
+ struct { su_int low; si_int high; } s;
+} dwords;
+
+typedef union {
+ du_int all;
+ struct { su_int low; su_int high; } s;
+} udwords;
+
+#define CRT_HAS_128BIT
+typedef int ti_int __attribute__((mode(TI)));
+typedef unsigned tu_int __attribute__((mode(TI)));
+
+typedef union {
+ ti_int all;
+ struct { du_int low; di_int high; } s;
+} twords;
+
+typedef union {
+ tu_int all;
+ struct { du_int low; du_int high; } s;
+} utwords;
+
+static __inline ti_int make_ti(di_int h, di_int l) {
+ twords r; r.s.high = h; r.s.low = l; return r.all;
+}
+static __inline tu_int make_tu(du_int h, du_int l) {
+ utwords r; r.s.high = h; r.s.low = l; return r.all;
+}
+
+#define CRT_HAS_FLOATING_POINT 1
+
+typedef union { su_int u; float f; } float_bits;
+typedef union { udwords u; double f; } double_bits;
+
+typedef struct { udwords low; udwords high; } uqwords;
+
+// IEEE binary128 (tf_float / CRT_HAS_TF_MODE) is supplied via
+// include/lp64_le_ldbl128/tf_supplement.h when compiling lib/fp_tf/. Not
+// available in this base header to keep it free of feature ifdefs.
+
+typedef float _Complex Fcomplex;
+typedef double _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+#define COMPLEX_REAL(x) __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+
+#endif // INT_TYPES_H
diff --git a/lib/include/lp64_le_ldbl128/tf_supplement.h b/lib/include/lp64_le_ldbl128/tf_supplement.h
@@ -0,0 +1,32 @@
+//===-- tf_supplement.h - binary128 long double support ------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Include via -include when compiling lib/fp_tf/ on a target where
+// long double is IEEE binary128 (e.g. aarch64-linux with -mlong-double-128).
+//===----------------------------------------------------------------------===//
+#ifndef TF_SUPPLEMENT_H
+#define TF_SUPPLEMENT_H
+
+// Pre-included before the translation unit, so the int_types.h pull happens
+// before stdint.h has been brought in by int_lib.h. Bring it in directly.
+#include <stdint.h>
+
+#include "int_types.h"
+
+typedef long double tf_float;
+#define CRT_LDBL_128BIT
+#define CRT_HAS_F128
+#define CRT_HAS_IEEE_TF
+#define CRT_LDBL_IEEE_F128
+#define TF_C(x) x##L
+#define CRT_HAS_TF_MODE
+
+typedef union { uqwords u; tf_float f; } tf_bits;
+
+typedef Lcomplex Qcomplex;
+#define CRT_HAS_NATIVE_COMPLEX_F128
+#define COMPLEXTF_REAL(x) __real__(x)
+#define COMPLEXTF_IMAGINARY(x) __imag__(x)
+
+#endif
diff --git a/lib/int/absvdi2.c b/lib/int/absvdi2.c
@@ -0,0 +1,25 @@
+//===-- absvdi2.c - Implement __absvdi2 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __absvdi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: absolute value
+
+// Effects: aborts if abs(x) < 0
+
+COMPILER_RT_ABI di_int __absvdi2(di_int a) {
+ const int N = (int)(sizeof(di_int) * CHAR_BIT);
+ if (a == ((di_int)((du_int)1 << (N - 1))))
+ compilerrt_abort();
+ const di_int t = a >> (N - 1);
+ return (a ^ t) - t;
+}
diff --git a/lib/int/bswapdi2.c b/lib/int/bswapdi2.c
@@ -0,0 +1,25 @@
+//===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __bswapdi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) {
+ return (
+ (((u)&0xff00000000000000ULL) >> 56) |
+ (((u)&0x00ff000000000000ULL) >> 40) |
+ (((u)&0x0000ff0000000000ULL) >> 24) |
+ (((u)&0x000000ff00000000ULL) >> 8) |
+ (((u)&0x00000000ff000000ULL) << 8) |
+ (((u)&0x0000000000ff0000ULL) << 24) |
+ (((u)&0x000000000000ff00ULL) << 40) |
+ (((u)&0x00000000000000ffULL) << 56));
+}
diff --git a/lib/int/bswapsi2.c b/lib/int/bswapsi2.c
@@ -0,0 +1,20 @@
+//===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __bswapsi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) {
+ return ((((u)&0xff000000) >> 24) |
+ (((u)&0x00ff0000) >> 8) |
+ (((u)&0x0000ff00) << 8) |
+ (((u)&0x000000ff) << 24));
+}
diff --git a/lib/int/clzdi2.c b/lib/int/clzdi2.c
@@ -0,0 +1,25 @@
+//===-- clzdi2.c - Implement __clzdi2 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __clzdi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: the number of leading 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __clzdi2(di_int a) {
+ dwords x;
+ x.all = a;
+ const si_int f = -(x.s.high == 0);
+ return clzsi((x.s.high & ~f) | (x.s.low & f)) +
+ (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
+}
diff --git a/lib/int/clzsi2.c b/lib/int/clzsi2.c
@@ -0,0 +1,48 @@
+//===-- clzsi2.c - Implement __clzsi2 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __clzsi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: the number of leading 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __clzsi2(si_int a) {
+ su_int x = (su_int)a;
+ si_int t = ((x & 0xFFFF0000) == 0) << 4; // if (x is small) t = 16 else 0
+ x >>= 16 - t; // x = [0 - 0xFFFF]
+ su_int r = t; // r = [0, 16]
+ // return r + clz(x)
+ t = ((x & 0xFF00) == 0) << 3;
+ x >>= 8 - t; // x = [0 - 0xFF]
+ r += t; // r = [0, 8, 16, 24]
+ // return r + clz(x)
+ t = ((x & 0xF0) == 0) << 2;
+ x >>= 4 - t; // x = [0 - 0xF]
+ r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28]
+ // return r + clz(x)
+ t = ((x & 0xC) == 0) << 1;
+ x >>= 2 - t; // x = [0 - 3]
+ r += t; // r = [0 - 30] and is even
+ // return r + clz(x)
+ // switch (x)
+ // {
+ // case 0:
+ // return r + 2;
+ // case 1:
+ // return r + 1;
+ // case 2:
+ // case 3:
+ // return r;
+ // }
+ return r + ((2 - x) & -((x & 2) == 0));
+}
diff --git a/lib/int/cmpdi2.c b/lib/int/cmpdi2.c
@@ -0,0 +1,34 @@
+//===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __cmpdi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: if (a < b) returns 0
+// if (a == b) returns 1
+// if (a > b) returns 2
+
+COMPILER_RT_ABI si_int __cmpdi2(di_int a, di_int b) {
+ dwords x;
+ x.all = a;
+ dwords y;
+ y.all = b;
+ if (x.s.high < y.s.high)
+ return 0;
+ if (x.s.high > y.s.high)
+ return 2;
+ if (x.s.low < y.s.low)
+ return 0;
+ if (x.s.low > y.s.low)
+ return 2;
+ return 1;
+}
+
diff --git a/lib/int/ctzdi2.c b/lib/int/ctzdi2.c
@@ -0,0 +1,25 @@
+//===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __ctzdi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: the number of trailing 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __ctzdi2(di_int a) {
+ dwords x;
+ x.all = a;
+ const si_int f = -(x.s.low == 0);
+ return ctzsi((x.s.high & f) | (x.s.low & ~f)) +
+ (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
+}
diff --git a/lib/int/ctzsi2.c b/lib/int/ctzsi2.c
@@ -0,0 +1,53 @@
+//===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __ctzsi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: the number of trailing 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __ctzsi2(si_int a) {
+ su_int x = (su_int)a;
+ si_int t = ((x & 0x0000FFFF) == 0)
+ << 4; // if (x has no small bits) t = 16 else 0
+ x >>= t; // x = [0 - 0xFFFF] + higher garbage bits
+ su_int r = t; // r = [0, 16]
+ // return r + ctz(x)
+ t = ((x & 0x00FF) == 0) << 3;
+ x >>= t; // x = [0 - 0xFF] + higher garbage bits
+ r += t; // r = [0, 8, 16, 24]
+ // return r + ctz(x)
+ t = ((x & 0x0F) == 0) << 2;
+ x >>= t; // x = [0 - 0xF] + higher garbage bits
+ r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28]
+ // return r + ctz(x)
+ t = ((x & 0x3) == 0) << 1;
+ x >>= t;
+ x &= 3; // x = [0 - 3]
+ r += t; // r = [0 - 30] and is even
+ // return r + ctz(x)
+
+ // The branch-less return statement below is equivalent
+ // to the following switch statement:
+ // switch (x)
+ // {
+ // case 0:
+ // return r + 2;
+ // case 2:
+ // return r + 1;
+ // case 1:
+ // case 3:
+ // return r;
+ // }
+ return r + ((2 - (x >> 1)) & -((x & 1) == 0));
+}
diff --git a/lib/int/divdi3.c b/lib/int/divdi3.c
@@ -0,0 +1,22 @@
+//===-- divdi3.c - Implement __divdi3 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __divdi3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: a / b
+
+#define fixint_t di_int
+#define fixuint_t du_int
+#define COMPUTE_UDIV(a, b) __udivmoddi4((a), (b), (du_int *)0)
+#include "int_div_impl.inc"
+
+COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) { return __divXi3(a, b); }
diff --git a/lib/int/divmoddi4.c b/lib/int/divmoddi4.c
@@ -0,0 +1,28 @@
+//===-- divmoddi4.c - Implement __divmoddi4 -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __divmoddi4 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: a / b, *rem = a % b
+
+COMPILER_RT_ABI di_int __divmoddi4(di_int a, di_int b, di_int *rem) {
+ const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
+ di_int s_a = a >> bits_in_dword_m1; // s_a = a < 0 ? -1 : 0
+ di_int s_b = b >> bits_in_dword_m1; // s_b = b < 0 ? -1 : 0
+ a = (du_int)(a ^ s_a) - s_a; // negate if s_a == -1
+ b = (du_int)(b ^ s_b) - s_b; // negate if s_b == -1
+ s_b ^= s_a; // sign of quotient
+ du_int r;
+ di_int q = (__udivmoddi4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1
+ *rem = (r ^ s_a) - s_a; // negate if s_a == -1
+ return q;
+}
diff --git a/lib/int/ffsdi2.c b/lib/int/ffsdi2.c
@@ -0,0 +1,27 @@
+//===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __ffsdi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: the index of the least significant 1-bit in a, or
+// the value zero if a is zero. The least significant bit is index one.
+
+COMPILER_RT_ABI int __ffsdi2(di_int a) {
+ dwords x;
+ x.all = a;
+ if (x.s.low == 0) {
+ if (x.s.high == 0)
+ return 0;
+ return ctzsi(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT);
+ }
+ return ctzsi(x.s.low) + 1;
+}
diff --git a/lib/int/int_util.c b/lib/int/int_util.c
@@ -0,0 +1,20 @@
+//===-- int_util.c - Internal utilities for compiler-rt ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// cfree-tailored: freestanding only (cfree predefines __STDC_HOSTED__ == 0).
+// Aborts via __builtin_trap() with no libc dependency.
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+void __compilerrt_abort_impl(const char *file, int line, const char *function) {
+ (void)file;
+ (void)line;
+ (void)function;
+ __builtin_trap();
+}
diff --git a/lib/int/moddi3.c b/lib/int/moddi3.c
@@ -0,0 +1,22 @@
+//===-- moddi3.c - Implement __moddi3 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __moddi3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: a % b
+
+#define fixint_t di_int
+#define fixuint_t du_int
+#define ASSIGN_UMOD(res, a, b) __udivmoddi4((a), (b), &(res))
+#include "int_div_impl.inc"
+
+COMPILER_RT_ABI di_int __moddi3(di_int a, di_int b) { return __modXi3(a, b); }
diff --git a/lib/int/negdi2.c b/lib/int/negdi2.c
@@ -0,0 +1,21 @@
+//===-- negdi2.c - Implement __negdi2 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __negdi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: -a
+
+COMPILER_RT_ABI di_int __negdi2(di_int a) {
+ // Note: this routine is here for API compatibility; any sane compiler
+ // should expand it inline.
+ return -(du_int)a;
+}
diff --git a/lib/int/paritydi2.c b/lib/int/paritydi2.c
@@ -0,0 +1,25 @@
+//===-- paritydi2.c - Implement __paritydi2 -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __paritydi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: 1 if number of bits is odd else returns 0
+
+COMPILER_RT_ABI int __paritydi2(di_int a) {
+ dwords x;
+ x.all = a;
+ su_int x2 = x.s.high ^ x.s.low;
+ x2 ^= x2 >> 16;
+ x2 ^= x2 >> 8;
+ x2 ^= x2 >> 4;
+ return (0x6996 >> (x2 & 0xF)) & 1;
+}
diff --git a/lib/int/paritysi2.c b/lib/int/paritysi2.c
@@ -0,0 +1,23 @@
+//===-- paritysi2.c - Implement __paritysi2 -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __paritysi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: 1 if number of bits is odd else returns 0
+
+COMPILER_RT_ABI int __paritysi2(si_int a) {
+ su_int x = (su_int)a;
+ x ^= x >> 16;
+ x ^= x >> 8;
+ x ^= x >> 4;
+ return (0x6996 >> (x & 0xF)) & 1;
+}
diff --git a/lib/int/popcountdi2.c b/lib/int/popcountdi2.c
@@ -0,0 +1,32 @@
+//===-- popcountdi2.c - Implement __popcountdi2 ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __popcountdi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: count of 1 bits
+
+COMPILER_RT_ABI int __popcountdi2(di_int a) {
+ du_int x2 = (du_int)a;
+ x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
+ // Every 2 bits holds the sum of every pair of bits (32)
+ x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL);
+ // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16)
+ x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL;
+ // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8)
+ su_int x = (su_int)(x2 + (x2 >> 32));
+ // The lower 32 bits hold four 16 bit sums (5 significant bits).
+ // Upper 32 bits are garbage
+ x = x + (x >> 16);
+ // The lower 16 bits hold two 32 bit sums (6 significant bits).
+ // Upper 16 bits are garbage
+ return (x + (x >> 8)) & 0x0000007F; // (7 significant bits)
+}
diff --git a/lib/int/popcountsi2.c b/lib/int/popcountsi2.c
@@ -0,0 +1,29 @@
+//===-- popcountsi2.c - Implement __popcountsi2 ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __popcountsi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: count of 1 bits
+
+COMPILER_RT_ABI int __popcountsi2(si_int a) {
+ su_int x = (su_int)a;
+ x = x - ((x >> 1) & 0x55555555);
+ // Every 2 bits holds the sum of every pair of bits
+ x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
+ // Every 4 bits holds the sum of every 4-set of bits (3 significant bits)
+ x = (x + (x >> 4)) & 0x0F0F0F0F;
+ // Every 8 bits holds the sum of every 8-set of bits (4 significant bits)
+ x = (x + (x >> 16));
+ // The lower 16 bits hold two 8 bit sums (5 significant bits).
+ // Upper 16 bits are garbage
+ return (x + (x >> 8)) & 0x0000003F; // (6 significant bits)
+}
diff --git a/lib/int/ucmpdi2.c b/lib/int/ucmpdi2.c
@@ -0,0 +1,34 @@
+//===-- ucmpdi2.c - Implement __ucmpdi2 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __ucmpdi2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: if (a < b) returns 0
+// if (a == b) returns 1
+// if (a > b) returns 2
+
+COMPILER_RT_ABI si_int __ucmpdi2(du_int a, du_int b) {
+ udwords x;
+ x.all = a;
+ udwords y;
+ y.all = b;
+ if (x.s.high < y.s.high)
+ return 0;
+ if (x.s.high > y.s.high)
+ return 2;
+ if (x.s.low < y.s.low)
+ return 0;
+ if (x.s.low > y.s.low)
+ return 2;
+ return 1;
+}
+
diff --git a/lib/int/udivdi3.c b/lib/int/udivdi3.c
@@ -0,0 +1,23 @@
+//===-- udivdi3.c - Implement __udivdi3 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __udivdi3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+typedef du_int fixuint_t;
+typedef di_int fixint_t;
+#include "int_div_impl.inc"
+
+// Returns: a / b
+
+COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) {
+ return __udivXi3(a, b);
+}
diff --git a/lib/int/udivmoddi4.c b/lib/int/udivmoddi4.c
@@ -0,0 +1,191 @@
+//===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __udivmoddi4 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Effects: if rem != 0, *rem = a % b
+// Returns: a / b
+
+// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
+
+
+COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) {
+ const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
+ const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+ udwords n;
+ n.all = a;
+ udwords d;
+ d.all = b;
+ udwords q;
+ udwords r;
+ unsigned sr;
+ // special cases, X is unknown, K != 0
+ if (n.s.high == 0) {
+ if (d.s.high == 0) {
+ // 0 X
+ // ---
+ // 0 X
+ if (rem)
+ *rem = n.s.low % d.s.low;
+ return n.s.low / d.s.low;
+ }
+ // 0 X
+ // ---
+ // K X
+ if (rem)
+ *rem = n.s.low;
+ return 0;
+ }
+ // n.s.high != 0
+ if (d.s.low == 0) {
+ if (d.s.high == 0) {
+ // K X
+ // ---
+ // 0 0
+ if (rem)
+ *rem = n.s.high % d.s.low;
+ return n.s.high / d.s.low;
+ }
+ // d.s.high != 0
+ if (n.s.low == 0) {
+ // K 0
+ // ---
+ // K 0
+ if (rem) {
+ r.s.high = n.s.high % d.s.high;
+ r.s.low = 0;
+ *rem = r.all;
+ }
+ return n.s.high / d.s.high;
+ }
+ // K K
+ // ---
+ // K 0
+ if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ {
+ if (rem) {
+ r.s.low = n.s.low;
+ r.s.high = n.s.high & (d.s.high - 1);
+ *rem = r.all;
+ }
+ return n.s.high >> ctzsi(d.s.high);
+ }
+ // K K
+ // ---
+ // K 0
+ sr = clzsi(d.s.high) - clzsi(n.s.high);
+ // 0 <= sr <= n_uword_bits - 2 or sr large
+ if (sr > n_uword_bits - 2) {
+ if (rem)
+ *rem = n.all;
+ return 0;
+ }
+ ++sr;
+ // 1 <= sr <= n_uword_bits - 1
+ // q.all = n.all << (n_udword_bits - sr);
+ q.s.low = 0;
+ q.s.high = n.s.low << (n_uword_bits - sr);
+ // r.all = n.all >> sr;
+ r.s.high = n.s.high >> sr;
+ r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+ } else /* d.s.low != 0 */ {
+ if (d.s.high == 0) {
+ // K X
+ // ---
+ // 0 K
+ if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ {
+ if (rem)
+ *rem = n.s.low & (d.s.low - 1);
+ if (d.s.low == 1)
+ return n.all;
+ sr = ctzsi(d.s.low);
+ q.s.high = n.s.high >> sr;
+ q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+ return q.all;
+ }
+ // K X
+ // ---
+ // 0 K
+ sr = 1 + n_uword_bits + clzsi(d.s.low) - clzsi(n.s.high);
+ // 2 <= sr <= n_udword_bits - 1
+ // q.all = n.all << (n_udword_bits - sr);
+ // r.all = n.all >> sr;
+ if (sr == n_uword_bits) {
+ q.s.low = 0;
+ q.s.high = n.s.low;
+ r.s.high = 0;
+ r.s.low = n.s.high;
+ } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ {
+ q.s.low = 0;
+ q.s.high = n.s.low << (n_uword_bits - sr);
+ r.s.high = n.s.high >> sr;
+ r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+ } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ {
+ q.s.low = n.s.low << (n_udword_bits - sr);
+ q.s.high = (n.s.high << (n_udword_bits - sr)) |
+ (n.s.low >> (sr - n_uword_bits));
+ r.s.high = 0;
+ r.s.low = n.s.high >> (sr - n_uword_bits);
+ }
+ } else {
+ // K X
+ // ---
+ // K K
+ sr = clzsi(d.s.high) - clzsi(n.s.high);
+ // 0 <= sr <= n_uword_bits - 1 or sr large
+ if (sr > n_uword_bits - 1) {
+ if (rem)
+ *rem = n.all;
+ return 0;
+ }
+ ++sr;
+ // 1 <= sr <= n_uword_bits
+ // q.all = n.all << (n_udword_bits - sr);
+ q.s.low = 0;
+ if (sr == n_uword_bits) {
+ q.s.high = n.s.low;
+ r.s.high = 0;
+ r.s.low = n.s.high;
+ } else {
+ q.s.high = n.s.low << (n_uword_bits - sr);
+ r.s.high = n.s.high >> sr;
+ r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+ }
+ }
+ }
+ // Not a special case
+ // q and r are initialized with:
+ // q.all = n.all << (n_udword_bits - sr);
+ // r.all = n.all >> sr;
+ // 1 <= sr <= n_udword_bits - 1
+ su_int carry = 0;
+ for (; sr > 0; --sr) {
+ // r:q = ((r:q) << 1) | carry
+ r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1));
+ r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1));
+ q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1));
+ q.s.low = (q.s.low << 1) | carry;
+ // carry = 0;
+ // if (r.all >= d.all)
+ // {
+ // r.all -= d.all;
+ // carry = 1;
+ // }
+ const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
+ carry = s & 1;
+ r.all -= d.all & s;
+ }
+ q.all = (q.all << 1) | carry;
+ if (rem)
+ *rem = r.all;
+ return q.all;
+}
+
diff --git a/lib/int/umoddi3.c b/lib/int/umoddi3.c
@@ -0,0 +1,23 @@
+//===-- umoddi3.c - Implement __umoddi3 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __umoddi3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+typedef du_int fixuint_t;
+typedef di_int fixint_t;
+#include "int_div_impl.inc"
+
+// Returns: a % b
+
+COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) {
+ return __umodXi3(a, b);
+}
diff --git a/lib/int32/ashldi3.c b/lib/int32/ashldi3.c
@@ -0,0 +1,36 @@
+// ====-- ashldi3.c - Implement __ashldi3 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __ashldi3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: a << b
+
+// Precondition: 0 <= b < bits_in_dword
+
+COMPILER_RT_ABI di_int __ashldi3(di_int a, int b) {
+ const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+ dwords input;
+ dwords result;
+ input.all = a;
+ if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ {
+ result.s.low = 0;
+ result.s.high = input.s.low << (b - bits_in_word);
+ } else /* 0 <= b < bits_in_word */ {
+ if (b == 0)
+ return a;
+ result.s.low = input.s.low << b;
+ result.s.high =
+ ((su_int)input.s.high << b) | (input.s.low >> (bits_in_word - b));
+ }
+ return result.all;
+}
+
diff --git a/lib/int32/ashrdi3.c b/lib/int32/ashrdi3.c
@@ -0,0 +1,37 @@
+//===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __ashrdi3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: arithmetic a >> b
+
+// Precondition: 0 <= b < bits_in_dword
+
+COMPILER_RT_ABI di_int __ashrdi3(di_int a, int b) {
+ const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+ dwords input;
+ dwords result;
+ input.all = a;
+ if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ {
+ // result.s.high = input.s.high < 0 ? -1 : 0
+ result.s.high = input.s.high >> (bits_in_word - 1);
+ result.s.low = input.s.high >> (b - bits_in_word);
+ } else /* 0 <= b < bits_in_word */ {
+ if (b == 0)
+ return a;
+ result.s.high = input.s.high >> b;
+ result.s.low =
+ ((su_int)input.s.high << (bits_in_word - b)) | (input.s.low >> b);
+ }
+ return result.all;
+}
+
diff --git a/lib/int32/lshrdi3.c b/lib/int32/lshrdi3.c
@@ -0,0 +1,35 @@
+//===-- lshrdi3.c - Implement __lshrdi3 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __lshrdi3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: logical a >> b
+
+// Precondition: 0 <= b < bits_in_dword
+
+COMPILER_RT_ABI di_int __lshrdi3(di_int a, int b) {
+ const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+ udwords input;
+ udwords result;
+ input.all = a;
+ if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ {
+ result.s.high = 0;
+ result.s.low = input.s.high >> (b - bits_in_word);
+ } else /* 0 <= b < bits_in_word */ {
+ if (b == 0)
+ return a;
+ result.s.high = input.s.high >> b;
+ result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b);
+ }
+ return result.all;
+}
+
diff --git a/lib/int32/muldi3.c b/lib/int32/muldi3.c
@@ -0,0 +1,48 @@
+//===-- muldi3.c - Implement __muldi3 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __muldi3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+// Returns: a * b
+
+static di_int __muldsi3(su_int a, su_int b) {
+ dwords r;
+ const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2;
+ const su_int lower_mask = (su_int)~0 >> bits_in_word_2;
+ r.s.low = (a & lower_mask) * (b & lower_mask);
+ su_int t = r.s.low >> bits_in_word_2;
+ r.s.low &= lower_mask;
+ t += (a >> bits_in_word_2) * (b & lower_mask);
+ r.s.low += (t & lower_mask) << bits_in_word_2;
+ r.s.high = t >> bits_in_word_2;
+ t = r.s.low >> bits_in_word_2;
+ r.s.low &= lower_mask;
+ t += (b >> bits_in_word_2) * (a & lower_mask);
+ r.s.low += (t & lower_mask) << bits_in_word_2;
+ r.s.high += t >> bits_in_word_2;
+ r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2);
+ return r.all;
+}
+
+// Returns: a * b
+
+COMPILER_RT_ABI di_int __muldi3(di_int a, di_int b) {
+ dwords x;
+ x.all = a;
+ dwords y;
+ y.all = b;
+ dwords r;
+ r.all = __muldsi3(x.s.low, y.s.low);
+ r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
+ return r.all;
+}
+
diff --git a/lib/int64/ashlti3.c b/lib/int64/ashlti3.c
@@ -0,0 +1,37 @@
+//===-- ashlti3.c - Implement __ashlti3 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __ashlti3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: a << b
+
+// Precondition: 0 <= b < bits_in_tword
+
+COMPILER_RT_ABI ti_int __ashlti3(ti_int a, int b) {
+ const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+ twords input;
+ twords result;
+ input.all = a;
+ if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
+ result.s.low = 0;
+ result.s.high = input.s.low << (b - bits_in_dword);
+ } else /* 0 <= b < bits_in_dword */ {
+ if (b == 0)
+ return a;
+ result.s.low = input.s.low << b;
+ result.s.high =
+ ((du_int)input.s.high << b) | (input.s.low >> (bits_in_dword - b));
+ }
+ return result.all;
+}
+
diff --git a/lib/int64/ashrti3.c b/lib/int64/ashrti3.c
@@ -0,0 +1,38 @@
+//===-- ashrti3.c - Implement __ashrti3 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __ashrti3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: arithmetic a >> b
+
+// Precondition: 0 <= b < bits_in_tword
+
+COMPILER_RT_ABI ti_int __ashrti3(ti_int a, int b) {
+ const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+ twords input;
+ twords result;
+ input.all = a;
+ if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
+ // result.s.high = input.s.high < 0 ? -1 : 0
+ result.s.high = input.s.high >> (bits_in_dword - 1);
+ result.s.low = input.s.high >> (b - bits_in_dword);
+ } else /* 0 <= b < bits_in_dword */ {
+ if (b == 0)
+ return a;
+ result.s.high = input.s.high >> b;
+ result.s.low =
+ ((du_int)input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
+ }
+ return result.all;
+}
+
diff --git a/lib/int64/clzti2.c b/lib/int64/clzti2.c
@@ -0,0 +1,27 @@
+//===-- clzti2.c - Implement __clzti2 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __clzti2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: the number of leading 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __clzti2(ti_int a) {
+ twords x;
+ x.all = a;
+ const di_int f = -(x.s.high == 0);
+ return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) +
+ ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
+}
+
diff --git a/lib/int64/ctzti2.c b/lib/int64/ctzti2.c
@@ -0,0 +1,27 @@
+//===-- ctzti2.c - Implement __ctzti2 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __ctzti2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: the number of trailing 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __ctzti2(ti_int a) {
+ twords x;
+ x.all = a;
+ const di_int f = -(x.s.low == 0);
+ return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) +
+ ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
+}
+
diff --git a/lib/int64/divmodti4.c b/lib/int64/divmodti4.c
@@ -0,0 +1,30 @@
+//===-- divmodti4.c - Implement __divmodti4 -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __divmodti4 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: a / b, *rem = a % b
+
+COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int *rem) {
+ const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1;
+ ti_int s_a = a >> bits_in_tword_m1; // s_a = a < 0 ? -1 : 0
+ ti_int s_b = b >> bits_in_tword_m1; // s_b = b < 0 ? -1 : 0
+ a = (tu_int)(a ^ s_a) - s_a; // negate if s_a == -1
+ b = (tu_int)(b ^ s_b) - s_b; // negate if s_b == -1
+ s_b ^= s_a; // sign of quotient
+ tu_int r;
+ ti_int q = (__udivmodti4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1
+ *rem = (r ^ s_a) - s_a; // negate if s_a == -1
+ return q;
+}
+
diff --git a/lib/int64/divti3.c b/lib/int64/divti3.c
@@ -0,0 +1,24 @@
+//===-- divti3.c - Implement __divti3 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __divti3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: a / b
+
+#define fixint_t ti_int
+#define fixuint_t tu_int
+#define COMPUTE_UDIV(a, b) __udivmodti4((a), (b), (tu_int *)0)
+#include "int_div_impl.inc"
+
+COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) { return __divXi3(a, b); }
+
diff --git a/lib/int64/lshrti3.c b/lib/int64/lshrti3.c
@@ -0,0 +1,36 @@
+//===-- lshrti3.c - Implement __lshrti3 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __lshrti3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: logical a >> b
+
+// Precondition: 0 <= b < bits_in_tword
+
+COMPILER_RT_ABI ti_int __lshrti3(ti_int a, int b) {
+ const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+ utwords input;
+ utwords result;
+ input.all = a;
+ if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
+ result.s.high = 0;
+ result.s.low = input.s.high >> (b - bits_in_dword);
+ } else /* 0 <= b < bits_in_dword */ {
+ if (b == 0)
+ return a;
+ result.s.high = input.s.high >> b;
+ result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
+ }
+ return result.all;
+}
+
diff --git a/lib/int64/modti3.c b/lib/int64/modti3.c
@@ -0,0 +1,24 @@
+//===-- modti3.c - Implement __modti3 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __modti3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: a % b
+
+#define fixint_t ti_int
+#define fixuint_t tu_int
+#define ASSIGN_UMOD(res, a, b) __udivmodti4((a), (b), &(res))
+#include "int_div_impl.inc"
+
+COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) { return __modXi3(a, b); }
+
diff --git a/lib/int64/multi3.c b/lib/int64/multi3.c
@@ -0,0 +1,49 @@
+//===-- multi3.c - Implement __multi3 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __multi3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: a * b
+
+static ti_int __mulddi3(du_int a, du_int b) {
+ twords r;
+ const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2;
+ const du_int lower_mask = (du_int)~0 >> bits_in_dword_2;
+ r.s.low = (a & lower_mask) * (b & lower_mask);
+ du_int t = r.s.low >> bits_in_dword_2;
+ r.s.low &= lower_mask;
+ t += (a >> bits_in_dword_2) * (b & lower_mask);
+ r.s.low += (t & lower_mask) << bits_in_dword_2;
+ r.s.high = t >> bits_in_dword_2;
+ t = r.s.low >> bits_in_dword_2;
+ r.s.low &= lower_mask;
+ t += (b >> bits_in_dword_2) * (a & lower_mask);
+ r.s.low += (t & lower_mask) << bits_in_dword_2;
+ r.s.high += t >> bits_in_dword_2;
+ r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2);
+ return r.all;
+}
+
+// Returns: a * b
+
+COMPILER_RT_ABI ti_int __multi3(ti_int a, ti_int b) {
+ twords x;
+ x.all = a;
+ twords y;
+ y.all = b;
+ twords r;
+ r.all = __mulddi3(x.s.low, y.s.low);
+ r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
+ return r.all;
+}
+
diff --git a/lib/int64/negti2.c b/lib/int64/negti2.c
@@ -0,0 +1,23 @@
+//===-- negti2.c - Implement __negti2 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __negti2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: -a
+
+COMPILER_RT_ABI ti_int __negti2(ti_int a) {
+ // Note: this routine is here for API compatibility; any sane compiler
+ // should expand it inline.
+ return -(tu_int)a;
+}
+
diff --git a/lib/int64/udivmodti4.c b/lib/int64/udivmodti4.c
@@ -0,0 +1,148 @@
+//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __udivmodti4 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
+// Remainder stored in r.
+// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
+// fallback. For a correctness proof see the reference for this algorithm
+// in Knuth, Volume 2, section 4.3.1, Algorithm D.
+UNUSED
+static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
+ du_int *r) {
+ const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+ const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
+ du_int un1, un0; // Norm. dividend LSD's
+ du_int vn1, vn0; // Norm. divisor digits
+ du_int q1, q0; // Quotient digits
+ du_int un64, un21, un10; // Dividend digit pairs
+ du_int rhat; // A remainder
+ si_int s; // Shift amount for normalization
+
+ s = __builtin_clzll(v);
+ if (s > 0) {
+ // Normalize the divisor.
+ v = v << s;
+ un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
+ un10 = u0 << s; // Shift dividend left
+ } else {
+ // Avoid undefined behavior of (u0 >> 64).
+ un64 = u1;
+ un10 = u0;
+ }
+
+ // Break divisor up into two 32-bit digits.
+ vn1 = v >> (n_udword_bits / 2);
+ vn0 = v & 0xFFFFFFFF;
+
+ // Break right half of dividend into two digits.
+ un1 = un10 >> (n_udword_bits / 2);
+ un0 = un10 & 0xFFFFFFFF;
+
+ // Compute the first quotient digit, q1.
+ q1 = un64 / vn1;
+ rhat = un64 - q1 * vn1;
+
+ // q1 has at most error 2. No more than 2 iterations.
+ while (q1 >= b || q1 * vn0 > b * rhat + un1) {
+ q1 = q1 - 1;
+ rhat = rhat + vn1;
+ if (rhat >= b)
+ break;
+ }
+
+ un21 = un64 * b + un1 - q1 * v;
+
+ // Compute the second quotient digit.
+ q0 = un21 / vn1;
+ rhat = un21 - q0 * vn1;
+
+ // q0 has at most error 2. No more than 2 iterations.
+ while (q0 >= b || q0 * vn0 > b * rhat + un0) {
+ q0 = q0 - 1;
+ rhat = rhat + vn1;
+ if (rhat >= b)
+ break;
+ }
+
+ *r = (un21 * b + un0 - q0 * v) >> s;
+ return q1 * b + q0;
+}
+
+static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
+ du_int *r) {
+ return udiv128by64to64default(u1, u0, v, r);
+}
+
+// Effects: if rem != 0, *rem = a % b
+// Returns: a / b
+
+COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
+ const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
+ utwords dividend;
+ dividend.all = a;
+ utwords divisor;
+ divisor.all = b;
+ utwords quotient;
+ utwords remainder;
+ if (divisor.all > dividend.all) {
+ if (rem)
+ *rem = dividend.all;
+ return 0;
+ }
+ // When the divisor fits in 64 bits, we can use an optimized path.
+ if (divisor.s.high == 0) {
+ remainder.s.high = 0;
+ if (dividend.s.high < divisor.s.low) {
+ // The result fits in 64 bits.
+ quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
+ divisor.s.low, &remainder.s.low);
+ quotient.s.high = 0;
+ } else {
+ // First, divide with the high part to get the remainder in dividend.s.high.
+ // After that dividend.s.high < divisor.s.low.
+ quotient.s.high = dividend.s.high / divisor.s.low;
+ dividend.s.high = dividend.s.high % divisor.s.low;
+ quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
+ divisor.s.low, &remainder.s.low);
+ }
+ if (rem)
+ *rem = remainder.all;
+ return quotient.all;
+ }
+ // 0 <= shift <= 63.
+ si_int shift =
+ __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
+ divisor.all <<= shift;
+ quotient.s.high = 0;
+ quotient.s.low = 0;
+ for (; shift >= 0; --shift) {
+ quotient.s.low <<= 1;
+ // Branch free version of.
+ // if (dividend.all >= divisor.all)
+ // {
+ // dividend.all -= divisor.all;
+ // carry = 1;
+ // }
+ const ti_int s =
+ (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
+ quotient.s.low |= s & 1;
+ dividend.all -= divisor.all & s;
+ divisor.all >>= 1;
+ }
+ if (rem)
+ *rem = dividend.all;
+ return quotient.all;
+}
+
diff --git a/lib/int64/udivti3.c b/lib/int64/udivti3.c
@@ -0,0 +1,21 @@
+//===-- udivti3.c - Implement __udivti3 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __udivti3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: a / b
+
+COMPILER_RT_ABI tu_int __udivti3(tu_int a, tu_int b) {
+ return __udivmodti4(a, b, 0);
+}
+
diff --git a/lib/int64/umodti3.c b/lib/int64/umodti3.c
@@ -0,0 +1,23 @@
+//===-- umodti3.c - Implement __umodti3 -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements __umodti3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "int_lib.h"
+
+
+// Returns: a % b
+
+COMPILER_RT_ABI tu_int __umodti3(tu_int a, tu_int b) {
+ tu_int r;
+ __udivmodti4(a, b, &r);
+ return r;
+}
+
diff --git a/lib/mem/mem.c b/lib/mem/mem.c
@@ -0,0 +1,49 @@
+//===-- mem.c - cfree freestanding mem* primitives -----------------------===//
+//
+// SPDX-License-Identifier: 0BSD
+//
+// Portable C implementations of memcpy, memmove, memset, memcmp.
+// All are weak so a user libc (or a tuned arch-specific version) wins.
+// Targets without __builtin_trap can replace it; cfree always provides it.
+//===----------------------------------------------------------------------===//
+
+#include <stddef.h>
+
+__attribute__((weak))
+void *memcpy(void *restrict dst, const void *restrict src, size_t n) {
+ unsigned char *d = (unsigned char *)dst;
+ const unsigned char *s = (const unsigned char *)src;
+ for (size_t i = 0; i < n; i++) d[i] = s[i];
+ return dst;
+}
+
+__attribute__((weak))
+void *memmove(void *dst, const void *src, size_t n) {
+ unsigned char *d = (unsigned char *)dst;
+ const unsigned char *s = (const unsigned char *)src;
+ if (d == s || n == 0) return dst;
+ if (d < s) {
+ for (size_t i = 0; i < n; i++) d[i] = s[i];
+ } else {
+ for (size_t i = n; i-- > 0;) d[i] = s[i];
+ }
+ return dst;
+}
+
+__attribute__((weak))
+void *memset(void *dst, int c, size_t n) {
+ unsigned char *d = (unsigned char *)dst;
+ unsigned char v = (unsigned char)c;
+ for (size_t i = 0; i < n; i++) d[i] = v;
+ return dst;
+}
+
+__attribute__((weak))
+int memcmp(const void *a, const void *b, size_t n) {
+ const unsigned char *p = (const unsigned char *)a;
+ const unsigned char *q = (const unsigned char *)b;
+ for (size_t i = 0; i < n; i++) {
+ if (p[i] != q[i]) return (int)p[i] - (int)q[i];
+ }
+ return 0;
+}
diff --git a/lib/riscv/restore_rv32.S b/lib/riscv/restore_rv32.S
@@ -0,0 +1,73 @@
+//===-- restore_rv32.S - restore up to 12 callee-save registers (RV32) ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+ .text
+
+ .globl __riscv_restore_12
+ .type __riscv_restore_12,@function
+__riscv_restore_12:
+ lw s11, 12(sp)
+ addi sp, sp, 16
+ // fallthrough into __riscv_restore_11/10/9/8
+
+ .globl __riscv_restore_11
+ .type __riscv_restore_11,@function
+ .globl __riscv_restore_10
+ .type __riscv_restore_10,@function
+ .globl __riscv_restore_9
+ .type __riscv_restore_9,@function
+ .globl __riscv_restore_8
+ .type __riscv_restore_8,@function
+__riscv_restore_11:
+__riscv_restore_10:
+__riscv_restore_9:
+__riscv_restore_8:
+ lw s10, 0(sp)
+ lw s9, 4(sp)
+ lw s8, 8(sp)
+ lw s7, 12(sp)
+ addi sp, sp, 16
+ // fallthrough into __riscv_restore_7/6/5/4
+
+ .globl __riscv_restore_7
+ .type __riscv_restore_7,@function
+ .globl __riscv_restore_6
+ .type __riscv_restore_6,@function
+ .globl __riscv_restore_5
+ .type __riscv_restore_5,@function
+ .globl __riscv_restore_4
+ .type __riscv_restore_4,@function
+__riscv_restore_7:
+__riscv_restore_6:
+__riscv_restore_5:
+__riscv_restore_4:
+ lw s6, 0(sp)
+ lw s5, 4(sp)
+ lw s4, 8(sp)
+ lw s3, 12(sp)
+ addi sp, sp, 16
+ // fallthrough into __riscv_restore_3/2/1/0
+
+ .globl __riscv_restore_3
+ .type __riscv_restore_3,@function
+ .globl __riscv_restore_2
+ .type __riscv_restore_2,@function
+ .globl __riscv_restore_1
+ .type __riscv_restore_1,@function
+ .globl __riscv_restore_0
+ .type __riscv_restore_0,@function
+__riscv_restore_3:
+__riscv_restore_2:
+__riscv_restore_1:
+__riscv_restore_0:
+ lw s2, 0(sp)
+ lw s1, 4(sp)
+ lw s0, 8(sp)
+ lw ra, 12(sp)
+ addi sp, sp, 16
+ ret
diff --git a/lib/riscv/restore_rv64.S b/lib/riscv/restore_rv64.S
@@ -0,0 +1,82 @@
+//===-- restore_rv64.S - restore up to 12 callee-save registers (RV64) ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+ .text
+
+ .globl __riscv_restore_12
+ .type __riscv_restore_12,@function
+__riscv_restore_12:
+ ld s11, 8(sp)
+ addi sp, sp, 16
+ // fallthrough into __riscv_restore_11/10
+
+ .globl __riscv_restore_11
+ .type __riscv_restore_11,@function
+ .globl __riscv_restore_10
+ .type __riscv_restore_10,@function
+__riscv_restore_11:
+__riscv_restore_10:
+ ld s10, 0(sp)
+ ld s9, 8(sp)
+ addi sp, sp, 16
+ // fallthrough into __riscv_restore_9/8
+
+ .globl __riscv_restore_9
+ .type __riscv_restore_9,@function
+ .globl __riscv_restore_8
+ .type __riscv_restore_8,@function
+__riscv_restore_9:
+__riscv_restore_8:
+ ld s8, 0(sp)
+ ld s7, 8(sp)
+ addi sp, sp, 16
+ // fallthrough into __riscv_restore_7/6
+
+ .globl __riscv_restore_7
+ .type __riscv_restore_7,@function
+ .globl __riscv_restore_6
+ .type __riscv_restore_6,@function
+__riscv_restore_7:
+__riscv_restore_6:
+ ld s6, 0(sp)
+ ld s5, 8(sp)
+ addi sp, sp, 16
+ // fallthrough into __riscv_restore_5/4
+
+ .globl __riscv_restore_5
+ .type __riscv_restore_5,@function
+ .globl __riscv_restore_4
+ .type __riscv_restore_4,@function
+__riscv_restore_5:
+__riscv_restore_4:
+ ld s4, 0(sp)
+ ld s3, 8(sp)
+ addi sp, sp, 16
+ // fallthrough into __riscv_restore_3/2
+
+ .globl __riscv_restore_3
+ .type __riscv_restore_3,@function
+ .globl __riscv_restore_2
+ .type __riscv_restore_2,@function
+__riscv_restore_3:
+__riscv_restore_2:
+ ld s2, 0(sp)
+ ld s1, 8(sp)
+ addi sp, sp, 16
+ // fallthrough into __riscv_restore_1/0
+
+ .globl __riscv_restore_1
+ .type __riscv_restore_1,@function
+ .globl __riscv_restore_0
+ .type __riscv_restore_0,@function
+__riscv_restore_1:
+__riscv_restore_0:
+ ld s0, 0(sp)
+ ld ra, 8(sp)
+ addi sp, sp, 16
+ ret
diff --git a/lib/riscv/save_rv32.S b/lib/riscv/save_rv32.S
@@ -0,0 +1,88 @@
+//===-- save_rv32.S - save up to 12 callee-saved registers (RV32) --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Multiple entry points depending on number of registers to save.
+// Entry points are grouped in 4s for rv32 to maintain 16-byte stack alignment.
+//
+//===----------------------------------------------------------------------===//
+
+ .text
+
+ .globl __riscv_save_12
+ .type __riscv_save_12,@function
+__riscv_save_12:
+ addi sp, sp, -64
+ mv t1, zero
+ sw s11, 12(sp)
+ j .Lriscv_save_11_8
+
+ .globl __riscv_save_11
+ .type __riscv_save_11,@function
+ .globl __riscv_save_10
+ .type __riscv_save_10,@function
+ .globl __riscv_save_9
+ .type __riscv_save_9,@function
+ .globl __riscv_save_8
+ .type __riscv_save_8,@function
+__riscv_save_11:
+__riscv_save_10:
+__riscv_save_9:
+__riscv_save_8:
+ addi sp, sp, -64
+ li t1, 16
+.Lriscv_save_11_8:
+ sw s10, 16(sp)
+ sw s9, 20(sp)
+ sw s8, 24(sp)
+ sw s7, 28(sp)
+ j .Lriscv_save_7_4
+
+ .globl __riscv_save_7
+ .type __riscv_save_7,@function
+ .globl __riscv_save_6
+ .type __riscv_save_6,@function
+ .globl __riscv_save_5
+ .type __riscv_save_5,@function
+ .globl __riscv_save_4
+ .type __riscv_save_4,@function
+__riscv_save_7:
+__riscv_save_6:
+__riscv_save_5:
+__riscv_save_4:
+ addi sp, sp, -64
+ li t1, 32
+.Lriscv_save_7_4:
+ sw s6, 32(sp)
+ sw s5, 36(sp)
+ sw s4, 40(sp)
+ sw s3, 44(sp)
+ sw s2, 48(sp)
+ sw s1, 52(sp)
+ sw s0, 56(sp)
+ sw ra, 60(sp)
+ add sp, sp, t1
+ jr t0
+
+ .globl __riscv_save_3
+ .type __riscv_save_3,@function
+ .globl __riscv_save_2
+ .type __riscv_save_2,@function
+ .globl __riscv_save_1
+ .type __riscv_save_1,@function
+ .globl __riscv_save_0
+ .type __riscv_save_0,@function
+__riscv_save_3:
+__riscv_save_2:
+__riscv_save_1:
+__riscv_save_0:
+ addi sp, sp, -16
+ sw s2, 0(sp)
+ sw s1, 4(sp)
+ sw s0, 8(sp)
+ sw ra, 12(sp)
+ jr t0
diff --git a/lib/riscv/save_rv64.S b/lib/riscv/save_rv64.S
@@ -0,0 +1,101 @@
+//===-- save_rv64.S - save up to 12 callee-saved registers (RV64) --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Multiple entry points depending on number of registers to save.
+// Entry points are grouped in 2s for rv64 to maintain 16-byte stack alignment.
+//
+//===----------------------------------------------------------------------===//
+
+ .text
+
+ .globl __riscv_save_12
+ .type __riscv_save_12,@function
+__riscv_save_12:
+ addi sp, sp, -112
+ mv t1, zero
+ sd s11, 8(sp)
+ j .Lriscv_save_11_10
+
+ .globl __riscv_save_11
+ .type __riscv_save_11,@function
+ .globl __riscv_save_10
+ .type __riscv_save_10,@function
+__riscv_save_11:
+__riscv_save_10:
+ addi sp, sp, -112
+ li t1, 16
+.Lriscv_save_11_10:
+ sd s10, 16(sp)
+ sd s9, 24(sp)
+ j .Lriscv_save_9_8
+
+ .globl __riscv_save_9
+ .type __riscv_save_9,@function
+ .globl __riscv_save_8
+ .type __riscv_save_8,@function
+__riscv_save_9:
+__riscv_save_8:
+ addi sp, sp, -112
+ li t1, 32
+.Lriscv_save_9_8:
+ sd s8, 32(sp)
+ sd s7, 40(sp)
+ j .Lriscv_save_7_6
+
+ .globl __riscv_save_7
+ .type __riscv_save_7,@function
+ .globl __riscv_save_6
+ .type __riscv_save_6,@function
+__riscv_save_7:
+__riscv_save_6:
+ addi sp, sp, -112
+ li t1, 48
+.Lriscv_save_7_6:
+ sd s6, 48(sp)
+ sd s5, 56(sp)
+ j .Lriscv_save_5_4
+
+ .globl __riscv_save_5
+ .type __riscv_save_5,@function
+ .globl __riscv_save_4
+ .type __riscv_save_4,@function
+__riscv_save_5:
+__riscv_save_4:
+ addi sp, sp, -112
+ li t1, 64
+.Lriscv_save_5_4:
+ sd s4, 64(sp)
+ sd s3, 72(sp)
+ j .Lriscv_save_3_2
+
+ .globl __riscv_save_3
+ .type __riscv_save_3,@function
+ .globl __riscv_save_2
+ .type __riscv_save_2,@function
+__riscv_save_3:
+__riscv_save_2:
+ addi sp, sp, -112
+ li t1, 80
+.Lriscv_save_3_2:
+ sd s2, 80(sp)
+ sd s1, 88(sp)
+ sd s0, 96(sp)
+ sd ra, 104(sp)
+ add sp, sp, t1
+ jr t0
+
+ .globl __riscv_save_1
+ .type __riscv_save_1,@function
+ .globl __riscv_save_0
+ .type __riscv_save_0,@function
+__riscv_save_1:
+__riscv_save_0:
+ addi sp, sp, -16
+ sd s0, 0(sp)
+ sd ra, 8(sp)
+ jr t0