lib/ consolidation into fewer files and exhaustive build.sh via clang - kit

commit 10edd3fecf8e36b56b5b8b41b653755dd44de48c
parent 05db6b973f7e1398455d363ebd3ba815c3f82d53
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Thu,  7 May 2026 11:32:37 -0700

lib/ consolidation into fewer files and exhaustive build.sh via clang

Diffstat:
M lib/README.md  | 269 ++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
A lib/arm/aeabi.c  | 20 ++++++++++++++++++++
D lib/arm/aeabi_dcmp.S  | 45 ---------------------------------------------
D lib/arm/aeabi_drsub.c  | 14 --------------
D lib/arm/aeabi_fcmp.S  | 45 ---------------------------------------------
D lib/arm/aeabi_frsub.c  | 14 --------------
D lib/arm/aeabi_idivmod.S  | 33 ---------------------------------
D lib/arm/aeabi_idivmod_thumb1.S  | 28 ----------------------------
D lib/arm/aeabi_ldivmod.S  | 34 ----------------------------------
D lib/arm/aeabi_memcmp.S  | 23 -----------------------
D lib/arm/aeabi_memcmp_thumb1.S  | 25 -------------------------
D lib/arm/aeabi_memcpy.S  | 23 -----------------------
D lib/arm/aeabi_memcpy_thumb1.S  | 25 -------------------------
D lib/arm/aeabi_memmove.S  | 22 ----------------------
D lib/arm/aeabi_memmove_thumb1.S  | 24 ------------------------
D lib/arm/aeabi_memset.S  | 37 -------------------------------------
D lib/arm/aeabi_memset_thumb1.S  | 42 ------------------------------------------
A lib/arm/aeabi_thumb1.S  | 261 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A lib/arm/aeabi_thumb2.S  | 268 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D lib/arm/aeabi_uidivmod.S  | 34 ----------------------------------
D lib/arm/aeabi_uidivmod_thumb1.S  | 31 -------------------------------
D lib/arm/aeabi_uldivmod.S  | 34 ----------------------------------
M lib/build.sh  | 172 ++++++++++++++++++++++++++++++-------------------------------------------------
D lib/fp/adddf3.c  | 17 -----------------
D lib/fp/addsf3.c  | 17 -----------------
D lib/fp/comparedf2.c  | 52 ----------------------------------------------------
D lib/fp/comparesf2.c  | 52 ----------------------------------------------------
D lib/fp/divdf3.c  | 25 -------------------------
D lib/fp/divsf3.c  | 27 ---------------------------
D lib/fp/extendsfdf2.c  | 17 -----------------
D lib/fp/fixdfdi.c  | 26 --------------------------
D lib/fp/fixdfsi.c  | 21 ---------------------
D lib/fp/fixsfdi.c  | 26 --------------------------
D lib/fp/fixsfsi.c  | 21 ---------------------
D lib/fp/fixunsdfdi.c  | 24 ------------------------
D lib/fp/fixunsdfsi.c  | 19 -------------------
D lib/fp/fixunssfdi.c  | 24 ------------------------
D lib/fp/fixunssfsi.c  | 23 -----------------------
D lib/fp/floatdidf.c  | 35 -----------------------------------
D lib/fp/floatdisf.c  | 30 ------------------------------
D lib/fp/floatsidf.c  | 51 ---------------------------------------------------
D lib/fp/floatsisf.c  | 59 -----------------------------------------------------------
D lib/fp/floatundidf.c  | 35 -----------------------------------
D lib/fp/floatundisf.c  | 30 ------------------------------
D lib/fp/floatunsidf.c  | 40 ----------------------------------------
D lib/fp/floatunsisf.c  | 50 --------------------------------------------------
A lib/fp/fp.c  | 512 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D lib/fp/fp_mode.c  | 22 ----------------------
D lib/fp/muldf3.c  | 18 ------------------
D lib/fp/mulsf3.c  | 18 ------------------
D lib/fp/negdf2.c  | 17 -----------------
D lib/fp/negsf2.c  | 17 -----------------
D lib/fp/subdf3.c  | 20 --------------------
D lib/fp/subsf3.c  | 20 --------------------
D lib/fp/truncdfsf2.c  | 17 -----------------
D lib/fp_tf/addtf3.c  | 21 ---------------------
D lib/fp_tf/comparetf2.c  | 51 ---------------------------------------------------
D lib/fp_tf/divtf3.c  | 27 ---------------------------
D lib/fp_tf/extenddftf2.c  | 20 --------------------
D lib/fp_tf/extendsftf2.c  | 20 --------------------
D lib/fp_tf/fixtfdi.c  | 21 ---------------------
D lib/fp_tf/fixtfsi.c  | 21 ---------------------
D lib/fp_tf/fixtfti.c  | 21 ---------------------
D lib/fp_tf/fixunstfdi.c  | 19 -------------------
D lib/fp_tf/fixunstfsi.c  | 19 -------------------
D lib/fp_tf/fixunstfti.c  | 19 -------------------
D lib/fp_tf/floatditf.c  | 47 -----------------------------------------------
D lib/fp_tf/floatsitf.c  | 47 -----------------------------------------------
D lib/fp_tf/floattitf.c  | 36 ------------------------------------
D lib/fp_tf/floatunditf.c  | 38 --------------------------------------
D lib/fp_tf/floatunsitf.c  | 38 --------------------------------------
D lib/fp_tf/floatuntitf.c  | 36 ------------------------------------
A lib/fp_tf/fp_tf.c  | 387 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D lib/fp_tf/multf3.c  | 20 --------------------
D lib/fp_tf/subtf3.c  | 22 ----------------------
D lib/fp_tf/trunctfdf2.c  | 20 --------------------
D lib/fp_tf/trunctfsf2.c  | 20 --------------------
D lib/fp_ti/fixdfti.c  | 24 ------------------------
D lib/fp_ti/fixsfti.c  | 24 ------------------------
D lib/fp_ti/fixunsdfti.c  | 20 --------------------
D lib/fp_ti/fixunssfti.c  | 23 -----------------------
D lib/fp_ti/floattidf.c  | 32 --------------------------------
D lib/fp_ti/floattisf.c  | 31 -------------------------------
D lib/fp_ti/floatuntidf.c  | 32 --------------------------------
D lib/fp_ti/floatuntisf.c  | 31 -------------------------------
A lib/fp_ti/fp_ti.c  | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D lib/impl/fp_extend.h  | 277 -------------------------------------------------------------------------------
M lib/impl/fp_extend_impl.inc  | 259 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
D lib/impl/fp_trunc.h  | 259 -------------------------------------------------------------------------------
M lib/impl/fp_trunc_impl.inc  | 243 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
D lib/impl/int_to_fp.h  | 174 -------------------------------------------------------------------------------
M lib/impl/int_to_fp_impl.inc  | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M lib/include/common/fp_lib.h  | 1 -
D lib/include/ilp32_le/int_endianness.h  | 13 -------------
M lib/include/ilp32_le/int_lib.h  | 43 ++++++++++++++++++++++++++++++++++++++++++-
D lib/include/ilp32_le/int_types.h  | 48 ------------------------------------------------
D lib/include/llp64_le/int_endianness.h  | 13 -------------
M lib/include/llp64_le/int_lib.h  | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
D lib/include/llp64_le/int_types.h  | 69 ---------------------------------------------------------------------
D lib/include/lp64_le/int_endianness.h  | 13 -------------
M lib/include/lp64_le/int_lib.h  | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
D lib/include/lp64_le/int_types.h  | 74 --------------------------------------------------------------------------
M lib/include/lp64_le_ldbl128/tf_supplement.h  | 6 +++---
D lib/int/absvdi2.c  | 25 -------------------------
D lib/int/bswapdi2.c  | 25 -------------------------
D lib/int/bswapsi2.c  | 20 --------------------
D lib/int/clzdi2.c  | 25 -------------------------
D lib/int/clzsi2.c  | 48 ------------------------------------------------
D lib/int/cmpdi2.c  | 34 ----------------------------------
D lib/int/ctzdi2.c  | 25 -------------------------
D lib/int/ctzsi2.c  | 53 -----------------------------------------------------
D lib/int/divdi3.c  | 25 -------------------------
D lib/int/divmoddi4.c  | 28 ----------------------------
D lib/int/ffsdi2.c  | 27 ---------------------------
A lib/int/int.c  | 558 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D lib/int/int_util.c  | 20 --------------------
D lib/int/moddi3.c  | 25 -------------------------
D lib/int/negdi2.c  | 21 ---------------------
D lib/int/paritydi2.c  | 25 -------------------------
D lib/int/paritysi2.c  | 23 -----------------------
D lib/int/popcountdi2.c  | 32 --------------------------------
D lib/int/popcountsi2.c  | 29 -----------------------------
D lib/int/ucmpdi2.c  | 34 ----------------------------------
D lib/int/udivdi3.c  | 24 ------------------------
D lib/int/udivmoddi4.c  | 191 -------------------------------------------------------------------------------
D lib/int/umoddi3.c  | 24 ------------------------
D lib/int32/ashldi3.c  | 36 ------------------------------------
D lib/int32/ashrdi3.c  | 37 -------------------------------------
A lib/int32/int32.c  | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D lib/int32/lshrdi3.c  | 35 -----------------------------------
D lib/int32/muldi3.c  | 48 ------------------------------------------------
D lib/int64/ashlti3.c  | 37 -------------------------------------
D lib/int64/ashrti3.c  | 38 --------------------------------------
D lib/int64/clzti2.c  | 27 ---------------------------
D lib/int64/ctzti2.c  | 27 ---------------------------
D lib/int64/divmodti4.c  | 30 ------------------------------
D lib/int64/divti3.c  | 27 ---------------------------
A lib/int64/int64.c  | 383 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D lib/int64/lshrti3.c  | 36 ------------------------------------
D lib/int64/modti3.c  | 27 ---------------------------
D lib/int64/multi3.c  | 49 -------------------------------------------------
D lib/int64/negti2.c  | 23 -----------------------
D lib/int64/udivmodti4.c  | 148 -------------------------------------------------------------------------------
D lib/int64/udivti3.c  | 21 ---------------------
D lib/int64/umodti3.c  | 23 -----------------------
D lib/riscv/restore_rv32.S  | 73 -------------------------------------------------------------------------
D lib/riscv/restore_rv64.S  | 82 -------------------------------------------------------------------------------
A lib/riscv/rv32.S  | 147 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A lib/riscv/rv64.S  | 169 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D lib/riscv/save_rv32.S  | 88 -------------------------------------------------------------------------------
D lib/riscv/save_rv64.S  | 101 -------------------------------------------------------------------------------

151 files changed, 4042 insertions(+), 5163 deletions(-)
diff --git a/lib/README.md b/lib/README.md
@@ -1,77 +1,84 @@
 # lib/ — `libcfree_rt.a` source
 
-Per-op runtime helpers for cfree, copied from
+Runtime helpers for cfree, derived from
 [compiler-rt 18.1.8](https://github.com/llvm/llvm-project/releases/tag/llvmorg-18.1.8)
-(`lib/builtins/`) and stripped to remove ifdefs that select between targets.
-The build system picks files by directory; each `.c` / `.S` file compiles to
-exactly one target without preprocessor branching.
+(`lib/builtins/`) and stripped of all target-dispatch ifdefs. Every helper
+that varies across targets is selected by directory + compile flags, not
+preprocessor branches inside source.
+
+The build compiles **exactly one master `.c` (and/or `.S`) file per feature
+flag** — no globbing of per-op files. Per-op snippets are inlined directly
+into the master, with the per-precision / per-(src,dst) machinery from
+`impl/` re-applied for each section.
 
 License: Apache-2.0 WITH LLVM-exception (see `LICENSE-compiler-rt.txt`). The
 hand-written `mem/mem.c` is 0BSD; relicense as desired.
 
 ## Layout
 
-### Source dirs (each `.c` / `.S` becomes one object in `libcfree_rt.a`)
-
-| Directory      | Purpose                                                   | Built on                                  |
-| -------------- | --------------------------------------------------------- | ----------------------------------------- |
-| `int/`         | Integer helpers needed on every target                    | All                                       |
-| `int32/`       | 64-bit ops synthesized from 32-bit                        | ILP32 only                                |
-| `int64/`       | 128-bit ops via `__int128`                                | LP64 / LLP64 only                         |
-| `fp/`          | Soft-float `sf` (binary32) and `df` (binary64)            | FPU-less targets (RV{32,64}I, ARM softfp, WASM) |
-| `fp_tf/`       | Soft-float `tf` (binary128)                               | Targets with binary128 long double (e.g. aarch64 `-mlong-double-128`) |
-| `fp_ti/`       | `__int128` ↔ float                                        | LP64 / LLP64 + soft-float                 |
-| `arm/`         | ARM AAPCS / AEABI aliases & wrappers                      | 32-bit ARM                                |
-| `riscv/`       | `__riscv_save_*` / `__riscv_restore_*` (`-msave-restore`) | RISC-V (per-xlen file)                    |
-| `mem/`         | `memcpy` / `memmove` / `memset` / `memcmp` (weak)         | All; user libc overrides                  |
-| `atomic/`      | `__atomic_*` fallback shim                                | All                                       |
-
-### Build-time include dirs (consumed by the source dirs above; nothing here lands in `libcfree_rt.a`)
-
-| Directory                  | Consumed by                                                                                |
-| -------------------------- | ------------------------------------------------------------------------------------------ |
-| `impl/`                    | `int/divdi3.c` etc. (via `int_div_impl.inc`); every `fp*/`, `fp_tf/`, `fp_ti/` file        |
-| `include/common/`          | All `.c` (transitively, via the per-target `int_lib.h`); `arm/*.S` includes `assembly.h`   |
-| `include/lp64_le/`         | LP64 builds; selected via `-Iinclude/lp64_le`                                              |
-| `include/llp64_le/`        | LLP64 (Win64) builds                                                                       |
-| `include/ilp32_le/`        | ILP32 builds                                                                               |
-| `include/lp64_le_ldbl128/` | Extra `-include tf_supplement.h` when compiling `fp_tf/` on binary128-long-double targets  |
-| `atomic/atomic_common.inc` | `atomic/atomic_freestanding.c`                                                             |
+### Master files (each becomes one object in `libcfree_rt.a`)
+
+| File                       | Purpose                                                     | Built on                                            |
+| -------------------------- | ----------------------------------------------------------- | --------------------------------------------------- |
+| `int/int.c`                | Integer helpers needed on every target                      | All                                                 |
+| `int32/int32.c`            | 64-bit ops synthesized from 32-bit                          | ILP32 only                                          |
+| `int64/int64.c`            | 128-bit ops via `__int128`                                  | LP64 / LLP64 only                                   |
+| `fp/fp.c`                  | Soft-float `sf` (binary32) + `df` (binary64) + sf↔df + `fp_mode` | FPU-less (RV{32,64}I, ARM softfp, WASM)         |
+| `fp_tf/fp_tf.c`            | Soft-float `tf` (binary128) + sf↔tf + df↔tf + i128↔tf       | Targets with binary128 long double (e.g. aarch64 `-mlong-double-128`) |
+| `fp_ti/fp_ti.c`            | `__int128` ↔ sf/df + sf/df → ti fix                         | LP64 / LLP64 + soft-float                           |
+| `arm/aeabi_thumb2.S`       | AEABI div/mod/mem* + soft-float compares (ARMv7+/Thumb2)    | 32-bit ARM, ARMv7+/Thumb2                           |
+| `arm/aeabi_thumb1.S`       | Same, Thumb1-tuned (no tail-calls, simpler instr forms)     | 32-bit ARM, ARMv6-M (Cortex-M0/M0+/M1)              |
+| `arm/aeabi.c`              | AEABI `__aeabi_drsub` / `__aeabi_frsub` (ISA-agnostic)      | 32-bit ARM (both ISA modes)                         |
+| `riscv/rv32.S`             | `__riscv_save_*` + `__riscv_restore_*` (rv32)               | RISC-V rv32 with `-msave-restore`                   |
+| `riscv/rv64.S`             | `__riscv_save_*` + `__riscv_restore_*` (rv64)               | RISC-V rv64 with `-msave-restore`                   |
+| `mem/mem.c`                | `memcpy` / `memmove` / `memset` / `memcmp` (weak)           | All; user libc overrides                            |
+| `atomic/atomic_freestanding.c` | `__atomic_*` fallback shim                              | All                                                 |
+
+### Build-time include dirs (consumed by the masters; nothing here lands in `libcfree_rt.a`)
+
+| Directory                  | Consumed by                                                                                                |
+| -------------------------- | ---------------------------------------------------------------------------------------------------------- |
+| `impl/`                    | `int/int.c` (via `int_div_impl.inc`); every `fp*` master (via `fp_*_impl.inc`, `fp_extend_impl.inc`, `fp_trunc_impl.inc`, `int_to_fp_impl.inc`) |
+| `include/common/`          | All masters (transitively, via the per-target `int_lib.h`); `arm/aeabi_thumb*.S` includes `assembly.h`     |
+| `include/lp64_le/`         | LP64 builds; selected via `-Iinclude/lp64_le`                                                              |
+| `include/llp64_le/`        | LLP64 (Win64) builds                                                                                       |
+| `include/ilp32_le/`        | ILP32 builds                                                                                               |
+| `include/lp64_le_ldbl128/` | Extra `-include tf_supplement.h` when compiling `fp_tf/fp_tf.c` on binary128-long-double targets           |
+| `atomic/atomic_common.inc` | `atomic/atomic_freestanding.c`                                                                             |
 
 ## How the build picks files
 
 ```text
 target tuple                              ⟶ compile
 
-x86_64-linux  / aarch64-linux  / aarch64-darwin / rv64
-                                          ⟶ int/* int64/* fp/*
+x86_64-linux  / x86_64-darwin / aarch64-darwin / rv64
+                                          ⟶ int/int.c int64/int64.c fp/fp.c
                                             atomic/atomic_freestanding.c
                                             mem/mem.c
                                           -Iinclude/lp64_le
                                           -DHAS_INT128=1
 
-x86_64-windows                            ⟶ int/* int64/* fp/* atomic/* mem/*
-                                          -Iinclude/llp64_le
+x86_64-windows                            ⟶ same set, -Iinclude/llp64_le
                                           -DHAS_INT128=1
 
-i386-* / arm32-* / rv32 / wasm32          ⟶ int/* int32/* fp/* atomic/* mem/*
+i386-* / arm32-* / rv32 / wasm32          ⟶ int/int.c int32/int32.c fp/fp.c
+                                            atomic/atomic_freestanding.c
+                                            mem/mem.c
                                           -Iinclude/ilp32_le
                                           -DHAS_INT128=0
 
-aarch64-* with binary128 long double      ⟶ above + fp_tf/* fp_ti/*
+aarch64-linux (binary128 long double)     ⟶ above LP64 set + fp_tf/fp_tf.c
+                                            + fp_ti/fp_ti.c
                                           -include include/lp64_le_ldbl128/tf_supplement.h
 
-rv32 with -msave-restore                  ⟶ above + riscv/save_rv32.S riscv/restore_rv32.S
-rv64 with -msave-restore                  ⟶ above + riscv/save_rv64.S riscv/restore_rv64.S
-arm32 ARMv7+/Thumb2 (AEABI)               ⟶ above + arm/aeabi_*.{S,c} (excluding *_thumb1.S)
-arm32 ARMv6-M Thumb1 (Cortex-M0/M0+/M1)   ⟶ above + arm/aeabi_*_thumb1.S
-                                                  + the AEABI files with no Thumb1 variant
-                                                    (aeabi_ldivmod.S, aeabi_uldivmod.S,
-                                                     aeabi_dcmp.S, aeabi_fcmp.S,
-                                                     aeabi_drsub.c, aeabi_frsub.c)
+rv32 with -msave-restore                  ⟶ above + riscv/rv32.S
+rv64 with -msave-restore                  ⟶ above + riscv/rv64.S
+arm32 ARMv7+/Thumb2 (AEABI)               ⟶ above + arm/aeabi_thumb2.S + arm/aeabi.c
+arm32 ARMv6-M Thumb1 (Cortex-M0/M0+/M1)   ⟶ above + arm/aeabi_thumb1.S + arm/aeabi.c
 ```
 
-`-Iinclude/common` is always added.
+`-Iinclude/common` and `-Iimpl` are always added. The full set of variants
+is in `build.sh`.
 
 ## Endianness
 
@@ -79,68 +86,105 @@ All headers assume `__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__`. cfree's
 supported targets (x86, ARM-LE, RISC-V, WASM) are LE in practice. Big-endian
 support would need a parallel `*_be/` header set; not provided.
 
+## Re-includable templates in `impl/` and `include/common/fp_lib.h`
+
+Each master compiles to one TU. To support multiple precisions and multiple
+(src, dst) pairs in that single TU, the upstream compiler-rt's
+single-precision-per-TU model was extended:
+
+- **`include/common/fp_lib.h`** is re-includable. Caller `#define`s
+  `SINGLE_PRECISION`, `DOUBLE_PRECISION`, or `QUAD_PRECISION`, then
+  `#include "fp_lib.h"`. The header emits suffix-renamed typedefs and
+  static inlines (`fp_t_sf`, `rep_clz_df`, ...) once per (TU, precision),
+  and sets bare-name `#define` aliases (`fp_t`, `rep_clz`, ...) so caller
+  code uses bare names that resolve to the right suffixed entity.
+  `include/common/fp_lib_undef.h` clears the bare aliases between sections
+  that switch precision or (src,dst) pair.
+- **`impl/fp_*_impl.inc`** (add, mul, div, compare, fixint, fixuint) are
+  re-includable. Their static `__addXf3__` / `__leXf2__` / etc. are
+  suffix-renamed via `_FP_NAME(...)`; emission is gated per (TU, precision)
+  via `FP_<OP>_<SUFFIX>_EMITTED`. `fp_fixint_impl.inc` / `fp_fixuint_impl.inc`
+  take an additional caller-supplied `FP_FIX_SUFFIX` so each fix call site
+  gets its own helper instance.
+- **`impl/fp_extend_impl.inc`**, **`fp_trunc_impl.inc`**, and
+  **`int_to_fp_impl.inc`** suffix-rename by the (src, dst) pair token
+  (`sfdf`, `sftf`, `dftf`, ...). One emission per (TU, pair). Each
+  inc bundles its own type/helper setup at the top (formerly the
+  separate `fp_extend.h` / `fp_trunc.h` / `int_to_fp.h` partner headers).
+- **`impl/int_div_impl.inc`** takes a caller-supplied `INT_DIV_SUFFIX` and
+  emits suffixed `__udivXi3_<suf>` / `__divXi3_<suf>` etc. so the master
+  can include the inc multiple times in one TU (e.g. once per of
+  `divdi3`, `udivdi3`, `moddi3`, `umoddi3` in `int.c`).
+
+In short: every template that the master includes more than once per TU
+either uses a precision-derived suffix (auto, via `FP_LIB_SUFFIX`) or a
+caller-defined suffix.
+
 ## Files with surviving preprocessor logic
 
-The "no ifdefs" rule is applied to every `.c` and `.S` source — none branch on
-target. Some headers and shared `.inc` files retain preprocessor logic that is
-*not* target dispatch:
+The "no target-dispatch ifdefs" rule is applied to every master. Some
+templates retain preprocessor logic that is *not* target dispatch:
 
-- `impl/*.inc`, `impl/*.h`, `include/common/fp_lib.h` — parameterized via
+- `impl/*.inc`, `include/common/fp_lib.h` — parameterized via
   `SINGLE_PRECISION` / `DOUBLE_PRECISION` / `QUAD_PRECISION` and the
-  `SRC_*` / `DST_*` selectors. Each `.c` file `#define`s exactly one before
-  the include; this is the metaprogramming model compiler-rt uses for fp ops.
-- `include/common/assembly.h` — abstracts assembler syntax (ELF vs Mach-O vs
-  COFF symbol decoration, ARM/Thumb mode markers, etc.). Heavily ifdef'd by
-  design; consumed only by `arm/*.S`.
-- `atomic/atomic_common.inc` — keys the 16-byte cases off `HAS_INT128`, set
-  by the build (`-DHAS_INT128=1` on 64-bit, `-DHAS_INT128=0` on 32-bit).
+  `SRC_*` / `DST_*` selectors set by the master before each inclusion.
+- `include/common/assembly.h` — abstracts assembler syntax (ELF vs Mach-O
+  vs COFF symbol decoration, ARM/Thumb mode markers, etc.). Heavily
+  ifdef'd by design; consumed only by `arm/aeabi_thumb{1,2}.S`.
+- `atomic/atomic_common.inc` — keys the 16-byte cases off `HAS_INT128`,
+  set by the build (`-DHAS_INT128=1` on 64-bit, `-DHAS_INT128=0` on 32-bit).
 
-## Notes per directory
+## Notes per master
 
 ### `mem/mem.c`
-Hand-written portable C (not from compiler-rt). All four functions are weak so
-a user libc, or a tuned arch-specific replacement, wins at link time. The
-existing `arm/aeabi_mem*.S` files forward to these symbols.
-
-### `atomic/`
-`atomic_freestanding.c` defines a pointer-sized `_Atomic(uintptr_t)` spinlock
-as the lock primitive (no OS dependency) then `#include`s `atomic_common.inc`,
-which contains the dispatch logic and all `__atomic_*_N` expansions. The
-shim calls the GCC `__atomic_*` builtin family (the one cfree documents in
-`doc/builtins.md`); upstream's Clang-only `__c11_atomic_*` calls were
-translated. Public symbols are exported via `#pragma redefine_extname` from
-`_c`-suffixed names so they don't collide with the clang builtins of the
-same name.
-
-### `arm/`
-AEABI aliases for div/mod, soft-float compares, and the `aeabi_mem{cpy,move,set,clr}`
-size-specialized wrappers. Six files have a `*_thumb1.S` companion for
-ARMv6-M (Cortex-M0/M0+/M1), where the ISA can't tail-call (`b memcpy`) or
-fold the `subs/muls` form into one instruction:
-`aeabi_idivmod`, `aeabi_uidivmod`, `aeabi_memcpy`, `aeabi_memmove`,
-`aeabi_memset`, `aeabi_memcmp`. The build picks one variant per symbol —
-the Thumb1 file for ARMv6-M, the base file for ARMv7+/Thumb2. The remaining
-AEABI files (`aeabi_ldivmod`, `aeabi_uldivmod`, `aeabi_{d,f}cmp`,
-`aeabi_{d,f}rsub`) are ISA-agnostic and used as-is on both.
-
-### `riscv/`
-`save.S` / `restore.S` upstream are one file each, gated on `__riscv_xlen`.
-Split into `save_rv32.S` / `save_rv64.S` and `restore_rv32.S` / `restore_rv64.S`.
-The embedded ABI variants (`__riscv_32e` / `__riscv_64e`) were not carried over.
-
-### `fp_tf/`
+Hand-written portable C (not from compiler-rt). All four functions are weak
+so a user libc, or a tuned arch-specific replacement, wins at link time.
+`arm/aeabi_thumb{1,2}.S`'s `aeabi_mem*` symbols forward to these.
+
+### `atomic/atomic_freestanding.c`
+Defines a pointer-sized `_Atomic(uintptr_t)` spinlock as the lock primitive
+(no OS dependency) then `#include`s `atomic_common.inc`, which contains the
+dispatch logic and all `__atomic_*_N` expansions. The shim calls the GCC
+`__atomic_*` builtin family (the one cfree documents in `doc/builtins.md`);
+upstream's Clang-only `__c11_atomic_*` calls were translated. Public symbols
+are exported via `#pragma redefine_extname` from `_c`-suffixed names so they
+don't collide with the clang builtins of the same name.
+
+### `arm/aeabi_thumb{1,2}.S`
+AEABI aliases for div/mod, soft-float compares, and the
+`aeabi_mem{cpy,move,set,clr}` size-specialized wrappers. Two ISA-mode
+variants:
+- **Thumb2** — used on ARMv7+/Thumb2; tail-calls into `memcpy` etc., uses
+  `subs/muls` folding.
+- **Thumb1** — used on ARMv6-M (Cortex-M0/M0+/M1); avoids tail-calls (no
+  `b memcpy`) and the folded `subs/muls` form.
+Both files contain the same ISA-agnostic helpers (`aeabi_ldivmod`,
+`aeabi_uldivmod`, `aeabi_{d,f}cmp`) inline plus their respective ISA-tuned
+versions of the dual helpers (`aeabi_idivmod`, `aeabi_uidivmod`, and the
+four `aeabi_mem*`).
+
+### `arm/aeabi.c`
+`__aeabi_drsub` and `__aeabi_frsub`. Built alongside whichever
+`aeabi_thumb{1,2}.S` is selected.
+
+### `riscv/rv{32,64}.S`
+Combined `save_*` + `restore_*`. Upstream's `save.S` / `restore.S` are
+gated on `__riscv_xlen`; cfree splits per xlen. The embedded ABI variants
+(`__riscv_32e` / `__riscv_64e`) are not carried over.
+
+### `fp_tf/fp_tf.c`
 Compile only on targets where `long double` is IEEE binary128 (typically
-`aarch64` with `-mlong-double-128`). The build must `-include` 
-`lp64_le_ldbl128/tf_supplement.h` so `tf_float`, `CRT_HAS_TF_MODE`, etc. are
-defined before `fp_lib.h` is processed.
+`aarch64` with `-mlong-double-128`). The build must `-include`
+`lp64_le_ldbl128/tf_supplement.h` so `tf_float`, `CRT_HAS_TF_MODE`, etc.
+are defined before `fp_lib.h` is processed.
 
-### Compare files
-`fp/comparesf2.c`, `fp/comparedf2.c`, `fp_tf/comparetf2.c` define every variant
-(`__eqXf2`, `__ltXf2`, `__neXf2`, `__gtXf2`) as a separate function rather than
-using `COMPILER_RT_ALIAS`. Replaces an object-format-conditional macro with a
-handful of one-line wrappers.
+### Compare helpers
+The `comparesf2` / `comparedf2` / `comparetf2` sections of the fp masters
+define every variant (`__eqXf2`, `__ltXf2`, `__neXf2`, `__gtXf2`) as a
+separate function rather than using `COMPILER_RT_ALIAS`. Replaces an
+object-format-conditional macro with a handful of one-line wrappers.
 
-### `int/int_util.c`
+### `int_util` section of `int/int.c`
 Replaced upstream's hosted/kernel/Apple/Win32 abort cascade with a single
 freestanding `__compilerrt_abort_impl` that calls `__builtin_trap()`.
 
@@ -156,12 +200,25 @@ These are documented in `doc/builtins.md` but not provided here:
 
 ## Updating from a newer compiler-rt
 
-When pulling in a new release:
-1. Diff `lib/builtins/*.c` against the corresponding files here. Per-op files
-   typically need only the `__ARM_EABI__` / `__MINGW32__` / `__SOFTFP__` blocks
-   re-stripped.
-2. Re-strip `int_util.c` to the freestanding-only abort path.
-3. Re-split any new `riscv/` or `arm/` files that combine modes.
-4. Diff `int_lib.h` / `int_types.h` / `fp_lib.h` against the per-target copies
-   under `include/`; update for any new feature gates that aren't legitimately
-   target-orthogonal.
+The per-op layout that lived in upstream's `lib/builtins/<op>.c` is now
+inlined into masters here. To pull in a newer release:
+
+1. Identify which builtins changed between releases (`git log` or release
+   notes). For each changed builtin:
+   - Find its inlined block in the matching cfree master (each block is
+     prefixed by `// ---- <upstream filename> ----`).
+   - Diff that block against upstream `lib/builtins/<op>.c`. Drop any
+     re-introduced target-dispatch ifdefs (`__ARM_EABI__`, `__MINGW32__`,
+     `__SOFTFP__`).
+2. If `int_util.c` changed upstream, re-strip to the freestanding-only
+   abort path.
+3. If `riscv/save.S` / `restore.S` changed, re-split into the per-xlen
+   inline blocks of `riscv/rv{32,64}.S`.
+4. Diff `int_lib.h` / `fp_lib.h` against the per-target copies under
+   `include/`; update for any new feature gates that aren't legitimately
+   target-orthogonal. Note: upstream's `int_endianness.h` and
+   `int_types.h` are folded into each per-target `int_lib.h`. Note also
+   that cfree's `fp_lib.h` has been refactored into a re-includable form
+   (per-precision suffix-renaming plus bare-name aliases) — pull upstream
+   changes into the precision blocks; don't revert the suffix machinery.
+5. Run `bash build.sh` and confirm all 13 variants pass.
diff --git a/lib/arm/aeabi.c b/lib/arm/aeabi.c
@@ -0,0 +1,20 @@
+// Consolidated AEABI soft-float C helpers for cfree's libcfree_rt.a.
+// The build compiles only this one file per ISA mode; the per-op files are
+// #included as snippets and not directly built.
+// License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
+
+// ---- aeabi_drsub.c ----
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+AEABI_RTABI fp_t __aeabi_dsub(fp_t, fp_t);
+
+AEABI_RTABI fp_t __aeabi_drsub(fp_t a, fp_t b) { return __aeabi_dsub(b, a); }
+#include "fp_lib_undef.h"
+// ---- aeabi_frsub.c ----
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+AEABI_RTABI fp_t __aeabi_fsub(fp_t, fp_t);
+
+AEABI_RTABI fp_t __aeabi_frsub(fp_t a, fp_t b) { return __aeabi_fsub(b, a); }
diff --git a/lib/arm/aeabi_dcmp.S b/lib/arm/aeabi_dcmp.S
@@ -1,45 +0,0 @@
-//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) {
-//   int result = __{eq,lt,le,ge,gt}df2(a, b);
-//   if (result {==,<,<=,>=,>} 0) {
-//     return 1;
-//   } else {
-//     return 0;
-//   }
-// }
-
-#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS
-
-#define DEFINE_AEABI_DCMP(cond)                            \
-        .syntax unified                          SEPARATOR \
-        .p2align 2                               SEPARATOR \
-DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)           \
-        push      { r4, lr }                     SEPARATOR \
-        CONVERT_DCMP_ARGS_TO_DF2_ARGS            SEPARATOR \
-        bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
-        cmp       r0, #0                         SEPARATOR \
-        b ## cond 1f                             SEPARATOR \
-        movs      r0, #0                         SEPARATOR \
-        pop       { r4, pc }                     SEPARATOR \
-1:                                               SEPARATOR \
-        movs      r0, #1                         SEPARATOR \
-        pop       { r4, pc }                     SEPARATOR \
-END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
-
-DEFINE_AEABI_DCMP(eq)
-DEFINE_AEABI_DCMP(lt)
-DEFINE_AEABI_DCMP(le)
-DEFINE_AEABI_DCMP(ge)
-DEFINE_AEABI_DCMP(gt)
-
-NO_EXEC_STACK_DIRECTIVE
-
diff --git a/lib/arm/aeabi_drsub.c b/lib/arm/aeabi_drsub.c
@@ -1,14 +0,0 @@
-//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-
-AEABI_RTABI fp_t __aeabi_dsub(fp_t, fp_t);
-
-AEABI_RTABI fp_t __aeabi_drsub(fp_t a, fp_t b) { return __aeabi_dsub(b, a); }
diff --git a/lib/arm/aeabi_fcmp.S b/lib/arm/aeabi_fcmp.S
@@ -1,45 +0,0 @@
-//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) {
-//   int result = __{eq,lt,le,ge,gt}sf2(a, b);
-//   if (result {==,<,<=,>=,>} 0) {
-//     return 1;
-//   } else {
-//     return 0;
-//   }
-// }
-
-#  define CONVERT_FCMP_ARGS_TO_SF2_ARGS
-
-#define DEFINE_AEABI_FCMP(cond)                            \
-        .syntax unified                          SEPARATOR \
-        .p2align 2                               SEPARATOR \
-DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)           \
-        push      { r4, lr }                     SEPARATOR \
-        CONVERT_FCMP_ARGS_TO_SF2_ARGS            SEPARATOR \
-        bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
-        cmp       r0, #0                         SEPARATOR \
-        b ## cond 1f                             SEPARATOR \
-        movs      r0, #0                         SEPARATOR \
-        pop       { r4, pc }                     SEPARATOR \
-1:                                               SEPARATOR \
-        movs      r0, #1                         SEPARATOR \
-        pop       { r4, pc }                     SEPARATOR \
-END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
-
-DEFINE_AEABI_FCMP(eq)
-DEFINE_AEABI_FCMP(lt)
-DEFINE_AEABI_FCMP(le)
-DEFINE_AEABI_FCMP(ge)
-DEFINE_AEABI_FCMP(gt)
-
-NO_EXEC_STACK_DIRECTIVE
-
diff --git a/lib/arm/aeabi_frsub.c b/lib/arm/aeabi_frsub.c
@@ -1,14 +0,0 @@
-//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-
-AEABI_RTABI fp_t __aeabi_fsub(fp_t, fp_t);
-
-AEABI_RTABI fp_t __aeabi_frsub(fp_t a, fp_t b) { return __aeabi_fsub(b, a); }
diff --git a/lib/arm/aeabi_idivmod.S b/lib/arm/aeabi_idivmod.S
@@ -1,33 +0,0 @@
-//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) {
-//   int rem, quot;
-//   quot = __divmodsi4(numerator, denominator, &rem);
-//   return {quot, rem};
-// }
-
-
-        .syntax unified
-        .text
-        DEFINE_CODE_STATE
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
-        push    { lr }
-        sub     sp, sp, #4
-        mov     r2, sp
-        bl      SYMBOL_NAME(__divmodsi4)
-        ldr     r1, [sp]
-        add     sp, sp, #4
-        pop     { pc }
-END_COMPILERRT_FUNCTION(__aeabi_idivmod)
-
-NO_EXEC_STACK_DIRECTIVE
-
diff --git a/lib/arm/aeabi_idivmod_thumb1.S b/lib/arm/aeabi_idivmod_thumb1.S
@@ -1,28 +0,0 @@
-//===-- aeabi_idivmod_thumb1.S - Thumb1 (Cortex-M0/M1) variant -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// Thumb1 has no muls-with-3-operands and limited register-set instructions,
-// so __aeabi_idivmod is implemented as: quot = __divsi3(num, denom);
-//                                       rem  = num - quot * denom.
-// Used on ARMv6-M (Cortex-M0/M0+/M1).
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-        .syntax unified
-        .text
-        DEFINE_CODE_STATE
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
-        push    {r0, r1, lr}
-        bl      SYMBOL_NAME(__divsi3)
-        pop     {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom
-        muls    r2, r0, r2   // r2 = quot * denom
-        subs    r1, r1, r2
-        JMP     (r3)
-END_COMPILERRT_FUNCTION(__aeabi_idivmod)
-
-NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_ldivmod.S b/lib/arm/aeabi_ldivmod.S
@@ -1,34 +0,0 @@
-//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-// struct { int64_t quot, int64_t rem}
-//        __aeabi_ldivmod(int64_t numerator, int64_t denominator) {
-//   int64_t rem, quot;
-//   quot = __divmoddi4(numerator, denominator, &rem);
-//   return {quot, rem};
-// }
-
-
-        .syntax unified
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
-        push    {r6, lr}
-        sub     sp, sp, #16
-        add     r6, sp, #8
-        str     r6, [sp]
-        bl      SYMBOL_NAME(__divmoddi4)
-        ldr     r2, [sp, #8]
-        ldr     r3, [sp, #12]
-        add     sp, sp, #16
-        pop     {r6, pc}
-END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
-
-NO_EXEC_STACK_DIRECTIVE
-
diff --git a/lib/arm/aeabi_memcmp.S b/lib/arm/aeabi_memcmp.S
@@ -1,23 +0,0 @@
-//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-//  void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }
-
-        .syntax unified
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
-        b       memcmp
-END_COMPILERRT_FUNCTION(__aeabi_memcmp)
-
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp)
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp)
-
-NO_EXEC_STACK_DIRECTIVE
-
diff --git a/lib/arm/aeabi_memcmp_thumb1.S b/lib/arm/aeabi_memcmp_thumb1.S
@@ -1,25 +0,0 @@
-//===-- aeabi_memcmp_thumb1.S - Thumb1 (Cortex-M0/M1) variant ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// Used on ARMv6-M (Cortex-M0/M0+/M1).
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-//  void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }
-
-        .syntax unified
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
-        push    {r7, lr}
-        bl      memcmp
-        pop     {r7, pc}
-END_COMPILERRT_FUNCTION(__aeabi_memcmp)
-
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp)
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp)
-
-NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_memcpy.S b/lib/arm/aeabi_memcpy.S
@@ -1,23 +0,0 @@
-//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-//  void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }
-
-        .syntax unified
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
-        b       memcpy
-END_COMPILERRT_FUNCTION(__aeabi_memcpy)
-
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy)
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy)
-
-NO_EXEC_STACK_DIRECTIVE
-
diff --git a/lib/arm/aeabi_memcpy_thumb1.S b/lib/arm/aeabi_memcpy_thumb1.S
@@ -1,25 +0,0 @@
-//===-- aeabi_memcpy_thumb1.S - Thumb1 (Cortex-M0/M1) variant ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// Used on ARMv6-M (Cortex-M0/M0+/M1).
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-//  void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }
-
-        .syntax unified
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
-        push    {r7, lr}
-        bl      memcpy
-        pop     {r7, pc}
-END_COMPILERRT_FUNCTION(__aeabi_memcpy)
-
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy)
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy)
-
-NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_memmove.S b/lib/arm/aeabi_memmove.S
@@ -1,22 +0,0 @@
-//===-- aeabi_memmove.S - EABI memmove implementation --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===---------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-//  void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); }
-
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove)
-        b       memmove
-END_COMPILERRT_FUNCTION(__aeabi_memmove)
-
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove)
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove)
-
-NO_EXEC_STACK_DIRECTIVE
-
diff --git a/lib/arm/aeabi_memmove_thumb1.S b/lib/arm/aeabi_memmove_thumb1.S
@@ -1,24 +0,0 @@
-//===-- aeabi_memmove_thumb1.S - Thumb1 (Cortex-M0/M1) variant -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// Used on ARMv6-M (Cortex-M0/M0+/M1).
-//===---------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-//  void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); }
-
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove)
-        push    {r7, lr}
-        bl      memmove
-        pop     {r7, pc}
-END_COMPILERRT_FUNCTION(__aeabi_memmove)
-
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove)
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove)
-
-NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_memset.S b/lib/arm/aeabi_memset.S
@@ -1,37 +0,0 @@
-//===-- aeabi_memset.S - EABI memset implementation -----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-//  void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
-//  void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }
-
-        .syntax unified
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
-        mov     r3, r1
-        mov     r1, r2
-        mov     r2, r3
-        b       memset
-END_COMPILERRT_FUNCTION(__aeabi_memset)
-
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset)
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
-
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
-        mov     r2, r1
-        movs    r1, #0
-        b       memset
-END_COMPILERRT_FUNCTION(__aeabi_memclr)
-
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr)
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr)
-
-NO_EXEC_STACK_DIRECTIVE
-
diff --git a/lib/arm/aeabi_memset_thumb1.S b/lib/arm/aeabi_memset_thumb1.S
@@ -1,42 +0,0 @@
-//===-- aeabi_memset_thumb1.S - Thumb1 (Cortex-M0/M1) variant ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// Thumb1 lacks an unconditional `b <reg>` to a far symbol, so we bl/pop
-// instead of tail-calling memset. Used on ARMv6-M (Cortex-M0/M0+/M1).
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-//  void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
-//  void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }
-
-        .syntax unified
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
-        mov     r3, r1
-        mov     r1, r2
-        mov     r2, r3
-        push    {r7, lr}
-        bl      memset
-        pop     {r7, pc}
-END_COMPILERRT_FUNCTION(__aeabi_memset)
-
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset)
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
-
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
-        mov     r2, r1
-        movs    r1, #0
-        push    {r7, lr}
-        bl      memset
-        pop     {r7, pc}
-END_COMPILERRT_FUNCTION(__aeabi_memclr)
-
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr)
-DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr)
-
-NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_thumb1.S b/lib/arm/aeabi_thumb1.S
@@ -0,0 +1,261 @@
+// Consolidated AEABI helpers for cfree's libcfree_rt.a (ARM Thumb1 mode).
+// The build assembles only this one file for the Thumb1 ISA mode; the per-op
+// files are #included as snippets and not directly built.
+// License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
+
+// ISA-agnostic helpers
+// ---- aeabi_ldivmod.S ----
+#include "assembly.h"
+
+// struct { int64_t quot, int64_t rem}
+//        __aeabi_ldivmod(int64_t numerator, int64_t denominator) {
+//   int64_t rem, quot;
+//   quot = __divmoddi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+        push    {r6, lr}
+        sub     sp, sp, #16
+        add     r6, sp, #8
+        str     r6, [sp]
+        bl      SYMBOL_NAME(__divmoddi4)
+        ldr     r2, [sp, #8]
+        ldr     r3, [sp, #12]
+        add     sp, sp, #16
+        pop     {r6, pc}
+END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// ---- aeabi_uldivmod.S ----
+#include "assembly.h"
+
+// struct { uint64_t quot, uint64_t rem}
+//        __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) {
+//   uint64_t rem, quot;
+//   quot = __udivmoddi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+        push	{r6, lr}
+        sub	sp, sp, #16
+        add	r6, sp, #8
+        str	r6, [sp]
+        bl	SYMBOL_NAME(__udivmoddi4)
+        ldr	r2, [sp, #8]
+        ldr	r3, [sp, #12]
+        add	sp, sp, #16
+        pop	{r6, pc}
+END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// ---- aeabi_dcmp.S ----
+#include "assembly.h"
+
+// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) {
+//   int result = __{eq,lt,le,ge,gt}df2(a, b);
+//   if (result {==,<,<=,>=,>} 0) {
+//     return 1;
+//   } else {
+//     return 0;
+//   }
+// }
+
+#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS
+
+#define DEFINE_AEABI_DCMP(cond)                            \
+        .syntax unified                          SEPARATOR \
+        .p2align 2                               SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)           \
+        push      { r4, lr }                     SEPARATOR \
+        CONVERT_DCMP_ARGS_TO_DF2_ARGS            SEPARATOR \
+        bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
+        cmp       r0, #0                         SEPARATOR \
+        b ## cond 1f                             SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+1:                                               SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
+
+DEFINE_AEABI_DCMP(eq)
+DEFINE_AEABI_DCMP(lt)
+DEFINE_AEABI_DCMP(le)
+DEFINE_AEABI_DCMP(ge)
+DEFINE_AEABI_DCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// ---- aeabi_fcmp.S ----
+#include "assembly.h"
+
+// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) {
+//   int result = __{eq,lt,le,ge,gt}sf2(a, b);
+//   if (result {==,<,<=,>=,>} 0) {
+//     return 1;
+//   } else {
+//     return 0;
+//   }
+// }
+
+#  define CONVERT_FCMP_ARGS_TO_SF2_ARGS
+
+#define DEFINE_AEABI_FCMP(cond)                            \
+        .syntax unified                          SEPARATOR \
+        .p2align 2                               SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)           \
+        push      { r4, lr }                     SEPARATOR \
+        CONVERT_FCMP_ARGS_TO_SF2_ARGS            SEPARATOR \
+        bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
+        cmp       r0, #0                         SEPARATOR \
+        b ## cond 1f                             SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+1:                                               SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
+
+DEFINE_AEABI_FCMP(eq)
+DEFINE_AEABI_FCMP(lt)
+DEFINE_AEABI_FCMP(le)
+DEFINE_AEABI_FCMP(ge)
+DEFINE_AEABI_FCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
+
+// Thumb1-specific helpers
+// ---- aeabi_idivmod_thumb1.S ----
+#include "assembly.h"
+
+        .syntax unified
+        .text
+        DEFINE_CODE_STATE
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+        push    {r0, r1, lr}
+        bl      SYMBOL_NAME(__divsi3)
+        pop     {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom
+        muls    r2, r0, r2   // r2 = quot * denom
+        subs    r1, r1, r2
+        JMP     (r3)
+END_COMPILERRT_FUNCTION(__aeabi_idivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+// ---- aeabi_uidivmod_thumb1.S ----
+#include "assembly.h"
+
+        .syntax unified
+        .text
+        DEFINE_CODE_STATE
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+        cmp     r0, r1
+        bcc     LOCAL_LABEL(case_denom_larger)
+        push    {r0, r1, lr}
+        bl      SYMBOL_NAME(__aeabi_uidiv)
+        pop     {r1, r2, r3}
+        muls    r2, r0, r2 // r2 = quot * denom
+        subs    r1, r1, r2
+        JMP     (r3)
+LOCAL_LABEL(case_denom_larger):
+        movs    r1, r0
+        movs    r0, #0
+        JMP     (lr)
+END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+// ---- aeabi_memcpy_thumb1.S ----
+#include "assembly.h"
+
+//  void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
+        push    {r7, lr}
+        bl      memcpy
+        pop     {r7, pc}
+END_COMPILERRT_FUNCTION(__aeabi_memcpy)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy)
+
+NO_EXEC_STACK_DIRECTIVE
+// ---- aeabi_memmove_thumb1.S ----
+#include "assembly.h"
+
+//  void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); }
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove)
+        push    {r7, lr}
+        bl      memmove
+        pop     {r7, pc}
+END_COMPILERRT_FUNCTION(__aeabi_memmove)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove)
+
+NO_EXEC_STACK_DIRECTIVE
+// ---- aeabi_memset_thumb1.S ----
+#include "assembly.h"
+
+//  void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
+//  void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
+        mov     r3, r1
+        mov     r1, r2
+        mov     r2, r3
+        push    {r7, lr}
+        bl      memset
+        pop     {r7, pc}
+END_COMPILERRT_FUNCTION(__aeabi_memset)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
+        mov     r2, r1
+        movs    r1, #0
+        push    {r7, lr}
+        bl      memset
+        pop     {r7, pc}
+END_COMPILERRT_FUNCTION(__aeabi_memclr)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr)
+
+NO_EXEC_STACK_DIRECTIVE
+// ---- aeabi_memcmp_thumb1.S ----
+#include "assembly.h"
+
+//  void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
+        push    {r7, lr}
+        bl      memcmp
+        pop     {r7, pc}
+END_COMPILERRT_FUNCTION(__aeabi_memcmp)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_thumb2.S b/lib/arm/aeabi_thumb2.S
@@ -0,0 +1,268 @@
+// Consolidated AEABI helpers for cfree's libcfree_rt.a (ARM Thumb2 mode).
+// The build assembles only this one file for the Thumb2 ISA mode; the per-op
+// files are #included as snippets and not directly built.
+// License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
+
+// ISA-agnostic helpers
+// ---- aeabi_ldivmod.S ----
+#include "assembly.h"
+
+// struct { int64_t quot, int64_t rem}
+//        __aeabi_ldivmod(int64_t numerator, int64_t denominator) {
+//   int64_t rem, quot;
+//   quot = __divmoddi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+        push    {r6, lr}
+        sub     sp, sp, #16
+        add     r6, sp, #8
+        str     r6, [sp]
+        bl      SYMBOL_NAME(__divmoddi4)
+        ldr     r2, [sp, #8]
+        ldr     r3, [sp, #12]
+        add     sp, sp, #16
+        pop     {r6, pc}
+END_COMPILERRT_FUNCTION(__aeabi_ldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// ---- aeabi_uldivmod.S ----
+#include "assembly.h"
+
+// struct { uint64_t quot, uint64_t rem}
+//        __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) {
+//   uint64_t rem, quot;
+//   quot = __udivmoddi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+        push	{r6, lr}
+        sub	sp, sp, #16
+        add	r6, sp, #8
+        str	r6, [sp]
+        bl	SYMBOL_NAME(__udivmoddi4)
+        ldr	r2, [sp, #8]
+        ldr	r3, [sp, #12]
+        add	sp, sp, #16
+        pop	{r6, pc}
+END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// ---- aeabi_dcmp.S ----
+#include "assembly.h"
+
+// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) {
+//   int result = __{eq,lt,le,ge,gt}df2(a, b);
+//   if (result {==,<,<=,>=,>} 0) {
+//     return 1;
+//   } else {
+//     return 0;
+//   }
+// }
+
+#  define CONVERT_DCMP_ARGS_TO_DF2_ARGS
+
+#define DEFINE_AEABI_DCMP(cond)                            \
+        .syntax unified                          SEPARATOR \
+        .p2align 2                               SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)           \
+        push      { r4, lr }                     SEPARATOR \
+        CONVERT_DCMP_ARGS_TO_DF2_ARGS            SEPARATOR \
+        bl        SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \
+        cmp       r0, #0                         SEPARATOR \
+        b ## cond 1f                             SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+1:                                               SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond)
+
+DEFINE_AEABI_DCMP(eq)
+DEFINE_AEABI_DCMP(lt)
+DEFINE_AEABI_DCMP(le)
+DEFINE_AEABI_DCMP(ge)
+DEFINE_AEABI_DCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// ---- aeabi_fcmp.S ----
+#include "assembly.h"
+
+// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) {
+//   int result = __{eq,lt,le,ge,gt}sf2(a, b);
+//   if (result {==,<,<=,>=,>} 0) {
+//     return 1;
+//   } else {
+//     return 0;
+//   }
+// }
+
+#  define CONVERT_FCMP_ARGS_TO_SF2_ARGS
+
+#define DEFINE_AEABI_FCMP(cond)                            \
+        .syntax unified                          SEPARATOR \
+        .p2align 2                               SEPARATOR \
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)           \
+        push      { r4, lr }                     SEPARATOR \
+        CONVERT_FCMP_ARGS_TO_SF2_ARGS            SEPARATOR \
+        bl        SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
+        cmp       r0, #0                         SEPARATOR \
+        b ## cond 1f                             SEPARATOR \
+        movs      r0, #0                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+1:                                               SEPARATOR \
+        movs      r0, #1                         SEPARATOR \
+        pop       { r4, pc }                     SEPARATOR \
+END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
+
+DEFINE_AEABI_FCMP(eq)
+DEFINE_AEABI_FCMP(lt)
+DEFINE_AEABI_FCMP(le)
+DEFINE_AEABI_FCMP(ge)
+DEFINE_AEABI_FCMP(gt)
+
+NO_EXEC_STACK_DIRECTIVE
+
+
+// Thumb2-specific helpers
+// ---- aeabi_idivmod.S ----
+#include "assembly.h"
+
+// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) {
+//   int rem, quot;
+//   quot = __divmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+
+        .syntax unified
+        .text
+        DEFINE_CODE_STATE
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__divmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
+END_COMPILERRT_FUNCTION(__aeabi_idivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// ---- aeabi_uidivmod.S ----
+#include "assembly.h"
+
+// struct { unsigned quot, unsigned rem}
+//        __aeabi_uidivmod(unsigned numerator, unsigned denominator) {
+//   unsigned rem, quot;
+//   quot = __udivmodsi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+
+        .syntax unified
+        .text
+        DEFINE_CODE_STATE
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+        push    { lr }
+        sub     sp, sp, #4
+        mov     r2, sp
+        bl      SYMBOL_NAME(__udivmodsi4)
+        ldr     r1, [sp]
+        add     sp, sp, #4
+        pop     { pc }
+END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// ---- aeabi_memcpy.S ----
+#include "assembly.h"
+
+//  void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy)
+        b       memcpy
+END_COMPILERRT_FUNCTION(__aeabi_memcpy)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// ---- aeabi_memmove.S ----
+#include "assembly.h"
+
+//  void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); }
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove)
+        b       memmove
+END_COMPILERRT_FUNCTION(__aeabi_memmove)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// ---- aeabi_memset.S ----
+#include "assembly.h"
+
+//  void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); }
+//  void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memset)
+        mov     r3, r1
+        mov     r1, r2
+        mov     r2, r3
+        b       memset
+END_COMPILERRT_FUNCTION(__aeabi_memset)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset)
+
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr)
+        mov     r2, r1
+        movs    r1, #0
+        b       memset
+END_COMPILERRT_FUNCTION(__aeabi_memclr)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// ---- aeabi_memcmp.S ----
+#include "assembly.h"
+
+//  void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); }
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp)
+        b       memcmp
+END_COMPILERRT_FUNCTION(__aeabi_memcmp)
+
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp)
+DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/arm/aeabi_uidivmod.S b/lib/arm/aeabi_uidivmod.S
@@ -1,34 +0,0 @@
-//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-// struct { unsigned quot, unsigned rem}
-//        __aeabi_uidivmod(unsigned numerator, unsigned denominator) {
-//   unsigned rem, quot;
-//   quot = __udivmodsi4(numerator, denominator, &rem);
-//   return {quot, rem};
-// }
-
-
-        .syntax unified
-        .text
-        DEFINE_CODE_STATE
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
-        push    { lr }
-        sub     sp, sp, #4
-        mov     r2, sp
-        bl      SYMBOL_NAME(__udivmodsi4)
-        ldr     r1, [sp]
-        add     sp, sp, #4
-        pop     { pc }
-END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
-
-NO_EXEC_STACK_DIRECTIVE
-
diff --git a/lib/arm/aeabi_uidivmod_thumb1.S b/lib/arm/aeabi_uidivmod_thumb1.S
@@ -1,31 +0,0 @@
-//===-- aeabi_uidivmod_thumb1.S - Thumb1 (Cortex-M0/M1) variant ----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// Used on ARMv6-M (Cortex-M0/M0+/M1).
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-        .syntax unified
-        .text
-        DEFINE_CODE_STATE
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
-        cmp     r0, r1
-        bcc     LOCAL_LABEL(case_denom_larger)
-        push    {r0, r1, lr}
-        bl      SYMBOL_NAME(__aeabi_uidiv)
-        pop     {r1, r2, r3}
-        muls    r2, r0, r2 // r2 = quot * denom
-        subs    r1, r1, r2
-        JMP     (r3)
-LOCAL_LABEL(case_denom_larger):
-        movs    r1, r0
-        movs    r0, #0
-        JMP     (lr)
-END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
-
-NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/arm/aeabi_uldivmod.S b/lib/arm/aeabi_uldivmod.S
@@ -1,34 +0,0 @@
-//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "assembly.h"
-
-// struct { uint64_t quot, uint64_t rem}
-//        __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) {
-//   uint64_t rem, quot;
-//   quot = __udivmoddi4(numerator, denominator, &rem);
-//   return {quot, rem};
-// }
-
-
-        .syntax unified
-        .p2align 2
-DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
-        push	{r6, lr}
-        sub	sp, sp, #16
-        add	r6, sp, #8
-        str	r6, [sp]
-        bl	SYMBOL_NAME(__udivmoddi4)
-        ldr	r2, [sp, #8]
-        ldr	r3, [sp, #12]
-        add	sp, sp, #16
-        pop	{r6, pc}
-END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
-
-NO_EXEC_STACK_DIRECTIVE
-
diff --git a/lib/build.sh b/lib/build.sh
@@ -1,16 +1,10 @@
-#!/usr/bin/env bash
+#!/bin/sh
 #
-# build.sh — exhaustive build of libcfree_rt.a for every (target, mode) tuple
-# cfree supports. Compiles every required source file with clang for each
-# variant; on success, archives the objects with llvm-ar (if present) into
-# $OUT/libcfree_rt-<variant>.a.
-#
-# Reports OK / FAIL per variant; exits non-zero if any fail.
-# Override defaults via env: CC=clang OUT=/tmp/cfree_rt_build AR=llvm-ar.
-#
-# Run from anywhere — the script self-locates and cd's to lib/ first.
+# build2.sh — POSIX sh port of build.sh. Same behavior, no arrays, no
+# pipefail, no local. Source/flag lists are space-separated strings;
+# unquoted expansion does the splitting.
 
-set -uo pipefail
+set -u
 
 cd "$(dirname "$0")" || exit 1
 
@@ -18,57 +12,51 @@ CC=${CC:-clang}
 AR=${AR:-llvm-ar}
 OUT=${OUT:-/tmp/cfree_rt_build}
 
-# Freestanding, warnings on, common headers reachable. -fno-builtin keeps the
-# compiler from rewriting our memcpy/memset/etc. into recursive calls.
-COMMON_FLAGS=(-c -ffreestanding -fno-builtin -Wall -Wextra
-              -Iinclude/common -Iimpl)
+COMMON_FLAGS="-c -ffreestanding -fno-builtin -Wall -Wextra -Iinclude/common -Iimpl"
 
 mkdir -p "$OUT"
 OK=0
 FAIL=0
-FAILED_VARIANTS=()
+FAILED_VARIANTS=""
 
 #-------------------------------------------------------------------------------
-# build_variant <name> "<extra cc flags>" <source files...>
+# build_variant <name> <extra-cc-flags> <sources>
 #-------------------------------------------------------------------------------
 build_variant() {
-    local name=$1
-    local flags_str=$2
-    shift 2
-    local sources=("$@")
-    # shellcheck disable=SC2206
-    local flags=( $flags_str )
-
-    local out="$OUT/$name"
+    name=$1
+    flags=$2
+    sources=$3
+
+    out="$OUT/$name"
     mkdir -p "$out"
-    local log="$out/build.log"
+    log="$out/build.log"
     : >"$log"
 
-    local objs=()
-    local nfail=0
-    for src in "${sources[@]}"; do
-        local obj="$out/$(echo "$src" | tr / _).o"
-        if "$CC" "${COMMON_FLAGS[@]}" "${flags[@]}" -o "$obj" "$src" >>"$log" 2>&1; then
-            objs+=("$obj")
+    objs=""
+    nfail=0
+    total=0
+    for src in $sources; do
+        total=$((total+1))
+        obj="$out/$(echo "$src" | tr / _).o"
+        if $CC $COMMON_FLAGS $flags -o "$obj" "$src" >>"$log" 2>&1; then
+            objs="$objs $obj"
         else
             nfail=$((nfail+1))
             echo "FAIL: $src" >>"$log"
         fi
     done
 
-    local total=${#sources[@]}
     if [ $nfail -ne 0 ]; then
         printf "  %-32s FAIL  (%d/%d sources failed; see %s)\n" \
             "$name" "$nfail" "$total" "$log"
         FAIL=$((FAIL+1))
-        FAILED_VARIANTS+=("$name")
+        FAILED_VARIANTS="$FAILED_VARIANTS $name"
         return
     fi
 
-    # Archive (best-effort; not fatal if AR is missing).
     if command -v "$AR" >/dev/null 2>&1; then
-        local archive="$OUT/libcfree_rt-$name.a"
-        if "$AR" rcs "$archive" "${objs[@]}" 2>>"$log"; then
+        archive="$OUT/libcfree_rt-$name.a"
+        if $AR rcs "$archive" $objs 2>>"$log"; then
             printf "  %-32s OK    (%d objs → %s)\n" \
                 "$name" "$total" "$(basename "$archive")"
         else
@@ -83,142 +71,110 @@ build_variant() {
 }
 
 #-------------------------------------------------------------------------------
-# Source sets
+# Source sets — one master .c (and/or .S) per feature flag. Each master
+# #include's its per-op snippets in dependency order; the per-op files
+# are NOT compiled directly.
 #-------------------------------------------------------------------------------
-
-INT_C=(    int/*.c )
-INT32_C=(  int32/*.c )
-INT64_C=(  int64/*.c )
-FP_C=(     fp/*.c )
-FP_TF_C=(  fp_tf/*.c )
-FP_TI_C=(  fp_ti/*.c )
-MEM_C=(    mem/mem.c )
-ATOMIC_C=( atomic/atomic_freestanding.c )
-
-# ARM AEABI: 6 base files have a *_thumb1.S companion (idivmod, uidivmod,
-# memcpy, memmove, memset, memcmp). The Thumb2 build uses base files only;
-# the Thumb1 build uses *_thumb1.S in place of those base files, plus the
-# ISA-agnostic ones (ldivmod, uldivmod, dcmp, fcmp, drsub, frsub).
-ARM_AEABI_THUMB2=( arm/aeabi_*.S arm/aeabi_*.c )
-# Filter out *_thumb1.S from the Thumb2 list.
-_t2=()
-for f in "${ARM_AEABI_THUMB2[@]}"; do
-    case "$f" in *_thumb1.S) ;; *) _t2+=("$f") ;; esac
-done
-ARM_AEABI_THUMB2=("${_t2[@]}")
-
-# Build the Thumb1 list: every *_thumb1.S, plus any base file whose stem has
-# no *_thumb1.S partner.
-_thumb1_stems=()
-for f in arm/aeabi_*_thumb1.S; do
-    _thumb1_stems+=( "$(basename "$f" _thumb1.S)" )
-done
-ARM_AEABI_THUMB1=( arm/aeabi_*_thumb1.S )
-for f in "${ARM_AEABI_THUMB2[@]}"; do
-    stem=$(basename "$f" .S); stem=$(basename "$stem" .c)
-    has_t1=0
-    for s in "${_thumb1_stems[@]}"; do
-        [ "$stem" = "$s" ] && { has_t1=1; break; }
-    done
-    [ $has_t1 -eq 0 ] && ARM_AEABI_THUMB1+=("$f")
-done
-
-RV32_SR=( riscv/save_rv32.S riscv/restore_rv32.S )
-RV64_SR=( riscv/save_rv64.S riscv/restore_rv64.S )
+INT_C="int/int.c"
+INT32_C="int32/int32.c"
+INT64_C="int64/int64.c"
+FP_C="fp/fp.c"
+FP_TF_C="fp_tf/fp_tf.c"
+FP_TI_C="fp_ti/fp_ti.c"
+MEM_C="mem/mem.c"
+ATOMIC_C="atomic/atomic_freestanding.c"
+
+# ARM AEABI: one master .S per ISA mode (Thumb2 vs Thumb1) plus one
+# ISA-agnostic .c (drsub + frsub).
+ARM_AEABI_THUMB2="arm/aeabi_thumb2.S arm/aeabi.c"
+ARM_AEABI_THUMB1="arm/aeabi_thumb1.S arm/aeabi.c"
+
+# RISC-V save/restore: one master .S per xlen.
+RV32_SR="riscv/rv32.S"
+RV64_SR="riscv/rv64.S"
 
 #-------------------------------------------------------------------------------
-# Variants — each combination of (data model, target, mode) cfree supports.
+# Variants
 #-------------------------------------------------------------------------------
 echo "Building libcfree_rt for every supported (target, mode) tuple"
 echo "  CC=$CC, AR=$AR, OUT=$OUT"
 echo
 
 # ---- LP64 little-endian ------------------------------------------------------
-
-# x86_64 Linux / Darwin / RV64 / aarch64 base: int + int64 + fp + atomic + mem.
-# binary64 long double (no fp_tf).
-LP64_BASE=( "${INT_C[@]}" "${INT64_C[@]}" "${FP_C[@]}" "${MEM_C[@]}" "${ATOMIC_C[@]}" )
+LP64_BASE="$INT_C $INT64_C $FP_C $MEM_C $ATOMIC_C"
 
 build_variant x86_64-linux \
     "--target=x86_64-linux-gnu -Iinclude/lp64_le -DHAS_INT128=1" \
-    "${LP64_BASE[@]}"
+    "$LP64_BASE"
 
 build_variant x86_64-apple-darwin \
     "--target=x86_64-apple-darwin -Iinclude/lp64_le -DHAS_INT128=1" \
-    "${LP64_BASE[@]}"
+    "$LP64_BASE"
 
-# aarch64-linux: long double is binary128 by default (no flag to change),
-# so the build pulls in fp_tf + fp_ti and pre-includes tf_supplement.h.
+# aarch64-linux: long double is binary128; needs fp_tf + fp_ti and the
+# tf_supplement.h pre-include.
 build_variant aarch64-linux \
     "--target=aarch64-linux-gnu \
      -Iinclude/lp64_le_ldbl128 -Iinclude/lp64_le -DHAS_INT128=1 \
      -include include/lp64_le_ldbl128/tf_supplement.h" \
-    "${INT_C[@]}" "${INT64_C[@]}" "${FP_C[@]}" "${FP_TF_C[@]}" "${FP_TI_C[@]}" \
-    "${MEM_C[@]}" "${ATOMIC_C[@]}"
+    "$INT_C $INT64_C $FP_C $FP_TF_C $FP_TI_C $MEM_C $ATOMIC_C"
 
-# aarch64-apple-darwin: long double is binary64 (no fp_tf needed).
 build_variant aarch64-apple-darwin \
     "--target=aarch64-apple-darwin -Iinclude/lp64_le -DHAS_INT128=1" \
-    "${LP64_BASE[@]}"
+    "$LP64_BASE"
 
-# RISC-V 64 (soft-float) — with and without -msave-restore.
 build_variant riscv64-elf \
     "--target=riscv64-unknown-elf -mabi=lp64 -march=rv64imafd \
      -Iinclude/lp64_le -DHAS_INT128=1" \
-    "${LP64_BASE[@]}"
+    "$LP64_BASE"
 
 build_variant riscv64-elf-save-restore \
     "--target=riscv64-unknown-elf -mabi=lp64 -march=rv64imafd -msave-restore \
      -Iinclude/lp64_le -DHAS_INT128=1" \
-    "${LP64_BASE[@]}" "${RV64_SR[@]}"
+    "$LP64_BASE $RV64_SR"
 
 # ---- LLP64 little-endian (Win64) --------------------------------------------
-
 build_variant x86_64-pc-windows \
     "--target=x86_64-pc-windows-msvc -Iinclude/llp64_le -DHAS_INT128=1" \
-    "${INT_C[@]}" "${INT64_C[@]}" "${FP_C[@]}" "${MEM_C[@]}" "${ATOMIC_C[@]}"
+    "$INT_C $INT64_C $FP_C $MEM_C $ATOMIC_C"
 
 # ---- ILP32 little-endian -----------------------------------------------------
-
-ILP32_BASE=( "${INT_C[@]}" "${INT32_C[@]}" "${FP_C[@]}" "${MEM_C[@]}" "${ATOMIC_C[@]}" )
+ILP32_BASE="$INT_C $INT32_C $FP_C $MEM_C $ATOMIC_C"
 
 build_variant i386-linux \
     "--target=i386-linux-gnu -Iinclude/ilp32_le -DHAS_INT128=0" \
-    "${ILP32_BASE[@]}"
+    "$ILP32_BASE"
 
 build_variant wasm32 \
     "--target=wasm32-unknown-unknown -Iinclude/ilp32_le -DHAS_INT128=0" \
-    "${ILP32_BASE[@]}"
+    "$ILP32_BASE"
 
 build_variant riscv32-elf \
     "--target=riscv32-unknown-elf -mabi=ilp32 -march=rv32imafd \
      -Iinclude/ilp32_le -DHAS_INT128=0" \
-    "${ILP32_BASE[@]}"
+    "$ILP32_BASE"
 
 build_variant riscv32-elf-save-restore \
     "--target=riscv32-unknown-elf -mabi=ilp32 -march=rv32imafd -msave-restore \
      -Iinclude/ilp32_le -DHAS_INT128=0" \
-    "${ILP32_BASE[@]}" "${RV32_SR[@]}"
+    "$ILP32_BASE $RV32_SR"
 
-# ARM32 ARMv7+/Thumb2 (AEABI). The .S files use the AEABI base PCS regardless
-# of FPU presence, so one variant covers soft- and hard-float targets.
 build_variant arm-eabi-thumb2 \
     "--target=arm-none-eabi -march=armv7-a -mthumb -mfloat-abi=soft \
      -Iinclude/ilp32_le -DHAS_INT128=0" \
-    "${ILP32_BASE[@]}" "${ARM_AEABI_THUMB2[@]}"
+    "$ILP32_BASE $ARM_AEABI_THUMB2"
 
-# ARM32 ARMv6-M / Cortex-M0/M0+/M1 (Thumb1).
 build_variant arm-eabi-thumb1 \
     "--target=arm-none-eabi -march=armv6-m -mthumb -mfloat-abi=soft \
      -Iinclude/ilp32_le -DHAS_INT128=0" \
-    "${ILP32_BASE[@]}" "${ARM_AEABI_THUMB1[@]}"
+    "$ILP32_BASE $ARM_AEABI_THUMB1"
 
 #-------------------------------------------------------------------------------
 echo
 echo "Summary: $OK ok, $FAIL failed"
 if [ $FAIL -ne 0 ]; then
     echo "Failed variants:"
-    for v in "${FAILED_VARIANTS[@]}"; do
+    for v in $FAILED_VARIANTS; do
         echo "  - $v ($OUT/$v/build.log)"
     done
     exit 1
diff --git a/lib/fp/adddf3.c b/lib/fp/adddf3.c
@@ -1,17 +0,0 @@
-//===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements double-precision soft-float addition.
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_add_impl.inc"
-
-COMPILER_RT_ABI double __adddf3(double a, double b) { return __addXf3__(a, b); }
-
diff --git a/lib/fp/addsf3.c b/lib/fp/addsf3.c
@@ -1,17 +0,0 @@
-//===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements single-precision soft-float addition.
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_add_impl.inc"
-
-COMPILER_RT_ABI float __addsf3(float a, float b) { return __addXf3__(a, b); }
-
diff --git a/lib/fp/comparedf2.c b/lib/fp/comparedf2.c
@@ -1,52 +0,0 @@
-//===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// // This file implements the following soft-float comparison routines:
-//
-//   __eqdf2   __gedf2   __unorddf2
-//   __ledf2   __gtdf2
-//   __ltdf2
-//   __nedf2
-//
-// The semantics of the routines grouped in each column are identical, so there
-// is a single implementation for each, and wrappers to provide the other names.
-//
-// The main routines behave as follows:
-//
-//   __ledf2(a,b) returns -1 if a < b
-//                         0 if a == b
-//                         1 if a > b
-//                         1 if either a or b is NaN
-//
-//   __gedf2(a,b) returns -1 if a < b
-//                         0 if a == b
-//                         1 if a > b
-//                        -1 if either a or b is NaN
-//
-//   __unorddf2(a,b) returns 0 if both a and b are numbers
-//                           1 if either a or b is NaN
-//
-// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of
-// NaN values.
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-
-#include "fp_compare_impl.inc"
-
-COMPILER_RT_ABI CMP_RESULT __ledf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __eqdf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __ltdf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __nedf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __gedf2(fp_t a, fp_t b)    { return __geXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __gtdf2(fp_t a, fp_t b)    { return __geXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __unorddf2(fp_t a, fp_t b) { return __unordXf2__(a, b); }
-
-
diff --git a/lib/fp/comparesf2.c b/lib/fp/comparesf2.c
@@ -1,52 +0,0 @@
-//===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the following soft-fp_t comparison routines:
-//
-//   __eqsf2   __gesf2   __unordsf2
-//   __lesf2   __gtsf2
-//   __ltsf2
-//   __nesf2
-//
-// The semantics of the routines grouped in each column are identical, so there
-// is a single implementation for each, and wrappers to provide the other names.
-//
-// The main routines behave as follows:
-//
-//   __lesf2(a,b) returns -1 if a < b
-//                         0 if a == b
-//                         1 if a > b
-//                         1 if either a or b is NaN
-//
-//   __gesf2(a,b) returns -1 if a < b
-//                         0 if a == b
-//                         1 if a > b
-//                        -1 if either a or b is NaN
-//
-//   __unordsf2(a,b) returns 0 if both a and b are numbers
-//                           1 if either a or b is NaN
-//
-// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of
-// NaN values.
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-
-#include "fp_compare_impl.inc"
-
-COMPILER_RT_ABI CMP_RESULT __lesf2(fp_t a, fp_t b)   { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __eqsf2(fp_t a, fp_t b)   { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __ltsf2(fp_t a, fp_t b)   { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __nesf2(fp_t a, fp_t b)   { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __gesf2(fp_t a, fp_t b)   { return __geXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __gtsf2(fp_t a, fp_t b)   { return __geXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __unordsf2(fp_t a, fp_t b){ return __unordXf2__(a, b); }
-
-
diff --git a/lib/fp/divdf3.c b/lib/fp/divdf3.c
@@ -1,25 +0,0 @@
-//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements double-precision soft-float division
-// with the IEEE-754 default rounding (to nearest, ties to even).
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-
-#define NUMBER_OF_HALF_ITERATIONS 3
-#define NUMBER_OF_FULL_ITERATIONS 1
-
-#include "fp_div_impl.inc"
-
-COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
-
-
-#undef NUMBER_OF_HALF_ITERATIONS
-#undef NUMBER_OF_FULL_ITERATIONS
diff --git a/lib/fp/divsf3.c b/lib/fp/divsf3.c
@@ -1,27 +0,0 @@
-//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements single-precision soft-float division
-// with the IEEE-754 default rounding (to nearest, ties to even).
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-
-#define NUMBER_OF_HALF_ITERATIONS 0
-#define NUMBER_OF_FULL_ITERATIONS 3
-#define USE_NATIVE_FULL_ITERATIONS
-
-#include "fp_div_impl.inc"
-
-COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
-
-
-#undef NUMBER_OF_HALF_ITERATIONS
-#undef NUMBER_OF_FULL_ITERATIONS
-#undef USE_NATIVE_FULL_ITERATIONS
diff --git a/lib/fp/extendsfdf2.c b/lib/fp/extendsfdf2.c
@@ -1,17 +0,0 @@
-//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define SRC_SINGLE
-#define DST_DOUBLE
-#include "fp_extend_impl.inc"
-
-COMPILER_RT_ABI double __extendsfdf2(float a) { return __extendXfYf2__(a); }
-
-
-#undef SRC_SINGLE
-#undef DST_DOUBLE
diff --git a/lib/fp/fixdfdi.c b/lib/fp/fixdfdi.c
@@ -1,26 +0,0 @@
-//===-- fixdfdi.c - Implement __fixdfdi -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-
-// Support for systems that don't have hardware floating-point; there are no
-// flags to set, and we don't want to code-gen to an unknown soft-float
-// implementation.
-
-#define fixint_t di_int
-#define fixuint_t du_int
-#define FP_FIX_SUFFIX fixdfdi
-#include "fp_fixint_impl.inc"
-
-COMPILER_RT_ABI di_int __fixdfdi(fp_t a) { return __fixint(a); }
-
-
-#undef fixint_t
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixdfsi.c b/lib/fp/fixdfsi.c
@@ -1,21 +0,0 @@
-//===-- fixdfsi.c - Implement __fixdfsi -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-#define fixint_t si_int
-#define fixuint_t su_int
-#define FP_FIX_SUFFIX fixdfsi
-#include "fp_fixint_impl.inc"
-
-COMPILER_RT_ABI si_int __fixdfsi(fp_t a) { return __fixint(a); }
-
-
-#undef fixint_t
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixsfdi.c b/lib/fp/fixsfdi.c
@@ -1,26 +0,0 @@
-//===-- fixsfdi.c - Implement __fixsfdi -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-
-// Support for systems that don't have hardware floating-point; there are no
-// flags to set, and we don't want to code-gen to an unknown soft-float
-// implementation.
-
-#define fixint_t di_int
-#define fixuint_t du_int
-#define FP_FIX_SUFFIX fixsfdi
-#include "fp_fixint_impl.inc"
-
-COMPILER_RT_ABI di_int __fixsfdi(fp_t a) { return __fixint(a); }
-
-
-#undef fixint_t
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixsfsi.c b/lib/fp/fixsfsi.c
@@ -1,21 +0,0 @@
-//===-- fixsfsi.c - Implement __fixsfsi -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-#define fixint_t si_int
-#define fixuint_t su_int
-#define FP_FIX_SUFFIX fixsfsi
-#include "fp_fixint_impl.inc"
-
-COMPILER_RT_ABI si_int __fixsfsi(fp_t a) { return __fixint(a); }
-
-
-#undef fixint_t
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixunsdfdi.c b/lib/fp/fixunsdfdi.c
@@ -1,24 +0,0 @@
-//===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-
-// Support for systems that don't have hardware floating-point; there are no
-// flags to set, and we don't want to code-gen to an unknown soft-float
-// implementation.
-
-#define fixuint_t du_int
-#define FP_FIX_SUFFIX fixunsdfdi
-#include "fp_fixuint_impl.inc"
-
-COMPILER_RT_ABI du_int __fixunsdfdi(fp_t a) { return __fixuint(a); }
-
-
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixunsdfsi.c b/lib/fp/fixunsdfsi.c
@@ -1,19 +0,0 @@
-//===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-#define fixuint_t su_int
-#define FP_FIX_SUFFIX fixunsdfsi
-#include "fp_fixuint_impl.inc"
-
-COMPILER_RT_ABI su_int __fixunsdfsi(fp_t a) { return __fixuint(a); }
-
-
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixunssfdi.c b/lib/fp/fixunssfdi.c
@@ -1,24 +0,0 @@
-//===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-
-// Support for systems that don't have hardware floating-point; there are no
-// flags to set, and we don't want to code-gen to an unknown soft-float
-// implementation.
-
-#define fixuint_t du_int
-#define FP_FIX_SUFFIX fixunssfdi
-#include "fp_fixuint_impl.inc"
-
-COMPILER_RT_ABI du_int __fixunssfdi(fp_t a) { return __fixuint(a); }
-
-
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixunssfsi.c b/lib/fp/fixunssfsi.c
@@ -1,23 +0,0 @@
-//===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __fixunssfsi for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-#define fixuint_t su_int
-#define FP_FIX_SUFFIX fixunssfsi
-#include "fp_fixuint_impl.inc"
-
-COMPILER_RT_ABI su_int __fixunssfsi(fp_t a) { return __fixuint(a); }
-
-
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp/floatdidf.c b/lib/fp/floatdidf.c
@@ -1,35 +0,0 @@
-//===-- floatdidf.c - Implement __floatdidf -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __floatdidf for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: convert a to a double, rounding toward even.
-
-// Assumption: double is a IEEE 64 bit floating point type
-//             di_int is a 64 bit integral type
-
-// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
-// mmmm
-
-// Support for systems that don't have hardware floating-point; there are no
-// flags to set, and we don't want to code-gen to an unknown soft-float
-// implementation.
-
-#define SRC_I64
-#define DST_DOUBLE
-#include "int_to_fp_impl.inc"
-
-COMPILER_RT_ABI double __floatdidf(di_int a) { return __floatXiYf__(a); }
-
-
-#undef SRC_I64
-#undef DST_DOUBLE
diff --git a/lib/fp/floatdisf.c b/lib/fp/floatdisf.c
@@ -1,30 +0,0 @@
-//===-- floatdisf.c - Implement __floatdisf -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __floatdisf for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-// Returns: convert a to a float, rounding toward even.
-
-// Assumption: float is a IEEE 32 bit floating point type
-//             di_int is a 64 bit integral type
-
-// seee eeee emmm mmmm mmmm mmmm mmmm mmmm
-
-#include "int_lib.h"
-
-#define SRC_I64
-#define DST_SINGLE
-#include "int_to_fp_impl.inc"
-
-COMPILER_RT_ABI float __floatdisf(di_int a) { return __floatXiYf__(a); }
-
-
-#undef SRC_I64
-#undef DST_SINGLE
diff --git a/lib/fp/floatsidf.c b/lib/fp/floatsidf.c
@@ -1,51 +0,0 @@
-//===-- lib/floatsidf.c - integer -> double-precision conversion --*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements integer to double-precision conversion for the
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
-// mode.
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-
-#include "int_lib.h"
-
-COMPILER_RT_ABI fp_t __floatsidf(si_int a) {
-
-  const int aWidth = sizeof a * CHAR_BIT;
-
-  // Handle zero as a special case to protect clz
-  if (a == 0)
-    return fromRep(0);
-
-  // All other cases begin by extracting the sign and absolute value of a
-  rep_t sign = 0;
-  su_int aAbs = (su_int)a;
-  if (a < 0) {
-    sign = signBit;
-    aAbs = -aAbs;
-  }
-
-  // Exponent of (fp_t)a is the width of abs(a).
-  const int exponent = (aWidth - 1) - clzsi(aAbs);
-  rep_t result;
-
-  // Shift a into the significand field and clear the implicit bit.  Extra
-  // cast to unsigned int is necessary to get the correct behavior for
-  // the input INT_MIN.
-  const int shift = significandBits - exponent;
-  result = (rep_t)aAbs << shift ^ implicitBit;
-
-  // Insert the exponent
-  result += (rep_t)(exponent + exponentBias) << significandBits;
-  // Insert the sign bit and return
-  return fromRep(result | sign);
-}
-
diff --git a/lib/fp/floatsisf.c b/lib/fp/floatsisf.c
@@ -1,59 +0,0 @@
-//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements integer to single-precision conversion for the
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
-// mode.
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-
-#include "int_lib.h"
-
-COMPILER_RT_ABI fp_t __floatsisf(si_int a) {
-
-  const int aWidth = sizeof a * CHAR_BIT;
-
-  // Handle zero as a special case to protect clz
-  if (a == 0)
-    return fromRep(0);
-
-  // All other cases begin by extracting the sign and absolute value of a
-  rep_t sign = 0;
-  su_int aAbs = (su_int)a;
-  if (a < 0) {
-    sign = signBit;
-    aAbs = -aAbs;
-  }
-
-  // Exponent of (fp_t)a is the width of abs(a).
-  const int exponent = (aWidth - 1) - clzsi(aAbs);
-  rep_t result;
-
-  // Shift a into the significand field, rounding if it is a right-shift
-  if (exponent <= significandBits) {
-    const int shift = significandBits - exponent;
-    result = (rep_t)aAbs << shift ^ implicitBit;
-  } else {
-    const int shift = exponent - significandBits;
-    result = (rep_t)aAbs >> shift ^ implicitBit;
-    rep_t round = (rep_t)aAbs << (typeWidth - shift);
-    if (round > signBit)
-      result++;
-    if (round == signBit)
-      result += result & 1;
-  }
-
-  // Insert the exponent
-  result += (rep_t)(exponent + exponentBias) << significandBits;
-  // Insert the sign bit and return
-  return fromRep(result | sign);
-}
-
diff --git a/lib/fp/floatundidf.c b/lib/fp/floatundidf.c
@@ -1,35 +0,0 @@
-//===-- floatundidf.c - Implement __floatundidf ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __floatundidf for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-// Returns: convert a to a double, rounding toward even.
-
-// Assumption: double is a IEEE 64 bit floating point type
-//             du_int is a 64 bit integral type
-
-// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
-// mmmm
-
-#include "int_lib.h"
-
-// Support for systems that don't have hardware floating-point; there are no
-// flags to set, and we don't want to code-gen to an unknown soft-float
-// implementation.
-
-#define SRC_U64
-#define DST_DOUBLE
-#include "int_to_fp_impl.inc"
-
-COMPILER_RT_ABI double __floatundidf(du_int a) { return __floatXiYf__(a); }
-
-
-#undef SRC_U64
-#undef DST_DOUBLE
diff --git a/lib/fp/floatundisf.c b/lib/fp/floatundisf.c
@@ -1,30 +0,0 @@
-//===-- floatundisf.c - Implement __floatundisf ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __floatundisf for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-// Returns: convert a to a float, rounding toward even.
-
-// Assumption: float is a IEEE 32 bit floating point type
-//            du_int is a 64 bit integral type
-
-// seee eeee emmm mmmm mmmm mmmm mmmm mmmm
-
-#include "int_lib.h"
-
-#define SRC_U64
-#define DST_SINGLE
-#include "int_to_fp_impl.inc"
-
-COMPILER_RT_ABI float __floatundisf(du_int a) { return __floatXiYf__(a); }
-
-
-#undef SRC_U64
-#undef DST_SINGLE
diff --git a/lib/fp/floatunsidf.c b/lib/fp/floatunsidf.c
@@ -1,40 +0,0 @@
-//===-- lib/floatunsidf.c - uint -> double-precision conversion ---*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements unsigned integer to double-precision conversion for the
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
-// mode.
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-
-#include "int_lib.h"
-
-COMPILER_RT_ABI fp_t __floatunsidf(su_int a) {
-
-  const int aWidth = sizeof a * CHAR_BIT;
-
-  // Handle zero as a special case to protect clz
-  if (a == 0)
-    return fromRep(0);
-
-  // Exponent of (fp_t)a is the width of abs(a).
-  const int exponent = (aWidth - 1) - clzsi(a);
-  rep_t result;
-
-  // Shift a into the significand field and clear the implicit bit.
-  const int shift = significandBits - exponent;
-  result = (rep_t)a << shift ^ implicitBit;
-
-  // Insert the exponent
-  result += (rep_t)(exponent + exponentBias) << significandBits;
-  return fromRep(result);
-}
-
diff --git a/lib/fp/floatunsisf.c b/lib/fp/floatunsisf.c
@@ -1,50 +0,0 @@
-//===-- lib/floatunsisf.c - uint -> single-precision conversion ---*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements unsigned integer to single-precision conversion for the
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
-// mode.
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-
-#include "int_lib.h"
-
-COMPILER_RT_ABI fp_t __floatunsisf(su_int a) {
-
-  const int aWidth = sizeof a * CHAR_BIT;
-
-  // Handle zero as a special case to protect clz
-  if (a == 0)
-    return fromRep(0);
-
-  // Exponent of (fp_t)a is the width of abs(a).
-  const int exponent = (aWidth - 1) - clzsi(a);
-  rep_t result;
-
-  // Shift a into the significand field, rounding if it is a right-shift
-  if (exponent <= significandBits) {
-    const int shift = significandBits - exponent;
-    result = (rep_t)a << shift ^ implicitBit;
-  } else {
-    const int shift = exponent - significandBits;
-    result = (rep_t)a >> shift ^ implicitBit;
-    rep_t round = (rep_t)a << (typeWidth - shift);
-    if (round > signBit)
-      result++;
-    if (round == signBit)
-      result += result & 1;
-  }
-
-  // Insert the exponent
-  result += (rep_t)(exponent + exponentBias) << significandBits;
-  return fromRep(result);
-}
-
diff --git a/lib/fp/fp.c b/lib/fp/fp.c
@@ -0,0 +1,512 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Consolidated soft-float runtime helpers for cfree's libcfree_rt.a.
+// The build compiles only this one file; the per-op .c files are #included
+// as snippets and not directly compiled. The fp_lib_undef.h reset header is
+// included between sections that switch precision or (src,dst) pair.
+// License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
+
+// ============================================================
+// Section 1: SINGLE precision arith / compare / conv
+// ============================================================
+// ---- addsf3.c ----
+#define SINGLE_PRECISION
+#include "fp_add_impl.inc"
+
+COMPILER_RT_ABI float __addsf3(float a, float b) { return __addXf3__(a, b); }
+
+// ---- subsf3.c ----
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t __subsf3(fp_t a, fp_t b) {
+  return __addsf3(a, fromRep(toRep(b) ^ signBit));
+}
+
+// ---- mulsf3.c ----
+#define SINGLE_PRECISION
+#include "fp_mul_impl.inc"
+
+COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { return __mulXf3__(a, b); }
+
+// ---- divsf3.c ----
+#define SINGLE_PRECISION
+
+#define NUMBER_OF_HALF_ITERATIONS 0
+#define NUMBER_OF_FULL_ITERATIONS 3
+#define USE_NATIVE_FULL_ITERATIONS
+
+#include "fp_div_impl.inc"
+
+COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
+
+
+#undef NUMBER_OF_HALF_ITERATIONS
+#undef NUMBER_OF_FULL_ITERATIONS
+#undef USE_NATIVE_FULL_ITERATIONS
+// ---- negsf2.c ----
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __negsf2(fp_t a) { return fromRep(toRep(a) ^ signBit); }
+
+// ---- comparesf2.c ----
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#include "fp_compare_impl.inc"
+
+COMPILER_RT_ABI CMP_RESULT __lesf2(fp_t a, fp_t b)   { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __eqsf2(fp_t a, fp_t b)   { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __ltsf2(fp_t a, fp_t b)   { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __nesf2(fp_t a, fp_t b)   { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __gesf2(fp_t a, fp_t b)   { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __gtsf2(fp_t a, fp_t b)   { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __unordsf2(fp_t a, fp_t b){ return __unordXf2__(a, b); }
+
+
+// ---- floatsisf.c ----
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t __floatsisf(si_int a) {
+
+  const int aWidth = sizeof a * CHAR_BIT;
+
+  // Handle zero as a special case to protect clz
+  if (a == 0)
+    return fromRep(0);
+
+  // All other cases begin by extracting the sign and absolute value of a
+  rep_t sign = 0;
+  su_int aAbs = (su_int)a;
+  if (a < 0) {
+    sign = signBit;
+    aAbs = -aAbs;
+  }
+
+  // Exponent of (fp_t)a is the width of abs(a).
+  const int exponent = (aWidth - 1) - clzsi(aAbs);
+  rep_t result;
+
+  // Shift a into the significand field, rounding if it is a right-shift
+  if (exponent <= significandBits) {
+    const int shift = significandBits - exponent;
+    result = (rep_t)aAbs << shift ^ implicitBit;
+  } else {
+    const int shift = exponent - significandBits;
+    result = (rep_t)aAbs >> shift ^ implicitBit;
+    rep_t round = (rep_t)aAbs << (typeWidth - shift);
+    if (round > signBit)
+      result++;
+    if (round == signBit)
+      result += result & 1;
+  }
+
+  // Insert the exponent
+  result += (rep_t)(exponent + exponentBias) << significandBits;
+  // Insert the sign bit and return
+  return fromRep(result | sign);
+}
+
+// ---- floatunsisf.c ----
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t __floatunsisf(su_int a) {
+
+  const int aWidth = sizeof a * CHAR_BIT;
+
+  // Handle zero as a special case to protect clz
+  if (a == 0)
+    return fromRep(0);
+
+  // Exponent of (fp_t)a is the width of abs(a).
+  const int exponent = (aWidth - 1) - clzsi(a);
+  rep_t result;
+
+  // Shift a into the significand field, rounding if it is a right-shift
+  if (exponent <= significandBits) {
+    const int shift = significandBits - exponent;
+    result = (rep_t)a << shift ^ implicitBit;
+  } else {
+    const int shift = exponent - significandBits;
+    result = (rep_t)a >> shift ^ implicitBit;
+    rep_t round = (rep_t)a << (typeWidth - shift);
+    if (round > signBit)
+      result++;
+    if (round == signBit)
+      result += result & 1;
+  }
+
+  // Insert the exponent
+  result += (rep_t)(exponent + exponentBias) << significandBits;
+  return fromRep(result);
+}
+
+// ---- floatdisf.c ----
+#include "int_lib.h"
+
+#define SRC_I64
+#define DST_SINGLE
+#include "int_to_fp_impl.inc"
+
+COMPILER_RT_ABI float __floatdisf(di_int a) { return __floatXiYf__(a); }
+
+
+#undef SRC_I64
+#undef DST_SINGLE
+// ---- floatundisf.c ----
+#include "int_lib.h"
+
+#define SRC_U64
+#define DST_SINGLE
+#include "int_to_fp_impl.inc"
+
+COMPILER_RT_ABI float __floatundisf(du_int a) { return __floatXiYf__(a); }
+
+
+#undef SRC_U64
+#undef DST_SINGLE
+// ---- fixsfsi.c ----
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+#define fixint_t si_int
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixsfsi
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI si_int __fixsfsi(fp_t a) { return __fixint(a); }
+
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixsfdi.c ----
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+#define fixint_t di_int
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixsfdi
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int __fixsfdi(fp_t a) { return __fixint(a); }
+
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixunssfsi.c ----
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixunssfsi
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI su_int __fixunssfsi(fp_t a) { return __fixuint(a); }
+
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixunssfdi.c ----
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixunssfdi
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int __fixunssfdi(fp_t a) { return __fixuint(a); }
+
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+
+#include "fp_lib_undef.h"
+
+// ============================================================
+// Section 2: DOUBLE precision arith / compare / conv
+// ============================================================
+// ---- adddf3.c ----
+#define DOUBLE_PRECISION
+#include "fp_add_impl.inc"
+
+COMPILER_RT_ABI double __adddf3(double a, double b) { return __addXf3__(a, b); }
+
+// ---- subdf3.c ----
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t __subdf3(fp_t a, fp_t b) {
+  return __adddf3(a, fromRep(toRep(b) ^ signBit));
+}
+
+// ---- muldf3.c ----
+#define DOUBLE_PRECISION
+#include "fp_mul_impl.inc"
+
+COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { return __mulXf3__(a, b); }
+
+// ---- divdf3.c ----
+#define DOUBLE_PRECISION
+
+#define NUMBER_OF_HALF_ITERATIONS 3
+#define NUMBER_OF_FULL_ITERATIONS 1
+
+#include "fp_div_impl.inc"
+
+COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
+
+
+#undef NUMBER_OF_HALF_ITERATIONS
+#undef NUMBER_OF_FULL_ITERATIONS
+// ---- negdf2.c ----
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __negdf2(fp_t a) { return fromRep(toRep(a) ^ signBit); }
+
+// ---- comparedf2.c ----
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#include "fp_compare_impl.inc"
+
+COMPILER_RT_ABI CMP_RESULT __ledf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __eqdf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __ltdf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __nedf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __gedf2(fp_t a, fp_t b)    { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __gtdf2(fp_t a, fp_t b)    { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __unorddf2(fp_t a, fp_t b) { return __unordXf2__(a, b); }
+
+
+// ---- floatsidf.c ----
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t __floatsidf(si_int a) {
+
+  const int aWidth = sizeof a * CHAR_BIT;
+
+  // Handle zero as a special case to protect clz
+  if (a == 0)
+    return fromRep(0);
+
+  // All other cases begin by extracting the sign and absolute value of a
+  rep_t sign = 0;
+  su_int aAbs = (su_int)a;
+  if (a < 0) {
+    sign = signBit;
+    aAbs = -aAbs;
+  }
+
+  // Exponent of (fp_t)a is the width of abs(a).
+  const int exponent = (aWidth - 1) - clzsi(aAbs);
+  rep_t result;
+
+  // Shift a into the significand field and clear the implicit bit.  Extra
+  // cast to unsigned int is necessary to get the correct behavior for
+  // the input INT_MIN.
+  const int shift = significandBits - exponent;
+  result = (rep_t)aAbs << shift ^ implicitBit;
+
+  // Insert the exponent
+  result += (rep_t)(exponent + exponentBias) << significandBits;
+  // Insert the sign bit and return
+  return fromRep(result | sign);
+}
+
+// ---- floatunsidf.c ----
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#include "int_lib.h"
+
+COMPILER_RT_ABI fp_t __floatunsidf(su_int a) {
+
+  const int aWidth = sizeof a * CHAR_BIT;
+
+  // Handle zero as a special case to protect clz
+  if (a == 0)
+    return fromRep(0);
+
+  // Exponent of (fp_t)a is the width of abs(a).
+  const int exponent = (aWidth - 1) - clzsi(a);
+  rep_t result;
+
+  // Shift a into the significand field and clear the implicit bit.
+  const int shift = significandBits - exponent;
+  result = (rep_t)a << shift ^ implicitBit;
+
+  // Insert the exponent
+  result += (rep_t)(exponent + exponentBias) << significandBits;
+  return fromRep(result);
+}
+
+// ---- floatdidf.c ----
+#include "int_lib.h"
+
+// Returns: convert a to a double, rounding toward even.
+
+// Assumption: double is a IEEE 64 bit floating point type
+//             di_int is a 64 bit integral type
+
+// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+// mmmm
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+#define SRC_I64
+#define DST_DOUBLE
+#include "int_to_fp_impl.inc"
+
+COMPILER_RT_ABI double __floatdidf(di_int a) { return __floatXiYf__(a); }
+
+
+#undef SRC_I64
+#undef DST_DOUBLE
+// ---- floatundidf.c ----
+#include "int_lib.h"
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+#define SRC_U64
+#define DST_DOUBLE
+#include "int_to_fp_impl.inc"
+
+COMPILER_RT_ABI double __floatundidf(du_int a) { return __floatXiYf__(a); }
+
+
+#undef SRC_U64
+#undef DST_DOUBLE
+// ---- fixdfsi.c ----
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+#define fixint_t si_int
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixdfsi
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI si_int __fixdfsi(fp_t a) { return __fixint(a); }
+
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixdfdi.c ----
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+#define fixint_t di_int
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixdfdi
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int __fixdfdi(fp_t a) { return __fixint(a); }
+
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixunsdfsi.c ----
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixunsdfsi
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI su_int __fixunsdfsi(fp_t a) { return __fixuint(a); }
+
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixunsdfdi.c ----
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+// Support for systems that don't have hardware floating-point; there are no
+// flags to set, and we don't want to code-gen to an unknown soft-float
+// implementation.
+
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixunsdfdi
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int __fixunsdfdi(fp_t a) { return __fixuint(a); }
+
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+
+#include "fp_lib_undef.h"
+
+// ============================================================
+// Section 3: sf -> df extend
+// ============================================================
+// ---- extendsfdf2.c ----
+#define SRC_SINGLE
+#define DST_DOUBLE
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI double __extendsfdf2(float a) { return __extendXfYf2__(a); }
+
+
+#undef SRC_SINGLE
+#undef DST_DOUBLE
+
+#include "fp_lib_undef.h"
+
+// ============================================================
+// Section 4: df -> sf truncate
+// ============================================================
+// ---- truncdfsf2.c ----
+#define SRC_DOUBLE
+#define DST_SINGLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI float __truncdfsf2(double a) { return __truncXfYf2__(a); }
+
+
+#undef SRC_DOUBLE
+#undef DST_SINGLE
+
+#include "fp_lib_undef.h"
+
+// ============================================================
+// Section 5: fp_mode (precision-independent)
+// ============================================================
+// ---- fp_mode.c ----
+#include "fp_mode.h"
+
+// IEEE-754 default rounding (to nearest, ties to even).
+CRT_FE_ROUND_MODE __fe_getround(void) { return CRT_FE_TONEAREST; }
+
+int __fe_raise_inexact(void) {
+  return 0;
+}
diff --git a/lib/fp/fp_mode.c b/lib/fp/fp_mode.c
@@ -1,22 +0,0 @@
-//===----- lib/fp_mode.c - Floaing-point environment mode utilities --C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides a default implementation of fp_mode.h for architectures
-// that does not support or does not have an implementation of floating point
-// environment mode.
-//
-//===----------------------------------------------------------------------===//
-
-#include "fp_mode.h"
-
-// IEEE-754 default rounding (to nearest, ties to even).
-CRT_FE_ROUND_MODE __fe_getround(void) { return CRT_FE_TONEAREST; }
-
-int __fe_raise_inexact(void) {
-  return 0;
-}
diff --git a/lib/fp/muldf3.c b/lib/fp/muldf3.c
@@ -1,18 +0,0 @@
-//===-- lib/muldf3.c - Double-precision multiplication ------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements double-precision soft-float multiplication
-// with the IEEE-754 default rounding (to nearest, ties to even).
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_mul_impl.inc"
-
-COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { return __mulXf3__(a, b); }
-
diff --git a/lib/fp/mulsf3.c b/lib/fp/mulsf3.c
@@ -1,18 +0,0 @@
-//===-- lib/mulsf3.c - Single-precision multiplication ------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements single-precision soft-float multiplication
-// with the IEEE-754 default rounding (to nearest, ties to even).
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_mul_impl.inc"
-
-COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { return __mulXf3__(a, b); }
-
diff --git a/lib/fp/negdf2.c b/lib/fp/negdf2.c
@@ -1,17 +0,0 @@
-//===-- lib/negdf2.c - double-precision negation ------------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements double-precision soft-float negation.
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-
-COMPILER_RT_ABI fp_t __negdf2(fp_t a) { return fromRep(toRep(a) ^ signBit); }
-
diff --git a/lib/fp/negsf2.c b/lib/fp/negsf2.c
@@ -1,17 +0,0 @@
-//===-- lib/negsf2.c - single-precision negation ------------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements single-precision soft-float negation.
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-
-COMPILER_RT_ABI fp_t __negsf2(fp_t a) { return fromRep(toRep(a) ^ signBit); }
-
diff --git a/lib/fp/subdf3.c b/lib/fp/subdf3.c
@@ -1,20 +0,0 @@
-//===-- lib/adddf3.c - Double-precision subtraction ---------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements double-precision soft-float subtraction.
-//
-//===----------------------------------------------------------------------===//
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-
-// Subtraction; flip the sign bit of b and add.
-COMPILER_RT_ABI fp_t __subdf3(fp_t a, fp_t b) {
-  return __adddf3(a, fromRep(toRep(b) ^ signBit));
-}
-
diff --git a/lib/fp/subsf3.c b/lib/fp/subsf3.c
@@ -1,20 +0,0 @@
-//===-- lib/subsf3.c - Single-precision subtraction ---------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements single-precision soft-float subtraction.
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-
-// Subtraction; flip the sign bit of b and add.
-COMPILER_RT_ABI fp_t __subsf3(fp_t a, fp_t b) {
-  return __addsf3(a, fromRep(toRep(b) ^ signBit));
-}
-
diff --git a/lib/fp/truncdfsf2.c b/lib/fp/truncdfsf2.c
@@ -1,17 +0,0 @@
-//===-- lib/truncdfsf2.c - double -> single conversion ------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define SRC_DOUBLE
-#define DST_SINGLE
-#include "fp_trunc_impl.inc"
-
-COMPILER_RT_ABI float __truncdfsf2(double a) { return __truncXfYf2__(a); }
-
-
-#undef SRC_DOUBLE
-#undef DST_SINGLE
diff --git a/lib/fp_tf/addtf3.c b/lib/fp_tf/addtf3.c
@@ -1,21 +0,0 @@
-//===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements quad-precision soft-float addition.
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#include "fp_add_impl.inc"
-
-COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b) {
-  return __addXf3__(a, b);
-}
-
diff --git a/lib/fp_tf/comparetf2.c b/lib/fp_tf/comparetf2.c
@@ -1,51 +0,0 @@
-//===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// // This file implements the following soft-float comparison routines:
-//
-//   __eqtf2   __getf2   __unordtf2
-//   __letf2   __gttf2
-//   __lttf2
-//   __netf2
-//
-// The semantics of the routines grouped in each column are identical, so there
-// is a single implementation for each, and wrappers to provide the other names.
-//
-// The main routines behave as follows:
-//
-//   __letf2(a,b) returns -1 if a < b
-//                         0 if a == b
-//                         1 if a > b
-//                         1 if either a or b is NaN
-//
-//   __getf2(a,b) returns -1 if a < b
-//                         0 if a == b
-//                         1 if a > b
-//                        -1 if either a or b is NaN
-//
-//   __unordtf2(a,b) returns 0 if both a and b are numbers
-//                           1 if either a or b is NaN
-//
-// Note that __letf2( ) and __getf2( ) are identical except in their handling of
-// NaN values.
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#include "fp_compare_impl.inc"
-
-COMPILER_RT_ABI CMP_RESULT __letf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __eqtf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __lttf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __netf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __getf2(fp_t a, fp_t b)    { return __geXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __gttf2(fp_t a, fp_t b)    { return __geXf2__(a, b); }
-COMPILER_RT_ABI CMP_RESULT __unordtf2(fp_t a, fp_t b) { return __unordXf2__(a, b); }
-
diff --git a/lib/fp_tf/divtf3.c b/lib/fp_tf/divtf3.c
@@ -1,27 +0,0 @@
-//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements quad-precision soft-float division
-// with the IEEE-754 default rounding (to nearest, ties to even).
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-
-#define NUMBER_OF_HALF_ITERATIONS 4
-#define NUMBER_OF_FULL_ITERATIONS 1
-
-#include "fp_div_impl.inc"
-
-COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
-
-
-#undef NUMBER_OF_HALF_ITERATIONS
-#undef NUMBER_OF_FULL_ITERATIONS
diff --git a/lib/fp_tf/extenddftf2.c b/lib/fp_tf/extenddftf2.c
@@ -1,20 +0,0 @@
-//===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#define SRC_DOUBLE
-#define DST_QUAD
-#include "fp_extend_impl.inc"
-
-COMPILER_RT_ABI dst_t __extenddftf2(src_t a) { return __extendXfYf2__(a); }
-
-
-#undef SRC_DOUBLE
-#undef DST_QUAD
diff --git a/lib/fp_tf/extendsftf2.c b/lib/fp_tf/extendsftf2.c
@@ -1,20 +0,0 @@
-//===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#define SRC_SINGLE
-#define DST_QUAD
-#include "fp_extend_impl.inc"
-
-COMPILER_RT_ABI dst_t __extendsftf2(src_t a) { return __extendXfYf2__(a); }
-
-
-#undef SRC_SINGLE
-#undef DST_QUAD
diff --git a/lib/fp_tf/fixtfdi.c b/lib/fp_tf/fixtfdi.c
@@ -1,21 +0,0 @@
-//===-- fixtfdi.c - Implement __fixtfdi -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#define fixint_t di_int
-#define fixuint_t du_int
-#define FP_FIX_SUFFIX fixtfdi
-#include "fp_fixint_impl.inc"
-
-COMPILER_RT_ABI di_int __fixtfdi(fp_t a) { return __fixint(a); }
-
-#undef fixint_t
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/fixtfsi.c b/lib/fp_tf/fixtfsi.c
@@ -1,21 +0,0 @@
-//===-- fixtfsi.c - Implement __fixtfsi -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#define fixint_t si_int
-#define fixuint_t su_int
-#define FP_FIX_SUFFIX fixtfsi
-#include "fp_fixint_impl.inc"
-
-COMPILER_RT_ABI si_int __fixtfsi(fp_t a) { return __fixint(a); }
-
-#undef fixint_t
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/fixtfti.c b/lib/fp_tf/fixtfti.c
@@ -1,21 +0,0 @@
-//===-- fixtfti.c - Implement __fixtfti -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#define fixint_t ti_int
-#define fixuint_t tu_int
-#define FP_FIX_SUFFIX fixtfti
-#include "fp_fixint_impl.inc"
-
-COMPILER_RT_ABI ti_int __fixtfti(fp_t a) { return __fixint(a); }
-
-#undef fixint_t
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/fixunstfdi.c b/lib/fp_tf/fixunstfdi.c
@@ -1,19 +0,0 @@
-//===-- fixunstfdi.c - Implement __fixunstfdi -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#define fixuint_t du_int
-#define FP_FIX_SUFFIX fixunstfdi
-#include "fp_fixuint_impl.inc"
-
-COMPILER_RT_ABI du_int __fixunstfdi(fp_t a) { return __fixuint(a); }
-
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/fixunstfsi.c b/lib/fp_tf/fixunstfsi.c
@@ -1,19 +0,0 @@
-//===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#define fixuint_t su_int
-#define FP_FIX_SUFFIX fixunstfsi
-#include "fp_fixuint_impl.inc"
-
-COMPILER_RT_ABI su_int __fixunstfsi(fp_t a) { return __fixuint(a); }
-
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/fixunstfti.c b/lib/fp_tf/fixunstfti.c
@@ -1,19 +0,0 @@
-//===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#define fixuint_t tu_int
-#define FP_FIX_SUFFIX fixunstfti
-#include "fp_fixuint_impl.inc"
-
-COMPILER_RT_ABI tu_int __fixunstfti(fp_t a) { return __fixuint(a); }
-
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/floatditf.c b/lib/fp_tf/floatditf.c
@@ -1,47 +0,0 @@
-//===-- lib/floatditf.c - integer -> quad-precision conversion ----*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements di_int to quad-precision conversion for the
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
-// mode.
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-COMPILER_RT_ABI fp_t __floatditf(di_int a) {
-
-  const int aWidth = sizeof a * CHAR_BIT;
-
-  // Handle zero as a special case to protect clz
-  if (a == 0)
-    return fromRep(0);
-
-  // All other cases begin by extracting the sign and absolute value of a
-  rep_t sign = 0;
-  du_int aAbs = (du_int)a;
-  if (a < 0) {
-    sign = signBit;
-    aAbs = ~(du_int)a + 1U;
-  }
-
-  // Exponent of (fp_t)a is the width of abs(a).
-  const int exponent = (aWidth - 1) - __builtin_clzll(aAbs);
-  rep_t result;
-
-  // Shift a into the significand field, rounding if it is a right-shift
-  const int shift = significandBits - exponent;
-  result = (rep_t)aAbs << shift ^ implicitBit;
-
-  // Insert the exponent
-  result += (rep_t)(exponent + exponentBias) << significandBits;
-  // Insert the sign bit and return
-  return fromRep(result | sign);
-}
-
diff --git a/lib/fp_tf/floatsitf.c b/lib/fp_tf/floatsitf.c
@@ -1,47 +0,0 @@
-//===-- lib/floatsitf.c - integer -> quad-precision conversion ----*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements integer to quad-precision conversion for the
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
-// mode.
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-COMPILER_RT_ABI fp_t __floatsitf(si_int a) {
-
-  const int aWidth = sizeof a * CHAR_BIT;
-
-  // Handle zero as a special case to protect clz
-  if (a == 0)
-    return fromRep(0);
-
-  // All other cases begin by extracting the sign and absolute value of a
-  rep_t sign = 0;
-  su_int aAbs = (su_int)a;
-  if (a < 0) {
-    sign = signBit;
-    aAbs = -aAbs;
-  }
-
-  // Exponent of (fp_t)a is the width of abs(a).
-  const int exponent = (aWidth - 1) - clzsi(aAbs);
-  rep_t result;
-
-  // Shift a into the significand field and clear the implicit bit.
-  const int shift = significandBits - exponent;
-  result = (rep_t)aAbs << shift ^ implicitBit;
-
-  // Insert the exponent
-  result += (rep_t)(exponent + exponentBias) << significandBits;
-  // Insert the sign bit and return
-  return fromRep(result | sign);
-}
-
diff --git a/lib/fp_tf/floattitf.c b/lib/fp_tf/floattitf.c
@@ -1,36 +0,0 @@
-//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ti_int to quad-precision conversion for the
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
-// mode.
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-#include "int_lib.h"
-
-#define SRC_I128
-#define DST_QUAD
-#include "int_to_fp_impl.inc"
-
-// Returns: convert a ti_int to a fp_t, rounding toward even.
-
-// Assumption: fp_t is a IEEE 128 bit floating point type
-//             ti_int is a 128 bit integral type
-
-// seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
-// mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm
-// mmmm mmmm mmmm
-
-COMPILER_RT_ABI fp_t __floattitf(ti_int a) { return __floatXiYf__(a); }
-
-
-#undef SRC_I128
-#undef DST_QUAD
diff --git a/lib/fp_tf/floatunditf.c b/lib/fp_tf/floatunditf.c
@@ -1,38 +0,0 @@
-//===-- lib/floatunditf.c - uint -> quad-precision conversion -----*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements du_int to quad-precision conversion for the
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
-// mode.
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-COMPILER_RT_ABI fp_t __floatunditf(du_int a) {
-
-  const int aWidth = sizeof a * CHAR_BIT;
-
-  // Handle zero as a special case to protect clz
-  if (a == 0)
-    return fromRep(0);
-
-  // Exponent of (fp_t)a is the width of abs(a).
-  const int exponent = (aWidth - 1) - __builtin_clzll(a);
-  rep_t result;
-
-  // Shift a into the significand field and clear the implicit bit.
-  const int shift = significandBits - exponent;
-  result = (rep_t)a << shift ^ implicitBit;
-
-  // Insert the exponent
-  result += (rep_t)(exponent + exponentBias) << significandBits;
-  return fromRep(result);
-}
-
diff --git a/lib/fp_tf/floatunsitf.c b/lib/fp_tf/floatunsitf.c
@@ -1,38 +0,0 @@
-//===-- lib/floatunsitf.c - uint -> quad-precision conversion -----*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements unsigned integer to quad-precision conversion for the
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
-// mode.
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-COMPILER_RT_ABI fp_t __floatunsitf(su_int a) {
-
-  const int aWidth = sizeof a * CHAR_BIT;
-
-  // Handle zero as a special case to protect clz
-  if (a == 0)
-    return fromRep(0);
-
-  // Exponent of (fp_t)a is the width of abs(a).
-  const int exponent = (aWidth - 1) - clzsi(a);
-  rep_t result;
-
-  // Shift a into the significand field and clear the implicit bit.
-  const int shift = significandBits - exponent;
-  result = (rep_t)a << shift ^ implicitBit;
-
-  // Insert the exponent
-  result += (rep_t)(exponent + exponentBias) << significandBits;
-  return fromRep(result);
-}
-
diff --git a/lib/fp_tf/floatuntitf.c b/lib/fp_tf/floatuntitf.c
@@ -1,36 +0,0 @@
-//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements tu_int to quad-precision conversion for the
-// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even
-// mode.
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-#include "int_lib.h"
-
-#define SRC_U128
-#define DST_QUAD
-#include "int_to_fp_impl.inc"
-
-// Returns: convert a tu_int to a fp_t, rounding toward even.
-
-// Assumption: fp_t is a IEEE 128 bit floating point type
-//             tu_int is a 128 bit integral type
-
-// seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
-// mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm
-// mmmm mmmm mmmm
-
-COMPILER_RT_ABI fp_t __floatuntitf(tu_int a) { return __floatXiYf__(a); }
-
-
-#undef SRC_U128
-#undef DST_QUAD
diff --git a/lib/fp_tf/fp_tf.c b/lib/fp_tf/fp_tf.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Consolidated soft-float runtime helpers for cfree's libcfree_rt.a.
+// The build compiles only this one file; the per-op .c files are #included
+// as snippets and not directly compiled. The fp_lib_undef.h reset header is
+// included between sections that switch precision or (src,dst) pair.
+// License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
+
+// ============================================================
+// Section 1: QUAD precision arith / compare / conv / fix
+// ============================================================
+// ---- addtf3.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#include "fp_add_impl.inc"
+
+COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b) {
+  return __addXf3__(a, b);
+}
+
+// ---- subtf3.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b);
+
+// Subtraction; flip the sign bit of b and add.
+COMPILER_RT_ABI fp_t __subtf3(fp_t a, fp_t b) {
+  return __addtf3(a, fromRep(toRep(b) ^ signBit));
+}
+
+// ---- multf3.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#include "fp_mul_impl.inc"
+
+COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) { return __mulXf3__(a, b); }
+
+// ---- divtf3.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+
+#define NUMBER_OF_HALF_ITERATIONS 4
+#define NUMBER_OF_FULL_ITERATIONS 1
+
+#include "fp_div_impl.inc"
+
+COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
+
+
+#undef NUMBER_OF_HALF_ITERATIONS
+#undef NUMBER_OF_FULL_ITERATIONS
+// ---- comparetf2.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#include "fp_compare_impl.inc"
+
+COMPILER_RT_ABI CMP_RESULT __letf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __eqtf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __lttf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __netf2(fp_t a, fp_t b)    { return __leXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __getf2(fp_t a, fp_t b)    { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __gttf2(fp_t a, fp_t b)    { return __geXf2__(a, b); }
+COMPILER_RT_ABI CMP_RESULT __unordtf2(fp_t a, fp_t b) { return __unordXf2__(a, b); }
+
+// ---- floatsitf.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __floatsitf(si_int a) {
+
+  const int aWidth = sizeof a * CHAR_BIT;
+
+  // Handle zero as a special case to protect clz
+  if (a == 0)
+    return fromRep(0);
+
+  // All other cases begin by extracting the sign and absolute value of a
+  rep_t sign = 0;
+  su_int aAbs = (su_int)a;
+  if (a < 0) {
+    sign = signBit;
+    aAbs = -aAbs;
+  }
+
+  // Exponent of (fp_t)a is the width of abs(a).
+  const int exponent = (aWidth - 1) - clzsi(aAbs);
+  rep_t result;
+
+  // Shift a into the significand field and clear the implicit bit.
+  const int shift = significandBits - exponent;
+  result = (rep_t)aAbs << shift ^ implicitBit;
+
+  // Insert the exponent
+  result += (rep_t)(exponent + exponentBias) << significandBits;
+  // Insert the sign bit and return
+  return fromRep(result | sign);
+}
+
+// ---- floatunsitf.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __floatunsitf(su_int a) {
+
+  const int aWidth = sizeof a * CHAR_BIT;
+
+  // Handle zero as a special case to protect clz
+  if (a == 0)
+    return fromRep(0);
+
+  // Exponent of (fp_t)a is the width of abs(a).
+  const int exponent = (aWidth - 1) - clzsi(a);
+  rep_t result;
+
+  // Shift a into the significand field and clear the implicit bit.
+  const int shift = significandBits - exponent;
+  result = (rep_t)a << shift ^ implicitBit;
+
+  // Insert the exponent
+  result += (rep_t)(exponent + exponentBias) << significandBits;
+  return fromRep(result);
+}
+
+// ---- floatditf.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __floatditf(di_int a) {
+
+  const int aWidth = sizeof a * CHAR_BIT;
+
+  // Handle zero as a special case to protect clz
+  if (a == 0)
+    return fromRep(0);
+
+  // All other cases begin by extracting the sign and absolute value of a
+  rep_t sign = 0;
+  du_int aAbs = (du_int)a;
+  if (a < 0) {
+    sign = signBit;
+    aAbs = ~(du_int)a + 1U;
+  }
+
+  // Exponent of (fp_t)a is the width of abs(a).
+  const int exponent = (aWidth - 1) - __builtin_clzll(aAbs);
+  rep_t result;
+
+  // Shift a into the significand field, rounding if it is a right-shift
+  const int shift = significandBits - exponent;
+  result = (rep_t)aAbs << shift ^ implicitBit;
+
+  // Insert the exponent
+  result += (rep_t)(exponent + exponentBias) << significandBits;
+  // Insert the sign bit and return
+  return fromRep(result | sign);
+}
+
+// ---- floatunditf.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+COMPILER_RT_ABI fp_t __floatunditf(du_int a) {
+
+  const int aWidth = sizeof a * CHAR_BIT;
+
+  // Handle zero as a special case to protect clz
+  if (a == 0)
+    return fromRep(0);
+
+  // Exponent of (fp_t)a is the width of abs(a).
+  const int exponent = (aWidth - 1) - __builtin_clzll(a);
+  rep_t result;
+
+  // Shift a into the significand field and clear the implicit bit.
+  const int shift = significandBits - exponent;
+  result = (rep_t)a << shift ^ implicitBit;
+
+  // Insert the exponent
+  result += (rep_t)(exponent + exponentBias) << significandBits;
+  return fromRep(result);
+}
+
+// ---- floattitf.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+#define SRC_I128
+#define DST_QUAD
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a ti_int to a fp_t, rounding toward even.
+
+// Assumption: fp_t is a IEEE 128 bit floating point type
+//             ti_int is a 128 bit integral type
+
+// seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+// mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm
+// mmmm mmmm mmmm
+
+COMPILER_RT_ABI fp_t __floattitf(ti_int a) { return __floatXiYf__(a); }
+
+
+#undef SRC_I128
+#undef DST_QUAD
+// ---- floatuntitf.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+#include "int_lib.h"
+
+#define SRC_U128
+#define DST_QUAD
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a tu_int to a fp_t, rounding toward even.
+
+// Assumption: fp_t is a IEEE 128 bit floating point type
+//             tu_int is a 128 bit integral type
+
+// seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+// mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm
+// mmmm mmmm mmmm
+
+COMPILER_RT_ABI fp_t __floatuntitf(tu_int a) { return __floatXiYf__(a); }
+
+
+#undef SRC_U128
+#undef DST_QUAD
+// ---- fixtfsi.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define fixint_t si_int
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixtfsi
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI si_int __fixtfsi(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixtfdi.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define fixint_t di_int
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixtfdi
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI di_int __fixtfdi(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixtfti.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define fixint_t ti_int
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixtfti
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int __fixtfti(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixunstfsi.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixunstfsi
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI su_int __fixunstfsi(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixunstfdi.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixunstfdi
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI du_int __fixunstfdi(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixunstfti.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixunstfti
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int __fixunstfti(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+
+#include "fp_lib_undef.h"
+
+// ============================================================
+// Section 2: sf -> tf extend
+// ============================================================
+// ---- extendsftf2.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define SRC_SINGLE
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI dst_t __extendsftf2(src_t a) { return __extendXfYf2__(a); }
+
+
+#undef SRC_SINGLE
+#undef DST_QUAD
+
+#include "fp_lib_undef.h"
+
+// ============================================================
+// Section 3: df -> tf extend
+// ============================================================
+// ---- extenddftf2.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define SRC_DOUBLE
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI dst_t __extenddftf2(src_t a) { return __extendXfYf2__(a); }
+
+
+#undef SRC_DOUBLE
+#undef DST_QUAD
+
+#include "fp_lib_undef.h"
+
+// ============================================================
+// Section 4: tf -> sf truncate
+// ============================================================
+// ---- trunctfsf2.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define SRC_QUAD
+#define DST_SINGLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI dst_t __trunctfsf2(src_t a) { return __truncXfYf2__(a); }
+
+
+#undef SRC_QUAD
+#undef DST_SINGLE
+
+#include "fp_lib_undef.h"
+
+// ============================================================
+// Section 5: tf -> df truncate
+// ============================================================
+// ---- trunctfdf2.c ----
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#define SRC_QUAD
+#define DST_DOUBLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI dst_t __trunctfdf2(src_t a) { return __truncXfYf2__(a); }
+
+
+#undef SRC_QUAD
+#undef DST_DOUBLE
diff --git a/lib/fp_tf/multf3.c b/lib/fp_tf/multf3.c
@@ -1,20 +0,0 @@
-//===-- lib/multf3.c - Quad-precision multiplication --------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements quad-precision soft-float multiplication
-// with the IEEE-754 default rounding (to nearest, ties to even).
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#include "fp_mul_impl.inc"
-
-COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) { return __mulXf3__(a, b); }
-
diff --git a/lib/fp_tf/subtf3.c b/lib/fp_tf/subtf3.c
@@ -1,22 +0,0 @@
-//===-- lib/subtf3.c - Quad-precision subtraction -----------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements quad-precision soft-float subtraction.
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b);
-
-// Subtraction; flip the sign bit of b and add.
-COMPILER_RT_ABI fp_t __subtf3(fp_t a, fp_t b) {
-  return __addtf3(a, fromRep(toRep(b) ^ signBit));
-}
-
diff --git a/lib/fp_tf/trunctfdf2.c b/lib/fp_tf/trunctfdf2.c
@@ -1,20 +0,0 @@
-//===-- lib/truncdfsf2.c - quad -> double conversion --------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#define SRC_QUAD
-#define DST_DOUBLE
-#include "fp_trunc_impl.inc"
-
-COMPILER_RT_ABI dst_t __trunctfdf2(src_t a) { return __truncXfYf2__(a); }
-
-
-#undef SRC_QUAD
-#undef DST_DOUBLE
diff --git a/lib/fp_tf/trunctfsf2.c b/lib/fp_tf/trunctfsf2.c
@@ -1,20 +0,0 @@
-//===-- lib/trunctfsf2.c - quad -> single conversion --------------*- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#define QUAD_PRECISION
-#include "fp_lib.h"
-
-#define SRC_QUAD
-#define DST_SINGLE
-#include "fp_trunc_impl.inc"
-
-COMPILER_RT_ABI dst_t __trunctfsf2(src_t a) { return __truncXfYf2__(a); }
-
-
-#undef SRC_QUAD
-#undef DST_SINGLE
diff --git a/lib/fp_ti/fixdfti.c b/lib/fp_ti/fixdfti.c
@@ -1,24 +0,0 @@
-//===-- fixdfti.c - Implement __fixdfti -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-
-#define fixint_t ti_int
-#define fixuint_t tu_int
-#define FP_FIX_SUFFIX fixdfti
-#include "fp_fixint_impl.inc"
-
-COMPILER_RT_ABI ti_int __fixdfti(fp_t a) { return __fixint(a); }
-
-
-#undef fixint_t
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp_ti/fixsfti.c b/lib/fp_ti/fixsfti.c
@@ -1,24 +0,0 @@
-//===-- fixsfti.c - Implement __fixsfti -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-
-#define fixint_t ti_int
-#define fixuint_t tu_int
-#define FP_FIX_SUFFIX fixsfti
-#include "fp_fixint_impl.inc"
-
-COMPILER_RT_ABI ti_int __fixsfti(fp_t a) { return __fixint(a); }
-
-
-#undef fixint_t
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp_ti/fixunsdfti.c b/lib/fp_ti/fixunsdfti.c
@@ -1,20 +0,0 @@
-//===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-#define DOUBLE_PRECISION
-#include "fp_lib.h"
-#define fixuint_t tu_int
-#define FP_FIX_SUFFIX fixunsdfti
-#include "fp_fixuint_impl.inc"
-
-COMPILER_RT_ABI tu_int __fixunsdfti(fp_t a) { return __fixuint(a); }
-
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp_ti/fixunssfti.c b/lib/fp_ti/fixunssfti.c
@@ -1,23 +0,0 @@
-//===-- fixunssfti.c - Implement __fixunssfti -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __fixunssfti for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#define SINGLE_PRECISION
-#include "fp_lib.h"
-
-#define fixuint_t tu_int
-#define FP_FIX_SUFFIX fixunssfti
-#include "fp_fixuint_impl.inc"
-
-COMPILER_RT_ABI tu_int __fixunssfti(fp_t a) { return __fixuint(a); }
-
-#undef fixuint_t
-#undef FP_FIX_SUFFIX
diff --git a/lib/fp_ti/floattidf.c b/lib/fp_ti/floattidf.c
@@ -1,32 +0,0 @@
-//===-- floattidf.c - Implement __floattidf -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __floattidf for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-#define SRC_I128
-#define DST_DOUBLE
-#include "int_to_fp_impl.inc"
-
-// Returns: convert a to a double, rounding toward even.
-
-// Assumption: double is a IEEE 64 bit floating point type
-//            ti_int is a 128 bit integral type
-
-// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
-// mmmm
-
-COMPILER_RT_ABI double __floattidf(ti_int a) { return __floatXiYf__(a); }
-
-
-#undef SRC_I128
-#undef DST_DOUBLE
diff --git a/lib/fp_ti/floattisf.c b/lib/fp_ti/floattisf.c
@@ -1,31 +0,0 @@
-//===-- floattisf.c - Implement __floattisf -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __floattisf for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-#define SRC_I128
-#define DST_SINGLE
-#include "int_to_fp_impl.inc"
-
-// Returns: convert a to a float, rounding toward even.
-
-// Assumption: float is a IEEE 32 bit floating point type
-//             ti_int is a 128 bit integral type
-
-// seee eeee emmm mmmm mmmm mmmm mmmm mmmm
-
-COMPILER_RT_ABI float __floattisf(ti_int a) { return __floatXiYf__(a); }
-
-
-#undef SRC_I128
-#undef DST_SINGLE
diff --git a/lib/fp_ti/floatuntidf.c b/lib/fp_ti/floatuntidf.c
@@ -1,32 +0,0 @@
-//===-- floatuntidf.c - Implement __floatuntidf ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __floatuntidf for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-#define SRC_U128
-#define DST_DOUBLE
-#include "int_to_fp_impl.inc"
-
-// Returns: convert a to a double, rounding toward even.
-
-// Assumption: double is a IEEE 64 bit floating point type
-//             tu_int is a 128 bit integral type
-
-// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
-// mmmm
-
-COMPILER_RT_ABI double __floatuntidf(tu_int a) { return __floatXiYf__(a); }
-
-
-#undef SRC_U128
-#undef DST_DOUBLE
diff --git a/lib/fp_ti/floatuntisf.c b/lib/fp_ti/floatuntisf.c
@@ -1,31 +0,0 @@
-//===-- floatuntisf.c - Implement __floatuntisf ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __floatuntisf for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-#define SRC_U128
-#define DST_SINGLE
-#include "int_to_fp_impl.inc"
-
-// Returns: convert a to a float, rounding toward even.
-
-// Assumption: float is a IEEE 32 bit floating point type
-//             tu_int is a 128 bit integral type
-
-// seee eeee emmm mmmm mmmm mmmm mmmm mmmm
-
-COMPILER_RT_ABI float __floatuntisf(tu_int a) { return __floatXiYf__(a); }
-
-
-#undef SRC_U128
-#undef DST_SINGLE
diff --git a/lib/fp_ti/fp_ti.c b/lib/fp_ti/fp_ti.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Consolidated soft-float runtime helpers for cfree's libcfree_rt.a.
+// The build compiles only this one file; the per-op .c files are #included
+// as snippets and not directly compiled. The fp_lib_undef.h reset header is
+// included between sections that switch precision or (src,dst) pair.
+// License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
+
+// ============================================================
+// Section 1: SINGLE precision — i128/u128 -> sf, and sf -> ti/tu fix
+// ============================================================
+// ---- floattisf.c ----
+#include "int_lib.h"
+
+
+#define SRC_I128
+#define DST_SINGLE
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a to a float, rounding toward even.
+
+// Assumption: float is a IEEE 32 bit floating point type
+//             ti_int is a 128 bit integral type
+
+// seee eeee emmm mmmm mmmm mmmm mmmm mmmm
+
+COMPILER_RT_ABI float __floattisf(ti_int a) { return __floatXiYf__(a); }
+
+
+#undef SRC_I128
+#undef DST_SINGLE
+// ---- floatuntisf.c ----
+#include "int_lib.h"
+
+
+#define SRC_U128
+#define DST_SINGLE
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a to a float, rounding toward even.
+
+// Assumption: float is a IEEE 32 bit floating point type
+//             tu_int is a 128 bit integral type
+
+// seee eeee emmm mmmm mmmm mmmm mmmm mmmm
+
+COMPILER_RT_ABI float __floatuntisf(tu_int a) { return __floatXiYf__(a); }
+
+
+#undef SRC_U128
+#undef DST_SINGLE
+// ---- fixsfti.c ----
+#include "int_lib.h"
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#define fixint_t ti_int
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixsfti
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int __fixsfti(fp_t a) { return __fixint(a); }
+
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixunssfti.c ----
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixunssfti
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int __fixunssfti(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+
+#include "fp_lib_undef.h"
+
+// ============================================================
+// Section 2: DOUBLE precision — i128/u128 -> df, and df -> ti/tu fix
+// ============================================================
+// ---- floattidf.c ----
+#include "int_lib.h"
+
+
+#define SRC_I128
+#define DST_DOUBLE
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a to a double, rounding toward even.
+
+// Assumption: double is a IEEE 64 bit floating point type
+//            ti_int is a 128 bit integral type
+
+// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+// mmmm
+
+COMPILER_RT_ABI double __floattidf(ti_int a) { return __floatXiYf__(a); }
+
+
+#undef SRC_I128
+#undef DST_DOUBLE
+// ---- floatuntidf.c ----
+#include "int_lib.h"
+
+
+#define SRC_U128
+#define DST_DOUBLE
+#include "int_to_fp_impl.inc"
+
+// Returns: convert a to a double, rounding toward even.
+
+// Assumption: double is a IEEE 64 bit floating point type
+//             tu_int is a 128 bit integral type
+
+// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm
+// mmmm
+
+COMPILER_RT_ABI double __floatuntidf(tu_int a) { return __floatXiYf__(a); }
+
+
+#undef SRC_U128
+#undef DST_DOUBLE
+// ---- fixdfti.c ----
+#include "int_lib.h"
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+#define fixint_t ti_int
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixdfti
+#include "fp_fixint_impl.inc"
+
+COMPILER_RT_ABI ti_int __fixdfti(fp_t a) { return __fixint(a); }
+
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
+// ---- fixunsdfti.c ----
+#include "int_lib.h"
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixunsdfti
+#include "fp_fixuint_impl.inc"
+
+COMPILER_RT_ABI tu_int __fixunsdfti(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/impl/fp_extend.h b/lib/impl/fp_extend.h
@@ -1,277 +0,0 @@
-//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Source/destination type setup for the extending FP conversions. Caller
-// defines SRC_<X> (SINGLE/DOUBLE/80/HALF) and DST_<Y> (SINGLE/DOUBLE/QUAD)
-// before each inclusion.
-//
-// Re-includable. Names that depend on the (src, dst) pair are emitted with
-// a `_<src><dst>` suffix exactly once per (TU, pair); bare-name #define
-// aliases are set on every inclusion so the body in `fp_extend_impl.inc`
-// (and the caller) uses bare names that resolve to the right suffixed
-// entity. Use the umbrella `fp_lib_undef.h` between sections in one TU to
-// clear the bare-name aliases.
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Self-clean any prior inclusion's per-(src, dst) state. May coexist with
-// fp_trunc.h / int_to_fp.h which define overlapping bare-name aliases.
-#undef _FP_EXT_SRC_SUF
-#undef _FP_EXT_DST_SUF
-#undef SRC_REP_C
-#undef DST_REP_C
-#undef src_t
-#undef src_rep_t
-#undef dst_t
-#undef dst_rep_t
-#undef srcBits
-#undef srcSigFracBits
-#undef srcExpBits
-#undef dstBits
-#undef dstSigFracBits
-#undef dstExpBits
-#undef src_rep_t_clz
-#undef src_rep_t_clz_impl
-#undef srcToRep
-#undef dstFromRep
-#undef extract_sign_from_src
-#undef extract_exp_from_src
-#undef extract_sig_frac_from_src
-#undef clz_in_sig_frac
-#undef construct_dst_rep
-
-#if defined SRC_SINGLE
-#define _FP_EXT_SRC_SUF sf
-#elif defined SRC_DOUBLE
-#define _FP_EXT_SRC_SUF df
-#elif defined SRC_80
-#define _FP_EXT_SRC_SUF xf
-#elif defined SRC_HALF
-#define _FP_EXT_SRC_SUF hf
-#else
-#error Source should be half, single, or double precision!
-#endif
-
-#if defined DST_SINGLE
-#define _FP_EXT_DST_SUF sf
-#elif defined DST_DOUBLE
-#define _FP_EXT_DST_SUF df
-#elif defined DST_QUAD
-#define _FP_EXT_DST_SUF tf
-#else
-#error Destination should be single, double, or quad precision!
-#endif
-
-#define _FP_EXT_PASTE4_(a, b, c, d) a##b##c##d
-#define _FP_EXT_PASTE4(a, b, c, d)  _FP_EXT_PASTE4_(a, b, c, d)
-#define _FP_EXT_PAIR(stem)          _FP_EXT_PASTE4(stem, _, _FP_EXT_SRC_SUF, _FP_EXT_DST_SUF)
-
-// ---- Bare-name aliases (re-set every inclusion). ------------------------
-
-#define src_t                       _FP_EXT_PAIR(src_t)
-#define src_rep_t                   _FP_EXT_PAIR(src_rep_t)
-#define dst_t                       _FP_EXT_PAIR(dst_t)
-#define dst_rep_t                   _FP_EXT_PAIR(dst_rep_t)
-#define srcBits                     _FP_EXT_PAIR(srcBits)
-#define srcSigFracBits              _FP_EXT_PAIR(srcSigFracBits)
-#define srcExpBits                  _FP_EXT_PAIR(srcExpBits)
-#define dstBits                     _FP_EXT_PAIR(dstBits)
-#define dstSigFracBits              _FP_EXT_PAIR(dstSigFracBits)
-#define dstExpBits                  _FP_EXT_PAIR(dstExpBits)
-#define src_rep_t_clz_impl          _FP_EXT_PAIR(src_rep_t_clz_impl)
-#define srcToRep                    _FP_EXT_PAIR(srcToRep)
-#define dstFromRep                  _FP_EXT_PAIR(dstFromRep)
-#define extract_sign_from_src       _FP_EXT_PAIR(extract_sign_from_src)
-#define extract_exp_from_src        _FP_EXT_PAIR(extract_exp_from_src)
-#define extract_sig_frac_from_src   _FP_EXT_PAIR(extract_sig_frac_from_src)
-#define clz_in_sig_frac             _FP_EXT_PAIR(clz_in_sig_frac)
-#define construct_dst_rep           _FP_EXT_PAIR(construct_dst_rep)
-
-// SRC_REP_C / DST_REP_C: textual macros (UINT32_C etc.); same body each
-// inclusion within a precision.
-
-#if defined SRC_SINGLE
-#define SRC_REP_C UINT32_C
-#elif defined SRC_DOUBLE
-#define SRC_REP_C UINT64_C
-#elif defined SRC_80
-#define SRC_REP_C (__uint128_t)
-#elif defined SRC_HALF
-#define SRC_REP_C UINT16_C
-#endif
-
-#if defined DST_SINGLE
-#define DST_REP_C UINT32_C
-#elif defined DST_DOUBLE
-#define DST_REP_C UINT64_C
-#elif defined DST_QUAD
-#define DST_REP_C (__uint128_t)
-#endif
-
-// ---- One-time emission per (TU, src+dst pair). --------------------------
-// Enumerate the pairs cfree actually uses (sf→df, sf→tf, df→tf).
-
-#if defined SRC_SINGLE && defined DST_DOUBLE && !defined FP_EXT_SFDF_EMITTED
-#define FP_EXT_SFDF_EMITTED
-#define _FP_EXT_EMIT 1
-#elif defined SRC_SINGLE && defined DST_QUAD && !defined FP_EXT_SFTF_EMITTED
-#define FP_EXT_SFTF_EMITTED
-#define _FP_EXT_EMIT 1
-#elif defined SRC_DOUBLE && defined DST_QUAD && !defined FP_EXT_DFTF_EMITTED
-#define FP_EXT_DFTF_EMITTED
-#define _FP_EXT_EMIT 1
-#endif
-
-#ifdef _FP_EXT_EMIT
-#undef _FP_EXT_EMIT
-
-#if defined SRC_SINGLE
-typedef float src_t;
-typedef uint32_t src_rep_t;
-static const int srcBits = sizeof(src_t) * CHAR_BIT;
-static const int srcSigFracBits = 23;
-// -1 accounts for the sign bit.
-// srcBits - srcSigFracBits - 1
-static const int srcExpBits = 8;
-#define src_rep_t_clz clzsi
-
-#elif defined SRC_DOUBLE
-typedef double src_t;
-typedef uint64_t src_rep_t;
-static const int srcBits = sizeof(src_t) * CHAR_BIT;
-static const int srcSigFracBits = 52;
-// -1 accounts for the sign bit.
-// srcBits - srcSigFracBits - 1
-static const int srcExpBits = 11;
-
-static inline int src_rep_t_clz_impl(src_rep_t a) {
-#if defined __LP64__
-  return __builtin_clzl(a);
-#else
-  if (a & REP_C(0xffffffff00000000))
-    return clzsi(a >> 32);
-  else
-    return 32 + clzsi(a & REP_C(0xffffffff));
-#endif
-}
-#define src_rep_t_clz src_rep_t_clz_impl
-
-#elif defined SRC_80
-typedef xf_float src_t;
-typedef __uint128_t src_rep_t;
-// sign bit, exponent and significand occupy the lower 80 bits.
-static const int srcBits = 80;
-static const int srcSigFracBits = 63;
-// -1 accounts for the sign bit.
-// -1 accounts for the explicitly stored integer bit.
-// srcBits - srcSigFracBits - 1 - 1
-static const int srcExpBits = 15;
-
-#elif defined SRC_HALF
-#ifdef COMPILER_RT_HAS_FLOAT16
-typedef _Float16 src_t;
-#else
-typedef uint16_t src_t;
-#endif
-typedef uint16_t src_rep_t;
-static const int srcBits = sizeof(src_t) * CHAR_BIT;
-static const int srcSigFracBits = 10;
-// -1 accounts for the sign bit.
-// srcBits - srcSigFracBits - 1
-static const int srcExpBits = 5;
-
-static inline int src_rep_t_clz_impl(src_rep_t a) {
-  return __builtin_clz(a) - 16;
-}
-#define src_rep_t_clz src_rep_t_clz_impl
-
-#endif // end source precision
-
-#if defined DST_SINGLE
-typedef float dst_t;
-typedef uint32_t dst_rep_t;
-static const int dstBits = sizeof(dst_t) * CHAR_BIT;
-static const int dstSigFracBits = 23;
-// -1 accounts for the sign bit.
-// dstBits - dstSigFracBits - 1
-static const int dstExpBits = 8;
-
-#elif defined DST_DOUBLE
-typedef double dst_t;
-typedef uint64_t dst_rep_t;
-static const int dstBits = sizeof(dst_t) * CHAR_BIT;
-static const int dstSigFracBits = 52;
-// -1 accounts for the sign bit.
-// dstBits - dstSigFracBits - 1
-static const int dstExpBits = 11;
-
-#elif defined DST_QUAD
-typedef tf_float dst_t;
-typedef __uint128_t dst_rep_t;
-static const int dstBits = sizeof(dst_t) * CHAR_BIT;
-static const int dstSigFracBits = 112;
-// -1 accounts for the sign bit.
-// dstBits - dstSigFracBits - 1
-static const int dstExpBits = 15;
-
-#endif // end destination precision
-
-// End of specialization parameters.
-
-// TODO: These helper routines should be placed into fp_lib.h
-// Currently they depend on macros/constants defined above.
-
-static inline src_rep_t extract_sign_from_src(src_rep_t x) {
-  const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
-  return (x & srcSignMask) >> (srcBits - 1);
-}
-
-static inline src_rep_t extract_exp_from_src(src_rep_t x) {
-  const int srcSigBits = srcBits - 1 - srcExpBits;
-  const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
-  return (x & srcExpMask) >> srcSigBits;
-}
-
-static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
-  const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
-  return x & srcSigFracMask;
-}
-
-#ifdef src_rep_t_clz
-static inline int clz_in_sig_frac(src_rep_t sigFrac) {
-      const int skip = 1 + srcExpBits;
-      return src_rep_t_clz(sigFrac) - skip;
-}
-#endif
-
-static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
-  return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
-}
-
-// Two helper routines for conversion to and from the representation of
-// floating-point data as integer values follow.
-
-static inline src_rep_t srcToRep(src_t x) {
-  const union {
-    src_t f;
-    src_rep_t i;
-  } rep = {.f = x};
-  return rep.i;
-}
-
-static inline dst_t dstFromRep(dst_rep_t x) {
-  const union {
-    dst_t f;
-    dst_rep_t i;
-  } rep = {.i = x};
-  return rep.f;
-}
-// End helper routines.  Conversion implementation follows.
-
-#endif // _FP_EXT_EMIT
diff --git a/lib/impl/fp_extend_impl.inc b/lib/impl/fp_extend_impl.inc
@@ -35,7 +35,264 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "fp_extend.h"
+// ---- fp_extend.h (was a separate header; merged) ----
+#include "int_lib.h"
+
+// Self-clean any prior inclusion's per-(src, dst) state. May coexist with
+// fp_trunc.h / int_to_fp.h which define overlapping bare-name aliases.
+#undef _FP_EXT_SRC_SUF
+#undef _FP_EXT_DST_SUF
+#undef SRC_REP_C
+#undef DST_REP_C
+#undef src_t
+#undef src_rep_t
+#undef dst_t
+#undef dst_rep_t
+#undef srcBits
+#undef srcSigFracBits
+#undef srcExpBits
+#undef dstBits
+#undef dstSigFracBits
+#undef dstExpBits
+#undef src_rep_t_clz
+#undef src_rep_t_clz_impl
+#undef srcToRep
+#undef dstFromRep
+#undef extract_sign_from_src
+#undef extract_exp_from_src
+#undef extract_sig_frac_from_src
+#undef clz_in_sig_frac
+#undef construct_dst_rep
+
+#if defined SRC_SINGLE
+#define _FP_EXT_SRC_SUF sf
+#elif defined SRC_DOUBLE
+#define _FP_EXT_SRC_SUF df
+#elif defined SRC_80
+#define _FP_EXT_SRC_SUF xf
+#elif defined SRC_HALF
+#define _FP_EXT_SRC_SUF hf
+#else
+#error Source should be half, single, or double precision!
+#endif
+
+#if defined DST_SINGLE
+#define _FP_EXT_DST_SUF sf
+#elif defined DST_DOUBLE
+#define _FP_EXT_DST_SUF df
+#elif defined DST_QUAD
+#define _FP_EXT_DST_SUF tf
+#else
+#error Destination should be single, double, or quad precision!
+#endif
+
+#define _FP_EXT_PASTE4_(a, b, c, d) a##b##c##d
+#define _FP_EXT_PASTE4(a, b, c, d)  _FP_EXT_PASTE4_(a, b, c, d)
+#define _FP_EXT_PAIR(stem)          _FP_EXT_PASTE4(stem, _, _FP_EXT_SRC_SUF, _FP_EXT_DST_SUF)
+
+// ---- Bare-name aliases (re-set every inclusion). ------------------------
+
+#define src_t                       _FP_EXT_PAIR(src_t)
+#define src_rep_t                   _FP_EXT_PAIR(src_rep_t)
+#define dst_t                       _FP_EXT_PAIR(dst_t)
+#define dst_rep_t                   _FP_EXT_PAIR(dst_rep_t)
+#define srcBits                     _FP_EXT_PAIR(srcBits)
+#define srcSigFracBits              _FP_EXT_PAIR(srcSigFracBits)
+#define srcExpBits                  _FP_EXT_PAIR(srcExpBits)
+#define dstBits                     _FP_EXT_PAIR(dstBits)
+#define dstSigFracBits              _FP_EXT_PAIR(dstSigFracBits)
+#define dstExpBits                  _FP_EXT_PAIR(dstExpBits)
+#define src_rep_t_clz_impl          _FP_EXT_PAIR(src_rep_t_clz_impl)
+#define srcToRep                    _FP_EXT_PAIR(srcToRep)
+#define dstFromRep                  _FP_EXT_PAIR(dstFromRep)
+#define extract_sign_from_src       _FP_EXT_PAIR(extract_sign_from_src)
+#define extract_exp_from_src        _FP_EXT_PAIR(extract_exp_from_src)
+#define extract_sig_frac_from_src   _FP_EXT_PAIR(extract_sig_frac_from_src)
+#define clz_in_sig_frac             _FP_EXT_PAIR(clz_in_sig_frac)
+#define construct_dst_rep           _FP_EXT_PAIR(construct_dst_rep)
+
+// SRC_REP_C / DST_REP_C: textual macros (UINT32_C etc.); same body each
+// inclusion within a precision.
+
+#if defined SRC_SINGLE
+#define SRC_REP_C UINT32_C
+#elif defined SRC_DOUBLE
+#define SRC_REP_C UINT64_C
+#elif defined SRC_80
+#define SRC_REP_C (__uint128_t)
+#elif defined SRC_HALF
+#define SRC_REP_C UINT16_C
+#endif
+
+#if defined DST_SINGLE
+#define DST_REP_C UINT32_C
+#elif defined DST_DOUBLE
+#define DST_REP_C UINT64_C
+#elif defined DST_QUAD
+#define DST_REP_C (__uint128_t)
+#endif
+
+// ---- One-time emission per (TU, src+dst pair). --------------------------
+// Enumerate the pairs cfree actually uses (sf→df, sf→tf, df→tf).
+
+#if defined SRC_SINGLE && defined DST_DOUBLE && !defined FP_EXT_SFDF_EMITTED
+#define FP_EXT_SFDF_EMITTED
+#define _FP_EXT_EMIT 1
+#elif defined SRC_SINGLE && defined DST_QUAD && !defined FP_EXT_SFTF_EMITTED
+#define FP_EXT_SFTF_EMITTED
+#define _FP_EXT_EMIT 1
+#elif defined SRC_DOUBLE && defined DST_QUAD && !defined FP_EXT_DFTF_EMITTED
+#define FP_EXT_DFTF_EMITTED
+#define _FP_EXT_EMIT 1
+#endif
+
+#ifdef _FP_EXT_EMIT
+#undef _FP_EXT_EMIT
+
+#if defined SRC_SINGLE
+typedef float src_t;
+typedef uint32_t src_rep_t;
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 23;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 8;
+#define src_rep_t_clz clzsi
+
+#elif defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 52;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 11;
+
+static inline int src_rep_t_clz_impl(src_rep_t a) {
+#if defined __LP64__
+  return __builtin_clzl(a);
+#else
+  if (a & REP_C(0xffffffff00000000))
+    return clzsi(a >> 32);
+  else
+    return 32 + clzsi(a & REP_C(0xffffffff));
+#endif
+}
+#define src_rep_t_clz src_rep_t_clz_impl
+
+#elif defined SRC_80
+typedef xf_float src_t;
+typedef __uint128_t src_rep_t;
+// sign bit, exponent and significand occupy the lower 80 bits.
+static const int srcBits = 80;
+static const int srcSigFracBits = 63;
+// -1 accounts for the sign bit.
+// -1 accounts for the explicitly stored integer bit.
+// srcBits - srcSigFracBits - 1 - 1
+static const int srcExpBits = 15;
+
+#elif defined SRC_HALF
+#ifdef COMPILER_RT_HAS_FLOAT16
+typedef _Float16 src_t;
+#else
+typedef uint16_t src_t;
+#endif
+typedef uint16_t src_rep_t;
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 10;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 5;
+
+static inline int src_rep_t_clz_impl(src_rep_t a) {
+  return __builtin_clz(a) - 16;
+}
+#define src_rep_t_clz src_rep_t_clz_impl
+
+#endif // end source precision
+
+#if defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 23;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 8;
+
+#elif defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 52;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 11;
+
+#elif defined DST_QUAD
+typedef tf_float dst_t;
+typedef __uint128_t dst_rep_t;
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 112;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 15;
+
+#endif // end destination precision
+
+// End of specialization parameters.
+
+// TODO: These helper routines should be placed into fp_lib.h
+// Currently they depend on macros/constants defined above.
+
+static inline src_rep_t extract_sign_from_src(src_rep_t x) {
+  const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
+  return (x & srcSignMask) >> (srcBits - 1);
+}
+
+static inline src_rep_t extract_exp_from_src(src_rep_t x) {
+  const int srcSigBits = srcBits - 1 - srcExpBits;
+  const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
+  return (x & srcExpMask) >> srcSigBits;
+}
+
+static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
+  const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
+  return x & srcSigFracMask;
+}
+
+#ifdef src_rep_t_clz
+static inline int clz_in_sig_frac(src_rep_t sigFrac) {
+      const int skip = 1 + srcExpBits;
+      return src_rep_t_clz(sigFrac) - skip;
+}
+#endif
+
+static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
+  return (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
+}
+
+// Two helper routines for conversion to and from the representation of
+// floating-point data as integer values follow.
+
+static inline src_rep_t srcToRep(src_t x) {
+  const union {
+    src_t f;
+    src_rep_t i;
+  } rep = {.f = x};
+  return rep.i;
+}
+
+static inline dst_t dstFromRep(dst_rep_t x) {
+  const union {
+    dst_t f;
+    dst_rep_t i;
+  } rep = {.i = x};
+  return rep.f;
+}
+// End helper routines.  Conversion implementation follows.
+
+#endif // _FP_EXT_EMIT
 
 #define __extendXfYf2__ _FP_EXT_PAIR(__extendXfYf2__)
 
diff --git a/lib/impl/fp_trunc.h b/lib/impl/fp_trunc.h
@@ -1,259 +0,0 @@
-//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Source/destination precision setup for the truncating FP conversions.
-// Caller defines SRC_<X> (SINGLE/DOUBLE/QUAD) and DST_<Y>
-// (SINGLE/DOUBLE/80/HALF/BFLOAT) before each inclusion.
-//
-// Re-includable. Names that depend on the (src, dst) pair are emitted with
-// a `_<src><dst>` suffix exactly once per (TU, pair); bare-name #define
-// aliases are set every inclusion. Use the umbrella `fp_lib_undef.h` to
-// clear the bare-name aliases between sections in one TU.
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Self-clean any prior inclusion's per-(src, dst) state. May coexist with
-// fp_extend.h / int_to_fp.h which define overlapping bare-name aliases.
-#undef _FP_TRUNC_SRC_SUF
-#undef _FP_TRUNC_DST_SUF
-#undef SRC_REP_C
-#undef DST_REP_C
-#undef src_t
-#undef src_rep_t
-#undef dst_t
-#undef dst_rep_t
-#undef srcBits
-#undef srcSigFracBits
-#undef srcExpBits
-#undef dstBits
-#undef dstSigFracBits
-#undef dstExpBits
-#undef srcToRep
-#undef dstFromRep
-#undef extract_sign_from_src
-#undef extract_exp_from_src
-#undef extract_sig_frac_from_src
-#undef construct_dst_rep
-
-#if defined SRC_SINGLE
-#define _FP_TRUNC_SRC_SUF sf
-#elif defined SRC_DOUBLE
-#define _FP_TRUNC_SRC_SUF df
-#elif defined SRC_QUAD
-#define _FP_TRUNC_SRC_SUF tf
-#else
-#error Source should be double precision or quad precision!
-#endif
-
-#if defined DST_SINGLE
-#define _FP_TRUNC_DST_SUF sf
-#elif defined DST_DOUBLE
-#define _FP_TRUNC_DST_SUF df
-#elif defined DST_80
-#define _FP_TRUNC_DST_SUF xf
-#elif defined DST_HALF
-#define _FP_TRUNC_DST_SUF hf
-#elif defined DST_BFLOAT
-#define _FP_TRUNC_DST_SUF bf
-#else
-#error Destination should be single precision or double precision!
-#endif
-
-#define _FP_TRUNC_PASTE4_(a, b, c, d) a##b##c##d
-#define _FP_TRUNC_PASTE4(a, b, c, d)  _FP_TRUNC_PASTE4_(a, b, c, d)
-#define _FP_TRUNC_PAIR(stem)          _FP_TRUNC_PASTE4(stem, _, _FP_TRUNC_SRC_SUF, _FP_TRUNC_DST_SUF)
-
-// ---- Bare-name aliases (re-set every inclusion). ------------------------
-// fp_extend.h uses identical bare names; the aliases here suffix-rename
-// to fp_trunc-specific symbols (different pair tokens), so the two
-// headers can coexist in one TU without colliding.
-
-#define src_t                       _FP_TRUNC_PAIR(src_t)
-#define src_rep_t                   _FP_TRUNC_PAIR(src_rep_t)
-#define dst_t                       _FP_TRUNC_PAIR(dst_t)
-#define dst_rep_t                   _FP_TRUNC_PAIR(dst_rep_t)
-#define srcBits                     _FP_TRUNC_PAIR(srcBits)
-#define srcSigFracBits              _FP_TRUNC_PAIR(srcSigFracBits)
-#define srcExpBits                  _FP_TRUNC_PAIR(srcExpBits)
-#define dstBits                     _FP_TRUNC_PAIR(dstBits)
-#define dstSigFracBits              _FP_TRUNC_PAIR(dstSigFracBits)
-#define dstExpBits                  _FP_TRUNC_PAIR(dstExpBits)
-#define srcToRep                    _FP_TRUNC_PAIR(srcToRep)
-#define dstFromRep                  _FP_TRUNC_PAIR(dstFromRep)
-#define extract_sign_from_src       _FP_TRUNC_PAIR(extract_sign_from_src)
-#define extract_exp_from_src        _FP_TRUNC_PAIR(extract_exp_from_src)
-#define extract_sig_frac_from_src   _FP_TRUNC_PAIR(extract_sig_frac_from_src)
-#define construct_dst_rep           _FP_TRUNC_PAIR(construct_dst_rep)
-
-#if defined SRC_SINGLE
-#define SRC_REP_C UINT32_C
-#elif defined SRC_DOUBLE
-#define SRC_REP_C UINT64_C
-#elif defined SRC_QUAD
-#define SRC_REP_C (__uint128_t)
-#endif
-
-#if defined DST_SINGLE
-#define DST_REP_C UINT32_C
-#elif defined DST_DOUBLE
-#define DST_REP_C UINT64_C
-#elif defined DST_80
-#define DST_REP_C (__uint128_t)
-#elif defined DST_HALF
-#define DST_REP_C UINT16_C
-#elif defined DST_BFLOAT
-#define DST_REP_C UINT16_C
-#endif
-
-// ---- One-time emission per (TU, src+dst pair). --------------------------
-// Pairs cfree uses: df→sf, tf→df, tf→sf.
-
-#if defined SRC_DOUBLE && defined DST_SINGLE && !defined FP_TRUNC_DFSF_EMITTED
-#define FP_TRUNC_DFSF_EMITTED
-#define _FP_TRUNC_EMIT 1
-#elif defined SRC_QUAD && defined DST_DOUBLE && !defined FP_TRUNC_TFDF_EMITTED
-#define FP_TRUNC_TFDF_EMITTED
-#define _FP_TRUNC_EMIT 1
-#elif defined SRC_QUAD && defined DST_SINGLE && !defined FP_TRUNC_TFSF_EMITTED
-#define FP_TRUNC_TFSF_EMITTED
-#define _FP_TRUNC_EMIT 1
-#endif
-
-#ifdef _FP_TRUNC_EMIT
-#undef _FP_TRUNC_EMIT
-
-#if defined SRC_SINGLE
-typedef float src_t;
-typedef uint32_t src_rep_t;
-static const int srcBits = sizeof(src_t) * CHAR_BIT;
-static const int srcSigFracBits = 23;
-// -1 accounts for the sign bit.
-// srcBits - srcSigFracBits - 1
-static const int srcExpBits = 8;
-
-#elif defined SRC_DOUBLE
-typedef double src_t;
-typedef uint64_t src_rep_t;
-static const int srcBits = sizeof(src_t) * CHAR_BIT;
-static const int srcSigFracBits = 52;
-// -1 accounts for the sign bit.
-// srcBits - srcSigFracBits - 1
-static const int srcExpBits = 11;
-
-#elif defined SRC_QUAD
-typedef tf_float src_t;
-typedef __uint128_t src_rep_t;
-static const int srcBits = sizeof(src_t) * CHAR_BIT;
-static const int srcSigFracBits = 112;
-// -1 accounts for the sign bit.
-// srcBits - srcSigFracBits - 1
-static const int srcExpBits = 15;
-
-#endif // end source precision
-
-#if defined DST_DOUBLE
-typedef double dst_t;
-typedef uint64_t dst_rep_t;
-static const int dstBits = sizeof(dst_t) * CHAR_BIT;
-static const int dstSigFracBits = 52;
-// -1 accounts for the sign bit.
-// dstBits - dstSigFracBits - 1
-static const int dstExpBits = 11;
-
-#elif defined DST_80
-typedef xf_float dst_t;
-typedef __uint128_t dst_rep_t;
-static const int dstBits = 80;
-static const int dstSigFracBits = 63;
-// -1 accounts for the sign bit.
-// -1 accounts for the explicitly stored integer bit.
-// dstBits - dstSigFracBits - 1 - 1
-static const int dstExpBits = 15;
-
-#elif defined DST_SINGLE
-typedef float dst_t;
-typedef uint32_t dst_rep_t;
-static const int dstBits = sizeof(dst_t) * CHAR_BIT;
-static const int dstSigFracBits = 23;
-// -1 accounts for the sign bit.
-// dstBits - dstSigFracBits - 1
-static const int dstExpBits = 8;
-
-#elif defined DST_HALF
-#ifdef COMPILER_RT_HAS_FLOAT16
-typedef _Float16 dst_t;
-#else
-typedef uint16_t dst_t;
-#endif
-typedef uint16_t dst_rep_t;
-static const int dstBits = sizeof(dst_t) * CHAR_BIT;
-static const int dstSigFracBits = 10;
-// -1 accounts for the sign bit.
-// dstBits - dstSigFracBits - 1
-static const int dstExpBits = 5;
-
-#elif defined DST_BFLOAT
-typedef __bf16 dst_t;
-typedef uint16_t dst_rep_t;
-static const int dstBits = sizeof(dst_t) * CHAR_BIT;
-static const int dstSigFracBits = 7;
-// -1 accounts for the sign bit.
-// dstBits - dstSigFracBits - 1
-static const int dstExpBits = 8;
-
-#endif // end destination precision
-
-// TODO: These helper routines should be placed into fp_lib.h
-// Currently they depend on macros/constants defined above.
-
-static inline src_rep_t extract_sign_from_src(src_rep_t x) {
-  const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
-  return (x & srcSignMask) >> (srcBits - 1);
-}
-
-static inline src_rep_t extract_exp_from_src(src_rep_t x) {
-  const int srcSigBits = srcBits - 1 - srcExpBits;
-  const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
-  return (x & srcExpMask) >> srcSigBits;
-}
-
-static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
-  const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
-  return x & srcSigFracMask;
-}
-
-static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
-  dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
-  // Set the explicit integer bit in F80 if present.
-  if (dstBits == 80 && exp) {
-    result |= (DST_REP_C(1) << dstSigFracBits);
-  }
-  return result;
-}
-
-// End of specialization parameters.  Two helper routines for conversion to and
-// from the representation of floating-point data as integer values follow.
-
-static inline src_rep_t srcToRep(src_t x) {
-  const union {
-    src_t f;
-    src_rep_t i;
-  } rep = {.f = x};
-  return rep.i;
-}
-
-static inline dst_t dstFromRep(dst_rep_t x) {
-  const union {
-    dst_t f;
-    dst_rep_t i;
-  } rep = {.i = x};
-  return rep.f;
-}
-
-#endif // _FP_TRUNC_EMIT
diff --git a/lib/impl/fp_trunc_impl.inc b/lib/impl/fp_trunc_impl.inc
@@ -36,7 +36,248 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "fp_trunc.h"
+// ---- fp_trunc.h (was a separate header; merged) ----
+#include "int_lib.h"
+
+// Self-clean any prior inclusion's per-(src, dst) state. May coexist with
+// fp_extend.h / int_to_fp.h which define overlapping bare-name aliases.
+#undef _FP_TRUNC_SRC_SUF
+#undef _FP_TRUNC_DST_SUF
+#undef SRC_REP_C
+#undef DST_REP_C
+#undef src_t
+#undef src_rep_t
+#undef dst_t
+#undef dst_rep_t
+#undef srcBits
+#undef srcSigFracBits
+#undef srcExpBits
+#undef dstBits
+#undef dstSigFracBits
+#undef dstExpBits
+#undef srcToRep
+#undef dstFromRep
+#undef extract_sign_from_src
+#undef extract_exp_from_src
+#undef extract_sig_frac_from_src
+#undef construct_dst_rep
+
+#if defined SRC_SINGLE
+#define _FP_TRUNC_SRC_SUF sf
+#elif defined SRC_DOUBLE
+#define _FP_TRUNC_SRC_SUF df
+#elif defined SRC_QUAD
+#define _FP_TRUNC_SRC_SUF tf
+#else
+#error Source should be double precision or quad precision!
+#endif
+
+#if defined DST_SINGLE
+#define _FP_TRUNC_DST_SUF sf
+#elif defined DST_DOUBLE
+#define _FP_TRUNC_DST_SUF df
+#elif defined DST_80
+#define _FP_TRUNC_DST_SUF xf
+#elif defined DST_HALF
+#define _FP_TRUNC_DST_SUF hf
+#elif defined DST_BFLOAT
+#define _FP_TRUNC_DST_SUF bf
+#else
+#error Destination should be single precision or double precision!
+#endif
+
+#define _FP_TRUNC_PASTE4_(a, b, c, d) a##b##c##d
+#define _FP_TRUNC_PASTE4(a, b, c, d)  _FP_TRUNC_PASTE4_(a, b, c, d)
+#define _FP_TRUNC_PAIR(stem)          _FP_TRUNC_PASTE4(stem, _, _FP_TRUNC_SRC_SUF, _FP_TRUNC_DST_SUF)
+
+// ---- Bare-name aliases (re-set every inclusion). ------------------------
+// fp_extend.h uses identical bare names; the aliases here suffix-rename
+// to fp_trunc-specific symbols (different pair tokens), so the two
+// headers can coexist in one TU without colliding.
+
+#define src_t                       _FP_TRUNC_PAIR(src_t)
+#define src_rep_t                   _FP_TRUNC_PAIR(src_rep_t)
+#define dst_t                       _FP_TRUNC_PAIR(dst_t)
+#define dst_rep_t                   _FP_TRUNC_PAIR(dst_rep_t)
+#define srcBits                     _FP_TRUNC_PAIR(srcBits)
+#define srcSigFracBits              _FP_TRUNC_PAIR(srcSigFracBits)
+#define srcExpBits                  _FP_TRUNC_PAIR(srcExpBits)
+#define dstBits                     _FP_TRUNC_PAIR(dstBits)
+#define dstSigFracBits              _FP_TRUNC_PAIR(dstSigFracBits)
+#define dstExpBits                  _FP_TRUNC_PAIR(dstExpBits)
+#define srcToRep                    _FP_TRUNC_PAIR(srcToRep)
+#define dstFromRep                  _FP_TRUNC_PAIR(dstFromRep)
+#define extract_sign_from_src       _FP_TRUNC_PAIR(extract_sign_from_src)
+#define extract_exp_from_src        _FP_TRUNC_PAIR(extract_exp_from_src)
+#define extract_sig_frac_from_src   _FP_TRUNC_PAIR(extract_sig_frac_from_src)
+#define construct_dst_rep           _FP_TRUNC_PAIR(construct_dst_rep)
+
+#if defined SRC_SINGLE
+#define SRC_REP_C UINT32_C
+#elif defined SRC_DOUBLE
+#define SRC_REP_C UINT64_C
+#elif defined SRC_QUAD
+#define SRC_REP_C (__uint128_t)
+#endif
+
+#if defined DST_SINGLE
+#define DST_REP_C UINT32_C
+#elif defined DST_DOUBLE
+#define DST_REP_C UINT64_C
+#elif defined DST_80
+#define DST_REP_C (__uint128_t)
+#elif defined DST_HALF
+#define DST_REP_C UINT16_C
+#elif defined DST_BFLOAT
+#define DST_REP_C UINT16_C
+#endif
+
+// ---- One-time emission per (TU, src+dst pair). --------------------------
+// Pairs cfree uses: df→sf, tf→df, tf→sf.
+
+#if defined SRC_DOUBLE && defined DST_SINGLE && !defined FP_TRUNC_DFSF_EMITTED
+#define FP_TRUNC_DFSF_EMITTED
+#define _FP_TRUNC_EMIT 1
+#elif defined SRC_QUAD && defined DST_DOUBLE && !defined FP_TRUNC_TFDF_EMITTED
+#define FP_TRUNC_TFDF_EMITTED
+#define _FP_TRUNC_EMIT 1
+#elif defined SRC_QUAD && defined DST_SINGLE && !defined FP_TRUNC_TFSF_EMITTED
+#define FP_TRUNC_TFSF_EMITTED
+#define _FP_TRUNC_EMIT 1
+#endif
+
+#ifdef _FP_TRUNC_EMIT
+#undef _FP_TRUNC_EMIT
+
+#if defined SRC_SINGLE
+typedef float src_t;
+typedef uint32_t src_rep_t;
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 23;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 8;
+
+#elif defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 52;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 11;
+
+#elif defined SRC_QUAD
+typedef tf_float src_t;
+typedef __uint128_t src_rep_t;
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 112;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 15;
+
+#endif // end source precision
+
+#if defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 52;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 11;
+
+#elif defined DST_80
+typedef xf_float dst_t;
+typedef __uint128_t dst_rep_t;
+static const int dstBits = 80;
+static const int dstSigFracBits = 63;
+// -1 accounts for the sign bit.
+// -1 accounts for the explicitly stored integer bit.
+// dstBits - dstSigFracBits - 1 - 1
+static const int dstExpBits = 15;
+
+#elif defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 23;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 8;
+
+#elif defined DST_HALF
+#ifdef COMPILER_RT_HAS_FLOAT16
+typedef _Float16 dst_t;
+#else
+typedef uint16_t dst_t;
+#endif
+typedef uint16_t dst_rep_t;
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 10;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 5;
+
+#elif defined DST_BFLOAT
+typedef __bf16 dst_t;
+typedef uint16_t dst_rep_t;
+static const int dstBits = sizeof(dst_t) * CHAR_BIT;
+static const int dstSigFracBits = 7;
+// -1 accounts for the sign bit.
+// dstBits - dstSigFracBits - 1
+static const int dstExpBits = 8;
+
+#endif // end destination precision
+
+// TODO: These helper routines should be placed into fp_lib.h
+// Currently they depend on macros/constants defined above.
+
+static inline src_rep_t extract_sign_from_src(src_rep_t x) {
+  const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1);
+  return (x & srcSignMask) >> (srcBits - 1);
+}
+
+static inline src_rep_t extract_exp_from_src(src_rep_t x) {
+  const int srcSigBits = srcBits - 1 - srcExpBits;
+  const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
+  return (x & srcExpMask) >> srcSigBits;
+}
+
+static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) {
+  const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
+  return x & srcSigFracMask;
+}
+
+static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) {
+  dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac;
+  // Set the explicit integer bit in F80 if present.
+  if (dstBits == 80 && exp) {
+    result |= (DST_REP_C(1) << dstSigFracBits);
+  }
+  return result;
+}
+
+// End of specialization parameters.  Two helper routines for conversion to and
+// from the representation of floating-point data as integer values follow.
+
+static inline src_rep_t srcToRep(src_t x) {
+  const union {
+    src_t f;
+    src_rep_t i;
+  } rep = {.f = x};
+  return rep.i;
+}
+
+static inline dst_t dstFromRep(dst_rep_t x) {
+  const union {
+    dst_t f;
+    dst_rep_t i;
+  } rep = {.i = x};
+  return rep.f;
+}
+
+#endif // _FP_TRUNC_EMIT
 
 #define __truncXfYf2__ _FP_TRUNC_PAIR(__truncXfYf2__)
 
diff --git a/lib/impl/int_to_fp.h b/lib/impl/int_to_fp.h
@@ -1,174 +0,0 @@
-//===-- int_to_fp.h - integer to floating point conversion ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Source/destination type setup for int → fp conversions. Caller defines
-// SRC_<I64/U64/I128/U128> and DST_<SINGLE/DOUBLE/QUAD> before each
-// inclusion.
-//
-// Re-includable. Names depending on the (src, dst) pair are emitted with
-// a `_<src><dst>` suffix exactly once per (TU, pair); bare-name #define
-// aliases are set every inclusion. Use the umbrella `fp_lib_undef.h` to
-// clear the bare-name aliases between sections in one TU.
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Self-clean any prior inclusion's per-(src, dst) state. May coexist with
-// fp_extend.h / fp_trunc.h which define overlapping bare-name aliases.
-#undef _INT_TO_FP_SRC_SUF
-#undef _INT_TO_FP_DST_SUF
-#undef SRC_REP_C
-#undef DST_REP_C
-#undef src_t
-#undef usrc_t
-#undef dst_t
-#undef dst_rep_t
-#undef clzSrcT
-#undef dstFromRep
-#undef dstSigBits
-
-#if defined SRC_I64
-#define _INT_TO_FP_SRC_SUF i64
-#elif defined SRC_U64
-#define _INT_TO_FP_SRC_SUF u64
-#elif defined SRC_I128
-#define _INT_TO_FP_SRC_SUF i128
-#elif defined SRC_U128
-#define _INT_TO_FP_SRC_SUF u128
-#else
-#error Source should be a handled integer type.
-#endif
-
-#if defined DST_SINGLE
-#define _INT_TO_FP_DST_SUF sf
-#elif defined DST_DOUBLE
-#define _INT_TO_FP_DST_SUF df
-#elif defined DST_QUAD
-#define _INT_TO_FP_DST_SUF tf
-#else
-#error Destination should be a handled floating point type
-#endif
-
-#define _INT_TO_FP_PASTE4_(a, b, c, d) a##b##c##d
-#define _INT_TO_FP_PASTE4(a, b, c, d)  _INT_TO_FP_PASTE4_(a, b, c, d)
-#define _INT_TO_FP_PAIR(stem)          _INT_TO_FP_PASTE4(stem, _, _INT_TO_FP_SRC_SUF, _INT_TO_FP_DST_SUF)
-
-// ---- Bare-name aliases (re-set every inclusion). ------------------------
-
-#define src_t                       _INT_TO_FP_PAIR(src_t)
-#define usrc_t                      _INT_TO_FP_PAIR(usrc_t)
-#define dst_t                       _INT_TO_FP_PAIR(dst_t)
-#define dst_rep_t                   _INT_TO_FP_PAIR(dst_rep_t)
-#define clzSrcT                     _INT_TO_FP_PAIR(clzSrcT)
-#define dstFromRep                  _INT_TO_FP_PAIR(dstFromRep)
-#define dstSigBits                  _INT_TO_FP_PAIR(dstSigBits)
-
-// DST_REP_C: simple textual macro per dst.
-#if defined DST_SINGLE
-#define DST_REP_C UINT32_C
-#elif defined DST_DOUBLE
-#define DST_REP_C UINT64_C
-#elif defined DST_QUAD
-#define DST_REP_C (__uint128_t)
-#endif
-
-// ---- One-time emission per (TU, src+dst pair). --------------------------
-// Pairs cfree uses: (i64,u64) × (sf,df) and (i128,u128) × (sf,df,tf).
-
-#if defined SRC_I64 && defined DST_SINGLE && !defined INT_TO_FP_I64SF_EMITTED
-#define INT_TO_FP_I64SF_EMITTED
-#define _INT_TO_FP_EMIT 1
-#elif defined SRC_I64 && defined DST_DOUBLE && !defined INT_TO_FP_I64DF_EMITTED
-#define INT_TO_FP_I64DF_EMITTED
-#define _INT_TO_FP_EMIT 1
-#elif defined SRC_U64 && defined DST_SINGLE && !defined INT_TO_FP_U64SF_EMITTED
-#define INT_TO_FP_U64SF_EMITTED
-#define _INT_TO_FP_EMIT 1
-#elif defined SRC_U64 && defined DST_DOUBLE && !defined INT_TO_FP_U64DF_EMITTED
-#define INT_TO_FP_U64DF_EMITTED
-#define _INT_TO_FP_EMIT 1
-#elif defined SRC_I128 && defined DST_SINGLE && !defined INT_TO_FP_I128SF_EMITTED
-#define INT_TO_FP_I128SF_EMITTED
-#define _INT_TO_FP_EMIT 1
-#elif defined SRC_I128 && defined DST_DOUBLE && !defined INT_TO_FP_I128DF_EMITTED
-#define INT_TO_FP_I128DF_EMITTED
-#define _INT_TO_FP_EMIT 1
-#elif defined SRC_I128 && defined DST_QUAD && !defined INT_TO_FP_I128TF_EMITTED
-#define INT_TO_FP_I128TF_EMITTED
-#define _INT_TO_FP_EMIT 1
-#elif defined SRC_U128 && defined DST_SINGLE && !defined INT_TO_FP_U128SF_EMITTED
-#define INT_TO_FP_U128SF_EMITTED
-#define _INT_TO_FP_EMIT 1
-#elif defined SRC_U128 && defined DST_DOUBLE && !defined INT_TO_FP_U128DF_EMITTED
-#define INT_TO_FP_U128DF_EMITTED
-#define _INT_TO_FP_EMIT 1
-#elif defined SRC_U128 && defined DST_QUAD && !defined INT_TO_FP_U128TF_EMITTED
-#define INT_TO_FP_U128TF_EMITTED
-#define _INT_TO_FP_EMIT 1
-#endif
-
-#ifdef _INT_TO_FP_EMIT
-#undef _INT_TO_FP_EMIT
-
-#if defined SRC_I64
-typedef int64_t src_t;
-typedef uint64_t usrc_t;
-static __inline int clzSrcT(usrc_t x) { return __builtin_clzll(x); }
-
-#elif defined SRC_U64
-typedef uint64_t src_t;
-typedef uint64_t usrc_t;
-static __inline int clzSrcT(usrc_t x) { return __builtin_clzll(x); }
-
-#elif defined SRC_I128
-typedef __int128_t src_t;
-typedef __uint128_t usrc_t;
-static __inline int clzSrcT(usrc_t x) { return __clzti2(x); }
-
-#elif defined SRC_U128
-typedef __uint128_t src_t;
-typedef __uint128_t usrc_t;
-static __inline int clzSrcT(usrc_t x) { return __clzti2(x); }
-
-#endif
-
-#if defined DST_SINGLE
-typedef float dst_t;
-typedef uint32_t dst_rep_t;
-
-enum {
-  dstSigBits = 23,
-};
-
-#elif defined DST_DOUBLE
-typedef double dst_t;
-typedef uint64_t dst_rep_t;
-
-enum {
-  dstSigBits = 52,
-};
-
-#elif defined DST_QUAD
-typedef tf_float dst_t;
-typedef __uint128_t dst_rep_t;
-
-enum {
-  dstSigBits = 112,
-};
-
-#endif
-
-static __inline dst_t dstFromRep(dst_rep_t x) {
-  const union {
-    dst_t f;
-    dst_rep_t i;
-  } rep = {.i = x};
-  return rep.f;
-}
-
-#endif // _INT_TO_FP_EMIT
diff --git a/lib/impl/int_to_fp_impl.inc b/lib/impl/int_to_fp_impl.inc
@@ -12,7 +12,163 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "int_to_fp.h"
+// ---- int_to_fp.h (was a separate header; merged) ----
+#include "int_lib.h"
+
+// Self-clean any prior inclusion's per-(src, dst) state. May coexist with
+// fp_extend.h / fp_trunc.h which define overlapping bare-name aliases.
+#undef _INT_TO_FP_SRC_SUF
+#undef _INT_TO_FP_DST_SUF
+#undef SRC_REP_C
+#undef DST_REP_C
+#undef src_t
+#undef usrc_t
+#undef dst_t
+#undef dst_rep_t
+#undef clzSrcT
+#undef dstFromRep
+#undef dstSigBits
+
+#if defined SRC_I64
+#define _INT_TO_FP_SRC_SUF i64
+#elif defined SRC_U64
+#define _INT_TO_FP_SRC_SUF u64
+#elif defined SRC_I128
+#define _INT_TO_FP_SRC_SUF i128
+#elif defined SRC_U128
+#define _INT_TO_FP_SRC_SUF u128
+#else
+#error Source should be a handled integer type.
+#endif
+
+#if defined DST_SINGLE
+#define _INT_TO_FP_DST_SUF sf
+#elif defined DST_DOUBLE
+#define _INT_TO_FP_DST_SUF df
+#elif defined DST_QUAD
+#define _INT_TO_FP_DST_SUF tf
+#else
+#error Destination should be a handled floating point type
+#endif
+
+#define _INT_TO_FP_PASTE4_(a, b, c, d) a##b##c##d
+#define _INT_TO_FP_PASTE4(a, b, c, d)  _INT_TO_FP_PASTE4_(a, b, c, d)
+#define _INT_TO_FP_PAIR(stem)          _INT_TO_FP_PASTE4(stem, _, _INT_TO_FP_SRC_SUF, _INT_TO_FP_DST_SUF)
+
+// ---- Bare-name aliases (re-set every inclusion). ------------------------
+
+#define src_t                       _INT_TO_FP_PAIR(src_t)
+#define usrc_t                      _INT_TO_FP_PAIR(usrc_t)
+#define dst_t                       _INT_TO_FP_PAIR(dst_t)
+#define dst_rep_t                   _INT_TO_FP_PAIR(dst_rep_t)
+#define clzSrcT                     _INT_TO_FP_PAIR(clzSrcT)
+#define dstFromRep                  _INT_TO_FP_PAIR(dstFromRep)
+#define dstSigBits                  _INT_TO_FP_PAIR(dstSigBits)
+
+// DST_REP_C: simple textual macro per dst.
+#if defined DST_SINGLE
+#define DST_REP_C UINT32_C
+#elif defined DST_DOUBLE
+#define DST_REP_C UINT64_C
+#elif defined DST_QUAD
+#define DST_REP_C (__uint128_t)
+#endif
+
+// ---- One-time emission per (TU, src+dst pair). --------------------------
+// Pairs cfree uses: (i64,u64) × (sf,df) and (i128,u128) × (sf,df,tf).
+
+#if defined SRC_I64 && defined DST_SINGLE && !defined INT_TO_FP_I64SF_EMITTED
+#define INT_TO_FP_I64SF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_I64 && defined DST_DOUBLE && !defined INT_TO_FP_I64DF_EMITTED
+#define INT_TO_FP_I64DF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_U64 && defined DST_SINGLE && !defined INT_TO_FP_U64SF_EMITTED
+#define INT_TO_FP_U64SF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_U64 && defined DST_DOUBLE && !defined INT_TO_FP_U64DF_EMITTED
+#define INT_TO_FP_U64DF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_I128 && defined DST_SINGLE && !defined INT_TO_FP_I128SF_EMITTED
+#define INT_TO_FP_I128SF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_I128 && defined DST_DOUBLE && !defined INT_TO_FP_I128DF_EMITTED
+#define INT_TO_FP_I128DF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_I128 && defined DST_QUAD && !defined INT_TO_FP_I128TF_EMITTED
+#define INT_TO_FP_I128TF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_U128 && defined DST_SINGLE && !defined INT_TO_FP_U128SF_EMITTED
+#define INT_TO_FP_U128SF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_U128 && defined DST_DOUBLE && !defined INT_TO_FP_U128DF_EMITTED
+#define INT_TO_FP_U128DF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_U128 && defined DST_QUAD && !defined INT_TO_FP_U128TF_EMITTED
+#define INT_TO_FP_U128TF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#endif
+
+#ifdef _INT_TO_FP_EMIT
+#undef _INT_TO_FP_EMIT
+
+#if defined SRC_I64
+typedef int64_t src_t;
+typedef uint64_t usrc_t;
+static __inline int clzSrcT(usrc_t x) { return __builtin_clzll(x); }
+
+#elif defined SRC_U64
+typedef uint64_t src_t;
+typedef uint64_t usrc_t;
+static __inline int clzSrcT(usrc_t x) { return __builtin_clzll(x); }
+
+#elif defined SRC_I128
+typedef __int128_t src_t;
+typedef __uint128_t usrc_t;
+static __inline int clzSrcT(usrc_t x) { return __clzti2(x); }
+
+#elif defined SRC_U128
+typedef __uint128_t src_t;
+typedef __uint128_t usrc_t;
+static __inline int clzSrcT(usrc_t x) { return __clzti2(x); }
+
+#endif
+
+#if defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+
+enum {
+  dstSigBits = 23,
+};
+
+#elif defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+
+enum {
+  dstSigBits = 52,
+};
+
+#elif defined DST_QUAD
+typedef tf_float dst_t;
+typedef __uint128_t dst_rep_t;
+
+enum {
+  dstSigBits = 112,
+};
+
+#endif
+
+static __inline dst_t dstFromRep(dst_rep_t x) {
+  const union {
+    dst_t f;
+    dst_rep_t i;
+  } rep = {.i = x};
+  return rep.f;
+}
+
+#endif // _INT_TO_FP_EMIT
 
 #define __floatXiYf__ _INT_TO_FP_PAIR(__floatXiYf__)
 
diff --git a/lib/include/common/fp_lib.h b/lib/include/common/fp_lib.h
@@ -26,7 +26,6 @@
 
 #include "int_lib.h"
 #include "int_math.h"
-#include "int_types.h"
 #include <limits.h>
 #include <stdbool.h>
 #include <stdint.h>
diff --git a/lib/include/ilp32_le/int_endianness.h b/lib/include/ilp32_le/int_endianness.h
@@ -1,13 +0,0 @@
-//===-- int_endianness.h - LP64 little-endian ----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//===----------------------------------------------------------------------===//
-#ifndef INT_ENDIANNESS_H
-#define INT_ENDIANNESS_H
-
-#define _YUGA_LITTLE_ENDIAN 1
-#define _YUGA_BIG_ENDIAN    0
-
-#endif
diff --git a/lib/include/ilp32_le/int_lib.h b/lib/include/ilp32_le/int_lib.h
@@ -29,7 +29,48 @@
 #include <stdbool.h>
 #include <stdint.h>
 
-#include "int_types.h"
+// ---- int_types.h (merged) ----
+#ifndef INT_TYPES_H
+#define INT_TYPES_H
+
+// (was int_endianness.h — only defined the markers below)
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+
+#ifdef si_int
+#undef si_int
+#endif
+typedef int32_t  si_int;
+typedef uint32_t su_int;
+#define clzsi __builtin_clz
+#define ctzsi __builtin_ctz
+
+typedef int64_t  di_int;
+typedef uint64_t du_int;
+
+typedef union {
+  di_int all;
+  struct { su_int low; si_int high; } s;
+} dwords;
+
+typedef union {
+  du_int all;
+  struct { su_int low; su_int high; } s;
+} udwords;
+
+#define CRT_HAS_FLOATING_POINT 1
+
+typedef union { su_int u; float f; } float_bits;
+typedef union { udwords u; double f; } double_bits;
+typedef struct { udwords low; udwords high; } uqwords;
+
+typedef float       _Complex Fcomplex;
+typedef double      _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+#define COMPLEX_REAL(x)      __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+
+#endif // INT_TYPES_H
 #include "int_util.h"
 
 COMPILER_RT_ABI int    __paritysi2(si_int a);
diff --git a/lib/include/ilp32_le/int_types.h b/lib/include/ilp32_le/int_types.h
@@ -1,48 +0,0 @@
-//===-- int_types.h - ILP32 little-endian --------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// cfree-tailored: ILP32 little-endian. No 128-bit integer support.
-//===----------------------------------------------------------------------===//
-
-#ifndef INT_TYPES_H
-#define INT_TYPES_H
-
-#include "int_endianness.h"
-
-#ifdef si_int
-#undef si_int
-#endif
-typedef int32_t  si_int;
-typedef uint32_t su_int;
-#define clzsi __builtin_clz
-#define ctzsi __builtin_ctz
-
-typedef int64_t  di_int;
-typedef uint64_t du_int;
-
-typedef union {
-  di_int all;
-  struct { su_int low; si_int high; } s;
-} dwords;
-
-typedef union {
-  du_int all;
-  struct { su_int low; su_int high; } s;
-} udwords;
-
-#define CRT_HAS_FLOATING_POINT 1
-
-typedef union { su_int u; float f; } float_bits;
-typedef union { udwords u; double f; } double_bits;
-typedef struct { udwords low; udwords high; } uqwords;
-
-typedef float       _Complex Fcomplex;
-typedef double      _Complex Dcomplex;
-typedef long double _Complex Lcomplex;
-#define COMPLEX_REAL(x)      __real__(x)
-#define COMPLEX_IMAGINARY(x) __imag__(x)
-
-#endif // INT_TYPES_H
diff --git a/lib/include/llp64_le/int_endianness.h b/lib/include/llp64_le/int_endianness.h
@@ -1,13 +0,0 @@
-//===-- int_endianness.h - LP64 little-endian ----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//===----------------------------------------------------------------------===//
-#ifndef INT_ENDIANNESS_H
-#define INT_ENDIANNESS_H
-
-#define _YUGA_LITTLE_ENDIAN 1
-#define _YUGA_BIG_ENDIAN    0
-
-#endif
diff --git a/lib/include/llp64_le/int_lib.h b/lib/include/llp64_le/int_lib.h
@@ -23,7 +23,69 @@
 #include <stdbool.h>
 #include <stdint.h>
 
-#include "int_types.h"
+// ---- int_types.h (merged) ----
+#ifndef INT_TYPES_H
+#define INT_TYPES_H
+
+// (was int_endianness.h — only defined the markers below)
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+
+#ifdef si_int
+#undef si_int
+#endif
+typedef int32_t  si_int;
+typedef uint32_t su_int;
+#define clzsi __builtin_clz
+#define ctzsi __builtin_ctz
+
+typedef int64_t  di_int;
+typedef uint64_t du_int;
+
+typedef union {
+  di_int all;
+  struct { su_int low; si_int high; } s;
+} dwords;
+
+typedef union {
+  du_int all;
+  struct { su_int low; su_int high; } s;
+} udwords;
+
+#define CRT_HAS_128BIT
+typedef int      ti_int __attribute__((mode(TI)));
+typedef unsigned tu_int __attribute__((mode(TI)));
+
+typedef union {
+  ti_int all;
+  struct { du_int low; di_int high; } s;
+} twords;
+
+typedef union {
+  tu_int all;
+  struct { du_int low; du_int high; } s;
+} utwords;
+
+static __inline ti_int make_ti(di_int h, di_int l) {
+  twords r; r.s.high = h; r.s.low = l; return r.all;
+}
+static __inline tu_int make_tu(du_int h, du_int l) {
+  utwords r; r.s.high = h; r.s.low = l; return r.all;
+}
+
+#define CRT_HAS_FLOATING_POINT 1
+
+typedef union { su_int u; float f; } float_bits;
+typedef union { udwords u; double f; } double_bits;
+typedef struct { udwords low; udwords high; } uqwords;
+
+typedef float       _Complex Fcomplex;
+typedef double      _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+#define COMPLEX_REAL(x)      __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+
+#endif // INT_TYPES_H
 #include "int_util.h"
 
 COMPILER_RT_ABI int    __paritysi2(si_int a);
diff --git a/lib/include/llp64_le/int_types.h b/lib/include/llp64_le/int_types.h
@@ -1,69 +0,0 @@
-//===-- int_types.h - LLP64 little-endian --------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// cfree-tailored: LLP64 little-endian. Has 128-bit integer (__int128) support.
-//===----------------------------------------------------------------------===//
-
-#ifndef INT_TYPES_H
-#define INT_TYPES_H
-
-#include "int_endianness.h"
-
-#ifdef si_int
-#undef si_int
-#endif
-typedef int32_t  si_int;
-typedef uint32_t su_int;
-#define clzsi __builtin_clz
-#define ctzsi __builtin_ctz
-
-typedef int64_t  di_int;
-typedef uint64_t du_int;
-
-typedef union {
-  di_int all;
-  struct { su_int low; si_int high; } s;
-} dwords;
-
-typedef union {
-  du_int all;
-  struct { su_int low; su_int high; } s;
-} udwords;
-
-#define CRT_HAS_128BIT
-typedef int      ti_int __attribute__((mode(TI)));
-typedef unsigned tu_int __attribute__((mode(TI)));
-
-typedef union {
-  ti_int all;
-  struct { du_int low; di_int high; } s;
-} twords;
-
-typedef union {
-  tu_int all;
-  struct { du_int low; du_int high; } s;
-} utwords;
-
-static __inline ti_int make_ti(di_int h, di_int l) {
-  twords r; r.s.high = h; r.s.low = l; return r.all;
-}
-static __inline tu_int make_tu(du_int h, du_int l) {
-  utwords r; r.s.high = h; r.s.low = l; return r.all;
-}
-
-#define CRT_HAS_FLOATING_POINT 1
-
-typedef union { su_int u; float f; } float_bits;
-typedef union { udwords u; double f; } double_bits;
-typedef struct { udwords low; udwords high; } uqwords;
-
-typedef float       _Complex Fcomplex;
-typedef double      _Complex Dcomplex;
-typedef long double _Complex Lcomplex;
-#define COMPLEX_REAL(x)      __real__(x)
-#define COMPLEX_IMAGINARY(x) __imag__(x)
-
-#endif // INT_TYPES_H
diff --git a/lib/include/lp64_le/int_endianness.h b/lib/include/lp64_le/int_endianness.h
@@ -1,13 +0,0 @@
-//===-- int_endianness.h - LP64 little-endian ----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//===----------------------------------------------------------------------===//
-#ifndef INT_ENDIANNESS_H
-#define INT_ENDIANNESS_H
-
-#define _YUGA_LITTLE_ENDIAN 1
-#define _YUGA_BIG_ENDIAN    0
-
-#endif
diff --git a/lib/include/lp64_le/int_lib.h b/lib/include/lp64_le/int_lib.h
@@ -25,7 +25,74 @@
 #include <stdbool.h>
 #include <stdint.h>
 
-#include "int_types.h"
+// ---- int_types.h (merged) ----
+#ifndef INT_TYPES_H
+#define INT_TYPES_H
+
+// (was int_endianness.h — only defined the markers below)
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+
+#ifdef si_int
+#undef si_int
+#endif
+typedef int32_t  si_int;
+typedef uint32_t su_int;
+#define clzsi __builtin_clz
+#define ctzsi __builtin_ctz
+
+typedef int64_t  di_int;
+typedef uint64_t du_int;
+
+typedef union {
+  di_int all;
+  struct { su_int low; si_int high; } s;
+} dwords;
+
+typedef union {
+  du_int all;
+  struct { su_int low; su_int high; } s;
+} udwords;
+
+#define CRT_HAS_128BIT
+typedef int      ti_int __attribute__((mode(TI)));
+typedef unsigned tu_int __attribute__((mode(TI)));
+
+typedef union {
+  ti_int all;
+  struct { du_int low; di_int high; } s;
+} twords;
+
+typedef union {
+  tu_int all;
+  struct { du_int low; du_int high; } s;
+} utwords;
+
+static __inline ti_int make_ti(di_int h, di_int l) {
+  twords r; r.s.high = h; r.s.low = l; return r.all;
+}
+static __inline tu_int make_tu(du_int h, du_int l) {
+  utwords r; r.s.high = h; r.s.low = l; return r.all;
+}
+
+#define CRT_HAS_FLOATING_POINT 1
+
+typedef union { su_int u; float f; } float_bits;
+typedef union { udwords u; double f; } double_bits;
+
+typedef struct { udwords low; udwords high; } uqwords;
+
+// IEEE binary128 (tf_float / CRT_HAS_TF_MODE) is supplied via
+// include/lp64_le_ldbl128/tf_supplement.h when compiling lib/fp_tf/. Not
+// available in this base header to keep it free of feature ifdefs.
+
+typedef float       _Complex Fcomplex;
+typedef double      _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+#define COMPLEX_REAL(x)      __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+
+#endif // INT_TYPES_H
 #include "int_util.h"
 
 COMPILER_RT_ABI int    __paritysi2(si_int a);
diff --git a/lib/include/lp64_le/int_types.h b/lib/include/lp64_le/int_types.h
@@ -1,74 +0,0 @@
-//===-- int_types.h - LP64 little-endian ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// cfree-tailored: LP64 little-endian. Has 128-bit integer (__int128) support.
-//===----------------------------------------------------------------------===//
-
-#ifndef INT_TYPES_H
-#define INT_TYPES_H
-
-#include "int_endianness.h"
-
-#ifdef si_int
-#undef si_int
-#endif
-typedef int32_t  si_int;
-typedef uint32_t su_int;
-#define clzsi __builtin_clz
-#define ctzsi __builtin_ctz
-
-typedef int64_t  di_int;
-typedef uint64_t du_int;
-
-typedef union {
-  di_int all;
-  struct { su_int low; si_int high; } s;
-} dwords;
-
-typedef union {
-  du_int all;
-  struct { su_int low; su_int high; } s;
-} udwords;
-
-#define CRT_HAS_128BIT
-typedef int      ti_int __attribute__((mode(TI)));
-typedef unsigned tu_int __attribute__((mode(TI)));
-
-typedef union {
-  ti_int all;
-  struct { du_int low; di_int high; } s;
-} twords;
-
-typedef union {
-  tu_int all;
-  struct { du_int low; du_int high; } s;
-} utwords;
-
-static __inline ti_int make_ti(di_int h, di_int l) {
-  twords r; r.s.high = h; r.s.low = l; return r.all;
-}
-static __inline tu_int make_tu(du_int h, du_int l) {
-  utwords r; r.s.high = h; r.s.low = l; return r.all;
-}
-
-#define CRT_HAS_FLOATING_POINT 1
-
-typedef union { su_int u; float f; } float_bits;
-typedef union { udwords u; double f; } double_bits;
-
-typedef struct { udwords low; udwords high; } uqwords;
-
-// IEEE binary128 (tf_float / CRT_HAS_TF_MODE) is supplied via
-// include/lp64_le_ldbl128/tf_supplement.h when compiling lib/fp_tf/. Not
-// available in this base header to keep it free of feature ifdefs.
-
-typedef float       _Complex Fcomplex;
-typedef double      _Complex Dcomplex;
-typedef long double _Complex Lcomplex;
-#define COMPLEX_REAL(x)      __real__(x)
-#define COMPLEX_IMAGINARY(x) __imag__(x)
-
-#endif // INT_TYPES_H
diff --git a/lib/include/lp64_le_ldbl128/tf_supplement.h b/lib/include/lp64_le_ldbl128/tf_supplement.h
@@ -8,11 +8,11 @@
 #ifndef TF_SUPPLEMENT_H
 #define TF_SUPPLEMENT_H
 
-// Pre-included before the translation unit, so the int_types.h pull happens
-// before stdint.h has been brought in by int_lib.h. Bring it in directly.
+// Pre-included before the translation unit; bring int_lib.h in directly so
+// the per-target typedefs (uqwords, Lcomplex) it defines are available below.
 #include <stdint.h>
 
-#include "int_types.h"
+#include "int_lib.h"
 
 typedef long double tf_float;
 #define CRT_LDBL_128BIT
diff --git a/lib/int/absvdi2.c b/lib/int/absvdi2.c
@@ -1,25 +0,0 @@
-//===-- absvdi2.c - Implement __absvdi2 -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __absvdi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: absolute value
-
-// Effects: aborts if abs(x) < 0
-
-COMPILER_RT_ABI di_int __absvdi2(di_int a) {
-  const int N = (int)(sizeof(di_int) * CHAR_BIT);
-  if (a == ((di_int)((du_int)1 << (N - 1))))
-    compilerrt_abort();
-  const di_int t = a >> (N - 1);
-  return (a ^ t) - t;
-}
diff --git a/lib/int/bswapdi2.c b/lib/int/bswapdi2.c
@@ -1,25 +0,0 @@
-//===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __bswapdi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) {
-  return (
-      (((u)&0xff00000000000000ULL) >> 56) |
-      (((u)&0x00ff000000000000ULL) >> 40) |
-      (((u)&0x0000ff0000000000ULL) >> 24) |
-      (((u)&0x000000ff00000000ULL) >> 8)  |
-      (((u)&0x00000000ff000000ULL) << 8)  |
-      (((u)&0x0000000000ff0000ULL) << 24) |
-      (((u)&0x000000000000ff00ULL) << 40) |
-      (((u)&0x00000000000000ffULL) << 56));
-}
diff --git a/lib/int/bswapsi2.c b/lib/int/bswapsi2.c
@@ -1,20 +0,0 @@
-//===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __bswapsi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) {
-  return ((((u)&0xff000000) >> 24) |
-          (((u)&0x00ff0000) >> 8)  |
-          (((u)&0x0000ff00) << 8)  |
-          (((u)&0x000000ff) << 24));
-}
diff --git a/lib/int/clzdi2.c b/lib/int/clzdi2.c
@@ -1,25 +0,0 @@
-//===-- clzdi2.c - Implement __clzdi2 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __clzdi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: the number of leading 0-bits
-
-// Precondition: a != 0
-
-COMPILER_RT_ABI int __clzdi2(di_int a) {
-  dwords x;
-  x.all = a;
-  const si_int f = -(x.s.high == 0);
-  return clzsi((x.s.high & ~f) | (x.s.low & f)) +
-         (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
-}
diff --git a/lib/int/clzsi2.c b/lib/int/clzsi2.c
@@ -1,48 +0,0 @@
-//===-- clzsi2.c - Implement __clzsi2 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __clzsi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: the number of leading 0-bits
-
-// Precondition: a != 0
-
-COMPILER_RT_ABI int __clzsi2(si_int a) {
-  su_int x = (su_int)a;
-  si_int t = ((x & 0xFFFF0000) == 0) << 4; // if (x is small) t = 16 else 0
-  x >>= 16 - t;                            // x = [0 - 0xFFFF]
-  su_int r = t;                            // r = [0, 16]
-  // return r + clz(x)
-  t = ((x & 0xFF00) == 0) << 3;
-  x >>= 8 - t; // x = [0 - 0xFF]
-  r += t;      // r = [0, 8, 16, 24]
-  // return r + clz(x)
-  t = ((x & 0xF0) == 0) << 2;
-  x >>= 4 - t; // x = [0 - 0xF]
-  r += t;      // r = [0, 4, 8, 12, 16, 20, 24, 28]
-  // return r + clz(x)
-  t = ((x & 0xC) == 0) << 1;
-  x >>= 2 - t; // x = [0 - 3]
-  r += t;      // r = [0 - 30] and is even
-  // return r + clz(x)
-  //     switch (x)
-  //     {
-  //     case 0:
-  //         return r + 2;
-  //     case 1:
-  //         return r + 1;
-  //     case 2:
-  //     case 3:
-  //         return r;
-  //     }
-  return r + ((2 - x) & -((x & 2) == 0));
-}
diff --git a/lib/int/cmpdi2.c b/lib/int/cmpdi2.c
@@ -1,34 +0,0 @@
-//===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __cmpdi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: if (a <  b) returns 0
-//           if (a == b) returns 1
-//           if (a >  b) returns 2
-
-COMPILER_RT_ABI si_int __cmpdi2(di_int a, di_int b) {
-  dwords x;
-  x.all = a;
-  dwords y;
-  y.all = b;
-  if (x.s.high < y.s.high)
-    return 0;
-  if (x.s.high > y.s.high)
-    return 2;
-  if (x.s.low < y.s.low)
-    return 0;
-  if (x.s.low > y.s.low)
-    return 2;
-  return 1;
-}
-
diff --git a/lib/int/ctzdi2.c b/lib/int/ctzdi2.c
@@ -1,25 +0,0 @@
-//===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __ctzdi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: the number of trailing 0-bits
-
-// Precondition: a != 0
-
-COMPILER_RT_ABI int __ctzdi2(di_int a) {
-  dwords x;
-  x.all = a;
-  const si_int f = -(x.s.low == 0);
-  return ctzsi((x.s.high & f) | (x.s.low & ~f)) +
-         (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
-}
diff --git a/lib/int/ctzsi2.c b/lib/int/ctzsi2.c
@@ -1,53 +0,0 @@
-//===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __ctzsi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: the number of trailing 0-bits
-
-// Precondition: a != 0
-
-COMPILER_RT_ABI int __ctzsi2(si_int a) {
-  su_int x = (su_int)a;
-  si_int t = ((x & 0x0000FFFF) == 0)
-             << 4; // if (x has no small bits) t = 16 else 0
-  x >>= t;         // x = [0 - 0xFFFF] + higher garbage bits
-  su_int r = t;    // r = [0, 16]
-  // return r + ctz(x)
-  t = ((x & 0x00FF) == 0) << 3;
-  x >>= t; // x = [0 - 0xFF] + higher garbage bits
-  r += t;  // r = [0, 8, 16, 24]
-  // return r + ctz(x)
-  t = ((x & 0x0F) == 0) << 2;
-  x >>= t; // x = [0 - 0xF] + higher garbage bits
-  r += t;  // r = [0, 4, 8, 12, 16, 20, 24, 28]
-  // return r + ctz(x)
-  t = ((x & 0x3) == 0) << 1;
-  x >>= t;
-  x &= 3; // x = [0 - 3]
-  r += t; // r = [0 - 30] and is even
-  // return r + ctz(x)
-
-  //  The branch-less return statement below is equivalent
-  //  to the following switch statement:
-  //     switch (x)
-  //    {
-  //     case 0:
-  //         return r + 2;
-  //     case 2:
-  //         return r + 1;
-  //     case 1:
-  //     case 3:
-  //         return r;
-  //     }
-  return r + ((2 - (x >> 1)) & -((x & 1) == 0));
-}
diff --git a/lib/int/divdi3.c b/lib/int/divdi3.c
@@ -1,25 +0,0 @@
-//===-- divdi3.c - Implement __divdi3 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __divdi3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: a / b
-
-#define fixint_t di_int
-#define fixuint_t du_int
-#define INT_DIV_SUFFIX divdi3
-#define COMPUTE_UDIV(a, b) __udivmoddi4((a), (b), (du_int *)0)
-#include "int_div_impl.inc"
-
-COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) {
-  return __divXi3_divdi3(a, b);
-}
diff --git a/lib/int/divmoddi4.c b/lib/int/divmoddi4.c
@@ -1,28 +0,0 @@
-//===-- divmoddi4.c - Implement __divmoddi4 -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __divmoddi4 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: a / b, *rem = a % b
-
-COMPILER_RT_ABI di_int __divmoddi4(di_int a, di_int b, di_int *rem) {
-  const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
-  di_int s_a = a >> bits_in_dword_m1;                   // s_a = a < 0 ? -1 : 0
-  di_int s_b = b >> bits_in_dword_m1;                   // s_b = b < 0 ? -1 : 0
-  a = (du_int)(a ^ s_a) - s_a;                          // negate if s_a == -1
-  b = (du_int)(b ^ s_b) - s_b;                          // negate if s_b == -1
-  s_b ^= s_a;                                           // sign of quotient
-  du_int r;
-  di_int q = (__udivmoddi4(a, b, &r) ^ s_b) - s_b;      // negate if s_b == -1
-  *rem = (r ^ s_a) - s_a;                               // negate if s_a == -1
-  return q;
-}
diff --git a/lib/int/ffsdi2.c b/lib/int/ffsdi2.c
@@ -1,27 +0,0 @@
-//===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __ffsdi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: the index of the least significant 1-bit in a, or
-// the value zero if a is zero. The least significant bit is index one.
-
-COMPILER_RT_ABI int __ffsdi2(di_int a) {
-  dwords x;
-  x.all = a;
-  if (x.s.low == 0) {
-    if (x.s.high == 0)
-      return 0;
-    return ctzsi(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT);
-  }
-  return ctzsi(x.s.low) + 1;
-}
diff --git a/lib/int/int.c b/lib/int/int.c
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Consolidated per-op runtime helpers for cfree's libcfree_rt.a.
+// The build compiles only this one file per directory; the per-op .c files
+// are #included as snippets and not directly compiled.
+// License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
+
+// int_util.c first: defines __compilerrt_abort_impl used by other helpers.
+// ---- int_util.c ----
+#include "int_lib.h"
+
+__attribute__((weak))
+__attribute__((visibility("hidden")))
+void __compilerrt_abort_impl(const char *file, int line, const char *function) {
+  (void)file;
+  (void)line;
+  (void)function;
+  __builtin_trap();
+}
+
+// udivmoddi4.c next: __udivmoddi4 is called by divdi3, moddi3, divmoddi4,
+// udivdi3, umoddi3 — must be defined before those callers.
+// ---- udivmoddi4.c ----
+#include "int_lib.h"
+
+// Effects: if rem != 0, *rem = a % b
+// Returns: a / b
+
+// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
+
+
+COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) {
+  const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
+  const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+  udwords n;
+  n.all = a;
+  udwords d;
+  d.all = b;
+  udwords q;
+  udwords r;
+  unsigned sr;
+  // special cases, X is unknown, K != 0
+  if (n.s.high == 0) {
+    if (d.s.high == 0) {
+      // 0 X
+      // ---
+      // 0 X
+      if (rem)
+        *rem = n.s.low % d.s.low;
+      return n.s.low / d.s.low;
+    }
+    // 0 X
+    // ---
+    // K X
+    if (rem)
+      *rem = n.s.low;
+    return 0;
+  }
+  // n.s.high != 0
+  if (d.s.low == 0) {
+    if (d.s.high == 0) {
+      // K X
+      // ---
+      // 0 0
+      if (rem)
+        *rem = n.s.high % d.s.low;
+      return n.s.high / d.s.low;
+    }
+    // d.s.high != 0
+    if (n.s.low == 0) {
+      // K 0
+      // ---
+      // K 0
+      if (rem) {
+        r.s.high = n.s.high % d.s.high;
+        r.s.low = 0;
+        *rem = r.all;
+      }
+      return n.s.high / d.s.high;
+    }
+    // K K
+    // ---
+    // K 0
+    if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ {
+      if (rem) {
+        r.s.low = n.s.low;
+        r.s.high = n.s.high & (d.s.high - 1);
+        *rem = r.all;
+      }
+      return n.s.high >> ctzsi(d.s.high);
+    }
+    // K K
+    // ---
+    // K 0
+    sr = clzsi(d.s.high) - clzsi(n.s.high);
+    // 0 <= sr <= n_uword_bits - 2 or sr large
+    if (sr > n_uword_bits - 2) {
+      if (rem)
+        *rem = n.all;
+      return 0;
+    }
+    ++sr;
+    // 1 <= sr <= n_uword_bits - 1
+    // q.all = n.all << (n_udword_bits - sr);
+    q.s.low = 0;
+    q.s.high = n.s.low << (n_uword_bits - sr);
+    // r.all = n.all >> sr;
+    r.s.high = n.s.high >> sr;
+    r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+  } else /* d.s.low != 0 */ {
+    if (d.s.high == 0) {
+      // K X
+      // ---
+      // 0 K
+      if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ {
+        if (rem)
+          *rem = n.s.low & (d.s.low - 1);
+        if (d.s.low == 1)
+          return n.all;
+        sr = ctzsi(d.s.low);
+        q.s.high = n.s.high >> sr;
+        q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+        return q.all;
+      }
+      // K X
+      // ---
+      // 0 K
+      sr = 1 + n_uword_bits + clzsi(d.s.low) - clzsi(n.s.high);
+      // 2 <= sr <= n_udword_bits - 1
+      // q.all = n.all << (n_udword_bits - sr);
+      // r.all = n.all >> sr;
+      if (sr == n_uword_bits) {
+        q.s.low = 0;
+        q.s.high = n.s.low;
+        r.s.high = 0;
+        r.s.low = n.s.high;
+      } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ {
+        q.s.low = 0;
+        q.s.high = n.s.low << (n_uword_bits - sr);
+        r.s.high = n.s.high >> sr;
+        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+      } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ {
+        q.s.low = n.s.low << (n_udword_bits - sr);
+        q.s.high = (n.s.high << (n_udword_bits - sr)) |
+                   (n.s.low >> (sr - n_uword_bits));
+        r.s.high = 0;
+        r.s.low = n.s.high >> (sr - n_uword_bits);
+      }
+    } else {
+      // K X
+      // ---
+      // K K
+      sr = clzsi(d.s.high) - clzsi(n.s.high);
+      // 0 <= sr <= n_uword_bits - 1 or sr large
+      if (sr > n_uword_bits - 1) {
+        if (rem)
+          *rem = n.all;
+        return 0;
+      }
+      ++sr;
+      // 1 <= sr <= n_uword_bits
+      // q.all = n.all << (n_udword_bits - sr);
+      q.s.low = 0;
+      if (sr == n_uword_bits) {
+        q.s.high = n.s.low;
+        r.s.high = 0;
+        r.s.low = n.s.high;
+      } else {
+        q.s.high = n.s.low << (n_uword_bits - sr);
+        r.s.high = n.s.high >> sr;
+        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+      }
+    }
+  }
+  // Not a special case
+  // q and r are initialized with:
+  // q.all = n.all << (n_udword_bits - sr);
+  // r.all = n.all >> sr;
+  // 1 <= sr <= n_udword_bits - 1
+  su_int carry = 0;
+  for (; sr > 0; --sr) {
+    // r:q = ((r:q)  << 1) | carry
+    r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1));
+    r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1));
+    q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1));
+    q.s.low = (q.s.low << 1) | carry;
+    // carry = 0;
+    // if (r.all >= d.all)
+    // {
+    //      r.all -= d.all;
+    //      carry = 1;
+    // }
+    const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
+    carry = s & 1;
+    r.all -= d.all & s;
+  }
+  q.all = (q.all << 1) | carry;
+  if (rem)
+    *rem = r.all;
+  return q.all;
+}
+
+
+// Leaf ops (no intra-directory dependencies):
+// ---- absvdi2.c ----
+#include "int_lib.h"
+
+// Returns: absolute value
+
+// Effects: aborts if abs(x) < 0
+
+COMPILER_RT_ABI di_int __absvdi2(di_int a) {
+  const int N = (int)(sizeof(di_int) * CHAR_BIT);
+  if (a == ((di_int)((du_int)1 << (N - 1))))
+    compilerrt_abort();
+  const di_int t = a >> (N - 1);
+  return (a ^ t) - t;
+}
+// ---- bswapdi2.c ----
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) {
+  return (
+      (((u)&0xff00000000000000ULL) >> 56) |
+      (((u)&0x00ff000000000000ULL) >> 40) |
+      (((u)&0x0000ff0000000000ULL) >> 24) |
+      (((u)&0x000000ff00000000ULL) >> 8)  |
+      (((u)&0x00000000ff000000ULL) << 8)  |
+      (((u)&0x0000000000ff0000ULL) << 24) |
+      (((u)&0x000000000000ff00ULL) << 40) |
+      (((u)&0x00000000000000ffULL) << 56));
+}
+// ---- bswapsi2.c ----
+#include "int_lib.h"
+
+COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) {
+  return ((((u)&0xff000000) >> 24) |
+          (((u)&0x00ff0000) >> 8)  |
+          (((u)&0x0000ff00) << 8)  |
+          (((u)&0x000000ff) << 24));
+}
+// ---- clzdi2.c ----
+#include "int_lib.h"
+
+// Returns: the number of leading 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __clzdi2(di_int a) {
+  dwords x;
+  x.all = a;
+  const si_int f = -(x.s.high == 0);
+  return clzsi((x.s.high & ~f) | (x.s.low & f)) +
+         (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
+}
+// ---- clzsi2.c ----
+#include "int_lib.h"
+
+// Returns: the number of leading 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __clzsi2(si_int a) {
+  su_int x = (su_int)a;
+  si_int t = ((x & 0xFFFF0000) == 0) << 4; // if (x is small) t = 16 else 0
+  x >>= 16 - t;                            // x = [0 - 0xFFFF]
+  su_int r = t;                            // r = [0, 16]
+  // return r + clz(x)
+  t = ((x & 0xFF00) == 0) << 3;
+  x >>= 8 - t; // x = [0 - 0xFF]
+  r += t;      // r = [0, 8, 16, 24]
+  // return r + clz(x)
+  t = ((x & 0xF0) == 0) << 2;
+  x >>= 4 - t; // x = [0 - 0xF]
+  r += t;      // r = [0, 4, 8, 12, 16, 20, 24, 28]
+  // return r + clz(x)
+  t = ((x & 0xC) == 0) << 1;
+  x >>= 2 - t; // x = [0 - 3]
+  r += t;      // r = [0 - 30] and is even
+  // return r + clz(x)
+  //     switch (x)
+  //     {
+  //     case 0:
+  //         return r + 2;
+  //     case 1:
+  //         return r + 1;
+  //     case 2:
+  //     case 3:
+  //         return r;
+  //     }
+  return r + ((2 - x) & -((x & 2) == 0));
+}
+// ---- cmpdi2.c ----
+#include "int_lib.h"
+
+// Returns: if (a <  b) returns 0
+//           if (a == b) returns 1
+//           if (a >  b) returns 2
+
+COMPILER_RT_ABI si_int __cmpdi2(di_int a, di_int b) {
+  dwords x;
+  x.all = a;
+  dwords y;
+  y.all = b;
+  if (x.s.high < y.s.high)
+    return 0;
+  if (x.s.high > y.s.high)
+    return 2;
+  if (x.s.low < y.s.low)
+    return 0;
+  if (x.s.low > y.s.low)
+    return 2;
+  return 1;
+}
+
+// ---- ctzdi2.c ----
+#include "int_lib.h"
+
+// Returns: the number of trailing 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __ctzdi2(di_int a) {
+  dwords x;
+  x.all = a;
+  const si_int f = -(x.s.low == 0);
+  return ctzsi((x.s.high & f) | (x.s.low & ~f)) +
+         (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
+}
+// ---- ctzsi2.c ----
+#include "int_lib.h"
+
+// Returns: the number of trailing 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __ctzsi2(si_int a) {
+  su_int x = (su_int)a;
+  si_int t = ((x & 0x0000FFFF) == 0)
+             << 4; // if (x has no small bits) t = 16 else 0
+  x >>= t;         // x = [0 - 0xFFFF] + higher garbage bits
+  su_int r = t;    // r = [0, 16]
+  // return r + ctz(x)
+  t = ((x & 0x00FF) == 0) << 3;
+  x >>= t; // x = [0 - 0xFF] + higher garbage bits
+  r += t;  // r = [0, 8, 16, 24]
+  // return r + ctz(x)
+  t = ((x & 0x0F) == 0) << 2;
+  x >>= t; // x = [0 - 0xF] + higher garbage bits
+  r += t;  // r = [0, 4, 8, 12, 16, 20, 24, 28]
+  // return r + ctz(x)
+  t = ((x & 0x3) == 0) << 1;
+  x >>= t;
+  x &= 3; // x = [0 - 3]
+  r += t; // r = [0 - 30] and is even
+  // return r + ctz(x)
+
+  //  The branch-less return statement below is equivalent
+  //  to the following switch statement:
+  //     switch (x)
+  //    {
+  //     case 0:
+  //         return r + 2;
+  //     case 2:
+  //         return r + 1;
+  //     case 1:
+  //     case 3:
+  //         return r;
+  //     }
+  return r + ((2 - (x >> 1)) & -((x & 1) == 0));
+}
+// ---- ffsdi2.c ----
+#include "int_lib.h"
+
+// Returns: the index of the least significant 1-bit in a, or
+// the value zero if a is zero. The least significant bit is index one.
+
+COMPILER_RT_ABI int __ffsdi2(di_int a) {
+  dwords x;
+  x.all = a;
+  if (x.s.low == 0) {
+    if (x.s.high == 0)
+      return 0;
+    return ctzsi(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT);
+  }
+  return ctzsi(x.s.low) + 1;
+}
+// ---- negdi2.c ----
+#include "int_lib.h"
+
+// Returns: -a
+
+COMPILER_RT_ABI di_int __negdi2(di_int a) {
+  // Note: this routine is here for API compatibility; any sane compiler
+  // should expand it inline.
+  return -(du_int)a;
+}
+// ---- paritydi2.c ----
+#include "int_lib.h"
+
+// Returns: 1 if number of bits is odd else returns 0
+
+COMPILER_RT_ABI int __paritydi2(di_int a) {
+  dwords x;
+  x.all = a;
+  su_int x2 = x.s.high ^ x.s.low;
+  x2 ^= x2 >> 16;
+  x2 ^= x2 >> 8;
+  x2 ^= x2 >> 4;
+  return (0x6996 >> (x2 & 0xF)) & 1;
+}
+// ---- paritysi2.c ----
+#include "int_lib.h"
+
+// Returns: 1 if number of bits is odd else returns 0
+
+COMPILER_RT_ABI int __paritysi2(si_int a) {
+  su_int x = (su_int)a;
+  x ^= x >> 16;
+  x ^= x >> 8;
+  x ^= x >> 4;
+  return (0x6996 >> (x & 0xF)) & 1;
+}
+// ---- popcountdi2.c ----
+#include "int_lib.h"
+
+// Returns: count of 1 bits
+
+COMPILER_RT_ABI int __popcountdi2(di_int a) {
+  du_int x2 = (du_int)a;
+  x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
+  // Every 2 bits holds the sum of every pair of bits (32)
+  x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL);
+  // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16)
+  x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL;
+  // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8)
+  su_int x = (su_int)(x2 + (x2 >> 32));
+  // The lower 32 bits hold four 16 bit sums (5 significant bits).
+  //   Upper 32 bits are garbage
+  x = x + (x >> 16);
+  // The lower 16 bits hold two 32 bit sums (6 significant bits).
+  //   Upper 16 bits are garbage
+  return (x + (x >> 8)) & 0x0000007F; // (7 significant bits)
+}
+// ---- popcountsi2.c ----
+#include "int_lib.h"
+
+// Returns: count of 1 bits
+
+COMPILER_RT_ABI int __popcountsi2(si_int a) {
+  su_int x = (su_int)a;
+  x = x - ((x >> 1) & 0x55555555);
+  // Every 2 bits holds the sum of every pair of bits
+  x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
+  // Every 4 bits holds the sum of every 4-set of bits (3 significant bits)
+  x = (x + (x >> 4)) & 0x0F0F0F0F;
+  // Every 8 bits holds the sum of every 8-set of bits (4 significant bits)
+  x = (x + (x >> 16));
+  // The lower 16 bits hold two 8 bit sums (5 significant bits).
+  //    Upper 16 bits are garbage
+  return (x + (x >> 8)) & 0x0000003F; // (6 significant bits)
+}
+// ---- ucmpdi2.c ----
+#include "int_lib.h"
+
+// Returns:  if (a <  b) returns 0
+//           if (a == b) returns 1
+//           if (a >  b) returns 2
+
+COMPILER_RT_ABI si_int __ucmpdi2(du_int a, du_int b) {
+  udwords x;
+  x.all = a;
+  udwords y;
+  y.all = b;
+  if (x.s.high < y.s.high)
+    return 0;
+  if (x.s.high > y.s.high)
+    return 2;
+  if (x.s.low < y.s.low)
+    return 0;
+  if (x.s.low > y.s.low)
+    return 2;
+  return 1;
+}
+
+
+// Callers of __udivmoddi4:
+// ---- udivdi3.c ----
+#include "int_lib.h"
+
+// Returns: a / b
+
+#define fixint_t di_int
+#define fixuint_t du_int
+#define INT_DIV_SUFFIX udivdi3
+#include "int_div_impl.inc"
+
+COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) {
+  return __udivXi3_udivdi3(a, b);
+}
+// ---- umoddi3.c ----
+#include "int_lib.h"
+
+// Returns: a % b
+
+#define fixint_t di_int
+#define fixuint_t du_int
+#define INT_DIV_SUFFIX umoddi3
+#include "int_div_impl.inc"
+
+COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) {
+  return __umodXi3_umoddi3(a, b);
+}
+// ---- divdi3.c ----
+#include "int_lib.h"
+
+// Returns: a / b
+
+#define fixint_t di_int
+#define fixuint_t du_int
+#define INT_DIV_SUFFIX divdi3
+#define COMPUTE_UDIV(a, b) __udivmoddi4((a), (b), (du_int *)0)
+#include "int_div_impl.inc"
+
+COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) {
+  return __divXi3_divdi3(a, b);
+}
+// ---- moddi3.c ----
+#include "int_lib.h"
+
+// Returns: a % b
+
+#define fixint_t di_int
+#define fixuint_t du_int
+#define INT_DIV_SUFFIX moddi3
+#define ASSIGN_UMOD(res, a, b) __udivmoddi4((a), (b), &(res))
+#include "int_div_impl.inc"
+
+COMPILER_RT_ABI di_int __moddi3(di_int a, di_int b) {
+  return __modXi3_moddi3(a, b);
+}
+// ---- divmoddi4.c ----
+#include "int_lib.h"
+
+// Returns: a / b, *rem = a % b
+
+COMPILER_RT_ABI di_int __divmoddi4(di_int a, di_int b, di_int *rem) {
+  const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
+  di_int s_a = a >> bits_in_dword_m1;                   // s_a = a < 0 ? -1 : 0
+  di_int s_b = b >> bits_in_dword_m1;                   // s_b = b < 0 ? -1 : 0
+  a = (du_int)(a ^ s_a) - s_a;                          // negate if s_a == -1
+  b = (du_int)(b ^ s_b) - s_b;                          // negate if s_b == -1
+  s_b ^= s_a;                                           // sign of quotient
+  du_int r;
+  di_int q = (__udivmoddi4(a, b, &r) ^ s_b) - s_b;      // negate if s_b == -1
+  *rem = (r ^ s_a) - s_a;                               // negate if s_a == -1
+  return q;
+}
diff --git a/lib/int/int_util.c b/lib/int/int_util.c
@@ -1,20 +0,0 @@
-//===-- int_util.c - Internal utilities for compiler-rt ------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-// cfree-tailored: freestanding only (cfree predefines __STDC_HOSTED__ == 0).
-// Aborts via __builtin_trap() with no libc dependency.
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-__attribute__((weak))
-__attribute__((visibility("hidden")))
-void __compilerrt_abort_impl(const char *file, int line, const char *function) {
-  (void)file;
-  (void)line;
-  (void)function;
-  __builtin_trap();
-}
diff --git a/lib/int/moddi3.c b/lib/int/moddi3.c
@@ -1,25 +0,0 @@
-//===-- moddi3.c - Implement __moddi3 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __moddi3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: a % b
-
-#define fixint_t di_int
-#define fixuint_t du_int
-#define INT_DIV_SUFFIX moddi3
-#define ASSIGN_UMOD(res, a, b) __udivmoddi4((a), (b), &(res))
-#include "int_div_impl.inc"
-
-COMPILER_RT_ABI di_int __moddi3(di_int a, di_int b) {
-  return __modXi3_moddi3(a, b);
-}
diff --git a/lib/int/negdi2.c b/lib/int/negdi2.c
@@ -1,21 +0,0 @@
-//===-- negdi2.c - Implement __negdi2 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __negdi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: -a
-
-COMPILER_RT_ABI di_int __negdi2(di_int a) {
-  // Note: this routine is here for API compatibility; any sane compiler
-  // should expand it inline.
-  return -(du_int)a;
-}
diff --git a/lib/int/paritydi2.c b/lib/int/paritydi2.c
@@ -1,25 +0,0 @@
-//===-- paritydi2.c - Implement __paritydi2 -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __paritydi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: 1 if number of bits is odd else returns 0
-
-COMPILER_RT_ABI int __paritydi2(di_int a) {
-  dwords x;
-  x.all = a;
-  su_int x2 = x.s.high ^ x.s.low;
-  x2 ^= x2 >> 16;
-  x2 ^= x2 >> 8;
-  x2 ^= x2 >> 4;
-  return (0x6996 >> (x2 & 0xF)) & 1;
-}
diff --git a/lib/int/paritysi2.c b/lib/int/paritysi2.c
@@ -1,23 +0,0 @@
-//===-- paritysi2.c - Implement __paritysi2 -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __paritysi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: 1 if number of bits is odd else returns 0
-
-COMPILER_RT_ABI int __paritysi2(si_int a) {
-  su_int x = (su_int)a;
-  x ^= x >> 16;
-  x ^= x >> 8;
-  x ^= x >> 4;
-  return (0x6996 >> (x & 0xF)) & 1;
-}
diff --git a/lib/int/popcountdi2.c b/lib/int/popcountdi2.c
@@ -1,32 +0,0 @@
-//===-- popcountdi2.c - Implement __popcountdi2 ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __popcountdi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: count of 1 bits
-
-COMPILER_RT_ABI int __popcountdi2(di_int a) {
-  du_int x2 = (du_int)a;
-  x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL);
-  // Every 2 bits holds the sum of every pair of bits (32)
-  x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL);
-  // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16)
-  x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL;
-  // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8)
-  su_int x = (su_int)(x2 + (x2 >> 32));
-  // The lower 32 bits hold four 16 bit sums (5 significant bits).
-  //   Upper 32 bits are garbage
-  x = x + (x >> 16);
-  // The lower 16 bits hold two 32 bit sums (6 significant bits).
-  //   Upper 16 bits are garbage
-  return (x + (x >> 8)) & 0x0000007F; // (7 significant bits)
-}
diff --git a/lib/int/popcountsi2.c b/lib/int/popcountsi2.c
@@ -1,29 +0,0 @@
-//===-- popcountsi2.c - Implement __popcountsi2 ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __popcountsi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: count of 1 bits
-
-COMPILER_RT_ABI int __popcountsi2(si_int a) {
-  su_int x = (su_int)a;
-  x = x - ((x >> 1) & 0x55555555);
-  // Every 2 bits holds the sum of every pair of bits
-  x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
-  // Every 4 bits holds the sum of every 4-set of bits (3 significant bits)
-  x = (x + (x >> 4)) & 0x0F0F0F0F;
-  // Every 8 bits holds the sum of every 8-set of bits (4 significant bits)
-  x = (x + (x >> 16));
-  // The lower 16 bits hold two 8 bit sums (5 significant bits).
-  //    Upper 16 bits are garbage
-  return (x + (x >> 8)) & 0x0000003F; // (6 significant bits)
-}
diff --git a/lib/int/ucmpdi2.c b/lib/int/ucmpdi2.c
@@ -1,34 +0,0 @@
-//===-- ucmpdi2.c - Implement __ucmpdi2 -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __ucmpdi2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns:  if (a <  b) returns 0
-//           if (a == b) returns 1
-//           if (a >  b) returns 2
-
-COMPILER_RT_ABI si_int __ucmpdi2(du_int a, du_int b) {
-  udwords x;
-  x.all = a;
-  udwords y;
-  y.all = b;
-  if (x.s.high < y.s.high)
-    return 0;
-  if (x.s.high > y.s.high)
-    return 2;
-  if (x.s.low < y.s.low)
-    return 0;
-  if (x.s.low > y.s.low)
-    return 2;
-  return 1;
-}
-
diff --git a/lib/int/udivdi3.c b/lib/int/udivdi3.c
@@ -1,24 +0,0 @@
-//===-- udivdi3.c - Implement __udivdi3 -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __udivdi3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: a / b
-
-#define fixint_t di_int
-#define fixuint_t du_int
-#define INT_DIV_SUFFIX udivdi3
-#include "int_div_impl.inc"
-
-COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) {
-  return __udivXi3_udivdi3(a, b);
-}
diff --git a/lib/int/udivmoddi4.c b/lib/int/udivmoddi4.c
@@ -1,191 +0,0 @@
-//===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __udivmoddi4 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Effects: if rem != 0, *rem = a % b
-// Returns: a / b
-
-// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide
-
-
-COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) {
-  const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
-  const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
-  udwords n;
-  n.all = a;
-  udwords d;
-  d.all = b;
-  udwords q;
-  udwords r;
-  unsigned sr;
-  // special cases, X is unknown, K != 0
-  if (n.s.high == 0) {
-    if (d.s.high == 0) {
-      // 0 X
-      // ---
-      // 0 X
-      if (rem)
-        *rem = n.s.low % d.s.low;
-      return n.s.low / d.s.low;
-    }
-    // 0 X
-    // ---
-    // K X
-    if (rem)
-      *rem = n.s.low;
-    return 0;
-  }
-  // n.s.high != 0
-  if (d.s.low == 0) {
-    if (d.s.high == 0) {
-      // K X
-      // ---
-      // 0 0
-      if (rem)
-        *rem = n.s.high % d.s.low;
-      return n.s.high / d.s.low;
-    }
-    // d.s.high != 0
-    if (n.s.low == 0) {
-      // K 0
-      // ---
-      // K 0
-      if (rem) {
-        r.s.high = n.s.high % d.s.high;
-        r.s.low = 0;
-        *rem = r.all;
-      }
-      return n.s.high / d.s.high;
-    }
-    // K K
-    // ---
-    // K 0
-    if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ {
-      if (rem) {
-        r.s.low = n.s.low;
-        r.s.high = n.s.high & (d.s.high - 1);
-        *rem = r.all;
-      }
-      return n.s.high >> ctzsi(d.s.high);
-    }
-    // K K
-    // ---
-    // K 0
-    sr = clzsi(d.s.high) - clzsi(n.s.high);
-    // 0 <= sr <= n_uword_bits - 2 or sr large
-    if (sr > n_uword_bits - 2) {
-      if (rem)
-        *rem = n.all;
-      return 0;
-    }
-    ++sr;
-    // 1 <= sr <= n_uword_bits - 1
-    // q.all = n.all << (n_udword_bits - sr);
-    q.s.low = 0;
-    q.s.high = n.s.low << (n_uword_bits - sr);
-    // r.all = n.all >> sr;
-    r.s.high = n.s.high >> sr;
-    r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
-  } else /* d.s.low != 0 */ {
-    if (d.s.high == 0) {
-      // K X
-      // ---
-      // 0 K
-      if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ {
-        if (rem)
-          *rem = n.s.low & (d.s.low - 1);
-        if (d.s.low == 1)
-          return n.all;
-        sr = ctzsi(d.s.low);
-        q.s.high = n.s.high >> sr;
-        q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
-        return q.all;
-      }
-      // K X
-      // ---
-      // 0 K
-      sr = 1 + n_uword_bits + clzsi(d.s.low) - clzsi(n.s.high);
-      // 2 <= sr <= n_udword_bits - 1
-      // q.all = n.all << (n_udword_bits - sr);
-      // r.all = n.all >> sr;
-      if (sr == n_uword_bits) {
-        q.s.low = 0;
-        q.s.high = n.s.low;
-        r.s.high = 0;
-        r.s.low = n.s.high;
-      } else if (sr < n_uword_bits) /* 2 <= sr <= n_uword_bits - 1 */ {
-        q.s.low = 0;
-        q.s.high = n.s.low << (n_uword_bits - sr);
-        r.s.high = n.s.high >> sr;
-        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
-      } else /* n_uword_bits + 1 <= sr <= n_udword_bits - 1 */ {
-        q.s.low = n.s.low << (n_udword_bits - sr);
-        q.s.high = (n.s.high << (n_udword_bits - sr)) |
-                   (n.s.low >> (sr - n_uword_bits));
-        r.s.high = 0;
-        r.s.low = n.s.high >> (sr - n_uword_bits);
-      }
-    } else {
-      // K X
-      // ---
-      // K K
-      sr = clzsi(d.s.high) - clzsi(n.s.high);
-      // 0 <= sr <= n_uword_bits - 1 or sr large
-      if (sr > n_uword_bits - 1) {
-        if (rem)
-          *rem = n.all;
-        return 0;
-      }
-      ++sr;
-      // 1 <= sr <= n_uword_bits
-      // q.all = n.all << (n_udword_bits - sr);
-      q.s.low = 0;
-      if (sr == n_uword_bits) {
-        q.s.high = n.s.low;
-        r.s.high = 0;
-        r.s.low = n.s.high;
-      } else {
-        q.s.high = n.s.low << (n_uword_bits - sr);
-        r.s.high = n.s.high >> sr;
-        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
-      }
-    }
-  }
-  // Not a special case
-  // q and r are initialized with:
-  // q.all = n.all << (n_udword_bits - sr);
-  // r.all = n.all >> sr;
-  // 1 <= sr <= n_udword_bits - 1
-  su_int carry = 0;
-  for (; sr > 0; --sr) {
-    // r:q = ((r:q)  << 1) | carry
-    r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1));
-    r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1));
-    q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1));
-    q.s.low = (q.s.low << 1) | carry;
-    // carry = 0;
-    // if (r.all >= d.all)
-    // {
-    //      r.all -= d.all;
-    //      carry = 1;
-    // }
-    const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
-    carry = s & 1;
-    r.all -= d.all & s;
-  }
-  q.all = (q.all << 1) | carry;
-  if (rem)
-    *rem = r.all;
-  return q.all;
-}
-
diff --git a/lib/int/umoddi3.c b/lib/int/umoddi3.c
@@ -1,24 +0,0 @@
-//===-- umoddi3.c - Implement __umoddi3 -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __umoddi3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: a % b
-
-#define fixint_t di_int
-#define fixuint_t du_int
-#define INT_DIV_SUFFIX umoddi3
-#include "int_div_impl.inc"
-
-COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) {
-  return __umodXi3_umoddi3(a, b);
-}
diff --git a/lib/int32/ashldi3.c b/lib/int32/ashldi3.c
@@ -1,36 +0,0 @@
-// ====-- ashldi3.c - Implement __ashldi3 ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __ashldi3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: a << b
-
-// Precondition:  0 <= b < bits_in_dword
-
-COMPILER_RT_ABI di_int __ashldi3(di_int a, int b) {
-  const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
-  dwords input;
-  dwords result;
-  input.all = a;
-  if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ {
-    result.s.low = 0;
-    result.s.high = input.s.low << (b - bits_in_word);
-  } else /* 0 <= b < bits_in_word */ {
-    if (b == 0)
-      return a;
-    result.s.low = input.s.low << b;
-    result.s.high =
-        ((su_int)input.s.high << b) | (input.s.low >> (bits_in_word - b));
-  }
-  return result.all;
-}
-
diff --git a/lib/int32/ashrdi3.c b/lib/int32/ashrdi3.c
@@ -1,37 +0,0 @@
-//===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __ashrdi3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: arithmetic a >> b
-
-// Precondition:  0 <= b < bits_in_dword
-
-COMPILER_RT_ABI di_int __ashrdi3(di_int a, int b) {
-  const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
-  dwords input;
-  dwords result;
-  input.all = a;
-  if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ {
-    // result.s.high = input.s.high < 0 ? -1 : 0
-    result.s.high = input.s.high >> (bits_in_word - 1);
-    result.s.low = input.s.high >> (b - bits_in_word);
-  } else /* 0 <= b < bits_in_word */ {
-    if (b == 0)
-      return a;
-    result.s.high = input.s.high >> b;
-    result.s.low =
-        ((su_int)input.s.high << (bits_in_word - b)) | (input.s.low >> b);
-  }
-  return result.all;
-}
-
diff --git a/lib/int32/int32.c b/lib/int32/int32.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Consolidated per-op runtime helpers for cfree's libcfree_rt.a.
+// The build compiles only this one file per directory; the per-op .c files
+// are #included as snippets and not directly compiled.
+// License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
+
+// ---- ashldi3.c ----
+#include "int_lib.h"
+
+// Returns: a << b
+
+// Precondition:  0 <= b < bits_in_dword
+
+COMPILER_RT_ABI di_int __ashldi3(di_int a, int b) {
+  const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+  dwords input;
+  dwords result;
+  input.all = a;
+  if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ {
+    result.s.low = 0;
+    result.s.high = input.s.low << (b - bits_in_word);
+  } else /* 0 <= b < bits_in_word */ {
+    if (b == 0)
+      return a;
+    result.s.low = input.s.low << b;
+    result.s.high =
+        ((su_int)input.s.high << b) | (input.s.low >> (bits_in_word - b));
+  }
+  return result.all;
+}
+
+// ---- ashrdi3.c ----
+#include "int_lib.h"
+
+// Returns: arithmetic a >> b
+
+// Precondition:  0 <= b < bits_in_dword
+
+COMPILER_RT_ABI di_int __ashrdi3(di_int a, int b) {
+  const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+  dwords input;
+  dwords result;
+  input.all = a;
+  if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ {
+    // result.s.high = input.s.high < 0 ? -1 : 0
+    result.s.high = input.s.high >> (bits_in_word - 1);
+    result.s.low = input.s.high >> (b - bits_in_word);
+  } else /* 0 <= b < bits_in_word */ {
+    if (b == 0)
+      return a;
+    result.s.high = input.s.high >> b;
+    result.s.low =
+        ((su_int)input.s.high << (bits_in_word - b)) | (input.s.low >> b);
+  }
+  return result.all;
+}
+
+// ---- lshrdi3.c ----
+#include "int_lib.h"
+
+// Returns: logical a >> b
+
+// Precondition:  0 <= b < bits_in_dword
+
+COMPILER_RT_ABI di_int __lshrdi3(di_int a, int b) {
+  const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
+  udwords input;
+  udwords result;
+  input.all = a;
+  if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ {
+    result.s.high = 0;
+    result.s.low = input.s.high >> (b - bits_in_word);
+  } else /* 0 <= b < bits_in_word */ {
+    if (b == 0)
+      return a;
+    result.s.high = input.s.high >> b;
+    result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b);
+  }
+  return result.all;
+}
+
+// ---- muldi3.c ----
+#include "int_lib.h"
+
+// Returns: a * b
+
+static di_int __muldsi3(su_int a, su_int b) {
+  dwords r;
+  const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2;
+  const su_int lower_mask = (su_int)~0 >> bits_in_word_2;
+  r.s.low = (a & lower_mask) * (b & lower_mask);
+  su_int t = r.s.low >> bits_in_word_2;
+  r.s.low &= lower_mask;
+  t += (a >> bits_in_word_2) * (b & lower_mask);
+  r.s.low += (t & lower_mask) << bits_in_word_2;
+  r.s.high = t >> bits_in_word_2;
+  t = r.s.low >> bits_in_word_2;
+  r.s.low &= lower_mask;
+  t += (b >> bits_in_word_2) * (a & lower_mask);
+  r.s.low += (t & lower_mask) << bits_in_word_2;
+  r.s.high += t >> bits_in_word_2;
+  r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2);
+  return r.all;
+}
+
+// Returns: a * b
+
+COMPILER_RT_ABI di_int __muldi3(di_int a, di_int b) {
+  dwords x;
+  x.all = a;
+  dwords y;
+  y.all = b;
+  dwords r;
+  r.all = __muldsi3(x.s.low, y.s.low);
+  r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
+  return r.all;
+}
+
diff --git a/lib/int32/lshrdi3.c b/lib/int32/lshrdi3.c
@@ -1,35 +0,0 @@
-//===-- lshrdi3.c - Implement __lshrdi3 -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __lshrdi3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: logical a >> b
-
-// Precondition:  0 <= b < bits_in_dword
-
-COMPILER_RT_ABI di_int __lshrdi3(di_int a, int b) {
-  const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT);
-  udwords input;
-  udwords result;
-  input.all = a;
-  if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ {
-    result.s.high = 0;
-    result.s.low = input.s.high >> (b - bits_in_word);
-  } else /* 0 <= b < bits_in_word */ {
-    if (b == 0)
-      return a;
-    result.s.high = input.s.high >> b;
-    result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b);
-  }
-  return result.all;
-}
-
diff --git a/lib/int32/muldi3.c b/lib/int32/muldi3.c
@@ -1,48 +0,0 @@
-//===-- muldi3.c - Implement __muldi3 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __muldi3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-// Returns: a * b
-
-static di_int __muldsi3(su_int a, su_int b) {
-  dwords r;
-  const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2;
-  const su_int lower_mask = (su_int)~0 >> bits_in_word_2;
-  r.s.low = (a & lower_mask) * (b & lower_mask);
-  su_int t = r.s.low >> bits_in_word_2;
-  r.s.low &= lower_mask;
-  t += (a >> bits_in_word_2) * (b & lower_mask);
-  r.s.low += (t & lower_mask) << bits_in_word_2;
-  r.s.high = t >> bits_in_word_2;
-  t = r.s.low >> bits_in_word_2;
-  r.s.low &= lower_mask;
-  t += (b >> bits_in_word_2) * (a & lower_mask);
-  r.s.low += (t & lower_mask) << bits_in_word_2;
-  r.s.high += t >> bits_in_word_2;
-  r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2);
-  return r.all;
-}
-
-// Returns: a * b
-
-COMPILER_RT_ABI di_int __muldi3(di_int a, di_int b) {
-  dwords x;
-  x.all = a;
-  dwords y;
-  y.all = b;
-  dwords r;
-  r.all = __muldsi3(x.s.low, y.s.low);
-  r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
-  return r.all;
-}
-
diff --git a/lib/int64/ashlti3.c b/lib/int64/ashlti3.c
@@ -1,37 +0,0 @@
-//===-- ashlti3.c - Implement __ashlti3 -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __ashlti3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: a << b
-
-// Precondition:  0 <= b < bits_in_tword
-
-COMPILER_RT_ABI ti_int __ashlti3(ti_int a, int b) {
-  const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
-  twords input;
-  twords result;
-  input.all = a;
-  if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
-    result.s.low = 0;
-    result.s.high = input.s.low << (b - bits_in_dword);
-  } else /* 0 <= b < bits_in_dword */ {
-    if (b == 0)
-      return a;
-    result.s.low = input.s.low << b;
-    result.s.high =
-        ((du_int)input.s.high << b) | (input.s.low >> (bits_in_dword - b));
-  }
-  return result.all;
-}
-
diff --git a/lib/int64/ashrti3.c b/lib/int64/ashrti3.c
@@ -1,38 +0,0 @@
-//===-- ashrti3.c - Implement __ashrti3 -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __ashrti3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: arithmetic a >> b
-
-// Precondition:  0 <= b < bits_in_tword
-
-COMPILER_RT_ABI ti_int __ashrti3(ti_int a, int b) {
-  const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
-  twords input;
-  twords result;
-  input.all = a;
-  if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
-    // result.s.high = input.s.high < 0 ? -1 : 0
-    result.s.high = input.s.high >> (bits_in_dword - 1);
-    result.s.low = input.s.high >> (b - bits_in_dword);
-  } else /* 0 <= b < bits_in_dword */ {
-    if (b == 0)
-      return a;
-    result.s.high = input.s.high >> b;
-    result.s.low =
-        ((du_int)input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
-  }
-  return result.all;
-}
-
diff --git a/lib/int64/clzti2.c b/lib/int64/clzti2.c
@@ -1,27 +0,0 @@
-//===-- clzti2.c - Implement __clzti2 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __clzti2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: the number of leading 0-bits
-
-// Precondition: a != 0
-
-COMPILER_RT_ABI int __clzti2(ti_int a) {
-  twords x;
-  x.all = a;
-  const di_int f = -(x.s.high == 0);
-  return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) +
-         ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
-}
-
diff --git a/lib/int64/ctzti2.c b/lib/int64/ctzti2.c
@@ -1,27 +0,0 @@
-//===-- ctzti2.c - Implement __ctzti2 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __ctzti2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: the number of trailing 0-bits
-
-// Precondition: a != 0
-
-COMPILER_RT_ABI int __ctzti2(ti_int a) {
-  twords x;
-  x.all = a;
-  const di_int f = -(x.s.low == 0);
-  return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) +
-         ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
-}
-
diff --git a/lib/int64/divmodti4.c b/lib/int64/divmodti4.c
@@ -1,30 +0,0 @@
-//===-- divmodti4.c - Implement __divmodti4 -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __divmodti4 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: a / b, *rem = a % b
-
-COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int *rem) {
-  const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1;
-  ti_int s_a = a >> bits_in_tword_m1;                   // s_a = a < 0 ? -1 : 0
-  ti_int s_b = b >> bits_in_tword_m1;                   // s_b = b < 0 ? -1 : 0
-  a = (tu_int)(a ^ s_a) - s_a;                          // negate if s_a == -1
-  b = (tu_int)(b ^ s_b) - s_b;                          // negate if s_b == -1
-  s_b ^= s_a;                                           // sign of quotient
-  tu_int r;
-  ti_int q = (__udivmodti4(a, b, &r) ^ s_b) - s_b;      // negate if s_b == -1
-  *rem = (r ^ s_a) - s_a;                               // negate if s_a == -1
-  return q;
-}
-
diff --git a/lib/int64/divti3.c b/lib/int64/divti3.c
@@ -1,27 +0,0 @@
-//===-- divti3.c - Implement __divti3 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __divti3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: a / b
-
-#define fixint_t ti_int
-#define fixuint_t tu_int
-#define INT_DIV_SUFFIX divti3
-#define COMPUTE_UDIV(a, b) __udivmodti4((a), (b), (tu_int *)0)
-#include "int_div_impl.inc"
-
-COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) {
-  return __divXi3_divti3(a, b);
-}
-
diff --git a/lib/int64/int64.c b/lib/int64/int64.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Consolidated per-op runtime helpers for cfree's libcfree_rt.a.
+// The build compiles only this one file per directory; the per-op .c files
+// are #included as snippets and not directly compiled.
+// License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
+
+// udivmodti4.c first: __udivmodti4 is called by divmodti4, divti3, modti3,
+// udivti3, and umodti3 — must be defined before those callers.
+// ---- udivmodti4.c ----
+#include "int_lib.h"
+
+
+// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
+// Remainder stored in r.
+// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
+// fallback. For a correctness proof see the reference for this algorithm
+// in Knuth, Volume 2, section 4.3.1, Algorithm D.
+UNUSED
+static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
+                                            du_int *r) {
+  const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+  const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
+  du_int un1, un0;                                // Norm. dividend LSD's
+  du_int vn1, vn0;                                // Norm. divisor digits
+  du_int q1, q0;                                  // Quotient digits
+  du_int un64, un21, un10;                        // Dividend digit pairs
+  du_int rhat;                                    // A remainder
+  si_int s;                                       // Shift amount for normalization
+
+  s = __builtin_clzll(v);
+  if (s > 0) {
+    // Normalize the divisor.
+    v = v << s;
+    un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
+    un10 = u0 << s; // Shift dividend left
+  } else {
+    // Avoid undefined behavior of (u0 >> 64).
+    un64 = u1;
+    un10 = u0;
+  }
+
+  // Break divisor up into two 32-bit digits.
+  vn1 = v >> (n_udword_bits / 2);
+  vn0 = v & 0xFFFFFFFF;
+
+  // Break right half of dividend into two digits.
+  un1 = un10 >> (n_udword_bits / 2);
+  un0 = un10 & 0xFFFFFFFF;
+
+  // Compute the first quotient digit, q1.
+  q1 = un64 / vn1;
+  rhat = un64 - q1 * vn1;
+
+  // q1 has at most error 2. No more than 2 iterations.
+  while (q1 >= b || q1 * vn0 > b * rhat + un1) {
+    q1 = q1 - 1;
+    rhat = rhat + vn1;
+    if (rhat >= b)
+      break;
+  }
+
+  un21 = un64 * b + un1 - q1 * v;
+
+  // Compute the second quotient digit.
+  q0 = un21 / vn1;
+  rhat = un21 - q0 * vn1;
+
+  // q0 has at most error 2. No more than 2 iterations.
+  while (q0 >= b || q0 * vn0 > b * rhat + un0) {
+    q0 = q0 - 1;
+    rhat = rhat + vn1;
+    if (rhat >= b)
+      break;
+  }
+
+  *r = (un21 * b + un0 - q0 * v) >> s;
+  return q1 * b + q0;
+}
+
+static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
+                                     du_int *r) {
+  return udiv128by64to64default(u1, u0, v, r);
+}
+
+// Effects: if rem != 0, *rem = a % b
+// Returns: a / b
+
+COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
+  const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
+  utwords dividend;
+  dividend.all = a;
+  utwords divisor;
+  divisor.all = b;
+  utwords quotient;
+  utwords remainder;
+  if (divisor.all > dividend.all) {
+    if (rem)
+      *rem = dividend.all;
+    return 0;
+  }
+  // When the divisor fits in 64 bits, we can use an optimized path.
+  if (divisor.s.high == 0) {
+    remainder.s.high = 0;
+    if (dividend.s.high < divisor.s.low) {
+      // The result fits in 64 bits.
+      quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
+                                       divisor.s.low, &remainder.s.low);
+      quotient.s.high = 0;
+    } else {
+      // First, divide with the high part to get the remainder in dividend.s.high.
+      // After that dividend.s.high < divisor.s.low.
+      quotient.s.high = dividend.s.high / divisor.s.low;
+      dividend.s.high = dividend.s.high % divisor.s.low;
+      quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
+                                       divisor.s.low, &remainder.s.low);
+    }
+    if (rem)
+      *rem = remainder.all;
+    return quotient.all;
+  }
+  // 0 <= shift <= 63.
+  si_int shift =
+      __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
+  divisor.all <<= shift;
+  quotient.s.high = 0;
+  quotient.s.low = 0;
+  for (; shift >= 0; --shift) {
+    quotient.s.low <<= 1;
+    // Branch free version of.
+    // if (dividend.all >= divisor.all)
+    // {
+    //    dividend.all -= divisor.all;
+    //    carry = 1;
+    // }
+    const ti_int s =
+        (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
+    quotient.s.low |= s & 1;
+    dividend.all -= divisor.all & s;
+    divisor.all >>= 1;
+  }
+  if (rem)
+    *rem = dividend.all;
+  return quotient.all;
+}
+
+
+// Leaf ops (no intra-directory dependencies):
+// ---- ashlti3.c ----
+#include "int_lib.h"
+
+
+// Returns: a << b
+
+// Precondition:  0 <= b < bits_in_tword
+
+COMPILER_RT_ABI ti_int __ashlti3(ti_int a, int b) {
+  const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+  twords input;
+  twords result;
+  input.all = a;
+  if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
+    result.s.low = 0;
+    result.s.high = input.s.low << (b - bits_in_dword);
+  } else /* 0 <= b < bits_in_dword */ {
+    if (b == 0)
+      return a;
+    result.s.low = input.s.low << b;
+    result.s.high =
+        ((du_int)input.s.high << b) | (input.s.low >> (bits_in_dword - b));
+  }
+  return result.all;
+}
+
+// ---- ashrti3.c ----
+#include "int_lib.h"
+
+
+// Returns: arithmetic a >> b
+
+// Precondition:  0 <= b < bits_in_tword
+
+COMPILER_RT_ABI ti_int __ashrti3(ti_int a, int b) {
+  const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+  twords input;
+  twords result;
+  input.all = a;
+  if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
+    // result.s.high = input.s.high < 0 ? -1 : 0
+    result.s.high = input.s.high >> (bits_in_dword - 1);
+    result.s.low = input.s.high >> (b - bits_in_dword);
+  } else /* 0 <= b < bits_in_dword */ {
+    if (b == 0)
+      return a;
+    result.s.high = input.s.high >> b;
+    result.s.low =
+        ((du_int)input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
+  }
+  return result.all;
+}
+
+// ---- clzti2.c ----
+#include "int_lib.h"
+
+
+// Returns: the number of leading 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __clzti2(ti_int a) {
+  twords x;
+  x.all = a;
+  const di_int f = -(x.s.high == 0);
+  return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) +
+         ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
+}
+
+// ---- ctzti2.c ----
+#include "int_lib.h"
+
+
+// Returns: the number of trailing 0-bits
+
+// Precondition: a != 0
+
+COMPILER_RT_ABI int __ctzti2(ti_int a) {
+  twords x;
+  x.all = a;
+  const di_int f = -(x.s.low == 0);
+  return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) +
+         ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT)));
+}
+
+// ---- lshrti3.c ----
+#include "int_lib.h"
+
+
+// Returns: logical a >> b
+
+// Precondition:  0 <= b < bits_in_tword
+
+COMPILER_RT_ABI ti_int __lshrti3(ti_int a, int b) {
+  const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
+  utwords input;
+  utwords result;
+  input.all = a;
+  if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
+    result.s.high = 0;
+    result.s.low = input.s.high >> (b - bits_in_dword);
+  } else /* 0 <= b < bits_in_dword */ {
+    if (b == 0)
+      return a;
+    result.s.high = input.s.high >> b;
+    result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
+  }
+  return result.all;
+}
+
+// ---- multi3.c ----
+#include "int_lib.h"
+
+
+// Returns: a * b
+
+static ti_int __mulddi3(du_int a, du_int b) {
+  twords r;
+  const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2;
+  const du_int lower_mask = (du_int)~0 >> bits_in_dword_2;
+  r.s.low = (a & lower_mask) * (b & lower_mask);
+  du_int t = r.s.low >> bits_in_dword_2;
+  r.s.low &= lower_mask;
+  t += (a >> bits_in_dword_2) * (b & lower_mask);
+  r.s.low += (t & lower_mask) << bits_in_dword_2;
+  r.s.high = t >> bits_in_dword_2;
+  t = r.s.low >> bits_in_dword_2;
+  r.s.low &= lower_mask;
+  t += (b >> bits_in_dword_2) * (a & lower_mask);
+  r.s.low += (t & lower_mask) << bits_in_dword_2;
+  r.s.high += t >> bits_in_dword_2;
+  r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2);
+  return r.all;
+}
+
+// Returns: a * b
+
+COMPILER_RT_ABI ti_int __multi3(ti_int a, ti_int b) {
+  twords x;
+  x.all = a;
+  twords y;
+  y.all = b;
+  twords r;
+  r.all = __mulddi3(x.s.low, y.s.low);
+  r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
+  return r.all;
+}
+
+// ---- negti2.c ----
+#include "int_lib.h"
+
+
+// Returns: -a
+
+COMPILER_RT_ABI ti_int __negti2(ti_int a) {
+  // Note: this routine is here for API compatibility; any sane compiler
+  // should expand it inline.
+  return -(tu_int)a;
+}
+
+
+// Callers of __udivmodti4:
+// ---- udivti3.c ----
+#include "int_lib.h"
+
+
+// Returns: a / b
+
+COMPILER_RT_ABI tu_int __udivti3(tu_int a, tu_int b) {
+  return __udivmodti4(a, b, 0);
+}
+
+// ---- umodti3.c ----
+#include "int_lib.h"
+
+
+// Returns: a % b
+
+COMPILER_RT_ABI tu_int __umodti3(tu_int a, tu_int b) {
+  tu_int r;
+  __udivmodti4(a, b, &r);
+  return r;
+}
+
+// ---- divmodti4.c ----
+#include "int_lib.h"
+
+
+// Returns: a / b, *rem = a % b
+
+COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int *rem) {
+  const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1;
+  ti_int s_a = a >> bits_in_tword_m1;                   // s_a = a < 0 ? -1 : 0
+  ti_int s_b = b >> bits_in_tword_m1;                   // s_b = b < 0 ? -1 : 0
+  a = (tu_int)(a ^ s_a) - s_a;                          // negate if s_a == -1
+  b = (tu_int)(b ^ s_b) - s_b;                          // negate if s_b == -1
+  s_b ^= s_a;                                           // sign of quotient
+  tu_int r;
+  ti_int q = (__udivmodti4(a, b, &r) ^ s_b) - s_b;      // negate if s_b == -1
+  *rem = (r ^ s_a) - s_a;                               // negate if s_a == -1
+  return q;
+}
+
+// ---- divti3.c ----
+#include "int_lib.h"
+
+
+// Returns: a / b
+
+#define fixint_t ti_int
+#define fixuint_t tu_int
+#define INT_DIV_SUFFIX divti3
+#define COMPUTE_UDIV(a, b) __udivmodti4((a), (b), (tu_int *)0)
+#include "int_div_impl.inc"
+
+COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) {
+  return __divXi3_divti3(a, b);
+}
+
+// ---- modti3.c ----
+#include "int_lib.h"
+
+
+// Returns: a % b
+
+#define fixint_t ti_int
+#define fixuint_t tu_int
+#define INT_DIV_SUFFIX modti3
+#define ASSIGN_UMOD(res, a, b) __udivmodti4((a), (b), &(res))
+#include "int_div_impl.inc"
+
+COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) {
+  return __modXi3_modti3(a, b);
+}
+
diff --git a/lib/int64/lshrti3.c b/lib/int64/lshrti3.c
@@ -1,36 +0,0 @@
-//===-- lshrti3.c - Implement __lshrti3 -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __lshrti3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: logical a >> b
-
-// Precondition:  0 <= b < bits_in_tword
-
-COMPILER_RT_ABI ti_int __lshrti3(ti_int a, int b) {
-  const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT);
-  utwords input;
-  utwords result;
-  input.all = a;
-  if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ {
-    result.s.high = 0;
-    result.s.low = input.s.high >> (b - bits_in_dword);
-  } else /* 0 <= b < bits_in_dword */ {
-    if (b == 0)
-      return a;
-    result.s.high = input.s.high >> b;
-    result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b);
-  }
-  return result.all;
-}
-
diff --git a/lib/int64/modti3.c b/lib/int64/modti3.c
@@ -1,27 +0,0 @@
-//===-- modti3.c - Implement __modti3 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __modti3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: a % b
-
-#define fixint_t ti_int
-#define fixuint_t tu_int
-#define INT_DIV_SUFFIX modti3
-#define ASSIGN_UMOD(res, a, b) __udivmodti4((a), (b), &(res))
-#include "int_div_impl.inc"
-
-COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) {
-  return __modXi3_modti3(a, b);
-}
-
diff --git a/lib/int64/multi3.c b/lib/int64/multi3.c
@@ -1,49 +0,0 @@
-//===-- multi3.c - Implement __multi3 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __multi3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: a * b
-
-static ti_int __mulddi3(du_int a, du_int b) {
-  twords r;
-  const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2;
-  const du_int lower_mask = (du_int)~0 >> bits_in_dword_2;
-  r.s.low = (a & lower_mask) * (b & lower_mask);
-  du_int t = r.s.low >> bits_in_dword_2;
-  r.s.low &= lower_mask;
-  t += (a >> bits_in_dword_2) * (b & lower_mask);
-  r.s.low += (t & lower_mask) << bits_in_dword_2;
-  r.s.high = t >> bits_in_dword_2;
-  t = r.s.low >> bits_in_dword_2;
-  r.s.low &= lower_mask;
-  t += (b >> bits_in_dword_2) * (a & lower_mask);
-  r.s.low += (t & lower_mask) << bits_in_dword_2;
-  r.s.high += t >> bits_in_dword_2;
-  r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2);
-  return r.all;
-}
-
-// Returns: a * b
-
-COMPILER_RT_ABI ti_int __multi3(ti_int a, ti_int b) {
-  twords x;
-  x.all = a;
-  twords y;
-  y.all = b;
-  twords r;
-  r.all = __mulddi3(x.s.low, y.s.low);
-  r.s.high += x.s.high * y.s.low + x.s.low * y.s.high;
-  return r.all;
-}
-
diff --git a/lib/int64/negti2.c b/lib/int64/negti2.c
@@ -1,23 +0,0 @@
-//===-- negti2.c - Implement __negti2 -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __negti2 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: -a
-
-COMPILER_RT_ABI ti_int __negti2(ti_int a) {
-  // Note: this routine is here for API compatibility; any sane compiler
-  // should expand it inline.
-  return -(tu_int)a;
-}
-
diff --git a/lib/int64/udivmodti4.c b/lib/int64/udivmodti4.c
@@ -1,148 +0,0 @@
-//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __udivmodti4 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
-// Remainder stored in r.
-// Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
-// fallback. For a correctness proof see the reference for this algorithm
-// in Knuth, Volume 2, section 4.3.1, Algorithm D.
-UNUSED
-static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
-                                            du_int *r) {
-  const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
-  const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
-  du_int un1, un0;                                // Norm. dividend LSD's
-  du_int vn1, vn0;                                // Norm. divisor digits
-  du_int q1, q0;                                  // Quotient digits
-  du_int un64, un21, un10;                        // Dividend digit pairs
-  du_int rhat;                                    // A remainder
-  si_int s;                                       // Shift amount for normalization
-
-  s = __builtin_clzll(v);
-  if (s > 0) {
-    // Normalize the divisor.
-    v = v << s;
-    un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
-    un10 = u0 << s; // Shift dividend left
-  } else {
-    // Avoid undefined behavior of (u0 >> 64).
-    un64 = u1;
-    un10 = u0;
-  }
-
-  // Break divisor up into two 32-bit digits.
-  vn1 = v >> (n_udword_bits / 2);
-  vn0 = v & 0xFFFFFFFF;
-
-  // Break right half of dividend into two digits.
-  un1 = un10 >> (n_udword_bits / 2);
-  un0 = un10 & 0xFFFFFFFF;
-
-  // Compute the first quotient digit, q1.
-  q1 = un64 / vn1;
-  rhat = un64 - q1 * vn1;
-
-  // q1 has at most error 2. No more than 2 iterations.
-  while (q1 >= b || q1 * vn0 > b * rhat + un1) {
-    q1 = q1 - 1;
-    rhat = rhat + vn1;
-    if (rhat >= b)
-      break;
-  }
-
-  un21 = un64 * b + un1 - q1 * v;
-
-  // Compute the second quotient digit.
-  q0 = un21 / vn1;
-  rhat = un21 - q0 * vn1;
-
-  // q0 has at most error 2. No more than 2 iterations.
-  while (q0 >= b || q0 * vn0 > b * rhat + un0) {
-    q0 = q0 - 1;
-    rhat = rhat + vn1;
-    if (rhat >= b)
-      break;
-  }
-
-  *r = (un21 * b + un0 - q0 * v) >> s;
-  return q1 * b + q0;
-}
-
-static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
-                                     du_int *r) {
-  return udiv128by64to64default(u1, u0, v, r);
-}
-
-// Effects: if rem != 0, *rem = a % b
-// Returns: a / b
-
-COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
-  const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
-  utwords dividend;
-  dividend.all = a;
-  utwords divisor;
-  divisor.all = b;
-  utwords quotient;
-  utwords remainder;
-  if (divisor.all > dividend.all) {
-    if (rem)
-      *rem = dividend.all;
-    return 0;
-  }
-  // When the divisor fits in 64 bits, we can use an optimized path.
-  if (divisor.s.high == 0) {
-    remainder.s.high = 0;
-    if (dividend.s.high < divisor.s.low) {
-      // The result fits in 64 bits.
-      quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
-                                       divisor.s.low, &remainder.s.low);
-      quotient.s.high = 0;
-    } else {
-      // First, divide with the high part to get the remainder in dividend.s.high.
-      // After that dividend.s.high < divisor.s.low.
-      quotient.s.high = dividend.s.high / divisor.s.low;
-      dividend.s.high = dividend.s.high % divisor.s.low;
-      quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
-                                       divisor.s.low, &remainder.s.low);
-    }
-    if (rem)
-      *rem = remainder.all;
-    return quotient.all;
-  }
-  // 0 <= shift <= 63.
-  si_int shift =
-      __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
-  divisor.all <<= shift;
-  quotient.s.high = 0;
-  quotient.s.low = 0;
-  for (; shift >= 0; --shift) {
-    quotient.s.low <<= 1;
-    // Branch free version of.
-    // if (dividend.all >= divisor.all)
-    // {
-    //    dividend.all -= divisor.all;
-    //    carry = 1;
-    // }
-    const ti_int s =
-        (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
-    quotient.s.low |= s & 1;
-    dividend.all -= divisor.all & s;
-    divisor.all >>= 1;
-  }
-  if (rem)
-    *rem = dividend.all;
-  return quotient.all;
-}
-
diff --git a/lib/int64/udivti3.c b/lib/int64/udivti3.c
@@ -1,21 +0,0 @@
-//===-- udivti3.c - Implement __udivti3 -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __udivti3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: a / b
-
-COMPILER_RT_ABI tu_int __udivti3(tu_int a, tu_int b) {
-  return __udivmodti4(a, b, 0);
-}
-
diff --git a/lib/int64/umodti3.c b/lib/int64/umodti3.c
@@ -1,23 +0,0 @@
-//===-- umodti3.c - Implement __umodti3 -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements __umodti3 for the compiler_rt library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-
-// Returns: a % b
-
-COMPILER_RT_ABI tu_int __umodti3(tu_int a, tu_int b) {
-  tu_int r;
-  __udivmodti4(a, b, &r);
-  return r;
-}
-
diff --git a/lib/riscv/restore_rv32.S b/lib/riscv/restore_rv32.S
@@ -1,73 +0,0 @@
-//===-- restore_rv32.S - restore up to 12 callee-save registers (RV32) ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-  .text
-
-  .globl  __riscv_restore_12
-  .type   __riscv_restore_12,@function
-__riscv_restore_12:
-  lw      s11, 12(sp)
-  addi    sp, sp, 16
-  // fallthrough into __riscv_restore_11/10/9/8
-
-  .globl  __riscv_restore_11
-  .type   __riscv_restore_11,@function
-  .globl  __riscv_restore_10
-  .type   __riscv_restore_10,@function
-  .globl  __riscv_restore_9
-  .type   __riscv_restore_9,@function
-  .globl  __riscv_restore_8
-  .type   __riscv_restore_8,@function
-__riscv_restore_11:
-__riscv_restore_10:
-__riscv_restore_9:
-__riscv_restore_8:
-  lw      s10, 0(sp)
-  lw      s9,  4(sp)
-  lw      s8,  8(sp)
-  lw      s7,  12(sp)
-  addi    sp, sp, 16
-  // fallthrough into __riscv_restore_7/6/5/4
-
-  .globl  __riscv_restore_7
-  .type   __riscv_restore_7,@function
-  .globl  __riscv_restore_6
-  .type   __riscv_restore_6,@function
-  .globl  __riscv_restore_5
-  .type   __riscv_restore_5,@function
-  .globl  __riscv_restore_4
-  .type   __riscv_restore_4,@function
-__riscv_restore_7:
-__riscv_restore_6:
-__riscv_restore_5:
-__riscv_restore_4:
-  lw      s6,  0(sp)
-  lw      s5,  4(sp)
-  lw      s4,  8(sp)
-  lw      s3,  12(sp)
-  addi    sp, sp, 16
-  // fallthrough into __riscv_restore_3/2/1/0
-
-  .globl  __riscv_restore_3
-  .type   __riscv_restore_3,@function
-  .globl  __riscv_restore_2
-  .type   __riscv_restore_2,@function
-  .globl  __riscv_restore_1
-  .type   __riscv_restore_1,@function
-  .globl  __riscv_restore_0
-  .type   __riscv_restore_0,@function
-__riscv_restore_3:
-__riscv_restore_2:
-__riscv_restore_1:
-__riscv_restore_0:
-  lw      s2,  0(sp)
-  lw      s1,  4(sp)
-  lw      s0,  8(sp)
-  lw      ra,  12(sp)
-  addi    sp, sp, 16
-  ret
diff --git a/lib/riscv/restore_rv64.S b/lib/riscv/restore_rv64.S
@@ -1,82 +0,0 @@
-//===-- restore_rv64.S - restore up to 12 callee-save registers (RV64) ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-  .text
-
-  .globl  __riscv_restore_12
-  .type   __riscv_restore_12,@function
-__riscv_restore_12:
-  ld      s11, 8(sp)
-  addi    sp, sp, 16
-  // fallthrough into __riscv_restore_11/10
-
-  .globl  __riscv_restore_11
-  .type   __riscv_restore_11,@function
-  .globl  __riscv_restore_10
-  .type   __riscv_restore_10,@function
-__riscv_restore_11:
-__riscv_restore_10:
-  ld      s10, 0(sp)
-  ld      s9,  8(sp)
-  addi    sp, sp, 16
-  // fallthrough into __riscv_restore_9/8
-
-  .globl  __riscv_restore_9
-  .type   __riscv_restore_9,@function
-  .globl  __riscv_restore_8
-  .type   __riscv_restore_8,@function
-__riscv_restore_9:
-__riscv_restore_8:
-  ld      s8,  0(sp)
-  ld      s7,  8(sp)
-  addi    sp, sp, 16
-  // fallthrough into __riscv_restore_7/6
-
-  .globl  __riscv_restore_7
-  .type   __riscv_restore_7,@function
-  .globl  __riscv_restore_6
-  .type   __riscv_restore_6,@function
-__riscv_restore_7:
-__riscv_restore_6:
-  ld      s6,  0(sp)
-  ld      s5,  8(sp)
-  addi    sp, sp, 16
-  // fallthrough into __riscv_restore_5/4
-
-  .globl  __riscv_restore_5
-  .type   __riscv_restore_5,@function
-  .globl  __riscv_restore_4
-  .type   __riscv_restore_4,@function
-__riscv_restore_5:
-__riscv_restore_4:
-  ld      s4,  0(sp)
-  ld      s3,  8(sp)
-  addi    sp, sp, 16
-  // fallthrough into __riscv_restore_3/2
-
-  .globl  __riscv_restore_3
-  .type   __riscv_restore_3,@function
-  .globl  __riscv_restore_2
-  .type   __riscv_restore_2,@function
-__riscv_restore_3:
-__riscv_restore_2:
-  ld      s2,  0(sp)
-  ld      s1,  8(sp)
-  addi    sp, sp, 16
-  // fallthrough into __riscv_restore_1/0
-
-  .globl  __riscv_restore_1
-  .type   __riscv_restore_1,@function
-  .globl  __riscv_restore_0
-  .type   __riscv_restore_0,@function
-__riscv_restore_1:
-__riscv_restore_0:
-  ld      s0,  0(sp)
-  ld      ra,  8(sp)
-  addi    sp, sp, 16
-  ret
diff --git a/lib/riscv/rv32.S b/lib/riscv/rv32.S
@@ -0,0 +1,147 @@
+// Consolidated save/restore helpers for cfree's libcfree_rt.a (RISC-V rv32).
+// The build assembles only this one file for rv32; the per-op files are
+// #included as snippets and not directly built.
+// License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
+
+// ---- save_rv32.S ----
+  .text
+
+  .globl  __riscv_save_12
+  .type   __riscv_save_12,@function
+__riscv_save_12:
+  addi   sp, sp, -64
+  mv     t1, zero
+  sw     s11, 12(sp)
+  j      .Lriscv_save_11_8
+
+  .globl  __riscv_save_11
+  .type   __riscv_save_11,@function
+  .globl  __riscv_save_10
+  .type   __riscv_save_10,@function
+  .globl  __riscv_save_9
+  .type   __riscv_save_9,@function
+  .globl  __riscv_save_8
+  .type   __riscv_save_8,@function
+__riscv_save_11:
+__riscv_save_10:
+__riscv_save_9:
+__riscv_save_8:
+  addi   sp, sp, -64
+  li     t1, 16
+.Lriscv_save_11_8:
+  sw     s10, 16(sp)
+  sw     s9,  20(sp)
+  sw     s8,  24(sp)
+  sw     s7,  28(sp)
+  j      .Lriscv_save_7_4
+
+  .globl  __riscv_save_7
+  .type   __riscv_save_7,@function
+  .globl  __riscv_save_6
+  .type   __riscv_save_6,@function
+  .globl  __riscv_save_5
+  .type   __riscv_save_5,@function
+  .globl  __riscv_save_4
+  .type   __riscv_save_4,@function
+__riscv_save_7:
+__riscv_save_6:
+__riscv_save_5:
+__riscv_save_4:
+  addi   sp, sp, -64
+  li     t1, 32
+.Lriscv_save_7_4:
+  sw     s6, 32(sp)
+  sw     s5, 36(sp)
+  sw     s4, 40(sp)
+  sw     s3, 44(sp)
+  sw     s2, 48(sp)
+  sw     s1, 52(sp)
+  sw     s0, 56(sp)
+  sw     ra, 60(sp)
+  add    sp, sp, t1
+  jr     t0
+
+  .globl  __riscv_save_3
+  .type   __riscv_save_3,@function
+  .globl  __riscv_save_2
+  .type   __riscv_save_2,@function
+  .globl  __riscv_save_1
+  .type   __riscv_save_1,@function
+  .globl  __riscv_save_0
+  .type   __riscv_save_0,@function
+__riscv_save_3:
+__riscv_save_2:
+__riscv_save_1:
+__riscv_save_0:
+  addi    sp, sp, -16
+  sw      s2,  0(sp)
+  sw      s1,  4(sp)
+  sw      s0,  8(sp)
+  sw      ra,  12(sp)
+  jr      t0
+// ---- restore_rv32.S ----
+  .text
+
+  .globl  __riscv_restore_12
+  .type   __riscv_restore_12,@function
+__riscv_restore_12:
+  lw      s11, 12(sp)
+  addi    sp, sp, 16
+  // fallthrough into __riscv_restore_11/10/9/8
+
+  .globl  __riscv_restore_11
+  .type   __riscv_restore_11,@function
+  .globl  __riscv_restore_10
+  .type   __riscv_restore_10,@function
+  .globl  __riscv_restore_9
+  .type   __riscv_restore_9,@function
+  .globl  __riscv_restore_8
+  .type   __riscv_restore_8,@function
+__riscv_restore_11:
+__riscv_restore_10:
+__riscv_restore_9:
+__riscv_restore_8:
+  lw      s10, 0(sp)
+  lw      s9,  4(sp)
+  lw      s8,  8(sp)
+  lw      s7,  12(sp)
+  addi    sp, sp, 16
+  // fallthrough into __riscv_restore_7/6/5/4
+
+  .globl  __riscv_restore_7
+  .type   __riscv_restore_7,@function
+  .globl  __riscv_restore_6
+  .type   __riscv_restore_6,@function
+  .globl  __riscv_restore_5
+  .type   __riscv_restore_5,@function
+  .globl  __riscv_restore_4
+  .type   __riscv_restore_4,@function
+__riscv_restore_7:
+__riscv_restore_6:
+__riscv_restore_5:
+__riscv_restore_4:
+  lw      s6,  0(sp)
+  lw      s5,  4(sp)
+  lw      s4,  8(sp)
+  lw      s3,  12(sp)
+  addi    sp, sp, 16
+  // fallthrough into __riscv_restore_3/2/1/0
+
+  .globl  __riscv_restore_3
+  .type   __riscv_restore_3,@function
+  .globl  __riscv_restore_2
+  .type   __riscv_restore_2,@function
+  .globl  __riscv_restore_1
+  .type   __riscv_restore_1,@function
+  .globl  __riscv_restore_0
+  .type   __riscv_restore_0,@function
+__riscv_restore_3:
+__riscv_restore_2:
+__riscv_restore_1:
+__riscv_restore_0:
+  lw      s2,  0(sp)
+  lw      s1,  4(sp)
+  lw      s0,  8(sp)
+  lw      ra,  12(sp)
+  addi    sp, sp, 16
+  ret
diff --git a/lib/riscv/rv64.S b/lib/riscv/rv64.S
@@ -0,0 +1,169 @@
+// Consolidated save/restore helpers for cfree's libcfree_rt.a (RISC-V rv64).
+// The build assembles only this one file for rv64; the per-op files are
+// #included as snippets and not directly built.
+// License: Apache-2.0 WITH LLVM-exception (see lib/LICENSE-compiler-rt.txt).
+
+// ---- save_rv64.S ----
+  .text
+
+  .globl  __riscv_save_12
+  .type   __riscv_save_12,@function
+__riscv_save_12:
+  addi   sp, sp, -112
+  mv     t1, zero
+  sd     s11, 8(sp)
+  j      .Lriscv_save_11_10
+
+  .globl  __riscv_save_11
+  .type   __riscv_save_11,@function
+  .globl  __riscv_save_10
+  .type   __riscv_save_10,@function
+__riscv_save_11:
+__riscv_save_10:
+  addi   sp, sp, -112
+  li     t1, 16
+.Lriscv_save_11_10:
+  sd     s10, 16(sp)
+  sd     s9,  24(sp)
+  j      .Lriscv_save_9_8
+
+  .globl  __riscv_save_9
+  .type   __riscv_save_9,@function
+  .globl  __riscv_save_8
+  .type   __riscv_save_8,@function
+__riscv_save_9:
+__riscv_save_8:
+  addi   sp, sp, -112
+  li     t1, 32
+.Lriscv_save_9_8:
+  sd     s8,  32(sp)
+  sd     s7,  40(sp)
+  j      .Lriscv_save_7_6
+
+  .globl  __riscv_save_7
+  .type   __riscv_save_7,@function
+  .globl  __riscv_save_6
+  .type   __riscv_save_6,@function
+__riscv_save_7:
+__riscv_save_6:
+  addi   sp, sp, -112
+  li     t1, 48
+.Lriscv_save_7_6:
+  sd     s6,  48(sp)
+  sd     s5,  56(sp)
+  j      .Lriscv_save_5_4
+
+  .globl  __riscv_save_5
+  .type   __riscv_save_5,@function
+  .globl  __riscv_save_4
+  .type   __riscv_save_4,@function
+__riscv_save_5:
+__riscv_save_4:
+  addi   sp, sp, -112
+  li     t1, 64
+.Lriscv_save_5_4:
+  sd     s4, 64(sp)
+  sd     s3, 72(sp)
+  j      .Lriscv_save_3_2
+
+  .globl  __riscv_save_3
+  .type   __riscv_save_3,@function
+  .globl  __riscv_save_2
+  .type   __riscv_save_2,@function
+__riscv_save_3:
+__riscv_save_2:
+  addi   sp, sp, -112
+  li     t1, 80
+.Lriscv_save_3_2:
+  sd     s2, 80(sp)
+  sd     s1, 88(sp)
+  sd     s0, 96(sp)
+  sd     ra, 104(sp)
+  add    sp, sp, t1
+  jr     t0
+
+  .globl  __riscv_save_1
+  .type   __riscv_save_1,@function
+  .globl  __riscv_save_0
+  .type   __riscv_save_0,@function
+__riscv_save_1:
+__riscv_save_0:
+  addi   sp, sp, -16
+  sd     s0, 0(sp)
+  sd     ra, 8(sp)
+  jr     t0
+// ---- restore_rv64.S ----
+  .text
+
+  .globl  __riscv_restore_12
+  .type   __riscv_restore_12,@function
+__riscv_restore_12:
+  ld      s11, 8(sp)
+  addi    sp, sp, 16
+  // fallthrough into __riscv_restore_11/10
+
+  .globl  __riscv_restore_11
+  .type   __riscv_restore_11,@function
+  .globl  __riscv_restore_10
+  .type   __riscv_restore_10,@function
+__riscv_restore_11:
+__riscv_restore_10:
+  ld      s10, 0(sp)
+  ld      s9,  8(sp)
+  addi    sp, sp, 16
+  // fallthrough into __riscv_restore_9/8
+
+  .globl  __riscv_restore_9
+  .type   __riscv_restore_9,@function
+  .globl  __riscv_restore_8
+  .type   __riscv_restore_8,@function
+__riscv_restore_9:
+__riscv_restore_8:
+  ld      s8,  0(sp)
+  ld      s7,  8(sp)
+  addi    sp, sp, 16
+  // fallthrough into __riscv_restore_7/6
+
+  .globl  __riscv_restore_7
+  .type   __riscv_restore_7,@function
+  .globl  __riscv_restore_6
+  .type   __riscv_restore_6,@function
+__riscv_restore_7:
+__riscv_restore_6:
+  ld      s6,  0(sp)
+  ld      s5,  8(sp)
+  addi    sp, sp, 16
+  // fallthrough into __riscv_restore_5/4
+
+  .globl  __riscv_restore_5
+  .type   __riscv_restore_5,@function
+  .globl  __riscv_restore_4
+  .type   __riscv_restore_4,@function
+__riscv_restore_5:
+__riscv_restore_4:
+  ld      s4,  0(sp)
+  ld      s3,  8(sp)
+  addi    sp, sp, 16
+  // fallthrough into __riscv_restore_3/2
+
+  .globl  __riscv_restore_3
+  .type   __riscv_restore_3,@function
+  .globl  __riscv_restore_2
+  .type   __riscv_restore_2,@function
+__riscv_restore_3:
+__riscv_restore_2:
+  ld      s2,  0(sp)
+  ld      s1,  8(sp)
+  addi    sp, sp, 16
+  // fallthrough into __riscv_restore_1/0
+
+  .globl  __riscv_restore_1
+  .type   __riscv_restore_1,@function
+  .globl  __riscv_restore_0
+  .type   __riscv_restore_0,@function
+__riscv_restore_1:
+__riscv_restore_0:
+  ld      s0,  0(sp)
+  ld      ra,  8(sp)
+  addi    sp, sp, 16
+  ret
diff --git a/lib/riscv/save_rv32.S b/lib/riscv/save_rv32.S
@@ -1,88 +0,0 @@
-//===-- save_rv32.S - save up to 12 callee-saved registers (RV32) --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Multiple entry points depending on number of registers to save.
-// Entry points are grouped in 4s for rv32 to maintain 16-byte stack alignment.
-//
-//===----------------------------------------------------------------------===//
-
-  .text
-
-  .globl  __riscv_save_12
-  .type   __riscv_save_12,@function
-__riscv_save_12:
-  addi   sp, sp, -64
-  mv     t1, zero
-  sw     s11, 12(sp)
-  j      .Lriscv_save_11_8
-
-  .globl  __riscv_save_11
-  .type   __riscv_save_11,@function
-  .globl  __riscv_save_10
-  .type   __riscv_save_10,@function
-  .globl  __riscv_save_9
-  .type   __riscv_save_9,@function
-  .globl  __riscv_save_8
-  .type   __riscv_save_8,@function
-__riscv_save_11:
-__riscv_save_10:
-__riscv_save_9:
-__riscv_save_8:
-  addi   sp, sp, -64
-  li     t1, 16
-.Lriscv_save_11_8:
-  sw     s10, 16(sp)
-  sw     s9,  20(sp)
-  sw     s8,  24(sp)
-  sw     s7,  28(sp)
-  j      .Lriscv_save_7_4
-
-  .globl  __riscv_save_7
-  .type   __riscv_save_7,@function
-  .globl  __riscv_save_6
-  .type   __riscv_save_6,@function
-  .globl  __riscv_save_5
-  .type   __riscv_save_5,@function
-  .globl  __riscv_save_4
-  .type   __riscv_save_4,@function
-__riscv_save_7:
-__riscv_save_6:
-__riscv_save_5:
-__riscv_save_4:
-  addi   sp, sp, -64
-  li     t1, 32
-.Lriscv_save_7_4:
-  sw     s6, 32(sp)
-  sw     s5, 36(sp)
-  sw     s4, 40(sp)
-  sw     s3, 44(sp)
-  sw     s2, 48(sp)
-  sw     s1, 52(sp)
-  sw     s0, 56(sp)
-  sw     ra, 60(sp)
-  add    sp, sp, t1
-  jr     t0
-
-  .globl  __riscv_save_3
-  .type   __riscv_save_3,@function
-  .globl  __riscv_save_2
-  .type   __riscv_save_2,@function
-  .globl  __riscv_save_1
-  .type   __riscv_save_1,@function
-  .globl  __riscv_save_0
-  .type   __riscv_save_0,@function
-__riscv_save_3:
-__riscv_save_2:
-__riscv_save_1:
-__riscv_save_0:
-  addi    sp, sp, -16
-  sw      s2,  0(sp)
-  sw      s1,  4(sp)
-  sw      s0,  8(sp)
-  sw      ra,  12(sp)
-  jr      t0
diff --git a/lib/riscv/save_rv64.S b/lib/riscv/save_rv64.S
@@ -1,101 +0,0 @@
-//===-- save_rv64.S - save up to 12 callee-saved registers (RV64) --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Multiple entry points depending on number of registers to save.
-// Entry points are grouped in 2s for rv64 to maintain 16-byte stack alignment.
-//
-//===----------------------------------------------------------------------===//
-
-  .text
-
-  .globl  __riscv_save_12
-  .type   __riscv_save_12,@function
-__riscv_save_12:
-  addi   sp, sp, -112
-  mv     t1, zero
-  sd     s11, 8(sp)
-  j      .Lriscv_save_11_10
-
-  .globl  __riscv_save_11
-  .type   __riscv_save_11,@function
-  .globl  __riscv_save_10
-  .type   __riscv_save_10,@function
-__riscv_save_11:
-__riscv_save_10:
-  addi   sp, sp, -112
-  li     t1, 16
-.Lriscv_save_11_10:
-  sd     s10, 16(sp)
-  sd     s9,  24(sp)
-  j      .Lriscv_save_9_8
-
-  .globl  __riscv_save_9
-  .type   __riscv_save_9,@function
-  .globl  __riscv_save_8
-  .type   __riscv_save_8,@function
-__riscv_save_9:
-__riscv_save_8:
-  addi   sp, sp, -112
-  li     t1, 32
-.Lriscv_save_9_8:
-  sd     s8,  32(sp)
-  sd     s7,  40(sp)
-  j      .Lriscv_save_7_6
-
-  .globl  __riscv_save_7
-  .type   __riscv_save_7,@function
-  .globl  __riscv_save_6
-  .type   __riscv_save_6,@function
-__riscv_save_7:
-__riscv_save_6:
-  addi   sp, sp, -112
-  li     t1, 48
-.Lriscv_save_7_6:
-  sd     s6,  48(sp)
-  sd     s5,  56(sp)
-  j      .Lriscv_save_5_4
-
-  .globl  __riscv_save_5
-  .type   __riscv_save_5,@function
-  .globl  __riscv_save_4
-  .type   __riscv_save_4,@function
-__riscv_save_5:
-__riscv_save_4:
-  addi   sp, sp, -112
-  li     t1, 64
-.Lriscv_save_5_4:
-  sd     s4, 64(sp)
-  sd     s3, 72(sp)
-  j      .Lriscv_save_3_2
-
-  .globl  __riscv_save_3
-  .type   __riscv_save_3,@function
-  .globl  __riscv_save_2
-  .type   __riscv_save_2,@function
-__riscv_save_3:
-__riscv_save_2:
-  addi   sp, sp, -112
-  li     t1, 80
-.Lriscv_save_3_2:
-  sd     s2, 80(sp)
-  sd     s1, 88(sp)
-  sd     s0, 96(sp)
-  sd     ra, 104(sp)
-  add    sp, sp, t1
-  jr     t0
-
-  .globl  __riscv_save_1
-  .type   __riscv_save_1,@function
-  .globl  __riscv_save_0
-  .type   __riscv_save_0,@function
-__riscv_save_1:
-__riscv_save_0:
-  addi   sp, sp, -16
-  sd     s0, 0(sp)
-  sd     ra, 8(sp)
-  jr     t0

	kit kit
	git clone https://git.ryansepassi.com/git/kit.git
	Log \| Files \| Refs \| README

M	lib/README.md	\|	269	++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
A	lib/arm/aeabi.c	\|	20	++++++++++++++++++++
D	lib/arm/aeabi_dcmp.S	\|	45	---------------------------------------------
D	lib/arm/aeabi_drsub.c	\|	14	--------------
D	lib/arm/aeabi_fcmp.S	\|	45	---------------------------------------------
D	lib/arm/aeabi_frsub.c	\|	14	--------------
D	lib/arm/aeabi_idivmod.S	\|	33	---------------------------------
D	lib/arm/aeabi_idivmod_thumb1.S	\|	28	----------------------------
D	lib/arm/aeabi_ldivmod.S	\|	34	----------------------------------
D	lib/arm/aeabi_memcmp.S	\|	23	-----------------------
D	lib/arm/aeabi_memcmp_thumb1.S	\|	25	-------------------------
D	lib/arm/aeabi_memcpy.S	\|	23	-----------------------
D	lib/arm/aeabi_memcpy_thumb1.S	\|	25	-------------------------
D	lib/arm/aeabi_memmove.S	\|	22	----------------------
D	lib/arm/aeabi_memmove_thumb1.S	\|	24	------------------------
D	lib/arm/aeabi_memset.S	\|	37	-------------------------------------
D	lib/arm/aeabi_memset_thumb1.S	\|	42	------------------------------------------
A	lib/arm/aeabi_thumb1.S	\|	261	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	lib/arm/aeabi_thumb2.S	\|	268	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	lib/arm/aeabi_uidivmod.S	\|	34	----------------------------------
D	lib/arm/aeabi_uidivmod_thumb1.S	\|	31	-------------------------------
D	lib/arm/aeabi_uldivmod.S	\|	34	----------------------------------
M	lib/build.sh	\|	172	++++++++++++++++++++++++++++++-------------------------------------------------
D	lib/fp/adddf3.c	\|	17	-----------------
D	lib/fp/addsf3.c	\|	17	-----------------
D	lib/fp/comparedf2.c	\|	52	----------------------------------------------------
D	lib/fp/comparesf2.c	\|	52	----------------------------------------------------
D	lib/fp/divdf3.c	\|	25	-------------------------
D	lib/fp/divsf3.c	\|	27	---------------------------
D	lib/fp/extendsfdf2.c	\|	17	-----------------
D	lib/fp/fixdfdi.c	\|	26	--------------------------
D	lib/fp/fixdfsi.c	\|	21	---------------------
D	lib/fp/fixsfdi.c	\|	26	--------------------------
D	lib/fp/fixsfsi.c	\|	21	---------------------
D	lib/fp/fixunsdfdi.c	\|	24	------------------------
D	lib/fp/fixunsdfsi.c	\|	19	-------------------
D	lib/fp/fixunssfdi.c	\|	24	------------------------
D	lib/fp/fixunssfsi.c	\|	23	-----------------------
D	lib/fp/floatdidf.c	\|	35	-----------------------------------
D	lib/fp/floatdisf.c	\|	30	------------------------------
D	lib/fp/floatsidf.c	\|	51	---------------------------------------------------
D	lib/fp/floatsisf.c	\|	59	-----------------------------------------------------------
D	lib/fp/floatundidf.c	\|	35	-----------------------------------
D	lib/fp/floatundisf.c	\|	30	------------------------------
D	lib/fp/floatunsidf.c	\|	40	----------------------------------------
D	lib/fp/floatunsisf.c	\|	50	--------------------------------------------------
A	lib/fp/fp.c	\|	512	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	lib/fp/fp_mode.c	\|	22	----------------------
D	lib/fp/muldf3.c	\|	18	------------------
D	lib/fp/mulsf3.c	\|	18	------------------
D	lib/fp/negdf2.c	\|	17	-----------------
D	lib/fp/negsf2.c	\|	17	-----------------
D	lib/fp/subdf3.c	\|	20	--------------------
D	lib/fp/subsf3.c	\|	20	--------------------
D	lib/fp/truncdfsf2.c	\|	17	-----------------
D	lib/fp_tf/addtf3.c	\|	21	---------------------
D	lib/fp_tf/comparetf2.c	\|	51	---------------------------------------------------
D	lib/fp_tf/divtf3.c	\|	27	---------------------------
D	lib/fp_tf/extenddftf2.c	\|	20	--------------------
D	lib/fp_tf/extendsftf2.c	\|	20	--------------------
D	lib/fp_tf/fixtfdi.c	\|	21	---------------------
D	lib/fp_tf/fixtfsi.c	\|	21	---------------------
D	lib/fp_tf/fixtfti.c	\|	21	---------------------
D	lib/fp_tf/fixunstfdi.c	\|	19	-------------------
D	lib/fp_tf/fixunstfsi.c	\|	19	-------------------
D	lib/fp_tf/fixunstfti.c	\|	19	-------------------
D	lib/fp_tf/floatditf.c	\|	47	-----------------------------------------------
D	lib/fp_tf/floatsitf.c	\|	47	-----------------------------------------------
D	lib/fp_tf/floattitf.c	\|	36	------------------------------------
D	lib/fp_tf/floatunditf.c	\|	38	--------------------------------------
D	lib/fp_tf/floatunsitf.c	\|	38	--------------------------------------
D	lib/fp_tf/floatuntitf.c	\|	36	------------------------------------
A	lib/fp_tf/fp_tf.c	\|	387	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	lib/fp_tf/multf3.c	\|	20	--------------------
D	lib/fp_tf/subtf3.c	\|	22	----------------------
D	lib/fp_tf/trunctfdf2.c	\|	20	--------------------
D	lib/fp_tf/trunctfsf2.c	\|	20	--------------------
D	lib/fp_ti/fixdfti.c	\|	24	------------------------
D	lib/fp_ti/fixsfti.c	\|	24	------------------------
D	lib/fp_ti/fixunsdfti.c	\|	20	--------------------
D	lib/fp_ti/fixunssfti.c	\|	23	-----------------------
D	lib/fp_ti/floattidf.c	\|	32	--------------------------------
D	lib/fp_ti/floattisf.c	\|	31	-------------------------------
D	lib/fp_ti/floatuntidf.c	\|	32	--------------------------------
D	lib/fp_ti/floatuntisf.c	\|	31	-------------------------------
A	lib/fp_ti/fp_ti.c	\|	158	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	lib/impl/fp_extend.h	\|	277	-------------------------------------------------------------------------------
M	lib/impl/fp_extend_impl.inc	\|	259	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
D	lib/impl/fp_trunc.h	\|	259	-------------------------------------------------------------------------------
M	lib/impl/fp_trunc_impl.inc	\|	243	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
D	lib/impl/int_to_fp.h	\|	174	-------------------------------------------------------------------------------
M	lib/impl/int_to_fp_impl.inc	\|	158	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M	lib/include/common/fp_lib.h	\|	1	-
D	lib/include/ilp32_le/int_endianness.h	\|	13	-------------
M	lib/include/ilp32_le/int_lib.h	\|	43	++++++++++++++++++++++++++++++++++++++++++-
D	lib/include/ilp32_le/int_types.h	\|	48	------------------------------------------------
D	lib/include/llp64_le/int_endianness.h	\|	13	-------------
M	lib/include/llp64_le/int_lib.h	\|	64	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
D	lib/include/llp64_le/int_types.h	\|	69	---------------------------------------------------------------------
D	lib/include/lp64_le/int_endianness.h	\|	13	-------------
M	lib/include/lp64_le/int_lib.h	\|	69	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
D	lib/include/lp64_le/int_types.h	\|	74	--------------------------------------------------------------------------
M	lib/include/lp64_le_ldbl128/tf_supplement.h	\|	6	+++---
D	lib/int/absvdi2.c	\|	25	-------------------------
D	lib/int/bswapdi2.c	\|	25	-------------------------
D	lib/int/bswapsi2.c	\|	20	--------------------
D	lib/int/clzdi2.c	\|	25	-------------------------
D	lib/int/clzsi2.c	\|	48	------------------------------------------------
D	lib/int/cmpdi2.c	\|	34	----------------------------------
D	lib/int/ctzdi2.c	\|	25	-------------------------
D	lib/int/ctzsi2.c	\|	53	-----------------------------------------------------
D	lib/int/divdi3.c	\|	25	-------------------------
D	lib/int/divmoddi4.c	\|	28	----------------------------
D	lib/int/ffsdi2.c	\|	27	---------------------------
A	lib/int/int.c	\|	558	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	lib/int/int_util.c	\|	20	--------------------
D	lib/int/moddi3.c	\|	25	-------------------------
D	lib/int/negdi2.c	\|	21	---------------------
D	lib/int/paritydi2.c	\|	25	-------------------------
D	lib/int/paritysi2.c	\|	23	-----------------------
D	lib/int/popcountdi2.c	\|	32	--------------------------------
D	lib/int/popcountsi2.c	\|	29	-----------------------------
D	lib/int/ucmpdi2.c	\|	34	----------------------------------
D	lib/int/udivdi3.c	\|	24	------------------------
D	lib/int/udivmoddi4.c	\|	191	-------------------------------------------------------------------------------
D	lib/int/umoddi3.c	\|	24	------------------------
D	lib/int32/ashldi3.c	\|	36	------------------------------------
D	lib/int32/ashrdi3.c	\|	37	-------------------------------------
A	lib/int32/int32.c	\|	119	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	lib/int32/lshrdi3.c	\|	35	-----------------------------------
D	lib/int32/muldi3.c	\|	48	------------------------------------------------
D	lib/int64/ashlti3.c	\|	37	-------------------------------------
D	lib/int64/ashrti3.c	\|	38	--------------------------------------
D	lib/int64/clzti2.c	\|	27	---------------------------
D	lib/int64/ctzti2.c	\|	27	---------------------------
D	lib/int64/divmodti4.c	\|	30	------------------------------
D	lib/int64/divti3.c	\|	27	---------------------------
A	lib/int64/int64.c	\|	383	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	lib/int64/lshrti3.c	\|	36	------------------------------------
D	lib/int64/modti3.c	\|	27	---------------------------
D	lib/int64/multi3.c	\|	49	-------------------------------------------------
D	lib/int64/negti2.c	\|	23	-----------------------
D	lib/int64/udivmodti4.c	\|	148	-------------------------------------------------------------------------------
D	lib/int64/udivti3.c	\|	21	---------------------
D	lib/int64/umodti3.c	\|	23	-----------------------
D	lib/riscv/restore_rv32.S	\|	73	-------------------------------------------------------------------------
D	lib/riscv/restore_rv64.S	\|	82	-------------------------------------------------------------------------------
A	lib/riscv/rv32.S	\|	147	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	lib/riscv/rv64.S	\|	169	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	lib/riscv/save_rv32.S	\|	88	-------------------------------------------------------------------------------
D	lib/riscv/save_rv64.S	\|	101	-------------------------------------------------------------------------------