commit 05db6b973f7e1398455d363ebd3ba815c3f82d53
parent 6fcad2bdde71cb0814a384cf7a415bf6328688eb
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Thu, 7 May 2026 11:15:29 -0700
make compiler-rt templates re-includable; add build.sh
Each impl/.inc and the fp_lib.h header is parameterized so multiple
inclusions in one TU (different precisions, src/dst pairs, or callsites)
emit uniquely-suffixed static helpers. Bare-name aliases let existing
per-op .c files compile unchanged. fp_lib_undef.h clears the aliases
between sections of a future consolidated TU.
build.sh exhaustively enumerates the (target, mode) tuples cfree
documents and builds libcfree_rt-<variant>.a with clang for each
(13 variants).
Diffstat:
59 files changed, 1205 insertions(+), 138 deletions(-)
diff --git a/lib/build.sh b/lib/build.sh
@@ -0,0 +1,225 @@
+#!/usr/bin/env bash
+#
+# build.sh — exhaustive build of libcfree_rt.a for every (target, mode) tuple
+# cfree supports. Compiles every required source file with clang for each
+# variant; on success, archives the objects with llvm-ar (if present) into
+# $OUT/libcfree_rt-<variant>.a.
+#
+# Reports OK / FAIL per variant; exits non-zero if any fail.
+# Override defaults via env: CC=clang OUT=/tmp/cfree_rt_build AR=llvm-ar.
+#
+# Run from anywhere — the script self-locates and cd's to lib/ first.
+
+set -uo pipefail
+
+cd "$(dirname "$0")" || exit 1
+
+CC=${CC:-clang}
+AR=${AR:-llvm-ar}
+OUT=${OUT:-/tmp/cfree_rt_build}
+
+# Freestanding, warnings on, common headers reachable. -fno-builtin keeps the
+# compiler from rewriting our memcpy/memset/etc. into recursive calls.
+COMMON_FLAGS=(-c -ffreestanding -fno-builtin -Wall -Wextra
+ -Iinclude/common -Iimpl)
+
+mkdir -p "$OUT"
+OK=0
+FAIL=0
+FAILED_VARIANTS=()
+
+#-------------------------------------------------------------------------------
+# build_variant <name> "<extra cc flags>" <source files...>
+#-------------------------------------------------------------------------------
+build_variant() {
+ local name=$1
+ local flags_str=$2
+ shift 2
+ local sources=("$@")
+ # shellcheck disable=SC2206
+ local flags=( $flags_str )
+
+ local out="$OUT/$name"
+ mkdir -p "$out"
+ local log="$out/build.log"
+ : >"$log"
+
+ local objs=()
+ local nfail=0
+ for src in "${sources[@]}"; do
+ local obj="$out/$(echo "$src" | tr / _).o"
+ if "$CC" "${COMMON_FLAGS[@]}" "${flags[@]}" -o "$obj" "$src" >>"$log" 2>&1; then
+ objs+=("$obj")
+ else
+ nfail=$((nfail+1))
+ echo "FAIL: $src" >>"$log"
+ fi
+ done
+
+ local total=${#sources[@]}
+ if [ $nfail -ne 0 ]; then
+ printf " %-32s FAIL (%d/%d sources failed; see %s)\n" \
+ "$name" "$nfail" "$total" "$log"
+ FAIL=$((FAIL+1))
+ FAILED_VARIANTS+=("$name")
+ return
+ fi
+
+ # Archive (best-effort; not fatal if AR is missing).
+ if command -v "$AR" >/dev/null 2>&1; then
+ local archive="$OUT/libcfree_rt-$name.a"
+ if "$AR" rcs "$archive" "${objs[@]}" 2>>"$log"; then
+ printf " %-32s OK (%d objs → %s)\n" \
+ "$name" "$total" "$(basename "$archive")"
+ else
+ printf " %-32s OK (%d objs; %s failed to archive)\n" \
+ "$name" "$total" "$AR"
+ fi
+ else
+ printf " %-32s OK (%d objs; %s not found, no archive)\n" \
+ "$name" "$total" "$AR"
+ fi
+ OK=$((OK+1))
+}
+
+#-------------------------------------------------------------------------------
+# Source sets
+#-------------------------------------------------------------------------------
+
+INT_C=( int/*.c )
+INT32_C=( int32/*.c )
+INT64_C=( int64/*.c )
+FP_C=( fp/*.c )
+FP_TF_C=( fp_tf/*.c )
+FP_TI_C=( fp_ti/*.c )
+MEM_C=( mem/mem.c )
+ATOMIC_C=( atomic/atomic_freestanding.c )
+
+# ARM AEABI: 6 base files have a *_thumb1.S companion (idivmod, uidivmod,
+# memcpy, memmove, memset, memcmp). The Thumb2 build uses base files only;
+# the Thumb1 build uses *_thumb1.S in place of those base files, plus the
+# ISA-agnostic ones (ldivmod, uldivmod, dcmp, fcmp, drsub, frsub).
+ARM_AEABI_THUMB2=( arm/aeabi_*.S arm/aeabi_*.c )
+# Filter out *_thumb1.S from the Thumb2 list.
+_t2=()
+for f in "${ARM_AEABI_THUMB2[@]}"; do
+ case "$f" in *_thumb1.S) ;; *) _t2+=("$f") ;; esac
+done
+ARM_AEABI_THUMB2=("${_t2[@]}")
+
+# Build the Thumb1 list: every *_thumb1.S, plus any base file whose stem has
+# no *_thumb1.S partner.
+_thumb1_stems=()
+for f in arm/aeabi_*_thumb1.S; do
+ _thumb1_stems+=( "$(basename "$f" _thumb1.S)" )
+done
+ARM_AEABI_THUMB1=( arm/aeabi_*_thumb1.S )
+for f in "${ARM_AEABI_THUMB2[@]}"; do
+ stem=$(basename "$f" .S); stem=$(basename "$stem" .c)
+ has_t1=0
+ for s in "${_thumb1_stems[@]}"; do
+ [ "$stem" = "$s" ] && { has_t1=1; break; }
+ done
+ [ $has_t1 -eq 0 ] && ARM_AEABI_THUMB1+=("$f")
+done
+
+RV32_SR=( riscv/save_rv32.S riscv/restore_rv32.S )
+RV64_SR=( riscv/save_rv64.S riscv/restore_rv64.S )
+
+#-------------------------------------------------------------------------------
+# Variants — each combination of (data model, target, mode) cfree supports.
+#-------------------------------------------------------------------------------
+echo "Building libcfree_rt for every supported (target, mode) tuple"
+echo " CC=$CC, AR=$AR, OUT=$OUT"
+echo
+
+# ---- LP64 little-endian ------------------------------------------------------
+
+# x86_64 Linux / Darwin / RV64 / aarch64 base: int + int64 + fp + atomic + mem.
+# binary64 long double (no fp_tf).
+LP64_BASE=( "${INT_C[@]}" "${INT64_C[@]}" "${FP_C[@]}" "${MEM_C[@]}" "${ATOMIC_C[@]}" )
+
+build_variant x86_64-linux \
+ "--target=x86_64-linux-gnu -Iinclude/lp64_le -DHAS_INT128=1" \
+ "${LP64_BASE[@]}"
+
+build_variant x86_64-apple-darwin \
+ "--target=x86_64-apple-darwin -Iinclude/lp64_le -DHAS_INT128=1" \
+ "${LP64_BASE[@]}"
+
+# aarch64-linux: long double is binary128 by default (no flag to change),
+# so the build pulls in fp_tf + fp_ti and pre-includes tf_supplement.h.
+build_variant aarch64-linux \
+ "--target=aarch64-linux-gnu \
+ -Iinclude/lp64_le_ldbl128 -Iinclude/lp64_le -DHAS_INT128=1 \
+ -include include/lp64_le_ldbl128/tf_supplement.h" \
+ "${INT_C[@]}" "${INT64_C[@]}" "${FP_C[@]}" "${FP_TF_C[@]}" "${FP_TI_C[@]}" \
+ "${MEM_C[@]}" "${ATOMIC_C[@]}"
+
+# aarch64-apple-darwin: long double is binary64 (no fp_tf needed).
+build_variant aarch64-apple-darwin \
+ "--target=aarch64-apple-darwin -Iinclude/lp64_le -DHAS_INT128=1" \
+ "${LP64_BASE[@]}"
+
+# RISC-V 64 (soft-float) — with and without -msave-restore.
+build_variant riscv64-elf \
+ "--target=riscv64-unknown-elf -mabi=lp64 -march=rv64imafd \
+ -Iinclude/lp64_le -DHAS_INT128=1" \
+ "${LP64_BASE[@]}"
+
+build_variant riscv64-elf-save-restore \
+ "--target=riscv64-unknown-elf -mabi=lp64 -march=rv64imafd -msave-restore \
+ -Iinclude/lp64_le -DHAS_INT128=1" \
+ "${LP64_BASE[@]}" "${RV64_SR[@]}"
+
+# ---- LLP64 little-endian (Win64) --------------------------------------------
+
+build_variant x86_64-pc-windows \
+ "--target=x86_64-pc-windows-msvc -Iinclude/llp64_le -DHAS_INT128=1" \
+ "${INT_C[@]}" "${INT64_C[@]}" "${FP_C[@]}" "${MEM_C[@]}" "${ATOMIC_C[@]}"
+
+# ---- ILP32 little-endian -----------------------------------------------------
+
+ILP32_BASE=( "${INT_C[@]}" "${INT32_C[@]}" "${FP_C[@]}" "${MEM_C[@]}" "${ATOMIC_C[@]}" )
+
+build_variant i386-linux \
+ "--target=i386-linux-gnu -Iinclude/ilp32_le -DHAS_INT128=0" \
+ "${ILP32_BASE[@]}"
+
+build_variant wasm32 \
+ "--target=wasm32-unknown-unknown -Iinclude/ilp32_le -DHAS_INT128=0" \
+ "${ILP32_BASE[@]}"
+
+build_variant riscv32-elf \
+ "--target=riscv32-unknown-elf -mabi=ilp32 -march=rv32imafd \
+ -Iinclude/ilp32_le -DHAS_INT128=0" \
+ "${ILP32_BASE[@]}"
+
+build_variant riscv32-elf-save-restore \
+ "--target=riscv32-unknown-elf -mabi=ilp32 -march=rv32imafd -msave-restore \
+ -Iinclude/ilp32_le -DHAS_INT128=0" \
+ "${ILP32_BASE[@]}" "${RV32_SR[@]}"
+
+# ARM32 ARMv7+/Thumb2 (AEABI). The .S files use the AEABI base PCS regardless
+# of FPU presence, so one variant covers soft- and hard-float targets.
+build_variant arm-eabi-thumb2 \
+ "--target=arm-none-eabi -march=armv7-a -mthumb -mfloat-abi=soft \
+ -Iinclude/ilp32_le -DHAS_INT128=0" \
+ "${ILP32_BASE[@]}" "${ARM_AEABI_THUMB2[@]}"
+
+# ARM32 ARMv6-M / Cortex-M0/M0+/M1 (Thumb1).
+build_variant arm-eabi-thumb1 \
+ "--target=arm-none-eabi -march=armv6-m -mthumb -mfloat-abi=soft \
+ -Iinclude/ilp32_le -DHAS_INT128=0" \
+ "${ILP32_BASE[@]}" "${ARM_AEABI_THUMB1[@]}"
+
+#-------------------------------------------------------------------------------
+echo
+echo "Summary: $OK ok, $FAIL failed"
+if [ $FAIL -ne 0 ]; then
+ echo "Failed variants:"
+ for v in "${FAILED_VARIANTS[@]}"; do
+ echo " - $v ($OUT/$v/build.log)"
+ done
+ exit 1
+fi
diff --git a/lib/fp/divdf3.c b/lib/fp/divdf3.c
@@ -20,3 +20,6 @@
COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
+
+#undef NUMBER_OF_HALF_ITERATIONS
+#undef NUMBER_OF_FULL_ITERATIONS
diff --git a/lib/fp/divsf3.c b/lib/fp/divsf3.c
@@ -21,3 +21,7 @@
COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
+
+#undef NUMBER_OF_HALF_ITERATIONS
+#undef NUMBER_OF_FULL_ITERATIONS
+#undef USE_NATIVE_FULL_ITERATIONS
diff --git a/lib/fp/extendsfdf2.c b/lib/fp/extendsfdf2.c
@@ -12,3 +12,6 @@
COMPILER_RT_ABI double __extendsfdf2(float a) { return __extendXfYf2__(a); }
+
+#undef SRC_SINGLE
+#undef DST_DOUBLE
diff --git a/lib/fp/fixdfdi.c b/lib/fp/fixdfdi.c
@@ -13,9 +13,14 @@
// flags to set, and we don't want to code-gen to an unknown soft-float
// implementation.
-typedef di_int fixint_t;
-typedef du_int fixuint_t;
+#define fixint_t di_int
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixdfdi
#include "fp_fixint_impl.inc"
COMPILER_RT_ABI di_int __fixdfdi(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixdfsi.c b/lib/fp/fixdfsi.c
@@ -8,9 +8,14 @@
#define DOUBLE_PRECISION
#include "fp_lib.h"
-typedef si_int fixint_t;
-typedef su_int fixuint_t;
+#define fixint_t si_int
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixdfsi
#include "fp_fixint_impl.inc"
COMPILER_RT_ABI si_int __fixdfsi(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixsfdi.c b/lib/fp/fixsfdi.c
@@ -13,9 +13,14 @@
// flags to set, and we don't want to code-gen to an unknown soft-float
// implementation.
-typedef di_int fixint_t;
-typedef du_int fixuint_t;
+#define fixint_t di_int
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixsfdi
#include "fp_fixint_impl.inc"
COMPILER_RT_ABI di_int __fixsfdi(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixsfsi.c b/lib/fp/fixsfsi.c
@@ -8,9 +8,14 @@
#define SINGLE_PRECISION
#include "fp_lib.h"
-typedef si_int fixint_t;
-typedef su_int fixuint_t;
+#define fixint_t si_int
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixsfsi
#include "fp_fixint_impl.inc"
COMPILER_RT_ABI si_int __fixsfsi(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixunsdfdi.c b/lib/fp/fixunsdfdi.c
@@ -13,8 +13,12 @@
// flags to set, and we don't want to code-gen to an unknown soft-float
// implementation.
-typedef du_int fixuint_t;
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixunsdfdi
#include "fp_fixuint_impl.inc"
COMPILER_RT_ABI du_int __fixunsdfdi(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixunsdfsi.c b/lib/fp/fixunsdfsi.c
@@ -8,8 +8,12 @@
#define DOUBLE_PRECISION
#include "fp_lib.h"
-typedef su_int fixuint_t;
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixunsdfsi
#include "fp_fixuint_impl.inc"
COMPILER_RT_ABI su_int __fixunsdfsi(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixunssfdi.c b/lib/fp/fixunssfdi.c
@@ -13,8 +13,12 @@
// flags to set, and we don't want to code-gen to an unknown soft-float
// implementation.
-typedef du_int fixuint_t;
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixunssfdi
#include "fp_fixuint_impl.inc"
COMPILER_RT_ABI du_int __fixunssfdi(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp/fixunssfsi.c b/lib/fp/fixunssfsi.c
@@ -12,8 +12,12 @@
#define SINGLE_PRECISION
#include "fp_lib.h"
-typedef su_int fixuint_t;
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixunssfsi
#include "fp_fixuint_impl.inc"
COMPILER_RT_ABI su_int __fixunssfsi(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp/floatdidf.c b/lib/fp/floatdidf.c
@@ -30,3 +30,6 @@
COMPILER_RT_ABI double __floatdidf(di_int a) { return __floatXiYf__(a); }
+
+#undef SRC_I64
+#undef DST_DOUBLE
diff --git a/lib/fp/floatdisf.c b/lib/fp/floatdisf.c
@@ -25,3 +25,6 @@
COMPILER_RT_ABI float __floatdisf(di_int a) { return __floatXiYf__(a); }
+
+#undef SRC_I64
+#undef DST_SINGLE
diff --git a/lib/fp/floatundidf.c b/lib/fp/floatundidf.c
@@ -30,3 +30,6 @@
COMPILER_RT_ABI double __floatundidf(du_int a) { return __floatXiYf__(a); }
+
+#undef SRC_U64
+#undef DST_DOUBLE
diff --git a/lib/fp/floatundisf.c b/lib/fp/floatundisf.c
@@ -25,3 +25,6 @@
COMPILER_RT_ABI float __floatundisf(du_int a) { return __floatXiYf__(a); }
+
+#undef SRC_U64
+#undef DST_SINGLE
diff --git a/lib/fp/truncdfsf2.c b/lib/fp/truncdfsf2.c
@@ -12,3 +12,6 @@
COMPILER_RT_ABI float __truncdfsf2(double a) { return __truncXfYf2__(a); }
+
+#undef SRC_DOUBLE
+#undef DST_SINGLE
diff --git a/lib/fp_tf/divtf3.c b/lib/fp_tf/divtf3.c
@@ -22,3 +22,6 @@
COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { return __divXf3__(a, b); }
+
+#undef NUMBER_OF_HALF_ITERATIONS
+#undef NUMBER_OF_FULL_ITERATIONS
diff --git a/lib/fp_tf/extenddftf2.c b/lib/fp_tf/extenddftf2.c
@@ -15,3 +15,6 @@
COMPILER_RT_ABI dst_t __extenddftf2(src_t a) { return __extendXfYf2__(a); }
+
+#undef SRC_DOUBLE
+#undef DST_QUAD
diff --git a/lib/fp_tf/extendsftf2.c b/lib/fp_tf/extendsftf2.c
@@ -15,3 +15,6 @@
COMPILER_RT_ABI dst_t __extendsftf2(src_t a) { return __extendXfYf2__(a); }
+
+#undef SRC_SINGLE
+#undef DST_QUAD
diff --git a/lib/fp_tf/fixtfdi.c b/lib/fp_tf/fixtfdi.c
@@ -9,8 +9,13 @@
#define QUAD_PRECISION
#include "fp_lib.h"
-typedef di_int fixint_t;
-typedef du_int fixuint_t;
+#define fixint_t di_int
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixtfdi
#include "fp_fixint_impl.inc"
COMPILER_RT_ABI di_int __fixtfdi(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/fixtfsi.c b/lib/fp_tf/fixtfsi.c
@@ -9,8 +9,13 @@
#define QUAD_PRECISION
#include "fp_lib.h"
-typedef si_int fixint_t;
-typedef su_int fixuint_t;
+#define fixint_t si_int
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixtfsi
#include "fp_fixint_impl.inc"
COMPILER_RT_ABI si_int __fixtfsi(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/fixtfti.c b/lib/fp_tf/fixtfti.c
@@ -9,8 +9,13 @@
#define QUAD_PRECISION
#include "fp_lib.h"
-typedef ti_int fixint_t;
-typedef tu_int fixuint_t;
+#define fixint_t ti_int
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixtfti
#include "fp_fixint_impl.inc"
COMPILER_RT_ABI ti_int __fixtfti(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/fixunstfdi.c b/lib/fp_tf/fixunstfdi.c
@@ -9,7 +9,11 @@
#define QUAD_PRECISION
#include "fp_lib.h"
-typedef du_int fixuint_t;
+#define fixuint_t du_int
+#define FP_FIX_SUFFIX fixunstfdi
#include "fp_fixuint_impl.inc"
COMPILER_RT_ABI du_int __fixunstfdi(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/fixunstfsi.c b/lib/fp_tf/fixunstfsi.c
@@ -9,7 +9,11 @@
#define QUAD_PRECISION
#include "fp_lib.h"
-typedef su_int fixuint_t;
+#define fixuint_t su_int
+#define FP_FIX_SUFFIX fixunstfsi
#include "fp_fixuint_impl.inc"
COMPILER_RT_ABI su_int __fixunstfsi(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/fixunstfti.c b/lib/fp_tf/fixunstfti.c
@@ -9,7 +9,11 @@
#define QUAD_PRECISION
#include "fp_lib.h"
-typedef tu_int fixuint_t;
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixunstfti
#include "fp_fixuint_impl.inc"
COMPILER_RT_ABI tu_int __fixunstfti(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp_tf/floattitf.c b/lib/fp_tf/floattitf.c
@@ -31,3 +31,6 @@
COMPILER_RT_ABI fp_t __floattitf(ti_int a) { return __floatXiYf__(a); }
+
+#undef SRC_I128
+#undef DST_QUAD
diff --git a/lib/fp_tf/floatuntitf.c b/lib/fp_tf/floatuntitf.c
@@ -31,3 +31,6 @@
COMPILER_RT_ABI fp_t __floatuntitf(tu_int a) { return __floatXiYf__(a); }
+
+#undef SRC_U128
+#undef DST_QUAD
diff --git a/lib/fp_tf/trunctfdf2.c b/lib/fp_tf/trunctfdf2.c
@@ -15,3 +15,6 @@
COMPILER_RT_ABI dst_t __trunctfdf2(src_t a) { return __truncXfYf2__(a); }
+
+#undef SRC_QUAD
+#undef DST_DOUBLE
diff --git a/lib/fp_tf/trunctfsf2.c b/lib/fp_tf/trunctfsf2.c
@@ -15,3 +15,6 @@
COMPILER_RT_ABI dst_t __trunctfsf2(src_t a) { return __truncXfYf2__(a); }
+
+#undef SRC_QUAD
+#undef DST_SINGLE
diff --git a/lib/fp_ti/fixdfti.c b/lib/fp_ti/fixdfti.c
@@ -11,9 +11,14 @@
#define DOUBLE_PRECISION
#include "fp_lib.h"
-typedef ti_int fixint_t;
-typedef tu_int fixuint_t;
+#define fixint_t ti_int
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixdfti
#include "fp_fixint_impl.inc"
COMPILER_RT_ABI ti_int __fixdfti(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp_ti/fixsfti.c b/lib/fp_ti/fixsfti.c
@@ -11,9 +11,14 @@
#define SINGLE_PRECISION
#include "fp_lib.h"
-typedef ti_int fixint_t;
-typedef tu_int fixuint_t;
+#define fixint_t ti_int
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixsfti
#include "fp_fixint_impl.inc"
COMPILER_RT_ABI ti_int __fixsfti(fp_t a) { return __fixint(a); }
+
+#undef fixint_t
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp_ti/fixunsdfti.c b/lib/fp_ti/fixunsdfti.c
@@ -10,7 +10,11 @@
#define DOUBLE_PRECISION
#include "fp_lib.h"
-typedef tu_int fixuint_t;
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixunsdfti
#include "fp_fixuint_impl.inc"
COMPILER_RT_ABI tu_int __fixunsdfti(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp_ti/fixunssfti.c b/lib/fp_ti/fixunssfti.c
@@ -13,7 +13,11 @@
#define SINGLE_PRECISION
#include "fp_lib.h"
-typedef tu_int fixuint_t;
+#define fixuint_t tu_int
+#define FP_FIX_SUFFIX fixunssfti
#include "fp_fixuint_impl.inc"
COMPILER_RT_ABI tu_int __fixunssfti(fp_t a) { return __fixuint(a); }
+
+#undef fixuint_t
+#undef FP_FIX_SUFFIX
diff --git a/lib/fp_ti/floattidf.c b/lib/fp_ti/floattidf.c
@@ -27,3 +27,6 @@
COMPILER_RT_ABI double __floattidf(ti_int a) { return __floatXiYf__(a); }
+
+#undef SRC_I128
+#undef DST_DOUBLE
diff --git a/lib/fp_ti/floattisf.c b/lib/fp_ti/floattisf.c
@@ -26,3 +26,6 @@
COMPILER_RT_ABI float __floattisf(ti_int a) { return __floatXiYf__(a); }
+
+#undef SRC_I128
+#undef DST_SINGLE
diff --git a/lib/fp_ti/floatuntidf.c b/lib/fp_ti/floatuntidf.c
@@ -27,3 +27,6 @@
COMPILER_RT_ABI double __floatuntidf(tu_int a) { return __floatXiYf__(a); }
+
+#undef SRC_U128
+#undef DST_DOUBLE
diff --git a/lib/fp_ti/floatuntisf.c b/lib/fp_ti/floatuntisf.c
@@ -26,3 +26,6 @@
COMPILER_RT_ABI float __floatuntisf(tu_int a) { return __floatXiYf__(a); }
+
+#undef SRC_U128
+#undef DST_SINGLE
diff --git a/lib/impl/fp_add_impl.inc b/lib/impl/fp_add_impl.inc
@@ -14,6 +14,22 @@
#include "fp_lib.h"
#include "fp_mode.h"
+#define __addXf3__ _FP_NAME(__addXf3__)
+
+#if defined SINGLE_PRECISION && !defined FP_ADD_SF_EMITTED
+#define FP_ADD_SF_EMITTED
+#define _FP_ADD_EMIT 1
+#elif defined DOUBLE_PRECISION && !defined FP_ADD_DF_EMITTED
+#define FP_ADD_DF_EMITTED
+#define _FP_ADD_EMIT 1
+#elif defined QUAD_PRECISION && !defined FP_ADD_TF_EMITTED
+#define FP_ADD_TF_EMITTED
+#define _FP_ADD_EMIT 1
+#endif
+
+#ifdef _FP_ADD_EMIT
+#undef _FP_ADD_EMIT
+
static __inline fp_t __addXf3__(fp_t a, fp_t b) {
rep_t aRep = toRep(a);
rep_t bRep = toRep(b);
@@ -170,3 +186,5 @@ static __inline fp_t __addXf3__(fp_t a, fp_t b) {
__fe_raise_inexact();
return fromRep(result);
}
+
+#endif // _FP_ADD_EMIT
diff --git a/lib/impl/fp_compare_impl.inc b/lib/impl/fp_compare_impl.inc
@@ -8,6 +8,12 @@
#include "fp_lib.h"
+// CMP_RESULT and the LE_*/GE_* sentinels are precision-independent; emit
+// them once per TU. The static __inline comparators (__leXf2__ etc.) are
+// per-precision and gated below.
+#ifndef FP_COMPARE_COMMON_EMITTED
+#define FP_COMPARE_COMMON_EMITTED
+
// GCC uses long (at least for x86_64) as the return type of the comparison
// functions. We need to ensure that the return value is sign-extended in the
// same way as GCC expects (since otherwise GCC-generated __builtin_isinf
@@ -41,6 +47,34 @@ enum {
LE_UNORDERED = 1,
};
+enum {
+ GE_LESS = -1,
+ GE_EQUAL = 0,
+ GE_GREATER = 1,
+ GE_UNORDERED = -1 // Note: different from LE_UNORDERED
+};
+
+#endif // FP_COMPARE_COMMON_EMITTED
+
+// Bare-name aliases (re-set every inclusion, suffix-renamed via fp_lib).
+#define __leXf2__ _FP_NAME(__leXf2__)
+#define __geXf2__ _FP_NAME(__geXf2__)
+#define __unordXf2__ _FP_NAME(__unordXf2__)
+
+#if defined SINGLE_PRECISION && !defined FP_COMPARE_SF_EMITTED
+#define FP_COMPARE_SF_EMITTED
+#define _FP_COMPARE_EMIT 1
+#elif defined DOUBLE_PRECISION && !defined FP_COMPARE_DF_EMITTED
+#define FP_COMPARE_DF_EMITTED
+#define _FP_COMPARE_EMIT 1
+#elif defined QUAD_PRECISION && !defined FP_COMPARE_TF_EMITTED
+#define FP_COMPARE_TF_EMITTED
+#define _FP_COMPARE_EMIT 1
+#endif
+
+#ifdef _FP_COMPARE_EMIT
+#undef _FP_COMPARE_EMIT
+
static inline CMP_RESULT __leXf2__(fp_t a, fp_t b) {
const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
@@ -78,13 +112,6 @@ static inline CMP_RESULT __leXf2__(fp_t a, fp_t b) {
}
}
-enum {
- GE_LESS = -1,
- GE_EQUAL = 0,
- GE_GREATER = 1,
- GE_UNORDERED = -1 // Note: different from LE_UNORDERED
-};
-
static inline CMP_RESULT __geXf2__(fp_t a, fp_t b) {
const srep_t aInt = toRep(a);
const srep_t bInt = toRep(b);
@@ -117,3 +144,5 @@ static inline CMP_RESULT __unordXf2__(fp_t a, fp_t b) {
const rep_t bAbs = toRep(b) & absMask;
return aAbs > infRep || bAbs > infRep;
}
+
+#endif // _FP_COMPARE_EMIT
diff --git a/lib/impl/fp_div_impl.inc b/lib/impl/fp_div_impl.inc
@@ -13,6 +13,22 @@
#include "fp_lib.h"
+#define __divXf3__ _FP_NAME(__divXf3__)
+
+#if defined SINGLE_PRECISION && !defined FP_DIV_SF_EMITTED
+#define FP_DIV_SF_EMITTED
+#define _FP_DIV_EMIT 1
+#elif defined DOUBLE_PRECISION && !defined FP_DIV_DF_EMITTED
+#define FP_DIV_DF_EMITTED
+#define _FP_DIV_EMIT 1
+#elif defined QUAD_PRECISION && !defined FP_DIV_TF_EMITTED
+#define FP_DIV_TF_EMITTED
+#define _FP_DIV_EMIT 1
+#endif
+
+#ifdef _FP_DIV_EMIT
+#undef _FP_DIV_EMIT
+
// The __divXf3__ function implements Newton-Raphson floating point division.
// It uses 3 iterations for float32, 4 for float64 and 5 for float128,
// respectively. Due to number of significant bits being roughly doubled
@@ -417,3 +433,9 @@ static __inline fp_t __divXf3__(fp_t a, fp_t b) {
#endif
return fromRep(absResult | quotientSign);
}
+
+#undef HW
+#undef loMask
+#undef RECIPROCAL_PRECISION
+
+#endif // _FP_DIV_EMIT
diff --git a/lib/impl/fp_extend.h b/lib/impl/fp_extend.h
@@ -1,5 +1,4 @@
-//===-lib/fp_extend.h - low precision -> high precision conversion -*- C
-//-*-===//
+//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,19 +6,134 @@
//
//===----------------------------------------------------------------------===//
//
-// Set source and destination setting
+// Source/destination type setup for the extending FP conversions. Caller
+// defines SRC_<X> (SINGLE/DOUBLE/80/HALF) and DST_<Y> (SINGLE/DOUBLE/QUAD)
+// before each inclusion.
//
+// Re-includable. Names that depend on the (src, dst) pair are emitted with
+// a `_<src><dst>` suffix exactly once per (TU, pair); bare-name #define
+// aliases are set on every inclusion so the body in `fp_extend_impl.inc`
+// (and the caller) uses bare names that resolve to the right suffixed
+// entity. Use the umbrella `fp_lib_undef.h` between sections in one TU to
+// clear the bare-name aliases.
//===----------------------------------------------------------------------===//
-#ifndef FP_EXTEND_HEADER
-#define FP_EXTEND_HEADER
-
#include "int_lib.h"
+// Self-clean any prior inclusion's per-(src, dst) state. May coexist with
+// fp_trunc.h / int_to_fp.h which define overlapping bare-name aliases.
+#undef _FP_EXT_SRC_SUF
+#undef _FP_EXT_DST_SUF
+#undef SRC_REP_C
+#undef DST_REP_C
+#undef src_t
+#undef src_rep_t
+#undef dst_t
+#undef dst_rep_t
+#undef srcBits
+#undef srcSigFracBits
+#undef srcExpBits
+#undef dstBits
+#undef dstSigFracBits
+#undef dstExpBits
+#undef src_rep_t_clz
+#undef src_rep_t_clz_impl
+#undef srcToRep
+#undef dstFromRep
+#undef extract_sign_from_src
+#undef extract_exp_from_src
+#undef extract_sig_frac_from_src
+#undef clz_in_sig_frac
+#undef construct_dst_rep
+
+#if defined SRC_SINGLE
+#define _FP_EXT_SRC_SUF sf
+#elif defined SRC_DOUBLE
+#define _FP_EXT_SRC_SUF df
+#elif defined SRC_80
+#define _FP_EXT_SRC_SUF xf
+#elif defined SRC_HALF
+#define _FP_EXT_SRC_SUF hf
+#else
+#error Source should be half, single, or double precision!
+#endif
+
+#if defined DST_SINGLE
+#define _FP_EXT_DST_SUF sf
+#elif defined DST_DOUBLE
+#define _FP_EXT_DST_SUF df
+#elif defined DST_QUAD
+#define _FP_EXT_DST_SUF tf
+#else
+#error Destination should be single, double, or quad precision!
+#endif
+
+#define _FP_EXT_PASTE4_(a, b, c, d) a##b##c##d
+#define _FP_EXT_PASTE4(a, b, c, d) _FP_EXT_PASTE4_(a, b, c, d)
+#define _FP_EXT_PAIR(stem) _FP_EXT_PASTE4(stem, _, _FP_EXT_SRC_SUF, _FP_EXT_DST_SUF)
+
+// ---- Bare-name aliases (re-set every inclusion). ------------------------
+
+#define src_t _FP_EXT_PAIR(src_t)
+#define src_rep_t _FP_EXT_PAIR(src_rep_t)
+#define dst_t _FP_EXT_PAIR(dst_t)
+#define dst_rep_t _FP_EXT_PAIR(dst_rep_t)
+#define srcBits _FP_EXT_PAIR(srcBits)
+#define srcSigFracBits _FP_EXT_PAIR(srcSigFracBits)
+#define srcExpBits _FP_EXT_PAIR(srcExpBits)
+#define dstBits _FP_EXT_PAIR(dstBits)
+#define dstSigFracBits _FP_EXT_PAIR(dstSigFracBits)
+#define dstExpBits _FP_EXT_PAIR(dstExpBits)
+#define src_rep_t_clz_impl _FP_EXT_PAIR(src_rep_t_clz_impl)
+#define srcToRep _FP_EXT_PAIR(srcToRep)
+#define dstFromRep _FP_EXT_PAIR(dstFromRep)
+#define extract_sign_from_src _FP_EXT_PAIR(extract_sign_from_src)
+#define extract_exp_from_src _FP_EXT_PAIR(extract_exp_from_src)
+#define extract_sig_frac_from_src _FP_EXT_PAIR(extract_sig_frac_from_src)
+#define clz_in_sig_frac _FP_EXT_PAIR(clz_in_sig_frac)
+#define construct_dst_rep _FP_EXT_PAIR(construct_dst_rep)
+
+// SRC_REP_C / DST_REP_C: textual macros (UINT32_C etc.); same body each
+// inclusion within a precision.
+
+#if defined SRC_SINGLE
+#define SRC_REP_C UINT32_C
+#elif defined SRC_DOUBLE
+#define SRC_REP_C UINT64_C
+#elif defined SRC_80
+#define SRC_REP_C (__uint128_t)
+#elif defined SRC_HALF
+#define SRC_REP_C UINT16_C
+#endif
+
+#if defined DST_SINGLE
+#define DST_REP_C UINT32_C
+#elif defined DST_DOUBLE
+#define DST_REP_C UINT64_C
+#elif defined DST_QUAD
+#define DST_REP_C (__uint128_t)
+#endif
+
+// ---- One-time emission per (TU, src+dst pair). --------------------------
+// Enumerate the pairs cfree actually uses (sf→df, sf→tf, df→tf).
+
+#if defined SRC_SINGLE && defined DST_DOUBLE && !defined FP_EXT_SFDF_EMITTED
+#define FP_EXT_SFDF_EMITTED
+#define _FP_EXT_EMIT 1
+#elif defined SRC_SINGLE && defined DST_QUAD && !defined FP_EXT_SFTF_EMITTED
+#define FP_EXT_SFTF_EMITTED
+#define _FP_EXT_EMIT 1
+#elif defined SRC_DOUBLE && defined DST_QUAD && !defined FP_EXT_DFTF_EMITTED
+#define FP_EXT_DFTF_EMITTED
+#define _FP_EXT_EMIT 1
+#endif
+
+#ifdef _FP_EXT_EMIT
+#undef _FP_EXT_EMIT
+
#if defined SRC_SINGLE
typedef float src_t;
typedef uint32_t src_rep_t;
-#define SRC_REP_C UINT32_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 23;
// -1 accounts for the sign bit.
@@ -30,7 +144,6 @@ static const int srcExpBits = 8;
#elif defined SRC_DOUBLE
typedef double src_t;
typedef uint64_t src_rep_t;
-#define SRC_REP_C UINT64_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 52;
// -1 accounts for the sign bit.
@@ -52,7 +165,6 @@ static inline int src_rep_t_clz_impl(src_rep_t a) {
#elif defined SRC_80
typedef xf_float src_t;
typedef __uint128_t src_rep_t;
-#define SRC_REP_C (__uint128_t)
// sign bit, exponent and significand occupy the lower 80 bits.
static const int srcBits = 80;
static const int srcSigFracBits = 63;
@@ -68,7 +180,6 @@ typedef _Float16 src_t;
typedef uint16_t src_t;
#endif
typedef uint16_t src_rep_t;
-#define SRC_REP_C UINT16_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 10;
// -1 accounts for the sign bit.
@@ -78,17 +189,13 @@ static const int srcExpBits = 5;
static inline int src_rep_t_clz_impl(src_rep_t a) {
return __builtin_clz(a) - 16;
}
-
#define src_rep_t_clz src_rep_t_clz_impl
-#else
-#error Source should be half, single, or double precision!
#endif // end source precision
#if defined DST_SINGLE
typedef float dst_t;
typedef uint32_t dst_rep_t;
-#define DST_REP_C UINT32_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 23;
// -1 accounts for the sign bit.
@@ -98,7 +205,6 @@ static const int dstExpBits = 8;
#elif defined DST_DOUBLE
typedef double dst_t;
typedef uint64_t dst_rep_t;
-#define DST_REP_C UINT64_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 52;
// -1 accounts for the sign bit.
@@ -108,15 +214,12 @@ static const int dstExpBits = 11;
#elif defined DST_QUAD
typedef tf_float dst_t;
typedef __uint128_t dst_rep_t;
-#define DST_REP_C (__uint128_t)
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 112;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 15;
-#else
-#error Destination should be single, double, or quad precision!
#endif // end destination precision
// End of specialization parameters.
@@ -171,4 +274,4 @@ static inline dst_t dstFromRep(dst_rep_t x) {
}
// End helper routines. Conversion implementation follows.
-#endif // FP_EXTEND_HEADER
+#endif // _FP_EXT_EMIT
diff --git a/lib/impl/fp_extend_impl.inc b/lib/impl/fp_extend_impl.inc
@@ -37,6 +37,22 @@
#include "fp_extend.h"
+#define __extendXfYf2__ _FP_EXT_PAIR(__extendXfYf2__)
+
+#if defined SRC_SINGLE && defined DST_DOUBLE && !defined FP_EXT_IMPL_SFDF_EMITTED
+#define FP_EXT_IMPL_SFDF_EMITTED
+#define _FP_EXT_IMPL_EMIT 1
+#elif defined SRC_SINGLE && defined DST_QUAD && !defined FP_EXT_IMPL_SFTF_EMITTED
+#define FP_EXT_IMPL_SFTF_EMITTED
+#define _FP_EXT_IMPL_EMIT 1
+#elif defined SRC_DOUBLE && defined DST_QUAD && !defined FP_EXT_IMPL_DFTF_EMITTED
+#define FP_EXT_IMPL_DFTF_EMITTED
+#define _FP_EXT_IMPL_EMIT 1
+#endif
+
+#ifdef _FP_EXT_IMPL_EMIT
+#undef _FP_EXT_IMPL_EMIT
+
// The source type may use a usual IEEE-754 interchange format or Intel 80-bit
// format. In particular, for the source type srcSigFracBits may be not equal to
// srcSigBits. The destination type is assumed to be one of IEEE-754 standard
@@ -106,3 +122,5 @@ static __inline dst_t __extendXfYf2__(src_t a) {
const dst_rep_t result = construct_dst_rep(dstSign, dstExp, dstSigFrac);
return dstFromRep(result);
}
+
+#endif // _FP_EXT_IMPL_EMIT
diff --git a/lib/impl/fp_fixint_impl.inc b/lib/impl/fp_fixint_impl.inc
@@ -13,6 +13,20 @@
#include "fp_lib.h"
+#ifndef FP_FIX_SUFFIX
+#error "fp_fixint_impl.inc: FP_FIX_SUFFIX must be defined before #include"
+#endif
+
+#ifndef FP_FIX_IMPL_PASTE_
+#define FP_FIX_IMPL_PASTE_(a, b) a##_##b
+#define FP_FIX_IMPL_PASTE(a, b) FP_FIX_IMPL_PASTE_(a, b)
+#endif
+
+#ifdef __fixint
+#undef __fixint
+#endif
+#define __fixint FP_FIX_IMPL_PASTE(__fixint, FP_FIX_SUFFIX)
+
static __inline fixint_t __fixint(fp_t a) {
const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2);
const fixint_t fixint_min = -fixint_max - 1;
@@ -38,3 +52,9 @@ static __inline fixint_t __fixint(fp_t a) {
else
return sign * ((fixuint_t)significand << (exponent - significandBits));
}
+
+// FP_FIX_SUFFIX, fixint_t, fixuint_t intentionally left defined: the
+// caller's `return __fixint(a);` line is parsed AFTER this include, and
+// the `__fixint` macro re-expands FP_FIX_SUFFIX at that point. The
+// caller (per-op .c) is responsible for #undef'ing them at its bottom
+// so consolidated builds can stack multiple per-op snippets in one TU.
diff --git a/lib/impl/fp_fixuint_impl.inc b/lib/impl/fp_fixuint_impl.inc
@@ -13,6 +13,20 @@
#include "fp_lib.h"
+#ifndef FP_FIX_SUFFIX
+#error "fp_fixuint_impl.inc: FP_FIX_SUFFIX must be defined before #include"
+#endif
+
+#ifndef FP_FIX_IMPL_PASTE_
+#define FP_FIX_IMPL_PASTE_(a, b) a##_##b
+#define FP_FIX_IMPL_PASTE(a, b) FP_FIX_IMPL_PASTE_(a, b)
+#endif
+
+#ifdef __fixuint
+#undef __fixuint
+#endif
+#define __fixuint FP_FIX_IMPL_PASTE(__fixuint, FP_FIX_SUFFIX)
+
static __inline fixuint_t __fixuint(fp_t a) {
// Break a into sign, exponent, significand parts.
const rep_t aRep = toRep(a);
@@ -36,3 +50,5 @@ static __inline fixuint_t __fixuint(fp_t a) {
else
return (fixuint_t)significand << (exponent - significandBits);
}
+
+// FP_FIX_SUFFIX, fixuint_t intentionally left defined; see fp_fixint_impl.inc.
diff --git a/lib/impl/fp_mul_impl.inc b/lib/impl/fp_mul_impl.inc
@@ -13,6 +13,22 @@
#include "fp_lib.h"
+#define __mulXf3__ _FP_NAME(__mulXf3__)
+
+#if defined SINGLE_PRECISION && !defined FP_MUL_SF_EMITTED
+#define FP_MUL_SF_EMITTED
+#define _FP_MUL_EMIT 1
+#elif defined DOUBLE_PRECISION && !defined FP_MUL_DF_EMITTED
+#define FP_MUL_DF_EMITTED
+#define _FP_MUL_EMIT 1
+#elif defined QUAD_PRECISION && !defined FP_MUL_TF_EMITTED
+#define FP_MUL_TF_EMITTED
+#define _FP_MUL_EMIT 1
+#endif
+
+#ifdef _FP_MUL_EMIT
+#undef _FP_MUL_EMIT
+
static __inline fp_t __mulXf3__(fp_t a, fp_t b) {
const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
@@ -126,3 +142,5 @@ static __inline fp_t __mulXf3__(fp_t a, fp_t b) {
productHi += productHi & 1;
return fromRep(productHi);
}
+
+#endif // _FP_MUL_EMIT
diff --git a/lib/impl/fp_trunc.h b/lib/impl/fp_trunc.h
@@ -6,19 +6,131 @@
//
//===----------------------------------------------------------------------===//
//
-// Set source and destination precision setting
+// Source/destination precision setup for the truncating FP conversions.
+// Caller defines SRC_<X> (SINGLE/DOUBLE/QUAD) and DST_<Y>
+// (SINGLE/DOUBLE/80/HALF/BFLOAT) before each inclusion.
//
+// Re-includable. Names that depend on the (src, dst) pair are emitted with
+// a `_<src><dst>` suffix exactly once per (TU, pair); bare-name #define
+// aliases are set every inclusion. Use the umbrella `fp_lib_undef.h` to
+// clear the bare-name aliases between sections in one TU.
//===----------------------------------------------------------------------===//
-#ifndef FP_TRUNC_HEADER
-#define FP_TRUNC_HEADER
-
#include "int_lib.h"
+// Self-clean any prior inclusion's per-(src, dst) state. May coexist with
+// fp_extend.h / int_to_fp.h which define overlapping bare-name aliases.
+#undef _FP_TRUNC_SRC_SUF
+#undef _FP_TRUNC_DST_SUF
+#undef SRC_REP_C
+#undef DST_REP_C
+#undef src_t
+#undef src_rep_t
+#undef dst_t
+#undef dst_rep_t
+#undef srcBits
+#undef srcSigFracBits
+#undef srcExpBits
+#undef dstBits
+#undef dstSigFracBits
+#undef dstExpBits
+#undef srcToRep
+#undef dstFromRep
+#undef extract_sign_from_src
+#undef extract_exp_from_src
+#undef extract_sig_frac_from_src
+#undef construct_dst_rep
+
+#if defined SRC_SINGLE
+#define _FP_TRUNC_SRC_SUF sf
+#elif defined SRC_DOUBLE
+#define _FP_TRUNC_SRC_SUF df
+#elif defined SRC_QUAD
+#define _FP_TRUNC_SRC_SUF tf
+#else
+#error Source should be double precision or quad precision!
+#endif
+
+#if defined DST_SINGLE
+#define _FP_TRUNC_DST_SUF sf
+#elif defined DST_DOUBLE
+#define _FP_TRUNC_DST_SUF df
+#elif defined DST_80
+#define _FP_TRUNC_DST_SUF xf
+#elif defined DST_HALF
+#define _FP_TRUNC_DST_SUF hf
+#elif defined DST_BFLOAT
+#define _FP_TRUNC_DST_SUF bf
+#else
+#error Destination should be single precision or double precision!
+#endif
+
+#define _FP_TRUNC_PASTE4_(a, b, c, d) a##b##c##d
+#define _FP_TRUNC_PASTE4(a, b, c, d) _FP_TRUNC_PASTE4_(a, b, c, d)
+#define _FP_TRUNC_PAIR(stem) _FP_TRUNC_PASTE4(stem, _, _FP_TRUNC_SRC_SUF, _FP_TRUNC_DST_SUF)
+
+// ---- Bare-name aliases (re-set every inclusion). ------------------------
+// fp_extend.h uses identical bare names; the aliases here suffix-rename
+// to fp_trunc-specific symbols (different pair tokens), so the two
+// headers can coexist in one TU without colliding.
+
+#define src_t _FP_TRUNC_PAIR(src_t)
+#define src_rep_t _FP_TRUNC_PAIR(src_rep_t)
+#define dst_t _FP_TRUNC_PAIR(dst_t)
+#define dst_rep_t _FP_TRUNC_PAIR(dst_rep_t)
+#define srcBits _FP_TRUNC_PAIR(srcBits)
+#define srcSigFracBits _FP_TRUNC_PAIR(srcSigFracBits)
+#define srcExpBits _FP_TRUNC_PAIR(srcExpBits)
+#define dstBits _FP_TRUNC_PAIR(dstBits)
+#define dstSigFracBits _FP_TRUNC_PAIR(dstSigFracBits)
+#define dstExpBits _FP_TRUNC_PAIR(dstExpBits)
+#define srcToRep _FP_TRUNC_PAIR(srcToRep)
+#define dstFromRep _FP_TRUNC_PAIR(dstFromRep)
+#define extract_sign_from_src _FP_TRUNC_PAIR(extract_sign_from_src)
+#define extract_exp_from_src _FP_TRUNC_PAIR(extract_exp_from_src)
+#define extract_sig_frac_from_src _FP_TRUNC_PAIR(extract_sig_frac_from_src)
+#define construct_dst_rep _FP_TRUNC_PAIR(construct_dst_rep)
+
+#if defined SRC_SINGLE
+#define SRC_REP_C UINT32_C
+#elif defined SRC_DOUBLE
+#define SRC_REP_C UINT64_C
+#elif defined SRC_QUAD
+#define SRC_REP_C (__uint128_t)
+#endif
+
+#if defined DST_SINGLE
+#define DST_REP_C UINT32_C
+#elif defined DST_DOUBLE
+#define DST_REP_C UINT64_C
+#elif defined DST_80
+#define DST_REP_C (__uint128_t)
+#elif defined DST_HALF
+#define DST_REP_C UINT16_C
+#elif defined DST_BFLOAT
+#define DST_REP_C UINT16_C
+#endif
+
+// ---- One-time emission per (TU, src+dst pair). --------------------------
+// Pairs cfree uses: df→sf, tf→df, tf→sf.
+
+#if defined SRC_DOUBLE && defined DST_SINGLE && !defined FP_TRUNC_DFSF_EMITTED
+#define FP_TRUNC_DFSF_EMITTED
+#define _FP_TRUNC_EMIT 1
+#elif defined SRC_QUAD && defined DST_DOUBLE && !defined FP_TRUNC_TFDF_EMITTED
+#define FP_TRUNC_TFDF_EMITTED
+#define _FP_TRUNC_EMIT 1
+#elif defined SRC_QUAD && defined DST_SINGLE && !defined FP_TRUNC_TFSF_EMITTED
+#define FP_TRUNC_TFSF_EMITTED
+#define _FP_TRUNC_EMIT 1
+#endif
+
+#ifdef _FP_TRUNC_EMIT
+#undef _FP_TRUNC_EMIT
+
#if defined SRC_SINGLE
typedef float src_t;
typedef uint32_t src_rep_t;
-#define SRC_REP_C UINT32_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 23;
// -1 accounts for the sign bit.
@@ -28,7 +140,6 @@ static const int srcExpBits = 8;
#elif defined SRC_DOUBLE
typedef double src_t;
typedef uint64_t src_rep_t;
-#define SRC_REP_C UINT64_C
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 52;
// -1 accounts for the sign bit.
@@ -38,21 +149,17 @@ static const int srcExpBits = 11;
#elif defined SRC_QUAD
typedef tf_float src_t;
typedef __uint128_t src_rep_t;
-#define SRC_REP_C (__uint128_t)
static const int srcBits = sizeof(src_t) * CHAR_BIT;
static const int srcSigFracBits = 112;
// -1 accounts for the sign bit.
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 15;
-#else
-#error Source should be double precision or quad precision!
#endif // end source precision
#if defined DST_DOUBLE
typedef double dst_t;
typedef uint64_t dst_rep_t;
-#define DST_REP_C UINT64_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 52;
// -1 accounts for the sign bit.
@@ -62,7 +169,6 @@ static const int dstExpBits = 11;
#elif defined DST_80
typedef xf_float dst_t;
typedef __uint128_t dst_rep_t;
-#define DST_REP_C (__uint128_t)
static const int dstBits = 80;
static const int dstSigFracBits = 63;
// -1 accounts for the sign bit.
@@ -73,7 +179,6 @@ static const int dstExpBits = 15;
#elif defined DST_SINGLE
typedef float dst_t;
typedef uint32_t dst_rep_t;
-#define DST_REP_C UINT32_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 23;
// -1 accounts for the sign bit.
@@ -87,7 +192,6 @@ typedef _Float16 dst_t;
typedef uint16_t dst_t;
#endif
typedef uint16_t dst_rep_t;
-#define DST_REP_C UINT16_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 10;
// -1 accounts for the sign bit.
@@ -97,15 +201,12 @@ static const int dstExpBits = 5;
#elif defined DST_BFLOAT
typedef __bf16 dst_t;
typedef uint16_t dst_rep_t;
-#define DST_REP_C UINT16_C
static const int dstBits = sizeof(dst_t) * CHAR_BIT;
static const int dstSigFracBits = 7;
// -1 accounts for the sign bit.
// dstBits - dstSigFracBits - 1
static const int dstExpBits = 8;
-#else
-#error Destination should be single precision or double precision!
#endif // end destination precision
// TODO: These helper routines should be placed into fp_lib.h
@@ -155,4 +256,4 @@ static inline dst_t dstFromRep(dst_rep_t x) {
return rep.f;
}
-#endif // FP_TRUNC_HEADER
+#endif // _FP_TRUNC_EMIT
diff --git a/lib/impl/fp_trunc_impl.inc b/lib/impl/fp_trunc_impl.inc
@@ -38,6 +38,22 @@
#include "fp_trunc.h"
+#define __truncXfYf2__ _FP_TRUNC_PAIR(__truncXfYf2__)
+
+#if defined SRC_DOUBLE && defined DST_SINGLE && !defined FP_TRUNC_IMPL_DFSF_EMITTED
+#define FP_TRUNC_IMPL_DFSF_EMITTED
+#define _FP_TRUNC_IMPL_EMIT 1
+#elif defined SRC_QUAD && defined DST_DOUBLE && !defined FP_TRUNC_IMPL_TFDF_EMITTED
+#define FP_TRUNC_IMPL_TFDF_EMITTED
+#define _FP_TRUNC_IMPL_EMIT 1
+#elif defined SRC_QUAD && defined DST_SINGLE && !defined FP_TRUNC_IMPL_TFSF_EMITTED
+#define FP_TRUNC_IMPL_TFSF_EMITTED
+#define _FP_TRUNC_IMPL_EMIT 1
+#endif
+
+#ifdef _FP_TRUNC_IMPL_EMIT
+#undef _FP_TRUNC_IMPL_EMIT
+
// The destination type may use a usual IEEE-754 interchange format or Intel
// 80-bit format. In particular, for the destination type dstSigFracBits may be
// not equal to dstSigBits. The source type is assumed to be one of IEEE-754
@@ -153,3 +169,5 @@ static __inline dst_t __truncXfYf2__(src_t a) {
return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));
}
+
+#endif // _FP_TRUNC_IMPL_EMIT
diff --git a/lib/impl/int_div_impl.inc b/lib/impl/int_div_impl.inc
@@ -8,15 +8,46 @@
//
// Helpers used by __udivsi3, __umodsi3, __udivdi3, and __umodsi3.
//
+// Re-includable; safe to use multiple times in one TU. Inputs (caller
+// must #define before each #include):
+// fixint_t, fixuint_t -- the signed/unsigned integer width
+// INT_DIV_SUFFIX -- a unique token; helper names get suffixed
+// with `_<INT_DIV_SUFFIX>` so concurrent
+// inclusions don't collide
+// Optional inputs (gate emission of the signed wrappers):
+// COMPUTE_UDIV(a, b) -- expression yielding unsigned quotient
+// ASSIGN_UMOD(res, a, b)-- statement assigning unsigned remainder to res
+//
+// Outputs (always emitted as `static __inline`):
+// __udivXi3_<suffix>, __umodXi3_<suffix>
+// Plus, conditionally:
+// __divXi3_<suffix> iff COMPUTE_UDIV is defined
+// __modXi3_<suffix> iff ASSIGN_UMOD is defined
+//
+// At exit the inc #undef's all of its inputs (including INT_DIV_SUFFIX,
+// COMPUTE_UDIV, ASSIGN_UMOD, fixint_t, fixuint_t) so it's clean to
+// re-include with new settings.
+//
//===----------------------------------------------------------------------===//
-#define clz(a) (sizeof(a) == sizeof(unsigned long long) ? __builtin_clzll(a) : clzsi(a))
+#ifndef INT_DIV_IMPL_INC_GUARD
+#define INT_DIV_IMPL_INC_GUARD
+#define INT_DIV_IMPL_CAT_(a, b) a##b
+#define INT_DIV_IMPL_CAT(a, b) INT_DIV_IMPL_CAT_(a, b)
+#define INT_DIV_IMPL_CLZ(a) \
+ (sizeof(a) == sizeof(unsigned long long) ? __builtin_clzll(a) : clzsi(a))
+#endif
+
+#ifndef INT_DIV_SUFFIX
+#error "int_div_impl.inc: INT_DIV_SUFFIX must be defined before #include"
+#endif
// Adapted from Figure 3-40 of The PowerPC Compiler Writer's Guide
-static __inline fixuint_t __udivXi3(fixuint_t n, fixuint_t d) {
+static __inline fixuint_t
+INT_DIV_IMPL_CAT(__udivXi3_, INT_DIV_SUFFIX)(fixuint_t n, fixuint_t d) {
const unsigned N = sizeof(fixuint_t) * CHAR_BIT;
// d == 0 cases are unspecified.
- unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N);
+ unsigned sr = (d ? INT_DIV_IMPL_CLZ(d) : N) - (n ? INT_DIV_IMPL_CLZ(n) : N);
// 0 <= sr <= N - 1 or sr is very large.
if (sr > N - 1) // n < d
return 0;
@@ -42,10 +73,11 @@ static __inline fixuint_t __udivXi3(fixuint_t n, fixuint_t d) {
}
// Mostly identical to __udivXi3 but the return values are different.
-static __inline fixuint_t __umodXi3(fixuint_t n, fixuint_t d) {
+static __inline fixuint_t
+INT_DIV_IMPL_CAT(__umodXi3_, INT_DIV_SUFFIX)(fixuint_t n, fixuint_t d) {
const unsigned N = sizeof(fixuint_t) * CHAR_BIT;
// d == 0 cases are unspecified.
- unsigned sr = (d ? clz(d) : N) - (n ? clz(n) : N);
+ unsigned sr = (d ? INT_DIV_IMPL_CLZ(d) : N) - (n ? INT_DIV_IMPL_CLZ(n) : N);
// 0 <= sr <= N - 1 or sr is very large.
if (sr > N - 1) // n < d
return n;
@@ -70,7 +102,8 @@ static __inline fixuint_t __umodXi3(fixuint_t n, fixuint_t d) {
}
#ifdef COMPUTE_UDIV
-static __inline fixint_t __divXi3(fixint_t a, fixint_t b) {
+static __inline fixint_t
+INT_DIV_IMPL_CAT(__divXi3_, INT_DIV_SUFFIX)(fixint_t a, fixint_t b) {
const int N = (int)(sizeof(fixint_t) * CHAR_BIT) - 1;
fixint_t s_a = a >> N; // s_a = a < 0 ? -1 : 0
fixint_t s_b = b >> N; // s_b = b < 0 ? -1 : 0
@@ -82,7 +115,8 @@ static __inline fixint_t __divXi3(fixint_t a, fixint_t b) {
#endif // COMPUTE_UDIV
#ifdef ASSIGN_UMOD
-static __inline fixint_t __modXi3(fixint_t a, fixint_t b) {
+static __inline fixint_t
+INT_DIV_IMPL_CAT(__modXi3_, INT_DIV_SUFFIX)(fixint_t a, fixint_t b) {
const int N = (int)(sizeof(fixint_t) * CHAR_BIT) - 1;
fixint_t s = b >> N; // s = b < 0 ? -1 : 0
fixuint_t b_u = (fixuint_t)(b ^ s) + (-s); // negate if s == -1
@@ -93,3 +127,9 @@ static __inline fixint_t __modXi3(fixint_t a, fixint_t b) {
return (res ^ s) + (-s); // negate if s == -1
}
#endif // ASSIGN_UMOD
+
+#undef INT_DIV_SUFFIX
+#undef COMPUTE_UDIV
+#undef ASSIGN_UMOD
+#undef fixint_t
+#undef fixuint_t
diff --git a/lib/impl/int_to_fp.h b/lib/impl/int_to_fp.h
@@ -6,16 +6,115 @@
//
//===----------------------------------------------------------------------===//
//
-// Set source and destination defines in order to use a correctly
-// parameterised floatXiYf implementation.
+// Source/destination type setup for int → fp conversions. Caller defines
+// SRC_<I64/U64/I128/U128> and DST_<SINGLE/DOUBLE/QUAD> before each
+// inclusion.
//
+// Re-includable. Names depending on the (src, dst) pair are emitted with
+// a `_<src><dst>` suffix exactly once per (TU, pair); bare-name #define
+// aliases are set every inclusion. Use the umbrella `fp_lib_undef.h` to
+// clear the bare-name aliases between sections in one TU.
//===----------------------------------------------------------------------===//
-#ifndef INT_TO_FP_H
-#define INT_TO_FP_H
-
#include "int_lib.h"
+// Self-clean any prior inclusion's per-(src, dst) state. May coexist with
+// fp_extend.h / fp_trunc.h which define overlapping bare-name aliases.
+#undef _INT_TO_FP_SRC_SUF
+#undef _INT_TO_FP_DST_SUF
+#undef SRC_REP_C
+#undef DST_REP_C
+#undef src_t
+#undef usrc_t
+#undef dst_t
+#undef dst_rep_t
+#undef clzSrcT
+#undef dstFromRep
+#undef dstSigBits
+
+#if defined SRC_I64
+#define _INT_TO_FP_SRC_SUF i64
+#elif defined SRC_U64
+#define _INT_TO_FP_SRC_SUF u64
+#elif defined SRC_I128
+#define _INT_TO_FP_SRC_SUF i128
+#elif defined SRC_U128
+#define _INT_TO_FP_SRC_SUF u128
+#else
+#error Source should be a handled integer type.
+#endif
+
+#if defined DST_SINGLE
+#define _INT_TO_FP_DST_SUF sf
+#elif defined DST_DOUBLE
+#define _INT_TO_FP_DST_SUF df
+#elif defined DST_QUAD
+#define _INT_TO_FP_DST_SUF tf
+#else
+#error Destination should be a handled floating point type
+#endif
+
+#define _INT_TO_FP_PASTE4_(a, b, c, d) a##b##c##d
+#define _INT_TO_FP_PASTE4(a, b, c, d) _INT_TO_FP_PASTE4_(a, b, c, d)
+#define _INT_TO_FP_PAIR(stem) _INT_TO_FP_PASTE4(stem, _, _INT_TO_FP_SRC_SUF, _INT_TO_FP_DST_SUF)
+
+// ---- Bare-name aliases (re-set every inclusion). ------------------------
+
+#define src_t _INT_TO_FP_PAIR(src_t)
+#define usrc_t _INT_TO_FP_PAIR(usrc_t)
+#define dst_t _INT_TO_FP_PAIR(dst_t)
+#define dst_rep_t _INT_TO_FP_PAIR(dst_rep_t)
+#define clzSrcT _INT_TO_FP_PAIR(clzSrcT)
+#define dstFromRep _INT_TO_FP_PAIR(dstFromRep)
+#define dstSigBits _INT_TO_FP_PAIR(dstSigBits)
+
+// DST_REP_C: simple textual macro per dst.
+#if defined DST_SINGLE
+#define DST_REP_C UINT32_C
+#elif defined DST_DOUBLE
+#define DST_REP_C UINT64_C
+#elif defined DST_QUAD
+#define DST_REP_C (__uint128_t)
+#endif
+
+// ---- One-time emission per (TU, src+dst pair). --------------------------
+// Pairs cfree uses: (i64,u64) × (sf,df) and (i128,u128) × (sf,df,tf).
+
+#if defined SRC_I64 && defined DST_SINGLE && !defined INT_TO_FP_I64SF_EMITTED
+#define INT_TO_FP_I64SF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_I64 && defined DST_DOUBLE && !defined INT_TO_FP_I64DF_EMITTED
+#define INT_TO_FP_I64DF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_U64 && defined DST_SINGLE && !defined INT_TO_FP_U64SF_EMITTED
+#define INT_TO_FP_U64SF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_U64 && defined DST_DOUBLE && !defined INT_TO_FP_U64DF_EMITTED
+#define INT_TO_FP_U64DF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_I128 && defined DST_SINGLE && !defined INT_TO_FP_I128SF_EMITTED
+#define INT_TO_FP_I128SF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_I128 && defined DST_DOUBLE && !defined INT_TO_FP_I128DF_EMITTED
+#define INT_TO_FP_I128DF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_I128 && defined DST_QUAD && !defined INT_TO_FP_I128TF_EMITTED
+#define INT_TO_FP_I128TF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_U128 && defined DST_SINGLE && !defined INT_TO_FP_U128SF_EMITTED
+#define INT_TO_FP_U128SF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_U128 && defined DST_DOUBLE && !defined INT_TO_FP_U128DF_EMITTED
+#define INT_TO_FP_U128DF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#elif defined SRC_U128 && defined DST_QUAD && !defined INT_TO_FP_U128TF_EMITTED
+#define INT_TO_FP_U128TF_EMITTED
+#define _INT_TO_FP_EMIT 1
+#endif
+
+#ifdef _INT_TO_FP_EMIT
+#undef _INT_TO_FP_EMIT
+
#if defined SRC_I64
typedef int64_t src_t;
typedef uint64_t usrc_t;
@@ -36,14 +135,11 @@ typedef __uint128_t src_t;
typedef __uint128_t usrc_t;
static __inline int clzSrcT(usrc_t x) { return __clzti2(x); }
-#else
-#error Source should be a handled integer type.
#endif
#if defined DST_SINGLE
typedef float dst_t;
typedef uint32_t dst_rep_t;
-#define DST_REP_C UINT32_C
enum {
dstSigBits = 23,
@@ -52,7 +148,6 @@ enum {
#elif defined DST_DOUBLE
typedef double dst_t;
typedef uint64_t dst_rep_t;
-#define DST_REP_C UINT64_C
enum {
dstSigBits = 52,
@@ -61,14 +156,11 @@ enum {
#elif defined DST_QUAD
typedef tf_float dst_t;
typedef __uint128_t dst_rep_t;
-#define DST_REP_C (__uint128_t)
enum {
dstSigBits = 112,
};
-#else
-#error Destination should be a handled floating point type
#endif
static __inline dst_t dstFromRep(dst_rep_t x) {
@@ -79,4 +171,4 @@ static __inline dst_t dstFromRep(dst_rep_t x) {
return rep.f;
}
-#endif // INT_TO_FP_H
+#endif // _INT_TO_FP_EMIT
diff --git a/lib/impl/int_to_fp_impl.inc b/lib/impl/int_to_fp_impl.inc
@@ -14,6 +14,43 @@
#include "int_to_fp.h"
+#define __floatXiYf__ _INT_TO_FP_PAIR(__floatXiYf__)
+
+#if defined SRC_I64 && defined DST_SINGLE && !defined INT_TO_FP_IMPL_I64SF_EMITTED
+#define INT_TO_FP_IMPL_I64SF_EMITTED
+#define _INT_TO_FP_IMPL_EMIT 1
+#elif defined SRC_I64 && defined DST_DOUBLE && !defined INT_TO_FP_IMPL_I64DF_EMITTED
+#define INT_TO_FP_IMPL_I64DF_EMITTED
+#define _INT_TO_FP_IMPL_EMIT 1
+#elif defined SRC_U64 && defined DST_SINGLE && !defined INT_TO_FP_IMPL_U64SF_EMITTED
+#define INT_TO_FP_IMPL_U64SF_EMITTED
+#define _INT_TO_FP_IMPL_EMIT 1
+#elif defined SRC_U64 && defined DST_DOUBLE && !defined INT_TO_FP_IMPL_U64DF_EMITTED
+#define INT_TO_FP_IMPL_U64DF_EMITTED
+#define _INT_TO_FP_IMPL_EMIT 1
+#elif defined SRC_I128 && defined DST_SINGLE && !defined INT_TO_FP_IMPL_I128SF_EMITTED
+#define INT_TO_FP_IMPL_I128SF_EMITTED
+#define _INT_TO_FP_IMPL_EMIT 1
+#elif defined SRC_I128 && defined DST_DOUBLE && !defined INT_TO_FP_IMPL_I128DF_EMITTED
+#define INT_TO_FP_IMPL_I128DF_EMITTED
+#define _INT_TO_FP_IMPL_EMIT 1
+#elif defined SRC_I128 && defined DST_QUAD && !defined INT_TO_FP_IMPL_I128TF_EMITTED
+#define INT_TO_FP_IMPL_I128TF_EMITTED
+#define _INT_TO_FP_IMPL_EMIT 1
+#elif defined SRC_U128 && defined DST_SINGLE && !defined INT_TO_FP_IMPL_U128SF_EMITTED
+#define INT_TO_FP_IMPL_U128SF_EMITTED
+#define _INT_TO_FP_IMPL_EMIT 1
+#elif defined SRC_U128 && defined DST_DOUBLE && !defined INT_TO_FP_IMPL_U128DF_EMITTED
+#define INT_TO_FP_IMPL_U128DF_EMITTED
+#define _INT_TO_FP_IMPL_EMIT 1
+#elif defined SRC_U128 && defined DST_QUAD && !defined INT_TO_FP_IMPL_U128TF_EMITTED
+#define INT_TO_FP_IMPL_U128TF_EMITTED
+#define _INT_TO_FP_IMPL_EMIT 1
+#endif
+
+#ifdef _INT_TO_FP_IMPL_EMIT
+#undef _INT_TO_FP_IMPL_EMIT
+
static __inline dst_t __floatXiYf__(src_t a) {
if (a == 0)
return 0.0;
@@ -70,3 +107,5 @@ static __inline dst_t __floatXiYf__(src_t a) {
((dst_rep_t)(a) & dstSignificandMask);
return dstFromRep(result);
}
+
+#endif // _INT_TO_FP_IMPL_EMIT
diff --git a/lib/include/common/fp_lib.h b/lib/include/common/fp_lib.h
@@ -11,11 +11,19 @@
//
// Selected by the includer via #define SINGLE_PRECISION | DOUBLE_PRECISION
// | QUAD_PRECISION before #include "fp_lib.h".
+//
+// Re-includable. On each inclusion, fp_lib.h:
+// 1. emits per-precision typedefs and static inlines exactly once per
+// (TU, precision), with names suffix-renamed (e.g. rep_t_sf),
+// 2. sets bare-name #define aliases (rep_t → rep_t_sf, ...) so caller
+// code using bare names resolves to the right suffixed entity.
+//
+// To switch precision in the same TU, #include "fp_lib_undef.h" between
+// the two #include "fp_lib.h" calls; that clears the bare aliases and
+// the SINGLE/DOUBLE/QUAD_PRECISION marker so the next inclusion can
+// install a fresh set.
//===----------------------------------------------------------------------===//
-#ifndef FP_LIB_HEADER
-#define FP_LIB_HEADER
-
#include "int_lib.h"
#include "int_math.h"
#include "int_types.h"
@@ -24,15 +32,93 @@
#include <stdint.h>
#if defined SINGLE_PRECISION
+#define FP_LIB_SUFFIX sf
+#elif defined DOUBLE_PRECISION
+#define FP_LIB_SUFFIX df
+#elif defined QUAD_PRECISION
+#define FP_LIB_SUFFIX tf
+#else
+#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
+#endif
+
+#define _FP_PASTE_(a, b) a##_##b
+#define _FP_PASTE(a, b) _FP_PASTE_(a, b)
+#define _FP_NAME(stem) _FP_PASTE(stem, FP_LIB_SUFFIX)
+
+// ---- Bare-name aliases (re-set every inclusion). ------------------------
+// Map the bare names callers use onto the suffix-renamed implementations
+// emitted in the gated section below.
+
+#define half_rep_t _FP_NAME(half_rep_t)
+#define rep_t _FP_NAME(rep_t)
+#define srep_t _FP_NAME(srep_t)
+#define fp_t _FP_NAME(fp_t)
+#if defined SINGLE_PRECISION
+#define twice_rep_t _FP_NAME(twice_rep_t)
+#endif
+
+#define rep_clz _FP_NAME(rep_clz)
+#define wideMultiply _FP_NAME(wideMultiply)
+#define toRep _FP_NAME(toRep)
+#define fromRep _FP_NAME(fromRep)
+#define normalize _FP_NAME(normalize)
+#define wideLeftShift _FP_NAME(wideLeftShift)
+#define wideRightShiftWithSticky _FP_NAME(wideRightShiftWithSticky)
+#define __compiler_rt_logbX _FP_NAME(__compiler_rt_logbX)
+#define __compiler_rt_scalbnX _FP_NAME(__compiler_rt_scalbnX)
+#define __compiler_rt_fmaxX _FP_NAME(__compiler_rt_fmaxX)
+
+// ---- Per-precision values (bare macros; re-#define'd every inclusion). --
+
+#if defined SINGLE_PRECISION
+
+#define HALF_REP_C UINT16_C
+#define REP_C UINT32_C
+#define significandBits 23
+
+#elif defined DOUBLE_PRECISION
+
+#define HALF_REP_C UINT32_C
+#define REP_C UINT64_C
+#define significandBits 52
+
+#elif defined QUAD_PRECISION
+
+#define HALF_REP_C UINT64_C
+#define REP_C (__uint128_t)
+#define significandBits 112
+#define TF_MANT_DIG (significandBits + 1)
+
+#endif
+
+#define typeWidth (sizeof(rep_t) * CHAR_BIT)
+
+#define exponentBits (typeWidth - significandBits - 1)
+#define maxExponent ((1 << exponentBits) - 1)
+#define exponentBias (maxExponent >> 1)
+
+#define implicitBit (REP_C(1) << significandBits)
+#define significandMask (implicitBit - 1U)
+#define signBit (REP_C(1) << (significandBits + exponentBits))
+#define absMask (signBit - 1U)
+#define exponentMask (absMask ^ significandMask)
+#define oneRep ((rep_t)exponentBias << significandBits)
+#define infRep exponentMask
+#define quietBit (implicitBit >> 1)
+#define qnanRep (exponentMask | quietBit)
+
+// ---- One-time emission per (TU, precision). -----------------------------
+// Typedefs and static inlines, written in bare-name form so the aliases
+// above suffix-rename them to a unique identifier per precision.
+
+#if defined SINGLE_PRECISION && !defined FP_LIB_SF_EMITTED
+#define FP_LIB_SF_EMITTED
typedef uint16_t half_rep_t;
typedef uint32_t rep_t;
typedef uint64_t twice_rep_t;
typedef int32_t srep_t;
typedef float fp_t;
-#define HALF_REP_C UINT16_C
-#define REP_C UINT32_C
-#define significandBits 23
static __inline int rep_clz(rep_t a) { return clzsi(a); }
@@ -43,15 +129,13 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
}
COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b);
-#elif defined DOUBLE_PRECISION
+#elif defined DOUBLE_PRECISION && !defined FP_LIB_DF_EMITTED
+#define FP_LIB_DF_EMITTED
typedef uint32_t half_rep_t;
typedef uint64_t rep_t;
typedef int64_t srep_t;
typedef double fp_t;
-#define HALF_REP_C UINT32_C
-#define REP_C UINT64_C
-#define significandBits 52
static __inline int rep_clz(rep_t a) { return __builtin_clzll(a); }
@@ -73,17 +157,14 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
-#elif defined QUAD_PRECISION
+#elif defined QUAD_PRECISION && !defined FP_LIB_TF_EMITTED
+#define FP_LIB_TF_EMITTED
// Requires tf_supplement.h to be pre-included so CRT_HAS_TF_MODE and
// CRT_HAS_IEEE_TF are defined and tf_float is typedef'd.
typedef uint64_t half_rep_t;
typedef __uint128_t rep_t;
typedef __int128_t srep_t;
typedef tf_float fp_t;
-#define HALF_REP_C UINT64_C
-#define REP_C (__uint128_t)
-#define significandBits 112
-#define TF_MANT_DIG (significandBits + 1)
static __inline int rep_clz(rep_t a) {
const union {
@@ -147,11 +228,26 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
#undef Word_HiMask
#undef Word_LoMask
#undef Word_FullMask
-#else
-#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
+
#endif
-#define typeWidth (sizeof(rep_t) * CHAR_BIT)
+// ---- One-time emission per (TU, precision): shared static inlines. ------
+// These bodies use the bare-name value macros above; the aliases at the
+// top of the file ensure the entity names are suffix-renamed.
+
+#if defined SINGLE_PRECISION && !defined FP_LIB_SF_COMMON_EMITTED
+#define FP_LIB_SF_COMMON_EMITTED
+#define _FP_LIB_EMIT_COMMON 1
+#elif defined DOUBLE_PRECISION && !defined FP_LIB_DF_COMMON_EMITTED
+#define FP_LIB_DF_COMMON_EMITTED
+#define _FP_LIB_EMIT_COMMON 1
+#elif defined QUAD_PRECISION && !defined FP_LIB_TF_COMMON_EMITTED
+#define FP_LIB_TF_COMMON_EMITTED
+#define _FP_LIB_EMIT_COMMON 1
+#endif
+
+#ifdef _FP_LIB_EMIT_COMMON
+#undef _FP_LIB_EMIT_COMMON
static __inline rep_t toRep(fp_t x) {
const union { fp_t f; rep_t i; } rep = {.f = x};
@@ -163,20 +259,6 @@ static __inline fp_t fromRep(rep_t x) {
return rep.f;
}
-#define exponentBits (typeWidth - significandBits - 1)
-#define maxExponent ((1 << exponentBits) - 1)
-#define exponentBias (maxExponent >> 1)
-
-#define implicitBit (REP_C(1) << significandBits)
-#define significandMask (implicitBit - 1U)
-#define signBit (REP_C(1) << (significandBits + exponentBits))
-#define absMask (signBit - 1U)
-#define exponentMask (absMask ^ significandMask)
-#define oneRep ((rep_t)exponentBias << significandBits)
-#define infRep exponentMask
-#define quietBit (implicitBit >> 1)
-#define qnanRep (exponentMask | quietBit)
-
static __inline int normalize(rep_t *significand) {
const int shift = rep_clz(*significand) - rep_clz(implicitBit);
*significand <<= shift;
@@ -271,11 +353,16 @@ static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) { return __compiler_
static __inline tf_float __compiler_rt_logbtf(tf_float x) { return __compiler_rt_logbX(x); }
static __inline tf_float __compiler_rt_scalbntf(tf_float x, int y){ return __compiler_rt_scalbnX(x, y); }
static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y){ return __compiler_rt_fmaxX(x, y); }
+#endif
+
+#endif // _FP_LIB_EMIT_COMMON
+
+// Long-double aliases for QUAD targets. Idempotent (same text every
+// inclusion), so set outside the one-time emission gate.
+#if defined QUAD_PRECISION
#define __compiler_rt_logbl __compiler_rt_logbtf
#define __compiler_rt_scalbnl __compiler_rt_scalbntf
#define __compiler_rt_fmaxl __compiler_rt_fmaxtf
#define crt_fabstf crt_fabsf128
#define crt_copysigntf crt_copysignf128
#endif
-
-#endif // FP_LIB_HEADER
diff --git a/lib/include/common/fp_lib_undef.h b/lib/include/common/fp_lib_undef.h
@@ -0,0 +1,68 @@
+//===-- fp_lib_undef.h - Reset bare-name aliases set by fp_lib.h ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Use between two #include "fp_lib.h" calls in one TU when switching
+// precision (e.g. SINGLE → DOUBLE in a consolidated soft-float source).
+// Clears the bare-name #define aliases (rep_t, fp_t, signBit, ...) and
+// the SINGLE/DOUBLE/QUAD_PRECISION marker so the next fp_lib.h
+// inclusion can set up a fresh set. Leaves the suffix-renamed
+// definitions (rep_t_sf, etc.) intact for any code that still
+// references them post-undef.
+//===----------------------------------------------------------------------===//
+
+// Bare-name aliases for typedefs and static inlines.
+#undef half_rep_t
+#undef rep_t
+#undef srep_t
+#undef fp_t
+#undef twice_rep_t
+#undef rep_clz
+#undef wideMultiply
+#undef toRep
+#undef fromRep
+#undef normalize
+#undef wideLeftShift
+#undef wideRightShiftWithSticky
+#undef __compiler_rt_logbX
+#undef __compiler_rt_scalbnX
+#undef __compiler_rt_fmaxX
+
+// Per-precision value macros.
+#undef HALF_REP_C
+#undef REP_C
+#undef significandBits
+#undef TF_MANT_DIG
+
+// Width-derived value macros.
+#undef typeWidth
+#undef exponentBits
+#undef maxExponent
+#undef exponentBias
+#undef implicitBit
+#undef significandMask
+#undef signBit
+#undef absMask
+#undef exponentMask
+#undef oneRep
+#undef infRep
+#undef quietBit
+#undef qnanRep
+
+// QUAD-only long-double aliases.
+#undef __compiler_rt_logbl
+#undef __compiler_rt_scalbnl
+#undef __compiler_rt_fmaxl
+#undef crt_fabstf
+#undef crt_copysigntf
+
+// Precision selector and internal helpers.
+#undef SINGLE_PRECISION
+#undef DOUBLE_PRECISION
+#undef QUAD_PRECISION
+#undef FP_LIB_SUFFIX
+#undef _FP_PASTE_
+#undef _FP_PASTE
+#undef _FP_NAME
diff --git a/lib/int/divdi3.c b/lib/int/divdi3.c
@@ -16,7 +16,10 @@
#define fixint_t di_int
#define fixuint_t du_int
+#define INT_DIV_SUFFIX divdi3
#define COMPUTE_UDIV(a, b) __udivmoddi4((a), (b), (du_int *)0)
#include "int_div_impl.inc"
-COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) { return __divXi3(a, b); }
+COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) {
+ return __divXi3_divdi3(a, b);
+}
diff --git a/lib/int/moddi3.c b/lib/int/moddi3.c
@@ -16,7 +16,10 @@
#define fixint_t di_int
#define fixuint_t du_int
+#define INT_DIV_SUFFIX moddi3
#define ASSIGN_UMOD(res, a, b) __udivmoddi4((a), (b), &(res))
#include "int_div_impl.inc"
-COMPILER_RT_ABI di_int __moddi3(di_int a, di_int b) { return __modXi3(a, b); }
+COMPILER_RT_ABI di_int __moddi3(di_int a, di_int b) {
+ return __modXi3_moddi3(a, b);
+}
diff --git a/lib/int/udivdi3.c b/lib/int/udivdi3.c
@@ -12,12 +12,13 @@
#include "int_lib.h"
-typedef du_int fixuint_t;
-typedef di_int fixint_t;
-#include "int_div_impl.inc"
-
// Returns: a / b
+#define fixint_t di_int
+#define fixuint_t du_int
+#define INT_DIV_SUFFIX udivdi3
+#include "int_div_impl.inc"
+
COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) {
- return __udivXi3(a, b);
+ return __udivXi3_udivdi3(a, b);
}
diff --git a/lib/int/umoddi3.c b/lib/int/umoddi3.c
@@ -12,12 +12,13 @@
#include "int_lib.h"
-typedef du_int fixuint_t;
-typedef di_int fixint_t;
-#include "int_div_impl.inc"
-
// Returns: a % b
+#define fixint_t di_int
+#define fixuint_t du_int
+#define INT_DIV_SUFFIX umoddi3
+#include "int_div_impl.inc"
+
COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) {
- return __umodXi3(a, b);
+ return __umodXi3_umoddi3(a, b);
}
diff --git a/lib/int64/divti3.c b/lib/int64/divti3.c
@@ -17,8 +17,11 @@
#define fixint_t ti_int
#define fixuint_t tu_int
+#define INT_DIV_SUFFIX divti3
#define COMPUTE_UDIV(a, b) __udivmodti4((a), (b), (tu_int *)0)
#include "int_div_impl.inc"
-COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) { return __divXi3(a, b); }
+COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) {
+ return __divXi3_divti3(a, b);
+}
diff --git a/lib/int64/modti3.c b/lib/int64/modti3.c
@@ -17,8 +17,11 @@
#define fixint_t ti_int
#define fixuint_t tu_int
+#define INT_DIV_SUFFIX modti3
#define ASSIGN_UMOD(res, a, b) __udivmodti4((a), (b), &(res))
#include "int_div_impl.inc"
-COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) { return __modXi3(a, b); }
+COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) {
+ return __modXi3_modti3(a, b);
+}